From 0f98dc04d281c067a71fd7f169502cb8014a4f2f Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Thu, 5 Mar 2020 18:01:41 -0600 Subject: [PATCH 001/330] Confusion matrix in cupy/cython draft --- cpp/include/cuml/metrics/metrics.hpp | 13 +++ cpp/src/metrics/metrics.cu | 13 +++ python/cuml/metrics/confusion_matrix.pyx | 136 +++++++++++++++++++++++ 3 files changed, 162 insertions(+) create mode 100644 python/cuml/metrics/confusion_matrix.pyx diff --git a/cpp/include/cuml/metrics/metrics.hpp b/cpp/include/cuml/metrics/metrics.hpp index 1c011f5626..df40e8d966 100644 --- a/cpp/include/cuml/metrics/metrics.hpp +++ b/cpp/include/cuml/metrics/metrics.hpp @@ -251,5 +251,18 @@ double vMeasure(const cumlHandle &handle, const int *y, const int *y_hat, */ float accuracy_score_py(const cumlHandle &handle, const int *predictions, const int *ref_predictions, int n); + +/** +* Computes the contingency matrix +* +* @param handle: cumlHandle +* @param groundTruth: Array of ground truth labeling +* @param predictedLabel: Array of predicted labeling +* @param nSamples: Number of elements in the labelings +* @return: The entropy value of the clustering +*/ +void contingencyMatrix(const cumlHandle &handle, + const int *groundTruth, const int *predictedLabel, + const int nSamples, int *outMat); } // namespace Metrics } // namespace ML diff --git a/cpp/src/metrics/metrics.cu b/cpp/src/metrics/metrics.cu index 209283e821..80b959b9bc 100644 --- a/cpp/src/metrics/metrics.cu +++ b/cpp/src/metrics/metrics.cu @@ -87,6 +87,19 @@ double entropy(const cumlHandle &handle, const int *y, const int n, handle.getStream()); } +void contingencyMatrix(const cumlHandle &handle, + const int *groundTruth, const int *predictedLabel, + const int nSamples, int *outMat) { + size_t workspaceSz = MLCommon::Metrics::getContingencyMatrixWorkspaceSize( + size, firstClusterArray, stream, lowerLabelRange, upperLabelRange); + device_buffer pWorkspace( + handle.getDeviceAllocator(), stream, workspaceSz); + + return MLCommon::Metrics::contingencyMatrix( + groundTruth, predictedLabel, (int)nSamples, outMat, handle.getStream(), + (void *)pWorkspace.data(), workspaceSz); +} + double mutualInfoScore(const cumlHandle &handle, const int *y, const int *y_hat, const int n, const int lower_class_range, const int upper_class_range) { diff --git a/python/cuml/metrics/confusion_matrix.pyx b/python/cuml/metrics/confusion_matrix.pyx new file mode 100644 index 0000000000..70488e9273 --- /dev/null +++ b/python/cuml/metrics/confusion_matrix.pyx @@ -0,0 +1,136 @@ +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import numpy as np +import cupy as cp + +from cuml.utils import input_to_cuml_array +from cuml.common.handle cimport cumlHandle +import cuml.common.handle + +from cuml.utils.memory_utils import with_cupy_rmm + + +cdef extern from "cuml/metrics/metrics.hpp" namespace "ML::Metrics": + void contingencyMatrix(const cumlHandle &handle, + const int *groundTruth, const int *predictedLabel, + const int nSamples, int *outMat) except + + + +@with_cupy_rmm +def confusion_matrix(y_true, y_pred, + labels=None, + sample_weight=None, + normalize=None): + """Compute confusion matrix to evaluate the accuracy of a classification. + + Parameters + ---------- + y_true : array-like (device or host) shape = (n_samples,) + or (n_samples, n_outputs) + Ground truth (correct) target values. + y_pred : array-like (device or host) shape = (n_samples,) + or (n_samples, n_outputs) + Estimated target values. + labels : array-like (device or host) shape = (n_classes,), optional + List of labels to index the matrix. This may be used to reorder or + select a subset of labels. If None is given, those that appear at least + once in y_true or y_pred are used in sorted order. + sample_weight : array-like (device or host) shape = (n_samples,), optional + Sample weights. + normalize : string in [‘true’, ‘pred’, ‘all’] + Normalizes confusion matrix over the true (rows), predicted (columns) + conditions or all the population. If None, confusion matrix will not be + normalized. + + Returns + ------- + C : array-like (device or host) shape = (n_classes, n_classes) + Confusion matrix. + """ + handle = cuml.common.handle.Handle() \ + if handle is None else handle + cdef cumlHandle* handle_ =\ + handle.getHandle() + + y_true, n_rows, n_cols, dtype = \ + input_to_cuml_array(y_true, check_dtype=[np.int32, np.int64]) + + y_pred, _, _, _ = \ + input_to_cuml_array(y_pred, check_dtype=dtype, + check_rows=n_rows, check_cols=n_cols) + + if labels is None: + labels = unique_labels(y_true, y_pred) + else: + labels, n_labels, _, _ = \ + input_to_cuml_array(labels, check_dtype=dtype, check_cols=1) + if cp.all([l not in y_true for l in labels]): + raise ValueError("At least one label specified must be in y_true") + + if sample_weight is None: + sample_weight = cp.ones(n_rows, dtype=dtype) + else: + sample_weight, _, _, _ = \ + input_to_cuml_array(sample_weight, check_dtype=dtype, + check_rows=n_rows, check_cols=n_cols) + + if normalize not in ['true', 'pred', 'all', None]: + raise ValueError("normalize must be one of {'true', 'pred', " + "'all', None}") + + label_to_ind = {y: x for x, y in enumerate(labels)} + + y_pred = cp.array([label_to_ind.get(x, n_labels + 1) for x in y_pred]) + y_true = cp.array([label_to_ind.get(x, n_labels + 1) for x in y_true]) + + # intersect y_pred, y_true with labels, eliminate items not in labels + ind = cp.logical_and(y_pred < n_labels, y_true < n_labels) + y_pred = y_pred[ind] + y_true = y_true[ind] + sample_weight = sample_weight[ind] + + # Choose the accumulator dtype to always have high precision + if sample_weight.dtype.kind in {'i', 'u', 'b'}: + dtype = np.int64 + else: + dtype = np.float64 + + cm = None # TODO + contingencyMatrix(handle_[0], + ground_truth_ptr, + preds_ptr, + n_rows, + cm) + # cm = coo_matrix((sample_weight, (y_true, y_pred)), + # shape=(n_labels, n_labels), dtype=dtype, + # ).toarray() + + with np.errstate(all='ignore'): + if normalize == 'true': + cm = cm / cm.sum(axis=1, keepdims=True) + elif normalize == 'pred': + cm = cm / cm.sum(axis=0, keepdims=True) + elif normalize == 'all': + cm = cm / cm.sum() + cm = np.nan_to_num(cm) + + return cm \ No newline at end of file From ca7e1aafeead14b4418ab087608957cefaead501 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Fri, 6 Mar 2020 13:14:32 -0600 Subject: [PATCH 002/330] Confusion matrix code in cupy and cuda for benchmarking --- python/cuml/metrics/confusion_matrix.pyx | 73 ++++++++++++++++-------- python/cuml/metrics/utils.py | 30 ++++++++++ 2 files changed, 80 insertions(+), 23 deletions(-) create mode 100644 python/cuml/metrics/utils.py diff --git a/python/cuml/metrics/confusion_matrix.pyx b/python/cuml/metrics/confusion_matrix.pyx index 70488e9273..5912480150 100644 --- a/python/cuml/metrics/confusion_matrix.pyx +++ b/python/cuml/metrics/confusion_matrix.pyx @@ -22,11 +22,14 @@ import numpy as np import cupy as cp +from libc.stdint cimport uintptr_t + +import cuml.common.handle from cuml.utils import input_to_cuml_array from cuml.common.handle cimport cumlHandle -import cuml.common.handle - from cuml.utils.memory_utils import with_cupy_rmm +from cuml.common import CumlArray +from cuml.metrics.utils import sorted_unique_labels cdef extern from "cuml/metrics/metrics.hpp" namespace "ML::Metrics": @@ -39,7 +42,9 @@ cdef extern from "cuml/metrics/metrics.hpp" namespace "ML::Metrics": def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, - normalize=None): + normalize=None, + use_cuda_coo=False, + handle=None): """Compute confusion matrix to evaluate the accuracy of a classification. Parameters @@ -60,6 +65,17 @@ def confusion_matrix(y_true, y_pred, Normalizes confusion matrix over the true (rows), predicted (columns) conditions or all the population. If None, confusion matrix will not be normalized. + use_cuda_coo : bool, optional + Whether or not to use our cuda version of the coo function. + If false, will use cupy's version of coo. + Used for benchmarks. + handle : cuml.Handle + Specifies the cuml.handle that holds internal CUDA state for + computations in this function. Most importantly, this specifies the + CUDA stream that will be used for this function's computations, so + users can run different computations concurrently in different streams + by creating handles in several streams. + If it is None, a new one is created. Returns ------- @@ -72,14 +88,14 @@ def confusion_matrix(y_true, y_pred, handle.getHandle() y_true, n_rows, n_cols, dtype = \ - input_to_cuml_array(y_true, check_dtype=[np.int32, np.int64]) + input_to_cuml_array(y_true, check_dtype=[cp.int32, cp.int64]) y_pred, _, _, _ = \ input_to_cuml_array(y_pred, check_dtype=dtype, check_rows=n_rows, check_cols=n_cols) if labels is None: - labels = unique_labels(y_true, y_pred) + labels = sorted_unique_labels(y_true, y_pred) else: labels, n_labels, _, _ = \ input_to_cuml_array(labels, check_dtype=dtype, check_cols=1) @@ -89,6 +105,9 @@ def confusion_matrix(y_true, y_pred, if sample_weight is None: sample_weight = cp.ones(n_rows, dtype=dtype) else: + if use_cuda_coo: + raise NotImplementedError("Sample weights not implemented with " + "cuda coo.") sample_weight, _, _, _ = \ input_to_cuml_array(sample_weight, check_dtype=dtype, check_rows=n_rows, check_cols=n_cols) @@ -109,28 +128,36 @@ def confusion_matrix(y_true, y_pred, sample_weight = sample_weight[ind] # Choose the accumulator dtype to always have high precision - if sample_weight.dtype.kind in {'i', 'u', 'b'}: - dtype = np.int64 + if dtype.kind in {'i', 'u', 'b'}: + dtype = cp.int64 + else: + dtype = cp.float64 + + if use_cuda_coo: + cm = CumlArray.zeros(shape=(n_labels, n_labels), dtype=dtype, + order='C') + cdef uintptr_t cm_ptr = cm.ptr + cdef uintptr_t y_pred_ptr = y_pred.ptr + cdef uintptr_t y_true_ptr = y_true.ptr + + contingencyMatrix(handle_[0], + y_true_ptr, + y_pred_ptr, + n_rows, + cm_ptr) + # TODO: Implement weighting else: - dtype = np.float64 - - cm = None # TODO - contingencyMatrix(handle_[0], - ground_truth_ptr, - preds_ptr, - n_rows, - cm) - # cm = coo_matrix((sample_weight, (y_true, y_pred)), - # shape=(n_labels, n_labels), dtype=dtype, - # ).toarray() + cm = cp.sparse.coo_matrix((sample_weight, (y_true, y_pred)), + shape=(n_labels, n_labels), dtype=dtype, + ).toarray() with np.errstate(all='ignore'): if normalize == 'true': - cm = cm / cm.sum(axis=1, keepdims=True) + cm = cp.divide(cm, cm.sum(axis=1, keepdims=True)) elif normalize == 'pred': - cm = cm / cm.sum(axis=0, keepdims=True) + cm = cp.divide(cm, cm.sum(axis=0, keepdims=True)) elif normalize == 'all': - cm = cm / cm.sum() - cm = np.nan_to_num(cm) + cm = cp.divide(cm, cm.sum()) + cm = cp.nan_to_num(cm) - return cm \ No newline at end of file + return cm diff --git a/python/cuml/metrics/utils.py b/python/cuml/metrics/utils.py new file mode 100644 index 0000000000..2d2fc5e833 --- /dev/null +++ b/python/cuml/metrics/utils.py @@ -0,0 +1,30 @@ +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import cupy as cp + + +def sorted_unique_labels(*ys): + """Extract an ordered array of unique labels from one or more arrays of + labels.""" + ys = (cp.unique(y) for y in ys) + labels = cp.unique(cp.concatenate(ys)) + return cp.sort(labels) From e07968d2b80848a50aada6d4351ed3d4f7eab008 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Fri, 6 Mar 2020 17:54:08 -0600 Subject: [PATCH 003/330] Use make_monotonic instead of CPU operations --- cpp/include/cuml/metrics/metrics.hpp | 13 ---- cpp/src/metrics/metrics.cu | 13 ---- python/cuml/metrics/__init__.py | 1 + ...nfusion_matrix.pyx => confusion_matrix.py} | 78 ++++--------------- python/cuml/test/test_metrics.py | 12 +++ 5 files changed, 28 insertions(+), 89 deletions(-) rename python/cuml/metrics/{confusion_matrix.pyx => confusion_matrix.py} (57%) diff --git a/cpp/include/cuml/metrics/metrics.hpp b/cpp/include/cuml/metrics/metrics.hpp index df40e8d966..1c011f5626 100644 --- a/cpp/include/cuml/metrics/metrics.hpp +++ b/cpp/include/cuml/metrics/metrics.hpp @@ -251,18 +251,5 @@ double vMeasure(const cumlHandle &handle, const int *y, const int *y_hat, */ float accuracy_score_py(const cumlHandle &handle, const int *predictions, const int *ref_predictions, int n); - -/** -* Computes the contingency matrix -* -* @param handle: cumlHandle -* @param groundTruth: Array of ground truth labeling -* @param predictedLabel: Array of predicted labeling -* @param nSamples: Number of elements in the labelings -* @return: The entropy value of the clustering -*/ -void contingencyMatrix(const cumlHandle &handle, - const int *groundTruth, const int *predictedLabel, - const int nSamples, int *outMat); } // namespace Metrics } // namespace ML diff --git a/cpp/src/metrics/metrics.cu b/cpp/src/metrics/metrics.cu index 80b959b9bc..209283e821 100644 --- a/cpp/src/metrics/metrics.cu +++ b/cpp/src/metrics/metrics.cu @@ -87,19 +87,6 @@ double entropy(const cumlHandle &handle, const int *y, const int n, handle.getStream()); } -void contingencyMatrix(const cumlHandle &handle, - const int *groundTruth, const int *predictedLabel, - const int nSamples, int *outMat) { - size_t workspaceSz = MLCommon::Metrics::getContingencyMatrixWorkspaceSize( - size, firstClusterArray, stream, lowerLabelRange, upperLabelRange); - device_buffer pWorkspace( - handle.getDeviceAllocator(), stream, workspaceSz); - - return MLCommon::Metrics::contingencyMatrix( - groundTruth, predictedLabel, (int)nSamples, outMat, handle.getStream(), - (void *)pWorkspace.data(), workspaceSz); -} - double mutualInfoScore(const cumlHandle &handle, const int *y, const int *y_hat, const int n, const int lower_class_range, const int upper_class_range) { diff --git a/python/cuml/metrics/__init__.py b/python/cuml/metrics/__init__.py index cacc2be873..c7b24b1a53 100644 --- a/python/cuml/metrics/__init__.py +++ b/python/cuml/metrics/__init__.py @@ -19,3 +19,4 @@ from cuml.metrics.regression import mean_squared_error from cuml.metrics.accuracy import accuracy_score from cuml.metrics.cluster.adjustedrandindex import adjusted_rand_score +from cuml.metrics.confusion_matrix import confusion_matrix \ No newline at end of file diff --git a/python/cuml/metrics/confusion_matrix.pyx b/python/cuml/metrics/confusion_matrix.py similarity index 57% rename from python/cuml/metrics/confusion_matrix.pyx rename to python/cuml/metrics/confusion_matrix.py index 5912480150..3d27124457 100644 --- a/python/cuml/metrics/confusion_matrix.pyx +++ b/python/cuml/metrics/confusion_matrix.py @@ -22,29 +22,17 @@ import numpy as np import cupy as cp -from libc.stdint cimport uintptr_t - -import cuml.common.handle from cuml.utils import input_to_cuml_array -from cuml.common.handle cimport cumlHandle from cuml.utils.memory_utils import with_cupy_rmm -from cuml.common import CumlArray from cuml.metrics.utils import sorted_unique_labels - - -cdef extern from "cuml/metrics/metrics.hpp" namespace "ML::Metrics": - void contingencyMatrix(const cumlHandle &handle, - const int *groundTruth, const int *predictedLabel, - const int nSamples, int *outMat) except + +from cuml.prims.label import make_monotonic @with_cupy_rmm def confusion_matrix(y_true, y_pred, labels=None, sample_weight=None, - normalize=None, - use_cuda_coo=False, - handle=None): + normalize=None): """Compute confusion matrix to evaluate the accuracy of a classification. Parameters @@ -65,61 +53,42 @@ def confusion_matrix(y_true, y_pred, Normalizes confusion matrix over the true (rows), predicted (columns) conditions or all the population. If None, confusion matrix will not be normalized. - use_cuda_coo : bool, optional - Whether or not to use our cuda version of the coo function. - If false, will use cupy's version of coo. - Used for benchmarks. - handle : cuml.Handle - Specifies the cuml.handle that holds internal CUDA state for - computations in this function. Most importantly, this specifies the - CUDA stream that will be used for this function's computations, so - users can run different computations concurrently in different streams - by creating handles in several streams. - If it is None, a new one is created. Returns ------- C : array-like (device or host) shape = (n_classes, n_classes) Confusion matrix. """ - handle = cuml.common.handle.Handle() \ - if handle is None else handle - cdef cumlHandle* handle_ =\ - handle.getHandle() - y_true, n_rows, n_cols, dtype = \ input_to_cuml_array(y_true, check_dtype=[cp.int32, cp.int64]) + y_true = y_true.to_output('cupy') y_pred, _, _, _ = \ input_to_cuml_array(y_pred, check_dtype=dtype, check_rows=n_rows, check_cols=n_cols) + y_pred = y_pred.to_output('cupy') if labels is None: labels = sorted_unique_labels(y_true, y_pred) + n_labels = len(labels) else: labels, n_labels, _, _ = \ input_to_cuml_array(labels, check_dtype=dtype, check_cols=1) - if cp.all([l not in y_true for l in labels]): - raise ValueError("At least one label specified must be in y_true") - if sample_weight is None: sample_weight = cp.ones(n_rows, dtype=dtype) else: - if use_cuda_coo: - raise NotImplementedError("Sample weights not implemented with " - "cuda coo.") sample_weight, _, _, _ = \ input_to_cuml_array(sample_weight, check_dtype=dtype, check_rows=n_rows, check_cols=n_cols) + sample_weight = sample_weight.to_output('cupy') + print(1) if normalize not in ['true', 'pred', 'all', None]: raise ValueError("normalize must be one of {'true', 'pred', " "'all', None}") - label_to_ind = {y: x for x, y in enumerate(labels)} - - y_pred = cp.array([label_to_ind.get(x, n_labels + 1) for x in y_pred]) - y_true = cp.array([label_to_ind.get(x, n_labels + 1) for x in y_true]) + y_true, _ = make_monotonic(y_true, labels, copy=True) + y_pred, _ = make_monotonic(y_pred, labels, copy=True) # intersect y_pred, y_true with labels, eliminate items not in labels ind = cp.logical_and(y_pred < n_labels, y_true < n_labels) @@ -127,29 +96,12 @@ def confusion_matrix(y_true, y_pred, y_true = y_true[ind] sample_weight = sample_weight[ind] - # Choose the accumulator dtype to always have high precision - if dtype.kind in {'i', 'u', 'b'}: - dtype = cp.int64 - else: - dtype = cp.float64 - - if use_cuda_coo: - cm = CumlArray.zeros(shape=(n_labels, n_labels), dtype=dtype, - order='C') - cdef uintptr_t cm_ptr = cm.ptr - cdef uintptr_t y_pred_ptr = y_pred.ptr - cdef uintptr_t y_true_ptr = y_true.ptr - - contingencyMatrix(handle_[0], - y_true_ptr, - y_pred_ptr, - n_rows, - cm_ptr) - # TODO: Implement weighting - else: - cm = cp.sparse.coo_matrix((sample_weight, (y_true, y_pred)), - shape=(n_labels, n_labels), dtype=dtype, - ).toarray() + cm = cp.sparse.coo_matrix( + (sample_weight, (y_true, y_pred)), + shape=(n_labels, n_labels), + # Choosing accumulator dtype to always have high precision + dtype=np.int64 if dtype.kind in {'i', 'u', 'b'} else np.float64, + ).toarray() with np.errstate(all='ignore'): if normalize == 'true': diff --git a/python/cuml/test/test_metrics.py b/python/cuml/test/test_metrics.py index 56824d7d0f..bfab1ca945 100644 --- a/python/cuml/test/test_metrics.py +++ b/python/cuml/test/test_metrics.py @@ -36,6 +36,8 @@ from cuml.metrics.regression import mean_squared_error from sklearn.metrics.regression import mean_squared_error as sklearn_mse +from cuml.metrics import confusion_matrix + @pytest.mark.parametrize('datatype', [np.float32, np.float64]) @pytest.mark.parametrize('use_handle', [True, False]) @@ -217,3 +219,13 @@ def test_mean_squared_error_custom_weights(): skl_mse = sklearn_mse(y_true, y_pred, sample_weight=weights) assert_almost_equal(mse, skl_mse, decimal=2) + + +def test_confusion_matrix(): + y_true = cp.array([2, 0, 2, 2, 0, 1]) + y_pred = cp.array([0, 0, 2, 2, 0, 2]) + cm = confusion_matrix(y_true, y_pred) + ref = cp.array([[2, 0, 0], + [0, 0, 1], + [1, 0, 2]]) + cp.testing.assert_array_equal(cm, ref) From 15ec502b1c8434e288cf37c5652f4ae9ce9a8b87 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 9 Mar 2020 10:18:55 -0500 Subject: [PATCH 004/330] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e6435fdcd..2db8be9326 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## New Features - PR #1742: Mean squared error implementation with cupy +- PR #1817: Confusion matrix implementation with cupy - PR #1635: cuML Array shim and configurable output added to cluster methods - PR #1586: Seasonal ARIMA - PR #1683: cuml.dask make_regression From 8fe1c0070db9a0ce566f8cd9a55902c18bd56a03 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 9 Mar 2020 10:23:35 -0500 Subject: [PATCH 005/330] Improve code readability in confusion matrix --- python/cuml/metrics/confusion_matrix.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cuml/metrics/confusion_matrix.py b/python/cuml/metrics/confusion_matrix.py index 3d27124457..d9a688b743 100644 --- a/python/cuml/metrics/confusion_matrix.py +++ b/python/cuml/metrics/confusion_matrix.py @@ -96,12 +96,12 @@ def confusion_matrix(y_true, y_pred, y_true = y_true[ind] sample_weight = sample_weight[ind] - cm = cp.sparse.coo_matrix( - (sample_weight, (y_true, y_pred)), - shape=(n_labels, n_labels), - # Choosing accumulator dtype to always have high precision - dtype=np.int64 if dtype.kind in {'i', 'u', 'b'} else np.float64, - ).toarray() + # Choosing accumulator dtype to always have high precision + dtype = np.int64 if dtype.kind in {'i', 'u', 'b'} else np.float64 + + cm = cp.sparse.coo_matrix((sample_weight, (y_true, y_pred)), + shape=(n_labels, n_labels), + dtype=dtype).toarray() with np.errstate(all='ignore'): if normalize == 'true': From 187377688a2a83482339ec25301ef828bbb7bbbb Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Tue, 10 Mar 2020 11:00:23 -0500 Subject: [PATCH 006/330] Add tests for each feature of confusion matrix --- python/cuml/test/test_metrics.py | 69 ++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/python/cuml/test/test_metrics.py b/python/cuml/test/test_metrics.py index bfab1ca945..f862d6cb6d 100644 --- a/python/cuml/test/test_metrics.py +++ b/python/cuml/test/test_metrics.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from itertools import chain, permutations import cuml import cupy as cp @@ -35,6 +36,7 @@ from cuml.metrics.regression import mean_squared_error from sklearn.metrics.regression import mean_squared_error as sklearn_mse +from sklearn.metrics import confusion_matrix as sk_confusion_matrix from cuml.metrics import confusion_matrix @@ -229,3 +231,70 @@ def test_confusion_matrix(): [0, 0, 1], [1, 0, 2]]) cp.testing.assert_array_equal(cm, ref) + + +def test_confusion_matrix_binary(): + y_true = cp.array([0, 1, 0, 1]) + y_pred = cp.array([1, 1, 1, 0]) + tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() + ref = cp.array([0, 2, 1, 1]) + cp.testing.assert_array_equal(ref, cp.array([tn, fp, fn, tp])) + + +@pytest.mark.parametrize('n_samples', [50, 3000, stress_param(500000)]) +@pytest.mark.parametrize('dtype', [np.int32, np.int64]) +@pytest.mark.parametrize('problem_type', ['binary', 'multiclass']) +def test_confusion_matrix_random(n_samples, dtype, problem_type): + upper_range = 1 if problem_type == 'binary' else 1000 + + y_true, y_pred = generate_random_labels( + lambda rng: rng.randint(0, upper_range, n_samples).astype(dtype)) + cm = confusion_matrix(y_true, y_pred) + ref = sk_confusion_matrix(y_true, y_pred) + cp.testing.assert_array_almost_equal(ref, cm, decimal=4) + + +@pytest.mark.parametrize( + "normalize, expected_results", + [('true', 0.333333333), + ('pred', 0.333333333), + ('all', 0.1111111111), + (None, 2)] +) +def test_confusion_matrix_normalize(normalize, expected_results): + y_test = cp.array([0, 1, 2] * 6) + y_pred = cp.array(list(chain(*permutations([0, 1, 2])))) + cm = confusion_matrix(y_test, y_pred, normalize=normalize) + cp.testing.assert_allclose(cm, cp.array(expected_results)) + + +@pytest.mark.xfail +@pytest.mark.parametrize('labels', [(0, 1), + (2, 1), + (2, 1, 4, 7), + (2, 20)]) +def test_confusion_matrix_multiclass_subset_labels(labels): + y_true, y_pred = generate_random_labels( + lambda rng: rng.randint(0, 3, 10).astype(np.int32)) + + ref = sk_confusion_matrix(y_true, y_pred, labels=labels) + labels = cp.array(labels, dtype=np.int32) + cm = confusion_matrix(y_true, y_pred, labels=labels) + cp.testing.assert_array_almost_equal(ref, cm, decimal=4) + + +@pytest.mark.parametrize('n_samples', [50, 3000, stress_param(500000)]) +@pytest.mark.parametrize('dtype', [np.int32, np.int64]) +@pytest.mark.parametrize('weights_dtype', ['int', 'float']) +def test_confusion_matrix_random_weights(n_samples, dtype, weights_dtype): + y_true, y_pred = generate_random_labels( + lambda rng: rng.randint(0, 10, n_samples).astype(dtype)) + + if weights_dtype == 'int': + sample_weight = np.random.RandomState(0).randint(0, 10, n_samples) + else: + sample_weight = np.random.RandomState(0).rand(n_samples) + + cm = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) + ref = sk_confusion_matrix(y_true, y_pred, sample_weight=sample_weight) + cp.testing.assert_array_almost_equal(ref, cm, decimal=4) From 87d8e7de9a507d77cc6175dcc23fa8b70b7d97ae Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Tue, 10 Mar 2020 11:00:43 -0500 Subject: [PATCH 007/330] Fix confusion matrix according to tests --- python/cuml/metrics/confusion_matrix.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/python/cuml/metrics/confusion_matrix.py b/python/cuml/metrics/confusion_matrix.py index d9a688b743..f2e133e0b4 100644 --- a/python/cuml/metrics/confusion_matrix.py +++ b/python/cuml/metrics/confusion_matrix.py @@ -74,14 +74,16 @@ def confusion_matrix(y_true, y_pred, else: labels, n_labels, _, _ = \ input_to_cuml_array(labels, check_dtype=dtype, check_cols=1) + labels = labels.to_output('cupy') if sample_weight is None: sample_weight = cp.ones(n_rows, dtype=dtype) else: sample_weight, _, _, _ = \ - input_to_cuml_array(sample_weight, check_dtype=dtype, + input_to_cuml_array(sample_weight, + check_dtype=[cp.float32, cp.float64, + cp.int32, cp.int64], check_rows=n_rows, check_cols=n_cols) sample_weight = sample_weight.to_output('cupy') - print(1) if normalize not in ['true', 'pred', 'all', None]: raise ValueError("normalize must be one of {'true', 'pred', " @@ -96,12 +98,9 @@ def confusion_matrix(y_true, y_pred, y_true = y_true[ind] sample_weight = sample_weight[ind] - # Choosing accumulator dtype to always have high precision - dtype = np.int64 if dtype.kind in {'i', 'u', 'b'} else np.float64 - cm = cp.sparse.coo_matrix((sample_weight, (y_true, y_pred)), shape=(n_labels, n_labels), - dtype=dtype).toarray() + dtype=np.float64).toarray() with np.errstate(all='ignore'): if normalize == 'true': From e071a011875451c53df074a1808bc5d5b89664a7 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Tue, 10 Mar 2020 12:15:48 -0500 Subject: [PATCH 008/330] Fix labels feature --- python/cuml/prims/label/classlabels.py | 3 ++- python/cuml/test/test_metrics.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/prims/label/classlabels.py b/python/cuml/prims/label/classlabels.py index 2f4f088025..334792ab14 100644 --- a/python/cuml/prims/label/classlabels.py +++ b/python/cuml/prims/label/classlabels.py @@ -38,9 +38,10 @@ for(int i = 0; i < n_labels; i++) { if(label_cache[i] == unmapped_label) { x[tid] = i; - break; + return; } } + x[tid] = n_labels+1; } ''' diff --git a/python/cuml/test/test_metrics.py b/python/cuml/test/test_metrics.py index 7ff6956c30..cf26e41454 100644 --- a/python/cuml/test/test_metrics.py +++ b/python/cuml/test/test_metrics.py @@ -384,7 +384,6 @@ def test_confusion_matrix_normalize(normalize, expected_results): cp.testing.assert_allclose(cm, cp.array(expected_results)) -@pytest.mark.xfail @pytest.mark.parametrize('labels', [(0, 1), (2, 1), (2, 1, 4, 7), From 115c13b968610258e222e99c5ec331f9b64285a5 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Tue, 10 Mar 2020 13:34:03 -0500 Subject: [PATCH 009/330] Add comment to make_manotonic to clarify previous change in the kernel --- python/cuml/prims/label/classlabels.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cuml/prims/label/classlabels.py b/python/cuml/prims/label/classlabels.py index 334792ab14..abb381ea77 100644 --- a/python/cuml/prims/label/classlabels.py +++ b/python/cuml/prims/label/classlabels.py @@ -117,6 +117,8 @@ def make_monotonic(labels, classes=None, copy=False): set [0, n-1] and renumbers them to be drawn that interval. + Replaces labels not present in classes by len(classes)+1. + Parameters ---------- From 491e1747a3075cb16677dc654e4f0a3f3ca16c78 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 11 Mar 2020 18:05:02 -0500 Subject: [PATCH 010/330] Fix confusion matrix binary test --- python/cuml/test/test_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/test/test_metrics.py b/python/cuml/test/test_metrics.py index cf26e41454..11c9705769 100644 --- a/python/cuml/test/test_metrics.py +++ b/python/cuml/test/test_metrics.py @@ -361,7 +361,7 @@ def test_confusion_matrix_binary(): @pytest.mark.parametrize('dtype', [np.int32, np.int64]) @pytest.mark.parametrize('problem_type', ['binary', 'multiclass']) def test_confusion_matrix_random(n_samples, dtype, problem_type): - upper_range = 1 if problem_type == 'binary' else 1000 + upper_range = 2 if problem_type == 'binary' else 1000 y_true, y_pred = generate_random_labels( lambda rng: rng.randint(0, upper_range, n_samples).astype(dtype)) From 4a93cb4087b3fb7779a1cb6fe168191e92486cff Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 11 Mar 2020 18:06:20 -0500 Subject: [PATCH 011/330] Add distributed version of confusion_matrix --- python/cuml/dask/metrics/__init__.py | 23 ++++++ python/cuml/dask/metrics/confusion_matrix.py | 86 ++++++++++++++++++++ python/cuml/dask/metrics/utils.py | 31 +++++++ 3 files changed, 140 insertions(+) create mode 100644 python/cuml/dask/metrics/__init__.py create mode 100644 python/cuml/dask/metrics/confusion_matrix.py create mode 100644 python/cuml/dask/metrics/utils.py diff --git a/python/cuml/dask/metrics/__init__.py b/python/cuml/dask/metrics/__init__.py new file mode 100644 index 0000000000..ac00348fed --- /dev/null +++ b/python/cuml/dask/metrics/__init__.py @@ -0,0 +1,23 @@ +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cuml.utils.import_utils import has_dask +import warnings + +if has_dask(): + from cuml.dask.metrics.confusion_matrix import confusion_matrix +else: + warnings.warn("Dask not found. All Dask-based multi-GPU operation is disabed.") diff --git a/python/cuml/dask/metrics/confusion_matrix.py b/python/cuml/dask/metrics/confusion_matrix.py new file mode 100644 index 0000000000..a95d311cf3 --- /dev/null +++ b/python/cuml/dask/metrics/confusion_matrix.py @@ -0,0 +1,86 @@ +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import cupy as cp +from cuml.dask.common import extract_arr_partitions + +from cuml.utils.memory_utils import with_cupy_rmm +from cuml.dask.metrics.utils import sorted_unique_labels +from cuml.prims.label import make_monotonic +from dask.distributed import default_client + + +@with_cupy_rmm +def local_cm(inputs, labels, use_sample_weight): + if use_sample_weight: + y_true, y_pred, sample_weight = inputs + else: + y_true, y_pred = inputs + sample_weight = cp.ones(y_true.shape[0], dtype=y_true.dtype) + + y_true, _ = make_monotonic(y_true, labels, copy=True) + y_pred, _ = make_monotonic(y_pred, labels, copy=True) + + n_labels = labels.size + + # intersect y_pred, y_true with labels, eliminate items not in labels + ind = cp.logical_and(y_pred < n_labels, y_true < n_labels) + y_pred = y_pred[ind] + y_true = y_true[ind] + sample_weight = sample_weight[ind] + cm = cp.sparse.coo_matrix((sample_weight, (y_true, y_pred)), + shape=(n_labels, n_labels), dtype=cp.float64, + ).toarray() + return cp.nan_to_num(cm) + + +@with_cupy_rmm +def confusion_matrix(y_true, y_pred, + labels=None, + normalize=None, + sample_weight=None): + client = default_client() + + if labels is None: + labels = sorted_unique_labels(y_true, y_pred) + + if normalize not in ['true', 'pred', 'all', None]: + raise ValueError("normalize must be one of {'true', 'pred', " + "'all', None}") + + use_sample_weight = bool(sample_weight is not None) + dask_arrays = [y_true, y_pred, sample_weight] if use_sample_weight else \ + [y_true, y_pred] + + # run cm computation on each partition. + parts = client.sync(extract_arr_partitions, dask_arrays) + cms = [client.submit(local_cm, p, labels, use_sample_weight, + workers=[w]).result() for w, p in parts] + + # reduce each partition's result into one cupy matrix + cm = sum(cms) + + with np.errstate(all='ignore'): + if normalize == 'true': + cm = cm / cm.sum(axis=1, keepdims=True) + elif normalize == 'pred': + cm = cm / cm.sum(axis=0, keepdims=True) + elif normalize == 'all': + cm = cm / cm.sum() + cm = np.nan_to_num(cm) + + return cm diff --git a/python/cuml/dask/metrics/utils.py b/python/cuml/dask/metrics/utils.py new file mode 100644 index 0000000000..355425b0f9 --- /dev/null +++ b/python/cuml/dask/metrics/utils.py @@ -0,0 +1,31 @@ +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +import cupy as cp + + +def sorted_unique_labels(*ys): + """Extract an ordered array of unique labels from one or more dask arrays + of labels.""" + ys = (cp.unique(y.map_blocks(lambda x: cp.unique(x)).compute()) + for y in ys) + labels = cp.unique(cp.concatenate(ys)) + return cp.sort(labels) From ff545620da91747134980efa9c7fe0711440c185 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 11 Mar 2020 18:36:48 -0500 Subject: [PATCH 012/330] Add tests for MNMG confusion matrix --- python/cuml/test/dask/test_metrics.py | 150 ++++++++++++++++++++++++++ python/cuml/test/utils.py | 14 ++- 2 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 python/cuml/test/dask/test_metrics.py diff --git a/python/cuml/test/dask/test_metrics.py b/python/cuml/test/dask/test_metrics.py new file mode 100644 index 0000000000..914d414947 --- /dev/null +++ b/python/cuml/test/dask/test_metrics.py @@ -0,0 +1,150 @@ +# +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from itertools import chain, permutations + +import numpy as np +import cupy as cp +import pytest +from dask.distributed import Client +from sklearn.metrics import confusion_matrix as sk_confusion_matrix +from cuml.test.utils import stress_param, generate_random_labels + + +@pytest.mark.mg +def test_confusion_matrix(cluster): + client = Client(cluster) + from cuml.dask.metrics import confusion_matrix + import dask.array as da + + y_true = da.from_array(cp.array([2, 0, 2, 2, 0, 1])) + y_pred = da.from_array(cp.array([0, 0, 2, 2, 0, 2])) + cm = confusion_matrix(y_true, y_pred) + ref = cp.array([[2, 0, 0], + [0, 0, 1], + [1, 0, 2]]) + cp.testing.assert_array_equal(cm, ref) + + client.close() + + +@pytest.mark.mg +def test_confusion_matrix_binary(cluster): + client = Client(cluster) + from cuml.dask.metrics import confusion_matrix + import dask.array as da + y_true = da.from_array(cp.array([0, 1, 0, 1])) + y_pred = da.from_array(cp.array([1, 1, 1, 0])) + tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() + ref = cp.array([0, 2, 1, 1]) + cp.testing.assert_array_equal(ref, cp.array([tn, fp, fn, tp])) + client.close() + + +@pytest.mark.mg +@pytest.mark.parametrize('n_samples', [50, 3000, stress_param(500000)]) +@pytest.mark.parametrize('dtype', [np.int32, np.int64]) +@pytest.mark.parametrize('problem_type', ['binary', 'multiclass']) +def test_confusion_matrix_random(n_samples, dtype, problem_type, cluster): + client = Client(cluster) + from cuml.dask.metrics import confusion_matrix + import dask.array as da + upper_range = 2 if problem_type == 'binary' else 1000 + + y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( + lambda rng: rng.randint(0, upper_range, n_samples).astype(dtype), + as_cupy=True, + with_numpy=True + ) + + y_true, y_pred = da.from_array(y_true), da.from_array(y_pred) + cm = confusion_matrix(y_true, y_pred) + ref = sk_confusion_matrix(np_y_true, np_y_pred) + cp.testing.assert_array_almost_equal(ref, cm, decimal=4) + client.close() + + +@pytest.mark.mg +@pytest.mark.parametrize( + "normalize, expected_results", + [('true', 0.333333333), + ('pred', 0.333333333), + ('all', 0.1111111111), + (None, 2)] +) +def test_confusion_matrix_normalize(normalize, expected_results, cluster): + client = Client(cluster) + from cuml.dask.metrics import confusion_matrix + import dask.array as da + y_test = da.from_array(cp.array([0, 1, 2] * 6)) + y_pred = da.from_array(cp.array(list(chain(*permutations([0, 1, 2]))))) + cm = confusion_matrix(y_test, y_pred, normalize=normalize) + cp.testing.assert_allclose(cm, cp.array(expected_results)) + client.close() + + +@pytest.mark.mg +@pytest.mark.parametrize('labels', [(0, 1), + (2, 1), + (2, 1, 4, 7), + (2, 20)]) +def test_confusion_matrix_multiclass_subset_labels(labels, cluster): + client = Client(cluster) + from cuml.dask.metrics import confusion_matrix + import dask.array as da + y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( + lambda rng: rng.randint(0, 3, 10).astype(np.int32), + as_cupy=True, + with_numpy=True + ) + y_true, y_pred = da.from_array(y_true), da.from_array(y_pred) + + ref = sk_confusion_matrix(np_y_true, np_y_pred, labels=labels) + labels = cp.array(labels, dtype=np.int32) + cm = confusion_matrix(y_true, y_pred, labels=labels) + cp.testing.assert_array_almost_equal(ref, cm, decimal=4) + client.close() + + +@pytest.mark.mg +@pytest.mark.parametrize('n_samples', [50, 3000, stress_param(500000)]) +@pytest.mark.parametrize('dtype', [np.int32, np.int64]) +@pytest.mark.parametrize('weights_dtype', ['int', 'float']) +def test_confusion_matrix_random_weights(n_samples, dtype, weights_dtype, + cluster): + client = Client(cluster) + from cuml.dask.metrics import confusion_matrix + import dask.array as da + y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( + lambda rng: rng.randint(0, 10, n_samples).astype(dtype), + as_cupy=True, + with_numpy=True + ) + y_true, y_pred = da.from_array(y_true), da.from_array(y_pred) + + if weights_dtype == 'int': + sample_weight = np.random.RandomState(0).randint(0, 10, n_samples) + else: + sample_weight = np.random.RandomState(0).rand(n_samples) + + ref = sk_confusion_matrix(np_y_true, np_y_pred, + sample_weight=sample_weight) + + sample_weight = cp.array(sample_weight) + sample_weight = da.from_array(sample_weight) + + cm = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) + cp.testing.assert_array_almost_equal(ref, cm, decimal=4) + client.close() diff --git a/python/cuml/test/utils.py b/python/cuml/test/utils.py index 3dc721ec99..a08a65b820 100644 --- a/python/cuml/test/utils.py +++ b/python/cuml/test/utils.py @@ -216,9 +216,19 @@ def get_classes_from_package(package): return {k: v for dictionary in classes for k, v in dictionary.items()} -def generate_random_labels(random_generation_lambda, seed=1234): +def generate_random_labels(random_generation_lambda, seed=1234, as_cupy=False, + with_numpy=False): + # TODO: Add documentation rng = np.random.RandomState(seed) # makes it reproducible a = random_generation_lambda(rng) b = random_generation_lambda(rng) - return cuda.to_device(a), cuda.to_device(b) + if as_cupy: + a_res, b_res = cp.array(a), cp.array(b) + else: + a_res, b_res = cuda.to_device(a), cuda.to_device(b) + + if with_numpy: + return a_res, b_res, a, b + else: + return a_res, b_res From f60ee070c5620ec0c1e7b6314d5605e1357cf192 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Thu, 12 Mar 2020 10:29:02 -0500 Subject: [PATCH 013/330] Clean up of MNMG tests and testing with different chunk sizes --- python/cuml/test/dask/test_metrics.py | 29 +++++++++------------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/python/cuml/test/dask/test_metrics.py b/python/cuml/test/dask/test_metrics.py index 914d414947..8dd9538a8f 100644 --- a/python/cuml/test/dask/test_metrics.py +++ b/python/cuml/test/dask/test_metrics.py @@ -21,16 +21,16 @@ from dask.distributed import Client from sklearn.metrics import confusion_matrix as sk_confusion_matrix from cuml.test.utils import stress_param, generate_random_labels +from cuml.dask.metrics import confusion_matrix +import dask.array as da @pytest.mark.mg -def test_confusion_matrix(cluster): +@pytest.mark.parametrize('chunks', ['auto', 2, 1]) +def test_confusion_matrix(cluster, chunks): client = Client(cluster) - from cuml.dask.metrics import confusion_matrix - import dask.array as da - - y_true = da.from_array(cp.array([2, 0, 2, 2, 0, 1])) - y_pred = da.from_array(cp.array([0, 0, 2, 2, 0, 2])) + y_true = da.from_array(cp.array([2, 0, 2, 2, 0, 1]), chunks=chunks) + y_pred = da.from_array(cp.array([0, 0, 2, 2, 0, 2]), chunks=chunks) cm = confusion_matrix(y_true, y_pred) ref = cp.array([[2, 0, 0], [0, 0, 1], @@ -41,12 +41,11 @@ def test_confusion_matrix(cluster): @pytest.mark.mg -def test_confusion_matrix_binary(cluster): +@pytest.mark.parametrize('chunks', ['auto', 2, 1]) +def test_confusion_matrix_binary(cluster, chunks): client = Client(cluster) - from cuml.dask.metrics import confusion_matrix - import dask.array as da - y_true = da.from_array(cp.array([0, 1, 0, 1])) - y_pred = da.from_array(cp.array([1, 1, 1, 0])) + y_true = da.from_array(cp.array([0, 1, 0, 1]), chunks=chunks) + y_pred = da.from_array(cp.array([1, 1, 1, 0]), chunks=chunks) tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() ref = cp.array([0, 2, 1, 1]) cp.testing.assert_array_equal(ref, cp.array([tn, fp, fn, tp])) @@ -59,8 +58,6 @@ def test_confusion_matrix_binary(cluster): @pytest.mark.parametrize('problem_type', ['binary', 'multiclass']) def test_confusion_matrix_random(n_samples, dtype, problem_type, cluster): client = Client(cluster) - from cuml.dask.metrics import confusion_matrix - import dask.array as da upper_range = 2 if problem_type == 'binary' else 1000 y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( @@ -86,8 +83,6 @@ def test_confusion_matrix_random(n_samples, dtype, problem_type, cluster): ) def test_confusion_matrix_normalize(normalize, expected_results, cluster): client = Client(cluster) - from cuml.dask.metrics import confusion_matrix - import dask.array as da y_test = da.from_array(cp.array([0, 1, 2] * 6)) y_pred = da.from_array(cp.array(list(chain(*permutations([0, 1, 2]))))) cm = confusion_matrix(y_test, y_pred, normalize=normalize) @@ -102,8 +97,6 @@ def test_confusion_matrix_normalize(normalize, expected_results, cluster): (2, 20)]) def test_confusion_matrix_multiclass_subset_labels(labels, cluster): client = Client(cluster) - from cuml.dask.metrics import confusion_matrix - import dask.array as da y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( lambda rng: rng.randint(0, 3, 10).astype(np.int32), as_cupy=True, @@ -125,8 +118,6 @@ def test_confusion_matrix_multiclass_subset_labels(labels, cluster): def test_confusion_matrix_random_weights(n_samples, dtype, weights_dtype, cluster): client = Client(cluster) - from cuml.dask.metrics import confusion_matrix - import dask.array as da y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( lambda rng: rng.randint(0, 10, n_samples).astype(dtype), as_cupy=True, From 691803befd41b4eec94000e95feb953028d906c9 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Fri, 13 Mar 2020 13:28:28 -0500 Subject: [PATCH 014/330] Fix PR according to review --- python/cuml/dask/metrics/confusion_matrix.py | 42 +++++++++++++++++--- python/cuml/metrics/confusion_matrix.py | 5 ++- python/cuml/test/dask/test_metrics.py | 17 +++----- python/cuml/test/test_metrics.py | 10 ++--- python/cuml/test/utils.py | 40 ++++++++++++++----- 5 files changed, 80 insertions(+), 34 deletions(-) diff --git a/python/cuml/dask/metrics/confusion_matrix.py b/python/cuml/dask/metrics/confusion_matrix.py index a95d311cf3..79e48666d6 100644 --- a/python/cuml/dask/metrics/confusion_matrix.py +++ b/python/cuml/dask/metrics/confusion_matrix.py @@ -25,7 +25,7 @@ @with_cupy_rmm -def local_cm(inputs, labels, use_sample_weight): +def _local_cm(inputs, labels, use_sample_weight): if use_sample_weight: y_true, y_pred, sample_weight = inputs else: @@ -52,15 +52,45 @@ def local_cm(inputs, labels, use_sample_weight): def confusion_matrix(y_true, y_pred, labels=None, normalize=None, - sample_weight=None): - client = default_client() + sample_weight=None, + client=None): + """Compute confusion matrix to evaluate the accuracy of a classification. + + Parameters + ---------- + y_true : array-like (device or host) shape = (n_samples,) + or (n_samples, n_outputs) + Ground truth (correct) target values. + y_pred : array-like (device or host) shape = (n_samples,) + or (n_samples, n_outputs) + Estimated target values. + labels : array-like (device or host) shape = (n_classes,), optional + List of labels to index the matrix. This may be used to reorder or + select a subset of labels. If None is given, those that appear at least + once in y_true or y_pred are used in sorted order. + sample_weight : array-like (device or host) shape = (n_samples,), optional + Sample weights. + normalize : string in [‘true’, ‘pred’, ‘all’] + Normalizes confusion matrix over the true (rows), predicted (columns) + conditions or all the population. If None, confusion matrix will not be + normalized. + client : dask.distributed.Client, optional + Dask client to use. Will use the default client if None. + + Returns + ------- + C : array-like (device or host) shape = (n_classes, n_classes) + Confusion matrix. + """ + client = default_client() if client is None else client if labels is None: labels = sorted_unique_labels(y_true, y_pred) if normalize not in ['true', 'pred', 'all', None]: - raise ValueError("normalize must be one of {'true', 'pred', " - "'all', None}") + msg = "normalize must be one of " \ + f"{{'true', 'pred', 'all', None}}, got {normalize}." + raise ValueError(msg) use_sample_weight = bool(sample_weight is not None) dask_arrays = [y_true, y_pred, sample_weight] if use_sample_weight else \ @@ -68,7 +98,7 @@ def confusion_matrix(y_true, y_pred, # run cm computation on each partition. parts = client.sync(extract_arr_partitions, dask_arrays) - cms = [client.submit(local_cm, p, labels, use_sample_weight, + cms = [client.submit(_local_cm, p, labels, use_sample_weight, workers=[w]).result() for w, p in parts] # reduce each partition's result into one cupy matrix diff --git a/python/cuml/metrics/confusion_matrix.py b/python/cuml/metrics/confusion_matrix.py index f2e133e0b4..dbd2d8da13 100644 --- a/python/cuml/metrics/confusion_matrix.py +++ b/python/cuml/metrics/confusion_matrix.py @@ -86,8 +86,9 @@ def confusion_matrix(y_true, y_pred, sample_weight = sample_weight.to_output('cupy') if normalize not in ['true', 'pred', 'all', None]: - raise ValueError("normalize must be one of {'true', 'pred', " - "'all', None}") + msg = "normalize must be one of " \ + f"{{'true', 'pred', 'all', None}}, got {normalize}." + raise ValueError(msg) y_true, _ = make_monotonic(y_true, labels, copy=True) y_pred, _ = make_monotonic(y_pred, labels, copy=True) diff --git a/python/cuml/test/dask/test_metrics.py b/python/cuml/test/dask/test_metrics.py index 8dd9538a8f..3b98025f39 100644 --- a/python/cuml/test/dask/test_metrics.py +++ b/python/cuml/test/dask/test_metrics.py @@ -62,11 +62,10 @@ def test_confusion_matrix_random(n_samples, dtype, problem_type, cluster): y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( lambda rng: rng.randint(0, upper_range, n_samples).astype(dtype), - as_cupy=True, - with_numpy=True + as_cupy=True ) - y_true, y_pred = da.from_array(y_true), da.from_array(y_pred) + cm = confusion_matrix(y_true, y_pred) ref = sk_confusion_matrix(np_y_true, np_y_pred) cp.testing.assert_array_almost_equal(ref, cm, decimal=4) @@ -97,11 +96,9 @@ def test_confusion_matrix_normalize(normalize, expected_results, cluster): (2, 20)]) def test_confusion_matrix_multiclass_subset_labels(labels, cluster): client = Client(cluster) + y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( - lambda rng: rng.randint(0, 3, 10).astype(np.int32), - as_cupy=True, - with_numpy=True - ) + lambda rng: rng.randint(0, 3, 10).astype(np.int32), as_cupy=True) y_true, y_pred = da.from_array(y_true), da.from_array(y_pred) ref = sk_confusion_matrix(np_y_true, np_y_pred, labels=labels) @@ -118,11 +115,9 @@ def test_confusion_matrix_multiclass_subset_labels(labels, cluster): def test_confusion_matrix_random_weights(n_samples, dtype, weights_dtype, cluster): client = Client(cluster) + y_true, y_pred, np_y_true, np_y_pred = generate_random_labels( - lambda rng: rng.randint(0, 10, n_samples).astype(dtype), - as_cupy=True, - with_numpy=True - ) + lambda rng: rng.randint(0, 10, n_samples).astype(dtype), as_cupy=True) y_true, y_pred = da.from_array(y_true), da.from_array(y_pred) if weights_dtype == 'int': diff --git a/python/cuml/test/test_metrics.py b/python/cuml/test/test_metrics.py index 11c9705769..2380065a05 100644 --- a/python/cuml/test/test_metrics.py +++ b/python/cuml/test/test_metrics.py @@ -189,7 +189,7 @@ def test_regression_metrics_random(n_samples, dtype, function): # stress test for float32 fails because of floating point precision pytest.xfail() - y_true, y_pred = generate_random_labels( + y_true, y_pred, _, _ = generate_random_labels( lambda rng: rng.randint(0, 1000, n_samples).astype(dtype)) cuml_reg, sklearn_reg = { @@ -325,7 +325,7 @@ def test_entropy(use_handle): def test_entropy_random(n_samples, base, use_handle): handle, stream = get_handle(use_handle) - clustering, _ = \ + clustering, _, _, _ = \ generate_random_labels(lambda rng: rng.randint(0, 1000, n_samples)) # generate unormalized probabilities from clustering @@ -363,7 +363,7 @@ def test_confusion_matrix_binary(): def test_confusion_matrix_random(n_samples, dtype, problem_type): upper_range = 2 if problem_type == 'binary' else 1000 - y_true, y_pred = generate_random_labels( + y_true, y_pred, _, _ = generate_random_labels( lambda rng: rng.randint(0, upper_range, n_samples).astype(dtype)) cm = confusion_matrix(y_true, y_pred) ref = sk_confusion_matrix(y_true, y_pred) @@ -389,7 +389,7 @@ def test_confusion_matrix_normalize(normalize, expected_results): (2, 1, 4, 7), (2, 20)]) def test_confusion_matrix_multiclass_subset_labels(labels): - y_true, y_pred = generate_random_labels( + y_true, y_pred, _, _ = generate_random_labels( lambda rng: rng.randint(0, 3, 10).astype(np.int32)) ref = sk_confusion_matrix(y_true, y_pred, labels=labels) @@ -402,7 +402,7 @@ def test_confusion_matrix_multiclass_subset_labels(labels): @pytest.mark.parametrize('dtype', [np.int32, np.int64]) @pytest.mark.parametrize('weights_dtype', ['int', 'float']) def test_confusion_matrix_random_weights(n_samples, dtype, weights_dtype): - y_true, y_pred = generate_random_labels( + y_true, y_pred, _, _ = generate_random_labels( lambda rng: rng.randint(0, 10, n_samples).astype(dtype)) if weights_dtype == 'int': diff --git a/python/cuml/test/utils.py b/python/cuml/test/utils.py index a08a65b820..464d842a3a 100644 --- a/python/cuml/test/utils.py +++ b/python/cuml/test/utils.py @@ -216,19 +216,39 @@ def get_classes_from_package(package): return {k: v for dictionary in classes for k, v in dictionary.items()} -def generate_random_labels(random_generation_lambda, seed=1234, as_cupy=False, - with_numpy=False): - # TODO: Add documentation +def generate_random_labels(random_generation_lambda, seed=1234, as_cupy=False): + """ + Generates random labels to act as ground_truth and predictions for tests. + + Parameters + ---------- + random_generation_lambda : lambda function [numpy.random] -> ndarray + A lambda function used to generate labels for either y_true or y_pred + using a seeded numpy.random object. + seed : int + Seed for the numpy.random object. + as_cupy : bool + Choose return type of y_true and y_pred. + True: returns Cupy ndarray + False: returns Numba cuda DeviceNDArray + + Returns + ------- + y_true, y_pred, np_y_true, np_y_pred : tuple + y_true : Numba cuda DeviceNDArray or Cupy ndarray + Random target values. + y_pred : Numba cuda DeviceNDArray or Cupy ndarray + Random predictions. + np_y_true : Numpy ndarray + Same as y_true but as a numpy ndarray. + np_y_pred : Numpy ndarray + Same as y_pred but as a numpy ndarray. + """ rng = np.random.RandomState(seed) # makes it reproducible a = random_generation_lambda(rng) b = random_generation_lambda(rng) if as_cupy: - a_res, b_res = cp.array(a), cp.array(b) - else: - a_res, b_res = cuda.to_device(a), cuda.to_device(b) - - if with_numpy: - return a_res, b_res, a, b + return cp.array(a), cp.array(b), a, b else: - return a_res, b_res + return cuda.to_device(a), cuda.to_device(b), a, b From 40cc3f7b469d9e8c57c49563a3c994034b7106b6 Mon Sep 17 00:00:00 2001 From: Louis Sugy Date: Mon, 16 Mar 2020 18:27:10 +0100 Subject: [PATCH 015/330] Use cuML arrays in ARIMA --- notebooks/arima_demo.ipynb | 7 +- python/cuml/test/test_arima.py | 6 +- python/cuml/tsa/arima.pyx | 160 +++++++++++++++------------------ 3 files changed, 81 insertions(+), 92 deletions(-) diff --git a/notebooks/arima_demo.ipynb b/notebooks/arima_demo.ipynb index 7a8a6a6f91..84c2e3837e 100644 --- a/notebooks/arima_demo.ipynb +++ b/notebooks/arima_demo.ipynb @@ -108,6 +108,7 @@ " \n", " # Range for the prediction\n", " if pred is not None:\n", + " pred_np = pred.to_output('numpy')\n", " pred_start = pred_start or n_obs\n", " pred_end = pred_start + pred.shape[0]\n", " \n", @@ -116,7 +117,7 @@ " title = y.columns[i]\n", " ax[i].plot(np.r_[:n_obs], y[title].to_array())\n", " if pred is not None:\n", - " ax[i].plot(np.r_[pred_start:pred_end], pred[:, i],\n", + " ax[i].plot(np.r_[pred_start:pred_end], pred_np[:, i],\n", " linestyle=\"--\")\n", " ax[i].title.set_text(title)\n", " for i in range(batch_size, r*c):\n", @@ -192,7 +193,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "*Note:* the returned array is a device array. You can convert it to a numpy array with the `copy_to_host` method:" + "*Note:* the returned array is a cuML array. You can convert it to other types with the `to_output` method:" ] }, { @@ -202,7 +203,7 @@ "outputs": [], "source": [ "print(type(fc_mig))\n", - "print(type(fc_mig.copy_to_host()))" + "print(type(fc_mig.to_output('numpy')))" ] }, { diff --git a/python/cuml/test/test_arima.py b/python/cuml/test/test_arima.py index 9bb9fd33ab..ff8cf9bf00 100644 --- a/python/cuml/test/test_arima.py +++ b/python/cuml/test/test_arima.py @@ -244,7 +244,7 @@ def test_integration(test_case, dtype): cuml_model.fit() # Predict - cuml_pred = cuml_model.predict(data.start, data.end).copy_to_host() + cuml_pred = cuml_model.predict(data.start, data.end).to_output('numpy') ref_preds = np.zeros((data.end - data.start, data.batch_size)) for i in range(data.batch_size): ref_preds[:, i] = ref_fits[i].get_prediction( @@ -299,9 +299,9 @@ def _predict_common(test_case, dtype, start, end, num_steps=None): ref_preds[:, i] = ref_fits[i].get_prediction( start, end - 1).predicted_mean if num_steps is None: - cuml_pred = cuml_model.predict(start, end).copy_to_host() + cuml_pred = cuml_model.predict(start, end).to_output('numpy') else: - cuml_pred = cuml_model.forecast(num_steps).copy_to_host() + cuml_pred = cuml_model.forecast(num_steps).to_output('numpy') # Compare results np.testing.assert_allclose(cuml_pred, ref_preds, rtol=0.001, atol=0.01) diff --git a/python/cuml/tsa/arima.pyx b/python/cuml/tsa/arima.pyx index 79d2de1d26..4923a2c5ab 100644 --- a/python/cuml/tsa/arima.pyx +++ b/python/cuml/tsa/arima.pyx @@ -24,29 +24,22 @@ import cupy as cp import sys import ctypes - -from cuml.tsa.batched_lbfgs import batched_fmin_lbfgs_b -import rmm - -import cuml -from cuml.utils.input_utils import input_to_dev_array, input_to_host_array -from cuml.utils.input_utils import get_dev_array_ptr - +from libc.stdint cimport uintptr_t +from libcpp cimport bool +from libcpp.vector cimport vector from typing import List, Tuple, Dict, Mapping, Optional, Union -import cudf -from cuml.utils import get_dev_array_ptr, zeros -from cuml.common.cuda import nvtx_range_wrap +import cudf +import cuml +import rmm +from cuml.common.array import CumlArray as cumlArray from cuml.common.base import Base -from cuml.utils import rmm_cupy_ary, has_scipy - -from libc.stdint cimport uintptr_t -from libcpp.string cimport string -from libcpp cimport bool -from libc.stdlib cimport malloc, free +from cuml.common.cuda import nvtx_range_wrap from cuml.common.handle cimport cumlHandle -from libcpp.vector cimport vector +from cuml.tsa.batched_lbfgs import batched_fmin_lbfgs_b +from cuml.utils import has_scipy +from cuml.utils.input_utils import input_to_cuml_array, input_to_host_array cdef extern from "cuml/tsa/arima_common.h" namespace "ML": @@ -131,7 +124,7 @@ class ARIMA(Base): model.fit() # Forecast - fc = model.forecast(10).copy_to_host() + fc = model.forecast(10).to_output('numpy') print(fc) Output: @@ -251,8 +244,8 @@ class ARIMA(Base): raise ValueError("ERROR: Invalid order. Required: p,q,P,Q <= 4") # Get device array. Float64 only for now. - self.d_y, _, self.n_obs, self.batch_size, self.dtype \ - = input_to_dev_array(y, check_dtype=np.float64) + self.d_y, self.n_obs, self.batch_size, self.dtype \ + = input_to_cuml_array(y, check_dtype=np.float64) if self.n_obs < d + s * D + 1: raise ValueError("ERROR: Number of observations too small for the" @@ -284,22 +277,22 @@ class ARIMA(Base): cdef uintptr_t d_sma_ptr = NULL cdef uintptr_t d_sigma2_ptr = NULL if order.k: - d_mu, d_mu_ptr, _, _, _ = \ - input_to_dev_array(self.mu, check_dtype=np.float64) + d_mu, *_ = input_to_cuml_array(self.mu, check_dtype=np.float64) + d_mu_ptr = d_mu.ptr if order.p: - d_ar, d_ar_ptr, _, _, _ = \ - input_to_dev_array(self.ar, check_dtype=np.float64) + d_ar, *_ = input_to_cuml_array(self.ar, check_dtype=np.float64) + d_ar_ptr = d_ar.ptr if order.q: - d_ma, d_ma_ptr, _, _, _ = \ - input_to_dev_array(self.ma, check_dtype=np.float64) + d_ma, *_ = input_to_cuml_array(self.ma, check_dtype=np.float64) + d_ma_ptr = d_ma.ptr if order.P: - d_sar, d_sar_ptr, _, _, _ = \ - input_to_dev_array(self.sar, check_dtype=np.float64) + d_sar, *_ = input_to_cuml_array(self.sar, check_dtype=np.float64) + d_sar_ptr = d_sar.ptr if order.Q: - d_sma, d_sma_ptr, _, _, _ = \ - input_to_dev_array(self.sma, check_dtype=np.float64) - d_sigma2, d_sigma2_ptr, _, _, _ = \ - input_to_dev_array(self.sigma2, check_dtype=np.float64) + d_sma, *_ = input_to_cuml_array(self.sma, check_dtype=np.float64) + d_sma_ptr = d_sma.ptr + d_sigma2, *_ = input_to_cuml_array(self.sigma2, check_dtype=np.float64) + d_sigma2_ptr = d_sigma2.ptr cdef ARIMAParams[double] cpp_params cpp_params.mu = d_mu_ptr @@ -311,7 +304,7 @@ class ARIMA(Base): cdef vector[double] ic ic.resize(self.batch_size) - cdef uintptr_t d_y_ptr = get_dev_array_ptr(self.d_y) + cdef uintptr_t d_y_ptr = self.d_y.ptr ic_name_to_number = {"aic": 0, "aicc": 1, "bic": 2} cdef int ic_type_id @@ -409,7 +402,7 @@ class ARIMA(Base): ... model = ARIMA(ys, (1,1,1)) model.fit() - y_pred = model.predict().copy_to_host() + y_pred = model.predict().to_output('numpy') """ cdef ARIMAOrder order = self.order @@ -437,22 +430,22 @@ class ARIMA(Base): cdef uintptr_t d_sma_ptr = NULL cdef uintptr_t d_sigma2_ptr = NULL if order.k: - d_mu, d_mu_ptr, _, _, _ = \ - input_to_dev_array(self.mu, check_dtype=np.float64) + d_mu, *_ = input_to_cuml_array(self.mu, check_dtype=np.float64) + d_mu_ptr = d_mu.ptr if order.p: - d_ar, d_ar_ptr, _, _, _ = \ - input_to_dev_array(self.ar, check_dtype=np.float64) + d_ar, *_ = input_to_cuml_array(self.ar, check_dtype=np.float64) + d_ar_ptr = d_ar.ptr if order.q: - d_ma, d_ma_ptr, _, _, _ = \ - input_to_dev_array(self.ma, check_dtype=np.float64) + d_ma, *_ = input_to_cuml_array(self.ma, check_dtype=np.float64) + d_ma_ptr = d_ma.ptr if order.P: - d_sar, d_sar_ptr, _, _, _ = \ - input_to_dev_array(self.sar, check_dtype=np.float64) + d_sar, *_ = input_to_cuml_array(self.sar, check_dtype=np.float64) + d_sar_ptr = d_sar.ptr if order.Q: - d_sma, d_sma_ptr, _, _, _ = \ - input_to_dev_array(self.sma, check_dtype=np.float64) - d_sigma2, d_sigma2_ptr, _, _, _ = \ - input_to_dev_array(self.sigma2, check_dtype=np.float64) + d_sma, *_ = input_to_cuml_array(self.sma, check_dtype=np.float64) + d_sma_ptr = d_sma.ptr + d_sigma2, *_ = input_to_cuml_array(self.sigma2, check_dtype=np.float64) + d_sigma2_ptr = d_sigma2.ptr cdef ARIMAParams[double] cpp_params cpp_params.mu = d_mu_ptr @@ -468,14 +461,14 @@ class ARIMA(Base): # pointers cdef uintptr_t d_vs_ptr cdef uintptr_t d_y_p_ptr - d_vs = rmm.device_array((self.n_obs - order.d - order.D * order.s, - self.batch_size), dtype=np.float64, order="F") - d_y_p = rmm.device_array((predict_size, self.batch_size), - dtype=np.float64, order="F") - d_vs_ptr = get_dev_array_ptr(d_vs) - d_y_p_ptr = get_dev_array_ptr(d_y_p) + d_vs = cumlArray.empty((self.n_obs - order.d - order.D * order.s, + self.batch_size), dtype=np.float64, order="F") + d_y_p = cumlArray.empty((predict_size, self.batch_size), + dtype=np.float64, order="F") + d_vs_ptr = d_vs.ptr + d_y_p_ptr = d_y_p.ptr - cdef uintptr_t d_y_ptr = get_dev_array_ptr(self.d_y) + cdef uintptr_t d_y_ptr = self.d_y.ptr cpp_predict(handle_[0], d_y_ptr, self.batch_size, self.n_obs, start, end, order, @@ -516,7 +509,7 @@ class ARIMA(Base): """ cdef ARIMAOrder order = self.order - cdef uintptr_t d_y_ptr = get_dev_array_ptr(self.d_y) + cdef uintptr_t d_y_ptr = self.d_y.ptr cdef cumlHandle* handle_ = self.handle.getHandle() # Create mu, ar and ma arrays @@ -527,26 +520,22 @@ class ARIMA(Base): cdef uintptr_t d_sma_ptr = NULL cdef uintptr_t d_sigma2_ptr = NULL if order.k: - d_mu = zeros(self.batch_size, dtype=self.dtype) - d_mu_ptr = get_dev_array_ptr(d_mu) + d_mu = cumlArray.zeros(self.batch_size, dtype=np.float64) + d_mu_ptr = d_mu.ptr if order.p: - d_ar = zeros((order.p, self.batch_size), dtype=self.dtype, - order='F') - d_ar_ptr = get_dev_array_ptr(d_ar) + d_ar = cumlArray.zeros((order.p, self.batch_size), dtype=np.float64, order='F') + d_ar_ptr = d_ar.ptr if order.q: - d_ma = zeros((order.q, self.batch_size), dtype=self.dtype, - order='F') - d_ma_ptr = get_dev_array_ptr(d_ma) + d_ma = cumlArray.zeros((order.q, self.batch_size), dtype=np.float64, order='F') + d_ma_ptr = d_ma.ptr if order.P: - d_sar = zeros((order.P, self.batch_size), dtype=self.dtype, - order='F') - d_sar_ptr = get_dev_array_ptr(d_sar) + d_sar = cumlArray.zeros((order.P, self.batch_size), dtype=np.float64, order='F') + d_sar_ptr = d_sar.ptr if order.Q: - d_sma = zeros((order.Q, self.batch_size), dtype=self.dtype, - order='F') - d_sma_ptr = get_dev_array_ptr(d_sma) - d_sigma2 = zeros(self.batch_size, dtype=self.dtype) - d_sigma2_ptr = get_dev_array_ptr(d_sigma2) + d_sma = cumlArray.zeros((order.Q, self.batch_size), dtype=np.float64, order='F') + d_sma_ptr = d_sma.ptr + d_sigma2 = cumlArray.zeros(self.batch_size, dtype=np.float64) + d_sigma2_ptr = d_sigma2.ptr cdef ARIMAParams[double] cpp_params cpp_params.mu = d_mu_ptr @@ -562,16 +551,16 @@ class ARIMA(Base): params = dict() if order.k: - params["mu"] = d_mu.copy_to_host() + params["mu"] = d_mu.to_output('numpy') if order.p: - params["ar"] = d_ar.copy_to_host() + params["ar"] = d_ar.to_output('numpy') if order.q: - params["ma"] = d_ma.copy_to_host() + params["ma"] = d_ma.to_output('numpy') if order.P: - params["sar"] = d_sar.copy_to_host() + params["sar"] = d_sar.to_output('numpy') if order.Q: - params["sma"] = d_sma.copy_to_host() - params["sigma2"] = d_sigma2.copy_to_host() + params["sma"] = d_sma.to_output('numpy') + params["sigma2"] = d_sigma2.to_output('numpy') self.set_params(params) @nvtx_range_wrap @@ -610,7 +599,7 @@ class ARIMA(Base): else: self.set_params(start_params) - cdef uintptr_t d_y_ptr = get_dev_array_ptr(self.d_y) + cdef uintptr_t d_y_ptr = self.d_y.ptr def f(x: np.ndarray) -> np.ndarray: """The (batched) energy functional returning the negative @@ -667,17 +656,16 @@ class ARIMA(Base): cdef ARIMAOrder order = self.order - cdef uintptr_t d_x_ptr - d_x_array, d_x_ptr, _, _, _ = \ - input_to_dev_array(x, check_dtype=np.float64, order='C') + d_x_array, *_ = \ + input_to_cuml_array(x, check_dtype=np.float64, order='C') + cdef uintptr_t d_x_ptr = d_x_array.ptr - cdef uintptr_t d_y_ptr = get_dev_array_ptr(self.d_y) + cdef uintptr_t d_y_ptr = self.d_y.ptr cdef cumlHandle* handle_ = self.handle.getHandle() - d_vs = rmm.device_array((self.n_obs - order.d - order.D * order.s, - self.batch_size), - dtype=np.float64, order="F") - cdef uintptr_t d_vs_ptr = get_dev_array_ptr(d_vs) + d_vs = cumlArray.empty((self.n_obs - order.d - order.D * order.s, + self.batch_size), dtype=np.float64, order="F") + cdef uintptr_t d_vs_ptr = d_vs.ptr batched_loglike(handle_[0], d_y_ptr, self.batch_size, self.n_obs, order, d_x_ptr, From 1a3ae88d8d91eba57beabbf9c6ba4ebc0b951333 Mon Sep 17 00:00:00 2001 From: Louis Sugy Date: Mon, 16 Mar 2020 19:57:59 +0100 Subject: [PATCH 016/330] Python style requirements --- python/cuml/tsa/arima.pyx | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/cuml/tsa/arima.pyx b/python/cuml/tsa/arima.pyx index 4923a2c5ab..64ad39044e 100644 --- a/python/cuml/tsa/arima.pyx +++ b/python/cuml/tsa/arima.pyx @@ -523,16 +523,20 @@ class ARIMA(Base): d_mu = cumlArray.zeros(self.batch_size, dtype=np.float64) d_mu_ptr = d_mu.ptr if order.p: - d_ar = cumlArray.zeros((order.p, self.batch_size), dtype=np.float64, order='F') + d_ar = cumlArray.zeros((order.p, self.batch_size), dtype=np.float64, + order='F') d_ar_ptr = d_ar.ptr if order.q: - d_ma = cumlArray.zeros((order.q, self.batch_size), dtype=np.float64, order='F') + d_ma = cumlArray.zeros((order.q, self.batch_size), dtype=np.float64, + order='F') d_ma_ptr = d_ma.ptr if order.P: - d_sar = cumlArray.zeros((order.P, self.batch_size), dtype=np.float64, order='F') + d_sar = cumlArray.zeros((order.P, self.batch_size), dtype=np.float64, + order='F') d_sar_ptr = d_sar.ptr if order.Q: - d_sma = cumlArray.zeros((order.Q, self.batch_size), dtype=np.float64, order='F') + d_sma = cumlArray.zeros((order.Q, self.batch_size), dtype=np.float64, + order='F') d_sma_ptr = d_sma.ptr d_sigma2 = cumlArray.zeros(self.batch_size, dtype=np.float64) d_sigma2_ptr = d_sigma2.ptr From 59ebcfac9163d2529ab849ba0e262d37396d7125 Mon Sep 17 00:00:00 2001 From: Louis Sugy Date: Mon, 16 Mar 2020 20:01:12 +0100 Subject: [PATCH 017/330] Python style requirements (bis) --- python/cuml/tsa/arima.pyx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/python/cuml/tsa/arima.pyx b/python/cuml/tsa/arima.pyx index 64ad39044e..9897fab008 100644 --- a/python/cuml/tsa/arima.pyx +++ b/python/cuml/tsa/arima.pyx @@ -523,20 +523,20 @@ class ARIMA(Base): d_mu = cumlArray.zeros(self.batch_size, dtype=np.float64) d_mu_ptr = d_mu.ptr if order.p: - d_ar = cumlArray.zeros((order.p, self.batch_size), dtype=np.float64, - order='F') + d_ar = cumlArray.zeros((order.p, self.batch_size), + dtype=np.float64, order='F') d_ar_ptr = d_ar.ptr if order.q: - d_ma = cumlArray.zeros((order.q, self.batch_size), dtype=np.float64, - order='F') + d_ma = cumlArray.zeros((order.q, self.batch_size), + dtype=np.float64, order='F') d_ma_ptr = d_ma.ptr if order.P: - d_sar = cumlArray.zeros((order.P, self.batch_size), dtype=np.float64, - order='F') + d_sar = cumlArray.zeros((order.P, self.batch_size), + dtype=np.float64, order='F') d_sar_ptr = d_sar.ptr if order.Q: - d_sma = cumlArray.zeros((order.Q, self.batch_size), dtype=np.float64, - order='F') + d_sma = cumlArray.zeros((order.Q, self.batch_size), + dtype=np.float64, order='F') d_sma_ptr = d_sma.ptr d_sigma2 = cumlArray.zeros(self.batch_size, dtype=np.float64) d_sigma2_ptr = d_sigma2.ptr From 735988361647c66214e619b71139b21f2491c333 Mon Sep 17 00:00:00 2001 From: Louis Sugy Date: Tue, 17 Mar 2020 11:38:02 +0100 Subject: [PATCH 018/330] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31342c321b..17256f995a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ - PR #1848: Rely on subclassing for cuML Array serialization - PR #1866: Minimizing client memory pressure on Naive Bayes - PR #1788: Removing complexity bottleneck in S-ARIMA +- PR #1883: Use cuML arrays in ARIMA ## Bug Fixes - PR #1833: Fix depth issue in shallow RF regression estimators From 76154a056c6cfdbdf0fd0934bfdc4189471d8dd6 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 18 Mar 2020 14:21:21 -0500 Subject: [PATCH 019/330] One hot encoder draft code --- python/cuml/preprocessing/__init__.py | 1 + python/cuml/preprocessing/encoders.py | 206 +++++++++++++++++++++++ python/cuml/test/test_one_hot_encoder.py | 47 ++++++ 3 files changed, 254 insertions(+) create mode 100644 python/cuml/preprocessing/encoders.py create mode 100644 python/cuml/test/test_one_hot_encoder.py diff --git a/python/cuml/preprocessing/__init__.py b/python/cuml/preprocessing/__init__.py index 62054257ba..93a5b36e72 100644 --- a/python/cuml/preprocessing/__init__.py +++ b/python/cuml/preprocessing/__init__.py @@ -16,3 +16,4 @@ from cuml.preprocessing.model_selection import train_test_split from cuml.preprocessing.LabelEncoder import LabelEncoder from cuml.preprocessing.label import LabelBinarizer, label_binarize +from cuml.preprocessing.encoders import OneHotEncoder diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py new file mode 100644 index 0000000000..067a5ae2cc --- /dev/null +++ b/python/cuml/preprocessing/encoders.py @@ -0,0 +1,206 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import numpy as np +import cupy as cp + +from cuml.preprocessing import LabelEncoder +from cudf import DataFrame, Series + +from cuml.utils import with_cupy_rmm + + +class OneHotEncoder: + """ + Encode categorical features as a one-hot numeric array. + The input to this transformer should be a cuDF.DataFrame of integers or + strings, denoting the values taken on by categorical (discrete) features. + The features are encoded using a one-hot (aka 'one-of-K' or 'dummy') + encoding scheme. This creates a binary column for each category and + returns a sparse matrix or dense array (depending on the ``sparse`` + parameter). + By default, the encoder derives the categories based on the unique values + in each feature. Alternatively, you can also specify the `categories` + manually. + Note: a one-hot encoding of y labels should use a LabelBinarizer + instead. + + Parameters + ---------- + TODO: Implement categories + categories : 'auto' or a cuml.DataFrame, default='auto' + Categories (unique values) per feature: + - 'auto' : Determine categories automatically from the training data. + - DataFrame : ``categories[col]`` holds the categories expected in the + feature col. The passed categories should not mix strings and numeric + values within a single feature, and should be sorted in case of + numeric values. TODO: Check sorted for numeric + The used categories can be found in the ``categories_`` attribute. + TODO: Implement drop + drop : 'first' or a cuml.DataFrame, default=None + Specifies a methodology to use to drop one of the categories per + feature. This is useful in situations where perfectly collinear + features cause problems, such as when feeding the resulting data + into a neural network or an unregularized regression. + - None : retain all features (the default). + - 'first' : drop the first category in each feature. If only one + category is present, the feature will be dropped entirely. + - DataFrame : ``drop[col]`` is the category in feature col that + should be dropped. + # sparse : bool, default=True + # Will return sparse matrix if set True else will return an array. + dtype : number type, default=np.float + Desired dtype of output. + handle_unknown : {'error', 'ignore'}, default='error' + Whether to raise an error or ignore if an unknown categorical feature + is present during transform (default is to raise). When this parameter + is set to 'ignore' and an unknown category is encountered during + transform, the resulting one-hot encoded columns for this feature + will be all zeros. In the inverse transform, an unknown category + will be denoted as None. + + Attributes + ---------- + categories_ : list of arrays + The categories of each feature determined during fitting + (in order of the features in X and corresponding with the output + of ``transform``). This includes the category specified in ``drop`` + (if any). + drop_idx_ : array of shape (n_features,) + ``drop_idx_[i]`` is the index in ``categories_[i]`` of the category to + be dropped for each feature. None if all the transformed features will + be retained. + + """ + def __init__(self, categories='auto', drop=None, sparse=True, + dtype=np.float64, handle_unknown='error'): + self.categories = categories + self.sparse = sparse + self.dtype = dtype + self.handle_unknown = handle_unknown + self.drop = drop + self._fitted = False + self.categories_ = None + self.drop_idx_ = None + self._encoders = None + + def _validate_keywords(self): + if self.handle_unknown not in ('error', 'ignore'): + msg = ("handle_unknown should be either 'error' or 'ignore', " + "got {0}.".format(self.handle_unknown)) + raise ValueError(msg) + # If we have both dropped columns and ignored unknown + # values, there will be ambiguous cells. This creates difficulties + # in interpreting the model. + if self.drop is not None and self.handle_unknown != 'error': + raise ValueError( + "`handle_unknown` must be 'error' when the drop parameter is " + "specified, as both would create categories that are all " + "zero.") + + def fit(self, X): + """ + Fit OneHotEncoder to X. + Parameters + ---------- + X : cuDF.DataFrame + The data to determine the categories of each feature. + Returns + ------- + self + """ + self._validate_keywords() + if self.categories == 'auto': + self._encoders = {feature: LabelEncoder().fit(X[feature]) + for feature in X.columns} + else: + raise NotImplementedError + # def filtered_label_encoder(feature): + # filtered = X[feature].fil + # self._encoders = {feature: filtered_label_encoder(feature) + # for feature in X.columns} + # self._fit(X, handle_unknown=self.handle_unknown) + # self.drop_idx_ = self._compute_drop_idx() + self._fitted = True + return self + + def fit_transform(self, X): + """ + Fit OneHotEncoder to X, then transform X. + Equivalent to fit(X).transform(X). + + Parameters + ---------- + X : cudf.DataFrame + The data to encode. + Returns + ------- + X_out : sparse matrix if sparse=True else a 2-d array + Transformed input. + """ + return self.fit(X).transform(X) + + @staticmethod + @with_cupy_rmm + def _one_hot_encoding(encoder, X): + idx = encoder.transform(X).to_array() + ohe = cp.zeros((len(X), len(encoder.classes_))) + ohe[cp.arange(len(ohe)), idx] = 1 + return ohe + + @with_cupy_rmm + def transform(self, X): + """ + Transform X using one-hot encoding. + Parameters + ---------- + X : cudf.DataFrame + The data to encode. + Returns + ------- + X_out : sparse matrix if sparse=True else a 2-d array + Transformed input. + """ + if not self._fitted: + raise RuntimeError("Model must first be .fit()") + onehots = [self._one_hot_encoding(self._encoders[feature], X[feature]) + for feature in X.columns] + return cp.concatenate(onehots, axis=1) + + @with_cupy_rmm + def inverse_transform(self, X): + """ + Convert the data back to the original representation. + In case unknown categories are encountered (all zeros in the + one-hot encoding), ``None`` is used to represent this category. + Parameters + ---------- + X : array-like or sparse matrix, shape [n_samples, n_encoded_features] + The transformed data. + Returns + ------- + X_tr : cudf.DataFrame + Inverse transformed array. + """ + if not self._fitted: + raise RuntimeError("Model must first be .fit()") + result = DataFrame(columns=self._encoders.keys()) + j = 0 + for feature in self._encoders.keys(): + enc_size = len(self._encoders[feature].classes_) + x_feature = cp.argmax(X[:, j:j + enc_size], axis=1) + inv = self._encoders[feature].inverse_transform(Series(x_feature)) + result[feature] = inv + j += enc_size + return result diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py new file mode 100644 index 0000000000..08f3c76d01 --- /dev/null +++ b/python/cuml/test/test_one_hot_encoder.py @@ -0,0 +1,47 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cudf import DataFrame +from cuml.preprocessing import OneHotEncoder + +import cupy as cp + +from sklearn.preprocessing import OneHotEncoder as SkOneHotEncoder + + +def _from_df_to_array(df): + return list(zip(*[df[feature] for feature in df.columns])) + + +def test_onehot_vs_skonehot(): + X = DataFrame({'gender': ['Male', 'Female', 'Female'], 'int': [1, 3, 2]}) + skX = _from_df_to_array(X) + + enc = OneHotEncoder(sparse=False) + skohe = SkOneHotEncoder(sparse=False) + + ohe = enc.fit_transform(X) + ref = skohe.fit_transform(skX) + + cp.testing.assert_array_equal(ohe, ref) + + +def test_onehot_inverse_transform(): + X = DataFrame({'gender': ['Male', 'Female', 'Female'], 'int': [1, 3, 2]}) + + enc = OneHotEncoder() + ohe = enc.fit_transform(X) + inv = enc.inverse_transform(ohe) + + assert X.equals(inv) From c247afc7146c86c53fa94b73d9c0dfa0e287d1e5 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 18 Mar 2020 14:33:04 -0500 Subject: [PATCH 020/330] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cc8e6cae8..8bf0e9bea2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - PR #1766: Mean absolute error implementation with cupy - PR #1766: Mean squared log error implementation with cupy - PR #1635: cuML Array shim and configurable output added to cluster methods +- PR #1892: One hot encoder implementation with cupy - PR #1586: Seasonal ARIMA - PR #1683: cuml.dask make_regression - PR #1689: Add framework for cuML Dask serializers From ae730114a27cce4bf799500a898686c1e70f8583 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 18 Mar 2020 19:07:14 -0500 Subject: [PATCH 021/330] Add support for categories feature of OneHotEncoder as well as handling_unknown --- python/cuml/preprocessing/encoders.py | 32 ++++++++++-------------- python/cuml/test/test_one_hot_encoder.py | 24 +++++++++++++++++- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 067a5ae2cc..98be1949f2 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -38,15 +38,11 @@ class OneHotEncoder: Parameters ---------- - TODO: Implement categories categories : 'auto' or a cuml.DataFrame, default='auto' Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - DataFrame : ``categories[col]`` holds the categories expected in the - feature col. The passed categories should not mix strings and numeric - values within a single feature, and should be sorted in case of - numeric values. TODO: Check sorted for numeric - The used categories can be found in the ``categories_`` attribute. + feature col. TODO: Implement drop drop : 'first' or a cuml.DataFrame, default=None Specifies a methodology to use to drop one of the categories per @@ -60,6 +56,7 @@ class OneHotEncoder: should be dropped. # sparse : bool, default=True # Will return sparse matrix if set True else will return an array. + TODO: Implement dtype dtype : number type, default=np.float Desired dtype of output. handle_unknown : {'error', 'ignore'}, default='error' @@ -72,16 +69,10 @@ class OneHotEncoder: Attributes ---------- - categories_ : list of arrays - The categories of each feature determined during fitting - (in order of the features in X and corresponding with the output - of ``transform``). This includes the category specified in ``drop`` - (if any). drop_idx_ : array of shape (n_features,) ``drop_idx_[i]`` is the index in ``categories_[i]`` of the category to be dropped for each feature. None if all the transformed features will be retained. - """ def __init__(self, categories='auto', drop=None, sparse=True, dtype=np.float64, handle_unknown='error'): @@ -91,7 +82,6 @@ def __init__(self, categories='auto', drop=None, sparse=True, self.handle_unknown = handle_unknown self.drop = drop self._fitted = False - self.categories_ = None self.drop_idx_ = None self._encoders = None @@ -121,16 +111,20 @@ def fit(self, X): self """ self._validate_keywords() - if self.categories == 'auto': + if type(self.categories) is str and self.categories == 'auto': self._encoders = {feature: LabelEncoder().fit(X[feature]) for feature in X.columns} else: - raise NotImplementedError - # def filtered_label_encoder(feature): - # filtered = X[feature].fil - # self._encoders = {feature: filtered_label_encoder(feature) - # for feature in X.columns} - # self._fit(X, handle_unknown=self.handle_unknown) + self._encoders = dict() + for feature in self.categories.columns: + le = LabelEncoder().fit(self.categories[feature]) + self._encoders[feature] = le + if self.handle_unknown == 'error': + if not X[feature].isin(self.categories[feature]).all(): + msg = ("Found unknown categories in column {0}" + " during fit".format(feature)) + raise ValueError(msg) + # self.drop_idx_ = self._compute_drop_idx() self._fitted = True return self diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index 08f3c76d01..78666389a2 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import pytest from cudf import DataFrame from cuml.preprocessing import OneHotEncoder @@ -45,3 +45,25 @@ def test_onehot_inverse_transform(): inv = enc.inverse_transform(ohe) assert X.equals(inv) + + +def test_onehot_categories(): + X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) + enc = OneHotEncoder( + categories=DataFrame({'chars': ['a', 'b', 'c'], 'int': [0, 1, 2]})) + ref = cp.array([[1., 0., 0., 1., 0., 0.], + [0., 1., 0., 0., 0., 1.]]) + res = enc.fit_transform(X) + cp.testing.assert_array_equal(res, ref) + + +def test_onehot_fit_handle_unknown(): + X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) + Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]}) + + enc = OneHotEncoder(handle_unknown='error', categories=Y) + with pytest.raises(ValueError): + enc.fit(X) + + enc = OneHotEncoder(handle_unknown='ignore', categories=Y) + enc.fit(X) From c4cda84adbc5db0c4cff7ab19eddd30f8f091e84 Mon Sep 17 00:00:00 2001 From: Louis Sugy Date: Thu, 19 Mar 2020 11:55:07 +0100 Subject: [PATCH 022/330] Clean unused imports --- python/cuml/tsa/arima.pyx | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/python/cuml/tsa/arima.pyx b/python/cuml/tsa/arima.pyx index 9897fab008..9c65cc2acb 100644 --- a/python/cuml/tsa/arima.pyx +++ b/python/cuml/tsa/arima.pyx @@ -20,7 +20,6 @@ # cython: language_level = 3 import numpy as np -import cupy as cp import sys import ctypes @@ -29,10 +28,6 @@ from libcpp cimport bool from libcpp.vector cimport vector from typing import List, Tuple, Dict, Mapping, Optional, Union -import cudf -import cuml -import rmm - from cuml.common.array import CumlArray as cumlArray from cuml.common.base import Base from cuml.common.cuda import nvtx_range_wrap @@ -107,7 +102,7 @@ class ARIMA(Base): .. code-block:: python import numpy as np - from cuml.tsa import arima + from cuml.tsa.arima import ARIMA # Create seasonal data with a trend, a seasonal pattern and noise n_obs = 100 @@ -120,7 +115,7 @@ class ARIMA(Base): + np.tile(pattern, (25, 1))) # Fit a seasonal ARIMA model - model = arima.ARIMA(y, (0,1,1), (0,1,1,4), fit_intercept=False) + model = ARIMA(y, (0,1,1), (0,1,1,4), fit_intercept=False) model.fit() # Forecast @@ -398,7 +393,7 @@ class ARIMA(Base): Example: -------- .. code-block:: python - from cuml.tsa.arima import fit + from cuml.tsa.arima import ARIMA ... model = ARIMA(ys, (1,1,1)) model.fit() @@ -493,8 +488,7 @@ class ARIMA(Base): Example: -------- .. code-block:: python - from cuml.tsa.arima import fit - import cuml + from cuml.tsa.arima import ARIMA ... model = ARIMA(ys, (1,1,1)) model.fit() From c56e196842eb18246e3788eb818a90bb3d10cfd8 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Thu, 19 Mar 2020 15:45:40 -0500 Subject: [PATCH 023/330] Add full support for unknown values in OneHotEncoder --- python/cuml/preprocessing/LabelEncoder.py | 27 +++++++++++++---- python/cuml/preprocessing/encoders.py | 37 +++++++++++++++-------- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/python/cuml/preprocessing/LabelEncoder.py b/python/cuml/preprocessing/LabelEncoder.py index e18f03510e..2aafc1d55f 100644 --- a/python/cuml/preprocessing/LabelEncoder.py +++ b/python/cuml/preprocessing/LabelEncoder.py @@ -24,6 +24,14 @@ class LabelEncoder(object): """ An nvcategory based implementation of ordinal label encoding + Parameters + ---------- + handle_unknown : {'error', 'ignore'}, default='error' + Whether to raise an error or ignore if an unknown categorical feature + is present during transform (default is to raise). When this parameter + is set to 'ignore' and an unknown category is encountered during + transform or inverse transform, the resulting encoding will be null. + Examples -------- Converting a categorical implementation to a numerical one @@ -98,10 +106,16 @@ class LabelEncoder(object): """ - def __init__(self): + def __init__(self, handle_unknown='error'): self.classes_ = None self.dtype = None self._fitted: bool = False + self.handle_unknown = handle_unknown + + if self.handle_unknown not in ('error', 'ignore'): + msg = ("handle_unknown should be either 'error' or 'ignore', " + "got {0}.".format(self.handle_unknown)) + raise ValueError(msg) def _check_is_fitted(self): if not self._fitted: @@ -165,7 +179,7 @@ def transform(self, y: cudf.Series) -> cudf.Series: encoded = cudf.Series(encoded) - if encoded.has_nulls: + if encoded.has_nulls and self.handle_unknown == 'error': raise KeyError("Attempted to encode unseen key") return cudf.Series(encoded) @@ -210,10 +224,11 @@ def inverse_transform(self, y: cudf.Series) -> cudf.Series: # check if ord_label out of bound ord_label = y.unique() category_num = len(self.classes_) - for ordi in ord_label: - if ordi < 0 or ordi >= category_num: - raise ValueError( - 'y contains previously unseen label {}'.format(ordi)) + if self.handle_unknown == 'error': + for ordi in ord_label: + if ordi < 0 or ordi >= category_num: + raise ValueError( + 'y contains previously unseen label {}'.format(ordi)) y = y.astype(self.dtype) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 98be1949f2..7e4795a888 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -99,6 +99,10 @@ def _validate_keywords(self): "specified, as both would create categories that are all " "zero.") + def _check_is_fitted(self): + if not self._fitted: + raise RuntimeError("Model must first be .fit()") + def fit(self, X): """ Fit OneHotEncoder to X. @@ -112,13 +116,16 @@ def fit(self, X): """ self._validate_keywords() if type(self.categories) is str and self.categories == 'auto': - self._encoders = {feature: LabelEncoder().fit(X[feature]) - for feature in X.columns} + self._encoders = { + feature: LabelEncoder(handle_unknown=self.handle_unknown).fit( + X[feature]) + for feature in X.columns + } else: self._encoders = dict() for feature in self.categories.columns: - le = LabelEncoder().fit(self.categories[feature]) - self._encoders[feature] = le + le = LabelEncoder(handle_unknown=self.handle_unknown) + self._encoders[feature] = le.fit(self.categories[feature]) if self.handle_unknown == 'error': if not X[feature].isin(self.categories[feature]).all(): msg = ("Found unknown categories in column {0}" @@ -148,9 +155,14 @@ def fit_transform(self, X): @staticmethod @with_cupy_rmm def _one_hot_encoding(encoder, X): - idx = encoder.transform(X).to_array() + col_idx = encoder.transform(X).to_gpu_array(fillna="pandas") + col_idx = cp.asarray(col_idx) ohe = cp.zeros((len(X), len(encoder.classes_))) - ohe[cp.arange(len(ohe)), idx] = 1 + # Filter out rows with null values + idx_to_keep = col_idx > -1 + row_idx = cp.arange(len(ohe))[idx_to_keep] + col_idx = col_idx[idx_to_keep] + ohe[row_idx, col_idx] = 1 return ohe @with_cupy_rmm @@ -166,8 +178,7 @@ def transform(self, X): X_out : sparse matrix if sparse=True else a 2-d array Transformed input. """ - if not self._fitted: - raise RuntimeError("Model must first be .fit()") + self._check_is_fitted() onehots = [self._one_hot_encoding(self._encoders[feature], X[feature]) for feature in X.columns] return cp.concatenate(onehots, axis=1) @@ -187,14 +198,16 @@ def inverse_transform(self, X): X_tr : cudf.DataFrame Inverse transformed array. """ - if not self._fitted: - raise RuntimeError("Model must first be .fit()") + self._check_is_fitted() result = DataFrame(columns=self._encoders.keys()) j = 0 for feature in self._encoders.keys(): enc_size = len(self._encoders[feature].classes_) - x_feature = cp.argmax(X[:, j:j + enc_size], axis=1) - inv = self._encoders[feature].inverse_transform(Series(x_feature)) + x_feature = X[:, j:j + enc_size] + not_null_idx = x_feature.any(axis=1) + idx = cp.argmax(x_feature, axis=1) + inv = self._encoders[feature].inverse_transform(Series(idx)) + inv.iloc[~not_null_idx] = None result[feature] = inv j += enc_size return result From 217b12589236558a8f231383018d79743113532a Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Wed, 19 Feb 2020 19:35:11 -0800 Subject: [PATCH 024/330] float* class_probs --- cpp/src/fil/common.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index 1eb243f4be..57eedba2fe 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -79,6 +79,7 @@ struct dense_tree { return nodes_[i * node_pitch_]; } dense_node* nodes_ = nullptr; + float* class_probs_ = nullptr; int node_pitch_ = 0; }; @@ -95,6 +96,7 @@ struct dense_storage { return dense_tree(nodes_ + i * tree_stride_, node_pitch_); } dense_node* nodes_ = nullptr; + float* class_probs_ = nullptr; int num_trees_ = 0; int tree_stride_ = 0; int node_pitch_ = 0; From 12a31b7698259fb50c9c992c9189409e1ba4caac Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 20 Feb 2020 17:45:52 -0800 Subject: [PATCH 025/330] threaded proper payload type handling to try out different algorithms --- cpp/include/cuml/fil/fil.h | 10 ++++ cpp/src/fil/common.cuh | 25 ++++++-- cpp/src/fil/fil.cu | 44 +++++++++----- cpp/src/fil/infer.cu | 115 +++++++++++++++++++++++++++---------- python/cuml/fil/fil.pyx | 1 + 5 files changed, 146 insertions(+), 49 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index bf4a96d7c8..d1ab3601f9 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -121,6 +121,12 @@ struct forest; /** forest_t is the predictor handle */ typedef forest* forest_t; +enum leaf_value_t { + FLOAT_SCALAR, + INT_CLASS_LABEL, + FLOAT_VECTOR +}; + /** forest_params_t are the trees to initialize the predictor */ struct forest_params_t { // total number of nodes; ignored for dense forests @@ -131,6 +137,8 @@ struct forest_params_t { int num_trees; // num_cols is the number of columns in the data int num_cols; + // TODO doc + leaf_value_t leaf_payload_type; // algo is the inference algorithm; // sparse forests do not distinguish between NAIVE and TREE_REORG algo_t algo; @@ -156,6 +164,8 @@ struct treelite_params_t { float threshold; // storage_type indicates whether the forest should be imported as dense or sparse storage_type_t storage_type; + // TODO doc + leaf_value_t leaf_payload_type; }; /** init_dense uses params and nodes to initialize the dense forest stored in pf diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index 57eedba2fe..84fd2e036a 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -46,17 +46,28 @@ struct base_node { static const int FID_MASK = (1 << 30) - 1; static const int DEF_LEFT_MASK = 1 << 30; static const int IS_LEAF_MASK = 1 << 31; - float val; + union { + /// threshold value for branch node or output value (e.g. class + /// probability or regression summand) for leaf node + float f; + unsigned int idx; + ///< class label or index of the float vector + ///< vector can be used for class probabilities or regression + } val; int bits; - __host__ __device__ float output() const { return val; } - __host__ __device__ float thresh() const { return val; } + template __host__ __device__ Tval output() const; + template<> __host__ __device__ + float output() const { return val.f; } + template<> __host__ __device__ + unsigned int output() const { return val.idx; } + __host__ __device__ float thresh() const { return val.f; } __host__ __device__ int fid() const { return bits & FID_MASK; } __host__ __device__ bool def_left() const { return bits & DEF_LEFT_MASK; } __host__ __device__ bool is_leaf() const { return bits & IS_LEAF_MASK; } - __host__ __device__ base_node() : val(0.0f), bits(0) {} - base_node(dense_node_t node) : val(node.val), bits(node.bits) {} + __host__ __device__ base_node() : val.f(0.0f), bits(0) {} + base_node(dense_node_t node) : val.f(node.val), bits(node.bits) {} base_node(float output, float thresh, int fid, bool def_left, bool is_leaf) - : val(is_leaf ? output : thresh), + : val.f(is_leaf ? output : thresh), bits((fid & FID_MASK) | (def_left ? DEF_LEFT_MASK : 0) | (is_leaf ? IS_LEAF_MASK : 0)) {} }; @@ -150,6 +161,8 @@ struct predict_params { int num_output_classes; // so far, only 1 or 2 is supported, and only used to output probabilities // from classifier models + // TODO doc + leaf_value_t leaf_payload_type; // Data parameters. float* preds; diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index ee8b6b5b46..2d185cc56c 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -360,11 +360,26 @@ void adjust_threshold(float* pthreshold, int* tl_left, int* tl_right, } } +void tl2fil_leaf_payload(base_node_t* node, const tl::Tree::Node& node, + leaf_value_t leaf_payload_type) { + switch (leaf_payload_type) { + case INT_CLASS_LABEL: + node.val.idx = node.leaf_value(); + break; + case FLOAT_SCALAR: + node.val.f = node.leaf_value(); + break; + default: + ASSERT(false, "vector-payload nodes not supported yet"); + }; +} + void node2fil_dense(std::vector* pnodes, int root, int cur, - const tl::Tree& tree, const tl::Tree::Node& node) { + const tl::Tree& tree, const tl::Tree::Node& node, + const treelite_params_t tl_params) { if (node.is_leaf()) { - dense_node_init(&(*pnodes)[root + cur], node.leaf_value(), 0, 0, false, - true); + dense_node_init(&(*pnodes)[root + cur], nan, nan, 0, false, true); + tl2fil_leaf_payload(&(*pnodes)[root + cur], node, tl_params.leaf_payload_type); return; } @@ -383,10 +398,12 @@ void node2fil_dense(std::vector* pnodes, int root, int cur, } void node2fil_sparse(std::vector* pnodes, int root, int cur, - const tl::Tree& tree, const tl::Tree::Node& node) { + const tl::Tree& tree, const tl::Tree::Node& node, + const treelite_params_t tl_params) { if (node.is_leaf()) { - sparse_node_init(&(*pnodes)[root + cur], node.leaf_value(), 0, 0, false, + sparse_node_init(&(*pnodes)[root + cur], nan, nan, 0, false, true, 0); + tl2fil_leaf_payload(&(*pnodes)[root + cur], node, tl_params.leaf_payload_type); return; } @@ -415,14 +432,15 @@ void node2fil_sparse(std::vector* pnodes, int root, int cur, } void tree2fil_dense(std::vector* pnodes, int root, - const tl::Tree& tree) { - node2fil_dense(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree))); + const tl::Tree& tree, const treelite_params_t tl_params) { + node2fil_dense(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), tl_params); } -int tree2fil_sparse(std::vector* pnodes, const tl::Tree& tree) { +int tree2fil_sparse(std::vector* pnodes, const tl::Tree& tree, + const treelite_params_t tl_params) { int root = pnodes->size(); pnodes->push_back(sparse_node_t()); - node2fil_sparse(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree))); + node2fil_sparse(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), tl_params); return root; } @@ -433,6 +451,7 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, // fill in forest-indendent params params->algo = tl_params->algo; params->threshold = tl_params->threshold; + params->leaf_payload_type = tl_params->leaf_payload_type; // fill in forest-dependent params params->num_cols = model.num_feature; @@ -469,7 +488,7 @@ void tl2fil_dense(std::vector* pnodes, forest_params_t* params, int num_nodes = forest_num_nodes(params->num_trees, params->depth); pnodes->resize(num_nodes, dense_node_t{0, 0}); for (int i = 0; i < model.trees.size(); ++i) { - tree2fil_dense(pnodes, i * tree_num_nodes(params->depth), model.trees[i]); + tree2fil_dense(pnodes, i * tree_num_nodes(params->depth), model.trees[i], tl_params); } } @@ -482,7 +501,7 @@ void tl2fil_sparse(std::vector* ptrees, std::vector* pnodes, // convert the nodes for (int i = 0; i < model.trees.size(); ++i) { - int root = tree2fil_sparse(pnodes, model.trees[i]); + int root = tree2fil_sparse(pnodes, model.trees[i], tl_params); ptrees->push_back(root); } params->num_nodes = pnodes->size(); @@ -526,9 +545,9 @@ void from_treelite(const cumlHandle& handle, forest_t* pforest, } } + forest_params_t params; switch (storage_type) { case storage_type_t::DENSE: { - forest_params_t params; std::vector nodes; tl2fil_dense(&nodes, ¶ms, model_ref, tl_params); init_dense(handle, pforest, nodes.data(), ¶ms); @@ -538,7 +557,6 @@ void from_treelite(const cumlHandle& handle, forest_t* pforest, break; } case storage_type_t::SPARSE: { - forest_params_t params; std::vector trees; std::vector nodes; tl2fil_sparse(&trees, &nodes, ¶ms, model_ref, tl_params); diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 4dbdeb41f8..692e114c46 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -23,11 +23,11 @@ namespace fil { using namespace MLCommon; // vec wraps float[N] for cub::BlockReduce -template +template struct vec { - float data[N]; - __host__ __device__ float& operator[](int i) { return data[i]; } - __host__ __device__ float operator[](int i) const { return data[i]; } + T data[N]; + __host__ __device__ T& operator[](int i) { return data[i]; } + __host__ __device__ T operator[](int i) const { return data[i]; } friend __host__ __device__ vec operator+(const vec& a, const vec& b) { vec r; @@ -37,9 +37,9 @@ struct vec { } }; -template -__device__ __forceinline__ void infer_one_tree(tree_type tree, float* sdata, - int cols, vec& out) { +template +__device__ __forceinline__ vec infer_one_tree(tree_type tree, float* sdata, + int cols) { int curr[NITEMS]; int mask = (1 << NITEMS) - 1; // all active for (int j = 0; j < NITEMS; ++j) curr[j] = 0; @@ -57,13 +57,15 @@ __device__ __forceinline__ void infer_one_tree(tree_type tree, float* sdata, curr[j] = n.left(curr[j]) + cond; } } while (mask != 0); + vec out; #pragma unroll - for (int j = 0; j < NITEMS; ++j) out[j] += tree[curr[j]].output(); + for (int j = 0; j < NITEMS; ++j) out[j] = tree[curr[j]].output(); + return out; } -template -__device__ __forceinline__ void infer_one_tree(tree_type tree, float* sdata, - int cols, vec<1>& out) { +template +__device__ __forceinline__ vec<1, TOUTPUT> infer_one_tree(tree_type tree, float* sdata, + int cols) { int curr = 0; for (;;) { auto n = tree[curr]; @@ -72,10 +74,49 @@ __device__ __forceinline__ void infer_one_tree(tree_type tree, float* sdata, bool cond = isnan(val) ? !n.def_left() : val >= n.thresh(); curr = n.left(curr) + cond; } - out[0] = tree[curr].output(); + return vec<1, TOUTPUT> out; + // TODO: why did the deleted line not increment but assign the value? + out[0] = tree[curr].output(); + return out; } -template +template +class AggregateTrees { + public: + __device__ __forceinline__ AggregateTrees(int num_classes, void* shared_workspaces); + template + __device__ __forceinline__ void accumulate(vec out); + __device__ __forceinline__ void finalize(float* out); +}; + +template <> class AggregateTrees { + vec acc; + public: + __device__ __forceinline__ AggregateTrees(int num_classes, void* shared_workspaces) { + ASSERT(num_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); + // TODO: even if num_classes == 2, in regression, this needs to change + #pragma unroll + for (int i = 0; i < NITEMS; ++i) acc[i] = 0.0f; + } + template + __device__ __forceinline__ void accumulate(vec out) { + acc += out; + } + __device__ __forceinline__ void finalize(float* out) { + using BlockReduce = cub::BlockReduce, FIL_TPB>; + __shared__ typename BlockReduce::TempStorage tmp_storage; + acc = BlockReduce(tmp_storage).Sum(acc); + if (threadIdx.x == 0) { + for (int i = 0; i < NITEMS; ++i) { + int row = blockIdx.x * NITEMS + i; + if (row < params.num_rows) + out[row * num_classes] = acc[i]; + } + } + } +}; + +template __global__ void infer_k(storage_type forest, predict_params params) { // cache the row for all threads to reuse extern __shared__ char smem[]; @@ -90,22 +131,12 @@ __global__ void infer_k(storage_type forest, predict_params params) { } __syncthreads(); + AggregateTrees acc; // one block works on NITEMS rows and the whole forest - vec out; - for (int i = 0; i < NITEMS; ++i) out[i] = 0.0f; for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { - infer_one_tree(forest[j], sdata, params.num_cols, out); - } - using BlockReduce = cub::BlockReduce, FIL_TPB>; - __shared__ typename BlockReduce::TempStorage tmp_storage; - out = BlockReduce(tmp_storage).Sum(out); - if (threadIdx.x == 0) { - for (int i = 0; i < NITEMS; ++i) { - int row = blockIdx.x * NITEMS + i; - if (row < params.num_rows) - params.preds[row * params.num_output_classes] = out[i]; - } + acc.accumulate(infer_one_tree(forest[j], sdata, params.num_cols)); } + acc.finalize(params.preds); } template @@ -124,16 +155,40 @@ void infer(storage_type forest, predict_params params, cudaStream_t stream) { int shm_sz = num_items * sizeof(float) * params.num_cols; switch (num_items) { case 1: - infer_k<1><<>>(forest, params); + switch (leaf_payload) { + case FLOAT_SCALAR: + infer_k<1, FLOAT_SCALAR, float><<>>(forest, params); + break; + default: + ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload so far"); + } break; case 2: - infer_k<2><<>>(forest, params); + switch (leaf_payload) { + case FLOAT_SCALAR: + infer_k<2, FLOAT_SCALAR, float><<>>(forest, params); + break; + default: + ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload so far"); + } break; case 3: - infer_k<3><<>>(forest, params); + switch (leaf_payload) { + case FLOAT_SCALAR: + infer_k<3, FLOAT_SCALAR, float><<>>(forest, params); + break; + default: + ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload so far"); + } break; case 4: - infer_k<4><<>>(forest, params); + switch (leaf_payload) { + case FLOAT_SCALAR: + infer_k<4, FLOAT_SCALAR, float><<>>(forest, params); + break; + default: + ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload so far"); + } break; default: ASSERT(false, "internal error: nitems > 4"); diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx index a488a25024..ab55827919 100644 --- a/python/cuml/fil/fil.pyx +++ b/python/cuml/fil/fil.pyx @@ -283,6 +283,7 @@ cdef class ForestInference_impl(): treelite_params.threshold = threshold treelite_params.algo = self.get_algo(algo) treelite_params.storage_type = self.get_storage_type(storage_type) + treelite_params.leaf_payload = leaf_value_t.FLOAT_SCALAR self.forest_data = NULL cdef cumlHandle* handle_ =\ From 7a8c774f680bb97b0998bc6fe4eb1518d07c9b2b Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 20 Feb 2020 20:16:10 -0800 Subject: [PATCH 026/330] compile bug fixes --- cpp/src/fil/common.cuh | 24 ++++++++++-------- cpp/src/fil/infer.cu | 57 ++++++++++++++++++++++-------------------- 2 files changed, 43 insertions(+), 38 deletions(-) diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index 84fd2e036a..8f27af6316 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -46,7 +46,7 @@ struct base_node { static const int FID_MASK = (1 << 30) - 1; static const int DEF_LEFT_MASK = 1 << 30; static const int IS_LEAF_MASK = 1 << 31; - union { + union Val { /// threshold value for branch node or output value (e.g. class /// probability or regression summand) for leaf node float f; @@ -55,23 +55,25 @@ struct base_node { ///< vector can be used for class probabilities or regression } val; int bits; - template __host__ __device__ Tval output() const; - template<> __host__ __device__ - float output() const { return val.f; } - template<> __host__ __device__ - unsigned int output() const { return val.idx; } + template __host__ __device__ T output() const; __host__ __device__ float thresh() const { return val.f; } __host__ __device__ int fid() const { return bits & FID_MASK; } __host__ __device__ bool def_left() const { return bits & DEF_LEFT_MASK; } __host__ __device__ bool is_leaf() const { return bits & IS_LEAF_MASK; } - __host__ __device__ base_node() : val.f(0.0f), bits(0) {} - base_node(dense_node_t node) : val.f(node.val), bits(node.bits) {} + __host__ __device__ base_node() : bits(0) { val.f = 0.0f; } + base_node(dense_node_t node) : bits(node.bits) { val.f = node.val; } base_node(float output, float thresh, int fid, bool def_left, bool is_leaf) - : val.f(is_leaf ? output : thresh), - bits((fid & FID_MASK) | (def_left ? DEF_LEFT_MASK : 0) | - (is_leaf ? IS_LEAF_MASK : 0)) {} + : bits((fid & FID_MASK) | (def_left ? DEF_LEFT_MASK : 0) | + (is_leaf ? IS_LEAF_MASK : 0)) + { val.f = is_leaf ? output : thresh; } }; +template<> __host__ __device__ +unsigned int base_node::output() const { return val.idx; } + +template<> __host__ __device__ + float base_node::output() const { return val.f; } + /** dense_node is a single node of a dense forest */ struct alignas(8) dense_node : base_node { __host__ __device__ dense_node() : base_node() {} diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 692e114c46..23e0e47029 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -28,9 +28,9 @@ struct vec { T data[N]; __host__ __device__ T& operator[](int i) { return data[i]; } __host__ __device__ T operator[](int i) const { return data[i]; } - friend __host__ __device__ vec operator+(const vec& a, - const vec& b) { - vec r; + friend __host__ __device__ vec operator+(const vec& a, + const vec& b) { + vec r; #pragma unroll for (int i = 0; i < N; ++i) r[i] = a[i] + b[i]; return r; @@ -80,43 +80,42 @@ __device__ __forceinline__ vec<1, TOUTPUT> infer_one_tree(tree_type tree, float* return out; } -template +template class AggregateTrees { public: - __device__ __forceinline__ AggregateTrees(int num_classes, void* shared_workspaces); - template + __device__ __forceinline__ AggregateTrees(int num_output_classes, void* smem_workspace); __device__ __forceinline__ void accumulate(vec out); - __device__ __forceinline__ void finalize(float* out); + __device__ __forceinline__ void finalize(float* out, int num_rows); }; -template <> class AggregateTrees { +template class AggregateTrees { vec acc; + int num_output_classes; public: - __device__ __forceinline__ AggregateTrees(int num_classes, void* shared_workspaces) { - ASSERT(num_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); - // TODO: even if num_classes == 2, in regression, this needs to change + __device__ __forceinline__ AggregateTrees(int num_output_classes_, void*): + num_output_classes(num_output_classes_) { + // TODO: even if num_output_classes == 2, in regression, this needs to change #pragma unroll for (int i = 0; i < NITEMS; ++i) acc[i] = 0.0f; } - template __device__ __forceinline__ void accumulate(vec out) { acc += out; } - __device__ __forceinline__ void finalize(float* out) { - using BlockReduce = cub::BlockReduce, FIL_TPB>; + __device__ __forceinline__ void finalize(float* out, int num_rows) { + using BlockReduce = cub::BlockReduce, FIL_TPB>; __shared__ typename BlockReduce::TempStorage tmp_storage; acc = BlockReduce(tmp_storage).Sum(acc); if (threadIdx.x == 0) { for (int i = 0; i < NITEMS; ++i) { int row = blockIdx.x * NITEMS + i; - if (row < params.num_rows) - out[row * num_classes] = acc[i]; + if (row < num_rows) + out[row * num_output_classes] = acc[i]; } } } }; -template +template __global__ void infer_k(storage_type forest, predict_params params) { // cache the row for all threads to reuse extern __shared__ char smem[]; @@ -131,12 +130,12 @@ __global__ void infer_k(storage_type forest, predict_params params) { } __syncthreads(); - AggregateTrees acc; + AggregateTrees acc(params.num_output_classes, nullptr); // one block works on NITEMS rows and the whole forest for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { acc.accumulate(infer_one_tree(forest[j], sdata, params.num_cols)); } - acc.finalize(params.preds); + acc.finalize(params.preds, params.num_rows); } template @@ -155,39 +154,43 @@ void infer(storage_type forest, predict_params params, cudaStream_t stream) { int shm_sz = num_items * sizeof(float) * params.num_cols; switch (num_items) { case 1: - switch (leaf_payload) { + switch (params.leaf_payload_type) { case FLOAT_SCALAR: + ASSERT(params.num_output_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); infer_k<1, FLOAT_SCALAR, float><<>>(forest, params); break; default: - ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload so far"); + ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload_type so far"); } break; case 2: - switch (leaf_payload) { + switch (params.leaf_payload_type) { case FLOAT_SCALAR: + ASSERT(params.num_output_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); infer_k<2, FLOAT_SCALAR, float><<>>(forest, params); break; default: - ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload so far"); + ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload_type so far"); } break; case 3: - switch (leaf_payload) { + switch (params.leaf_payload_type) { case FLOAT_SCALAR: + ASSERT(params.num_output_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); infer_k<3, FLOAT_SCALAR, float><<>>(forest, params); break; default: - ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload so far"); + ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload_type so far"); } break; case 4: - switch (leaf_payload) { + switch (params.leaf_payload_type) { case FLOAT_SCALAR: + ASSERT(params.num_output_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); infer_k<4, FLOAT_SCALAR, float><<>>(forest, params); break; default: - ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload so far"); + ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload_type so far"); } break; default: From 3ec4807c3b46adbb8ed89a3063fa43a7d4e445e1 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Fri, 21 Feb 2020 17:52:41 -0800 Subject: [PATCH 027/330] fixed dense/sparse node init logic, compilation issues, formatting --- cpp/include/cuml/fil/fil.h | 38 ++++++++++++--------- cpp/src/fil/common.cuh | 52 +++++++++++++---------------- cpp/src/fil/fil.cu | 67 ++++++++++++++++++++++++-------------- cpp/src/fil/infer.cu | 26 ++++++++------- cpp/test/sg/fil_test.cu | 7 ++-- 5 files changed, 105 insertions(+), 85 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index d1ab3601f9..ec734c4d73 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -85,27 +85,39 @@ enum storage_type_t { /** dense_node_t is a node in a densely-stored forest */ struct dense_node_t { - float val; + union Val { + /// threshold value for branch node or output value (e.g. class + /// probability or regression summand) for leaf node + float f; + unsigned int idx; + ///< class label or index of the float vector + ///< vector can be used for class probabilities or regression + } val; int bits; }; -/** sparse_node_t is a node in a sparsely-stored forest */ -struct sparse_node_t { - float val; - int bits; +struct sparse_node_extra_data { int left_idx; - // pad the size to 16 bytes to match sparse_node - // (in cpp/src/fil/common.cuh) - int dummy; + int dummy; // make alignment explicit and reserve for future use +}; + +/** sparse_node_t is a node in a sparsely-stored forest */ +struct sparse_node_t : dense_node_t, sparse_node_extra_data { + sparse_node_t() = default; + sparse_node_t(dense_node_t dn, sparse_node_extra_data ed) + : dense_node_t(dn), sparse_node_extra_data(ed) {} }; +enum leaf_value_t { FLOAT_SCALAR, INT_CLASS_LABEL, FLOAT_VECTOR }; + /** dense_node_init initializes node from paramters */ void dense_node_init(dense_node_t* n, float output, float thresh, int fid, bool def_left, bool is_leaf); /** dense_node_decode extracts individual members from node */ void dense_node_decode(const dense_node_t* node, float* output, float* thresh, - int* fid, bool* def_left, bool* is_leaf); + int* fid, bool* def_left, bool* is_leaf, + leaf_value_t leaf_payload_type); /** sparse_node_init initializes node from parameters */ void sparse_node_init(sparse_node_t* node, float output, float thresh, int fid, @@ -114,19 +126,13 @@ void sparse_node_init(sparse_node_t* node, float output, float thresh, int fid, /** sparse_node_decode extracts individual members from node */ void sparse_node_decode(const sparse_node_t* node, float* output, float* thresh, int* fid, bool* def_left, bool* is_leaf, - int* left_index); + int* left_index, leaf_value_t leaf_payload_type); struct forest; /** forest_t is the predictor handle */ typedef forest* forest_t; -enum leaf_value_t { - FLOAT_SCALAR, - INT_CLASS_LABEL, - FLOAT_VECTOR -}; - /** forest_params_t are the trees to initialize the predictor */ struct forest_params_t { // total number of nodes; ignored for dense forests diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index 8f27af6316..f743471e9a 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -15,6 +15,7 @@ */ /** @file common.cuh Common GPU functionality */ +#pragma once #include #include @@ -42,44 +43,36 @@ __host__ __device__ __forceinline__ int forest_num_nodes(int num_trees, const int FIL_TPB = 256; /** base_node contains common implementation details for dense and sparse nodes */ -struct base_node { +struct base_node : dense_node_t { static const int FID_MASK = (1 << 30) - 1; static const int DEF_LEFT_MASK = 1 << 30; static const int IS_LEAF_MASK = 1 << 31; - union Val { - /// threshold value for branch node or output value (e.g. class - /// probability or regression summand) for leaf node - float f; - unsigned int idx; - ///< class label or index of the float vector - ///< vector can be used for class probabilities or regression - } val; - int bits; - template __host__ __device__ T output() const; + template inline T output() const; __host__ __device__ float thresh() const { return val.f; } __host__ __device__ int fid() const { return bits & FID_MASK; } __host__ __device__ bool def_left() const { return bits & DEF_LEFT_MASK; } __host__ __device__ bool is_leaf() const { return bits & IS_LEAF_MASK; } - __host__ __device__ base_node() : bits(0) { val.f = 0.0f; } - base_node(dense_node_t node) : bits(node.bits) { val.f = node.val; } - base_node(float output, float thresh, int fid, bool def_left, bool is_leaf) - : bits((fid & FID_MASK) | (def_left ? DEF_LEFT_MASK : 0) | - (is_leaf ? IS_LEAF_MASK : 0)) - { val.f = is_leaf ? output : thresh; } + base_node() = default; + base_node(dense_node_t node) : dense_node_t(node) {} + base_node(float output_, float thresh, int fid, bool def_left, bool is_leaf) { + bits = (fid & FID_MASK) | (def_left ? DEF_LEFT_MASK : 0) | + (is_leaf ? IS_LEAF_MASK : 0); + val.f = is_leaf ? output_ : thresh; + } }; template<> __host__ __device__ -unsigned int base_node::output() const { return val.idx; } +inline unsigned int base_node::output() const { return val.idx; } template<> __host__ __device__ - float base_node::output() const { return val.f; } +inline float base_node::output() const { return val.f; } /** dense_node is a single node of a dense forest */ struct alignas(8) dense_node : base_node { - __host__ __device__ dense_node() : base_node() {} + dense_node() = default; dense_node(dense_node_t node) : base_node(node) {} - dense_node(float output, float thresh, int fid, bool def_left, bool is_leaf) - : base_node(output, thresh, fid, def_left, is_leaf) {} + dense_node(float output_, float thresh, int fid, bool def_left, bool is_leaf) + : base_node(output_, thresh, fid, def_left, is_leaf) {} /** index of the left child, where curr is the index of the current node */ __host__ __device__ int left(int curr) const { return 2 * curr + 1; } }; @@ -116,16 +109,15 @@ struct dense_storage { }; /** sparse_node is a single node in a sparse forest */ -struct alignas(16) sparse_node : base_node { - int left_idx; - // pad the size to 16 bytes to match sparse_node_t (in fil.h) - int dummy; - __host__ __device__ sparse_node() : left_idx(0), base_node() {} +struct alignas(16) sparse_node : base_node, sparse_node_extra_data { + //__host__ __device__ sparse_node() : left_idx(0), base_node() {} sparse_node(sparse_node_t node) - : base_node(dense_node_t{node.val, node.bits}), left_idx(node.left_idx) {} - sparse_node(float output, float thresh, int fid, bool def_left, bool is_leaf, + : base_node(node), sparse_node_extra_data(node) {} + sparse_node(float output_, float thresh, int fid, bool def_left, bool is_leaf, int left_index) - : base_node(output, thresh, fid, def_left, is_leaf), left_idx(left_index) {} + : base_node(output_, thresh, fid, def_left, is_leaf), + sparse_node_extra_data({.left_idx = left_index, .dummy = 0}) + {} __host__ __device__ int left_index() const { return left_idx; } /** index of the left child, where curr is the index of the current node */ __host__ __device__ int left(int curr) const { return left_idx; } diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 2d185cc56c..88812fab0d 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -38,15 +38,23 @@ namespace tl = treelite; void dense_node_init(dense_node_t* n, float output, float thresh, int fid, bool def_left, bool is_leaf) { - dense_node dn(output, thresh, fid, def_left, is_leaf); - n->bits = dn.bits; - n->val = dn.val; + *n = dense_node(output, thresh, fid, def_left, is_leaf); } void dense_node_decode(const dense_node_t* n, float* output, float* thresh, - int* fid, bool* def_left, bool* is_leaf) { + int* fid, bool* def_left, bool* is_leaf, leaf_value_t leaf_payload_type) { dense_node dn(*n); - *output = dn.output(); + // TODO: shouldn't it output a NAN in case the value is not applicable (e.g. leaf vs not a leaf)? + switch (leaf_payload_type) { + case INT_CLASS_LABEL: + *output = dn.output(); + break; + case FLOAT_SCALAR: + *output = dn.output(); + break; + default: + ASSERT(false, "vector-valued payload not supported yet"); + } *thresh = dn.thresh(); *fid = dn.fid(); *def_left = dn.def_left(); @@ -56,17 +64,25 @@ void dense_node_decode(const dense_node_t* n, float* output, float* thresh, void sparse_node_init(sparse_node_t* node, float output, float thresh, int fid, bool def_left, bool is_leaf, int left_index) { sparse_node n(output, thresh, fid, def_left, is_leaf, left_index); - node->bits = n.bits; - node->val = n.val; - node->left_idx = n.left_idx; + + *node = sparse_node_t(n, n); } /** sparse_node_decode extracts individual members from node */ void sparse_node_decode(const sparse_node_t* node, float* output, float* thresh, int* fid, bool* def_left, bool* is_leaf, - int* left_index) { + int* left_index, leaf_value_t leaf_payload_type) { sparse_node n(*node); - *output = n.output(); + switch (leaf_payload_type) { + case INT_CLASS_LABEL: + *output = n.output(); + break; + case FLOAT_SCALAR: + *output = n.output(); + break; + default: + ASSERT(false, "vector-valued payload not supported yet"); + } *thresh = n.thresh(); *fid = n.fid(); *def_left = n.def_left(); @@ -360,14 +376,15 @@ void adjust_threshold(float* pthreshold, int* tl_left, int* tl_right, } } -void tl2fil_leaf_payload(base_node_t* node, const tl::Tree::Node& node, +template +void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, leaf_value_t leaf_payload_type) { switch (leaf_payload_type) { case INT_CLASS_LABEL: - node.val.idx = node.leaf_value(); + fil_node->val.idx = tl_node.leaf_value(); break; case FLOAT_SCALAR: - node.val.f = node.leaf_value(); + fil_node->val.f = tl_node.leaf_value(); break; default: ASSERT(false, "vector-payload nodes not supported yet"); @@ -376,10 +393,10 @@ void tl2fil_leaf_payload(base_node_t* node, const tl::Tree::Node& node, void node2fil_dense(std::vector* pnodes, int root, int cur, const tl::Tree& tree, const tl::Tree::Node& node, - const treelite_params_t tl_params) { + const treelite_params_t* tl_params) { if (node.is_leaf()) { - dense_node_init(&(*pnodes)[root + cur], nan, nan, 0, false, true); - tl2fil_leaf_payload(&(*pnodes)[root + cur], node, tl_params.leaf_payload_type); + dense_node_init(&(*pnodes)[root + cur], NAN, NAN, 0, false, true); + tl2fil_leaf_payload(&(*pnodes)[root + cur], node, tl_params->leaf_payload_type); return; } @@ -393,17 +410,17 @@ void node2fil_dense(std::vector* pnodes, int root, int cur, dense_node_init(&(*pnodes)[root + cur], 0, threshold, node.split_index(), default_left, false); int left = 2 * cur + 1; - node2fil_dense(pnodes, root, left, tree, tl_node_at(tree, tl_left)); - node2fil_dense(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right)); + node2fil_dense(pnodes, root, left, tree, tl_node_at(tree, tl_left), tl_params); + node2fil_dense(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right), tl_params); } void node2fil_sparse(std::vector* pnodes, int root, int cur, const tl::Tree& tree, const tl::Tree::Node& node, - const treelite_params_t tl_params) { + const treelite_params_t* tl_params) { if (node.is_leaf()) { - sparse_node_init(&(*pnodes)[root + cur], nan, nan, 0, false, + sparse_node_init(&(*pnodes)[root + cur], NAN, NAN, 0, false, true, 0); - tl2fil_leaf_payload(&(*pnodes)[root + cur], node, tl_params.leaf_payload_type); + tl2fil_leaf_payload(&(*pnodes)[root + cur], node, tl_params->leaf_payload_type); return; } @@ -427,17 +444,17 @@ void node2fil_sparse(std::vector* pnodes, int root, int cur, default_left, false, left); // init child nodes - node2fil_sparse(pnodes, root, left, tree, tl_node_at(tree, tl_left)); - node2fil_sparse(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right)); + node2fil_sparse(pnodes, root, left, tree, tl_node_at(tree, tl_left), tl_params); + node2fil_sparse(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right), tl_params); } void tree2fil_dense(std::vector* pnodes, int root, - const tl::Tree& tree, const treelite_params_t tl_params) { + const tl::Tree& tree, const treelite_params_t* tl_params) { node2fil_dense(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), tl_params); } int tree2fil_sparse(std::vector* pnodes, const tl::Tree& tree, - const treelite_params_t tl_params) { + const treelite_params_t* tl_params) { int root = pnodes->size(); pnodes->push_back(sparse_node_t()); node2fil_sparse(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), tl_params); diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 23e0e47029..85e9be8cd0 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -28,16 +28,20 @@ struct vec { T data[N]; __host__ __device__ T& operator[](int i) { return data[i]; } __host__ __device__ T operator[](int i) const { return data[i]; } + friend __host__ __device__ void operator+=(vec& a, + const vec& b) { +#pragma unroll + for (int i = 0; i < N; ++i) a[i] += b[i]; + } friend __host__ __device__ vec operator+(const vec& a, const vec& b) { - vec r; -#pragma unroll - for (int i = 0; i < N; ++i) r[i] = a[i] + b[i]; + vec r = a; + r += b; return r; - } + } }; -template +template __device__ __forceinline__ vec infer_one_tree(tree_type tree, float* sdata, int cols) { int curr[NITEMS]; @@ -59,11 +63,12 @@ __device__ __forceinline__ vec infer_one_tree(tree_type tree, f } while (mask != 0); vec out; #pragma unroll - for (int j = 0; j < NITEMS; ++j) out[j] = tree[curr[j]].output(); + for (int j = 0; j < NITEMS; ++j) + out[j] = tree[curr[j]].base_node::output(); return out; } -template +template __device__ __forceinline__ vec<1, TOUTPUT> infer_one_tree(tree_type tree, float* sdata, int cols) { int curr = 0; @@ -74,9 +79,8 @@ __device__ __forceinline__ vec<1, TOUTPUT> infer_one_tree(tree_type tree, float* bool cond = isnan(val) ? !n.def_left() : val >= n.thresh(); curr = n.left(curr) + cond; } - return vec<1, TOUTPUT> out; - // TODO: why did the deleted line not increment but assign the value? - out[0] = tree[curr].output(); + vec<1, TOUTPUT> out; + out[0] = tree[curr].base_node::output(); return out; } @@ -133,7 +137,7 @@ __global__ void infer_k(storage_type forest, predict_params params) { AggregateTrees acc(params.num_output_classes, nullptr); // one block works on NITEMS rows and the whole forest for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { - acc.accumulate(infer_one_tree(forest[j], sdata, params.num_cols)); + acc.accumulate(infer_one_tree(forest[j], sdata, params.num_cols)); } acc.finalize(params.preds, params.num_rows); } diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 56a550bd7d..89f7972c5a 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -251,7 +251,7 @@ class BaseFilTest : public testing::TestWithParam { bool def_left = false, is_leaf = false; for (;;) { fil::dense_node_decode(&root[curr], &output, &threshold, &fid, &def_left, - &is_leaf); + &is_leaf, fil::leaf_value_t::FLOAT_SCALAR); if (is_leaf) break; float val = data[fid]; bool cond = isnan(val) ? !def_left : val >= threshold; @@ -307,7 +307,7 @@ class PredictSparseFilTest : public BaseFilTest { int feature; bool def_left, is_leaf; dense_node_decode(&dense_root[i_dense], &output, &threshold, &feature, - &def_left, &is_leaf); + &def_left, &is_leaf, fil::leaf_value_t::FLOAT_SCALAR); if (is_leaf) { // leaf sparse node sparse_node_init(&sparse_nodes[i_sparse], output, threshold, feature, @@ -370,7 +370,8 @@ class TreeliteFilTest : public BaseFilTest { float threshold, output; bool is_leaf, default_left; fil::dense_node_decode(&nodes[node], &output, &threshold, &feature, - &default_left, &is_leaf); + &default_left, &is_leaf, + fil::leaf_value_t::FLOAT_SCALAR); if (is_leaf) { TL_CPP_CHECK(builder->SetLeafNode(key, output)); } else { From 0a17efd6713d9992e68466f8462bd58ded46ba62 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Fri, 21 Feb 2020 18:02:34 -0800 Subject: [PATCH 028/330] fixed enum import, improved C++ docs --- cpp/include/cuml/fil/fil.h | 18 +++++++++++++----- python/cuml/fil/fil.pyx | 5 +++++ 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index ec734c4d73..6e3c96e213 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -86,12 +86,12 @@ enum storage_type_t { /** dense_node_t is a node in a densely-stored forest */ struct dense_node_t { union Val { - /// threshold value for branch node or output value (e.g. class - /// probability or regression summand) for leaf node + /** threshold value for branch node or output value (e.g. class + probability or regression summand) for leaf node */ float f; + /** class label or index of the float vector + vector can be used for class probabilities or regression */ unsigned int idx; - ///< class label or index of the float vector - ///< vector can be used for class probabilities or regression } val; int bits; }; @@ -108,7 +108,15 @@ struct sparse_node_t : dense_node_t, sparse_node_extra_data { : dense_node_t(dn), sparse_node_extra_data(ed) {} }; -enum leaf_value_t { FLOAT_SCALAR, INT_CLASS_LABEL, FLOAT_VECTOR }; +enum leaf_value_t { + /** storing a clas probability or regression summand */ + FLOAT_SCALAR, + /** storing a class label */ + INT_CLASS_LABEL, + /** storing an index of the float vector which contains + class probabilities or regression summands */ + FLOAT_VECTOR +}; /** dense_node_init initializes node from paramters */ void dense_node_init(dense_node_t* n, float output, float thresh, int fid, diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx index ab55827919..482a467c95 100644 --- a/python/cuml/fil/fil.pyx +++ b/python/cuml/fil/fil.pyx @@ -165,6 +165,11 @@ cdef extern from "cuml/fil/fil.h" namespace "ML::fil": DENSE, SPARSE + cdef enum leaf_value_t: + FLOAT_SCALAR, + INT_CLASS_LABEL, + FLOAT_VECTOR + cdef struct forest: pass From 4f7823c4f4b18ffd82d227829ae97196d3e27ffd Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 20 Feb 2020 17:45:52 -0800 Subject: [PATCH 029/330] threaded proper payload type handling to try out different algorithms --- cpp/src/fil/infer.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 85e9be8cd0..fb7a763fc8 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -142,7 +142,7 @@ __global__ void infer_k(storage_type forest, predict_params params) { acc.finalize(params.preds, params.num_rows); } -template +template void infer(storage_type forest, predict_params params, cudaStream_t stream) { const int MAX_BATCH_ITEMS = 4; params.max_items = From c8a3cd04d5f095f1575fb86083f7bd561b4058bc Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 20 Feb 2020 18:40:22 -0800 Subject: [PATCH 030/330] added histogram vote aggregation mechanism --- cpp/src/fil/infer.cu | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index fb7a763fc8..01a60caeec 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -112,8 +112,43 @@ template class AggregateTrees { if (threadIdx.x == 0) { for (int i = 0; i < NITEMS; ++i) { int row = blockIdx.x * NITEMS + i; - if (row < num_rows) + if (row < params.num_rows) out[row * num_output_classes] = acc[i]; + //TODO for 2 output values, will need to change the above line + // to fix regression + } + } + } +}; + +template <> class AggregateTrees { + typedef unsigned int VoteCount; + // can switch to unsigned short to save shared memory + // provided atomicInc(short*) simulated with atomicAdd with appropriate shifts + VoteCount* votes; + public: + __device__ __forceinline__ AggregateTrees(int num_classes, void* shared_workspace) { + votes = (VoteCount*)shared_workspace; + for (int c = threadIdx.x; c < num_classes; c += FIL_TPB * NITEMS) + #pragma unroll + for (int i = 0; i < NITEMS; ++i) + votes[i * num_classes + c] = 0; + //__syncthreads(); // happening outside + } + template + __device__ __forceinline__ void accumulate(vec out) { + #pragma unroll + for (int i = 0; i < NITEMS; ++i) + atomicInc(votes + i * num_classes + acc[i]); + } + __device__ __forceinline__ void finalize(float* out) { + __syncthreads(); + if (threadIdx.x == 0) { + for (int i = 0; i < NITEMS; ++i) { + int row = blockIdx.x * NITEMS + i; + if (row < params.num_rows) + for (int c = 0; c < num_classes; ++c) + out[row * num_classes + c] = votes[i * num_classes + c]; } } } @@ -132,9 +167,12 @@ __global__ void infer_k(storage_type forest, predict_params params) { row < params.num_rows ? params.data[row * params.num_cols + i] : 0.0f; } } - __syncthreads(); + + AggregateTrees + acc(params.num_output_classes, sdata + params.num_cols * NITEMS); + + __syncthreads(); // for both row cache init and acc init - AggregateTrees acc(params.num_output_classes, nullptr); // one block works on NITEMS rows and the whole forest for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { acc.accumulate(infer_one_tree(forest[j], sdata, params.num_cols)); From 3e2e8e5a94f56a150a5a48b86bf023c24abd881f Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Fri, 21 Feb 2020 18:33:00 -0800 Subject: [PATCH 031/330] draft for integer payload threading and abstracting reduction mechanism --- cpp/src/fil/infer.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 01a60caeec..8f8b9c8e17 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -173,6 +173,7 @@ __global__ void infer_k(storage_type forest, predict_params params) { __syncthreads(); // for both row cache init and acc init + AggregateTrees acc(params.num_output_classes, nullptr); // one block works on NITEMS rows and the whole forest for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { acc.accumulate(infer_one_tree(forest[j], sdata, params.num_cols)); From ba9c382f960a2dbe40fb553357b367442eb94dd7 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Tue, 25 Feb 2020 16:39:25 -0800 Subject: [PATCH 032/330] fix compilation issues, formatting --- cpp/include/cuml/fil/fil.h | 8 +- cpp/src/fil/common.cuh | 23 +++-- cpp/src/fil/fil.cu | 61 ++++++----- cpp/src/fil/infer.cu | 203 +++++++++++++++++++------------------ python/cuml/fil/fil.pyx | 3 +- 5 files changed, 157 insertions(+), 141 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index 6e3c96e213..52014b6998 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -108,14 +108,12 @@ struct sparse_node_t : dense_node_t, sparse_node_extra_data { : dense_node_t(dn), sparse_node_extra_data(ed) {} }; -enum leaf_value_t { +enum leaf_value_t { /** storing a clas probability or regression summand */ FLOAT_SCALAR, /** storing a class label */ - INT_CLASS_LABEL, - /** storing an index of the float vector which contains - class probabilities or regression summands */ - FLOAT_VECTOR + INT_CLASS_LABEL + // to be extended }; /** dense_node_init initializes node from paramters */ diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index f743471e9a..8134194da2 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -47,25 +47,31 @@ struct base_node : dense_node_t { static const int FID_MASK = (1 << 30) - 1; static const int DEF_LEFT_MASK = 1 << 30; static const int IS_LEAF_MASK = 1 << 31; - template inline T output() const; + template + inline T output() const; __host__ __device__ float thresh() const { return val.f; } __host__ __device__ int fid() const { return bits & FID_MASK; } __host__ __device__ bool def_left() const { return bits & DEF_LEFT_MASK; } __host__ __device__ bool is_leaf() const { return bits & IS_LEAF_MASK; } - base_node() = default; + base_node() = default; base_node(dense_node_t node) : dense_node_t(node) {} - base_node(float output_, float thresh, int fid, bool def_left, bool is_leaf) { + base_node(float output_, float thresh, int fid, bool def_left, bool is_leaf) { bits = (fid & FID_MASK) | (def_left ? DEF_LEFT_MASK : 0) | (is_leaf ? IS_LEAF_MASK : 0); val.f = is_leaf ? output_ : thresh; } }; -template<> __host__ __device__ -inline unsigned int base_node::output() const { return val.idx; } +template <> +__host__ __device__ inline unsigned int base_node::output() + const { + return val.idx; +} -template<> __host__ __device__ -inline float base_node::output() const { return val.f; } +template <> +__host__ __device__ inline float base_node::output() const { + return val.f; +} /** dense_node is a single node of a dense forest */ struct alignas(8) dense_node : base_node { @@ -116,8 +122,7 @@ struct alignas(16) sparse_node : base_node, sparse_node_extra_data { sparse_node(float output_, float thresh, int fid, bool def_left, bool is_leaf, int left_index) : base_node(output_, thresh, fid, def_left, is_leaf), - sparse_node_extra_data({.left_idx = left_index, .dummy = 0}) - {} + sparse_node_extra_data({.left_idx = left_index, .dummy = 0}) {} __host__ __device__ int left_index() const { return left_idx; } /** index of the left child, where curr is the index of the current node */ __host__ __device__ int left(int curr) const { return left_idx; } diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 88812fab0d..5f83ab3d3c 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -42,7 +42,8 @@ void dense_node_init(dense_node_t* n, float output, float thresh, int fid, } void dense_node_decode(const dense_node_t* n, float* output, float* thresh, - int* fid, bool* def_left, bool* is_leaf, leaf_value_t leaf_payload_type) { + int* fid, bool* def_left, bool* is_leaf, + leaf_value_t leaf_payload_type) { dense_node dn(*n); // TODO: shouldn't it output a NAN in case the value is not applicable (e.g. leaf vs not a leaf)? switch (leaf_payload_type) { @@ -53,7 +54,7 @@ void dense_node_decode(const dense_node_t* n, float* output, float* thresh, *output = dn.output(); break; default: - ASSERT(false, "vector-valued payload not supported yet"); + ASSERT(false, "unknown leaf_payload_type"); } *thresh = dn.thresh(); *fid = dn.fid(); @@ -81,7 +82,7 @@ void sparse_node_decode(const sparse_node_t* node, float* output, float* thresh, *output = n.output(); break; default: - ASSERT(false, "vector-valued payload not supported yet"); + ASSERT(false, "unknown leaf_payload_type"); } *thresh = n.thresh(); *fid = n.fid(); @@ -376,19 +377,19 @@ void adjust_threshold(float* pthreshold, int* tl_left, int* tl_right, } } -template +template void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, - leaf_value_t leaf_payload_type) { - switch (leaf_payload_type) { - case INT_CLASS_LABEL: - fil_node->val.idx = tl_node.leaf_value(); - break; - case FLOAT_SCALAR: - fil_node->val.f = tl_node.leaf_value(); - break; - default: - ASSERT(false, "vector-payload nodes not supported yet"); - }; + leaf_value_t leaf_payload_type) { + switch (leaf_payload_type) { + case INT_CLASS_LABEL: + fil_node->val.idx = tl_node.leaf_value(); + break; + case FLOAT_SCALAR: + fil_node->val.f = tl_node.leaf_value(); + break; + default: + ASSERT(false, "unknown leaf_payload_type"); + }; } void node2fil_dense(std::vector* pnodes, int root, int cur, @@ -396,7 +397,8 @@ void node2fil_dense(std::vector* pnodes, int root, int cur, const treelite_params_t* tl_params) { if (node.is_leaf()) { dense_node_init(&(*pnodes)[root + cur], NAN, NAN, 0, false, true); - tl2fil_leaf_payload(&(*pnodes)[root + cur], node, tl_params->leaf_payload_type); + tl2fil_leaf_payload(&(*pnodes)[root + cur], node, + tl_params->leaf_payload_type); return; } @@ -410,17 +412,19 @@ void node2fil_dense(std::vector* pnodes, int root, int cur, dense_node_init(&(*pnodes)[root + cur], 0, threshold, node.split_index(), default_left, false); int left = 2 * cur + 1; - node2fil_dense(pnodes, root, left, tree, tl_node_at(tree, tl_left), tl_params); - node2fil_dense(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right), tl_params); + node2fil_dense(pnodes, root, left, tree, tl_node_at(tree, tl_left), + tl_params); + node2fil_dense(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right), + tl_params); } void node2fil_sparse(std::vector* pnodes, int root, int cur, const tl::Tree& tree, const tl::Tree::Node& node, const treelite_params_t* tl_params) { if (node.is_leaf()) { - sparse_node_init(&(*pnodes)[root + cur], NAN, NAN, 0, false, - true, 0); - tl2fil_leaf_payload(&(*pnodes)[root + cur], node, tl_params->leaf_payload_type); + sparse_node_init(&(*pnodes)[root + cur], NAN, NAN, 0, false, true, 0); + tl2fil_leaf_payload(&(*pnodes)[root + cur], node, + tl_params->leaf_payload_type); return; } @@ -444,20 +448,24 @@ void node2fil_sparse(std::vector* pnodes, int root, int cur, default_left, false, left); // init child nodes - node2fil_sparse(pnodes, root, left, tree, tl_node_at(tree, tl_left), tl_params); - node2fil_sparse(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right), tl_params); + node2fil_sparse(pnodes, root, left, tree, tl_node_at(tree, tl_left), + tl_params); + node2fil_sparse(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right), + tl_params); } void tree2fil_dense(std::vector* pnodes, int root, const tl::Tree& tree, const treelite_params_t* tl_params) { - node2fil_dense(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), tl_params); + node2fil_dense(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), + tl_params); } int tree2fil_sparse(std::vector* pnodes, const tl::Tree& tree, const treelite_params_t* tl_params) { int root = pnodes->size(); pnodes->push_back(sparse_node_t()); - node2fil_sparse(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), tl_params); + node2fil_sparse(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), + tl_params); return root; } @@ -505,7 +513,8 @@ void tl2fil_dense(std::vector* pnodes, forest_params_t* params, int num_nodes = forest_num_nodes(params->num_trees, params->depth); pnodes->resize(num_nodes, dense_node_t{0, 0}); for (int i = 0; i < model.trees.size(); ++i) { - tree2fil_dense(pnodes, i * tree_num_nodes(params->depth), model.trees[i], tl_params); + tree2fil_dense(pnodes, i * tree_num_nodes(params->depth), model.trees[i], + tl_params); } } diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 8f8b9c8e17..ab1c57c9cf 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -28,8 +28,7 @@ struct vec { T data[N]; __host__ __device__ T& operator[](int i) { return data[i]; } __host__ __device__ T operator[](int i) const { return data[i]; } - friend __host__ __device__ void operator+=(vec& a, - const vec& b) { + friend __host__ __device__ void operator+=(vec& a, const vec& b) { #pragma unroll for (int i = 0; i < N; ++i) a[i] += b[i]; } @@ -38,12 +37,13 @@ struct vec { vec r = a; r += b; return r; - } + } }; template -__device__ __forceinline__ vec infer_one_tree(tree_type tree, float* sdata, - int cols) { +__device__ __forceinline__ vec infer_one_tree(tree_type tree, + float* sdata, + int cols) { int curr[NITEMS]; int mask = (1 << NITEMS) - 1; // all active for (int j = 0; j < NITEMS; ++j) curr[j] = 0; @@ -69,8 +69,9 @@ __device__ __forceinline__ vec infer_one_tree(tree_type tree, f } template -__device__ __forceinline__ vec<1, TOUTPUT> infer_one_tree(tree_type tree, float* sdata, - int cols) { +__device__ __forceinline__ vec<1, TOUTPUT> infer_one_tree(tree_type tree, + float* sdata, + int cols) { int curr = 0; for (;;) { auto n = tree[curr]; @@ -86,75 +87,82 @@ __device__ __forceinline__ vec<1, TOUTPUT> infer_one_tree(tree_type tree, float* template class AggregateTrees { - public: - __device__ __forceinline__ AggregateTrees(int num_output_classes, void* smem_workspace); - __device__ __forceinline__ void accumulate(vec out); - __device__ __forceinline__ void finalize(float* out, int num_rows); + public: + __device__ __forceinline__ AggregateTrees(int num_output_classes, + void* smem_workspace); + __device__ __forceinline__ void accumulate(vec out); + __device__ __forceinline__ void finalize(float* out, int num_rows); }; -template class AggregateTrees { +template +class AggregateTrees { vec acc; int num_output_classes; - public: - __device__ __forceinline__ AggregateTrees(int num_output_classes_, void*): - num_output_classes(num_output_classes_) { - // TODO: even if num_output_classes == 2, in regression, this needs to change - #pragma unroll - for (int i = 0; i < NITEMS; ++i) acc[i] = 0.0f; - } - __device__ __forceinline__ void accumulate(vec out) { - acc += out; - } - __device__ __forceinline__ void finalize(float* out, int num_rows) { - using BlockReduce = cub::BlockReduce, FIL_TPB>; - __shared__ typename BlockReduce::TempStorage tmp_storage; - acc = BlockReduce(tmp_storage).Sum(acc); - if (threadIdx.x == 0) { - for (int i = 0; i < NITEMS; ++i) { - int row = blockIdx.x * NITEMS + i; - if (row < params.num_rows) - out[row * num_output_classes] = acc[i]; - //TODO for 2 output values, will need to change the above line - // to fix regression - } + + public: + __device__ __forceinline__ AggregateTrees(int num_output_classes_, void*) + : num_output_classes(num_output_classes_) { +// TODO: even if num_output_classes == 2, in regression, this needs to change +#pragma unroll + for (int i = 0; i < NITEMS; ++i) acc[i] = 0.0f; + } + __device__ __forceinline__ void accumulate(vec out) { + acc += out; + } + __device__ __forceinline__ void finalize(float* out, int num_rows) { + using BlockReduce = cub::BlockReduce, FIL_TPB>; + __shared__ typename BlockReduce::TempStorage tmp_storage; + acc = BlockReduce(tmp_storage).Sum(acc); + if (threadIdx.x == 0) { + for (int i = 0; i < NITEMS; ++i) { + int row = blockIdx.x * NITEMS + i; + if (row < num_rows) out[row * num_output_classes] = acc[i]; + //TODO for 2 output values, will need to change the above line + // to fix regression } } + } }; -template <> class AggregateTrees { +template +class AggregateTrees { typedef unsigned int VoteCount; // can switch to unsigned short to save shared memory // provided atomicInc(short*) simulated with atomicAdd with appropriate shifts VoteCount* votes; - public: - __device__ __forceinline__ AggregateTrees(int num_classes, void* shared_workspace) { - votes = (VoteCount*)shared_workspace; - for (int c = threadIdx.x; c < num_classes; c += FIL_TPB * NITEMS) - #pragma unroll - for (int i = 0; i < NITEMS; ++i) - votes[i * num_classes + c] = 0; - //__syncthreads(); // happening outside - } - template - __device__ __forceinline__ void accumulate(vec out) { - #pragma unroll - for (int i = 0; i < NITEMS; ++i) - atomicInc(votes + i * num_classes + acc[i]); - } - __device__ __forceinline__ void finalize(float* out) { - __syncthreads(); - if (threadIdx.x == 0) { - for (int i = 0; i < NITEMS; ++i) { - int row = blockIdx.x * NITEMS + i; - if (row < params.num_rows) - for (int c = 0; c < num_classes; ++c) - out[row * num_classes + c] = votes[i * num_classes + c]; - } + int num_output_classes; + + public: + __device__ __forceinline__ AggregateTrees(int num_output_classes_, + void* shared_workspace) + : num_output_classes(num_output_classes_) { + votes = (VoteCount*)shared_workspace; + for (int c = threadIdx.x; c < num_output_classes; c += FIL_TPB * NITEMS) +#pragma unroll + for (int i = 0; i < NITEMS; ++i) votes[i * num_output_classes + c] = 0; + //__syncthreads(); // happening outside + } + __device__ __forceinline__ void accumulate(vec out) { +#pragma unroll + for (int i = 0; i < NITEMS; ++i) + atomicInc(votes + i * num_output_classes + out[i], UINT_MAX); + } + __device__ __forceinline__ void finalize(float* out, int num_rows) { + __syncthreads(); + if (threadIdx.x == 0) { + for (int i = 0; i < NITEMS; ++i) { + int row = blockIdx.x * NITEMS + i; + if (row < num_rows) + for (int c = 0; c < num_output_classes; ++c) + out[row * num_output_classes + c] = + votes[i * num_output_classes + c]; } } + } }; -template +template __global__ void infer_k(storage_type forest, predict_params params) { // cache the row for all threads to reuse extern __shared__ char smem[]; @@ -167,22 +175,25 @@ __global__ void infer_k(storage_type forest, predict_params params) { row < params.num_rows ? params.data[row * params.num_cols + i] : 0.0f; } } - - AggregateTrees - acc(params.num_output_classes, sdata + params.num_cols * NITEMS); - __syncthreads(); // for both row cache init and acc init + AggregateTrees acc( + params.num_output_classes, sdata + params.num_cols * NITEMS); + + __syncthreads(); // for both row cache init and acc init AggregateTrees acc(params.num_output_classes, nullptr); // one block works on NITEMS rows and the whole forest for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { - acc.accumulate(infer_one_tree(forest[j], sdata, params.num_cols)); + acc.accumulate( + infer_one_tree(forest[j], sdata, params.num_cols)); } acc.finalize(params.preds, params.num_rows); } -template -void infer(storage_type forest, predict_params params, cudaStream_t stream) { +template +void infer_k_launcher(storage_type forest, predict_params params, + cudaStream_t stream) { const int MAX_BATCH_ITEMS = 4; params.max_items = params.algo == algo_t::BATCH_TREE_REORG ? MAX_BATCH_ITEMS : 1; @@ -197,44 +208,20 @@ void infer(storage_type forest, predict_params params, cudaStream_t stream) { int shm_sz = num_items * sizeof(float) * params.num_cols; switch (num_items) { case 1: - switch (params.leaf_payload_type) { - case FLOAT_SCALAR: - ASSERT(params.num_output_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); - infer_k<1, FLOAT_SCALAR, float><<>>(forest, params); - break; - default: - ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload_type so far"); - } + infer_k<1, leaf_payload_type, TOUTPUT> + <<>>(forest, params); break; case 2: - switch (params.leaf_payload_type) { - case FLOAT_SCALAR: - ASSERT(params.num_output_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); - infer_k<2, FLOAT_SCALAR, float><<>>(forest, params); - break; - default: - ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload_type so far"); - } + infer_k<2, leaf_payload_type, TOUTPUT> + <<>>(forest, params); break; case 3: - switch (params.leaf_payload_type) { - case FLOAT_SCALAR: - ASSERT(params.num_output_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); - infer_k<3, FLOAT_SCALAR, float><<>>(forest, params); - break; - default: - ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload_type so far"); - } + infer_k<3, leaf_payload_type, TOUTPUT> + <<>>(forest, params); break; case 4: - switch (params.leaf_payload_type) { - case FLOAT_SCALAR: - ASSERT(params.num_output_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); - infer_k<4, FLOAT_SCALAR, float><<>>(forest, params); - break; - default: - ASSERT(false, "only FLOAT_SCALAR supported as leaf_payload_type so far"); - } + infer_k<4, leaf_payload_type, TOUTPUT> + <<>>(forest, params); break; default: ASSERT(false, "internal error: nitems > 4"); @@ -242,6 +229,24 @@ void infer(storage_type forest, predict_params params, cudaStream_t stream) { CUDA_CHECK(cudaPeekAtLastError()); } +template +void infer(storage_type forest, predict_params params, cudaStream_t stream) { + switch (params.leaf_payload_type) { + case FLOAT_SCALAR: + ASSERT(params.num_output_classes <= 2, + "wrong leaf payload for multi-class (>2) inference"); + infer_k_launcher(forest, params, + stream); + break; + case INT_CLASS_LABEL: + infer_k_launcher( + forest, params, stream); + break; + default: + ASSERT(false, "unknown leaf_payload_type"); + } +} + template void infer(dense_storage forest, predict_params params, cudaStream_t stream); template void infer(sparse_storage forest, diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx index 482a467c95..61e99a793f 100644 --- a/python/cuml/fil/fil.pyx +++ b/python/cuml/fil/fil.pyx @@ -167,8 +167,7 @@ cdef extern from "cuml/fil/fil.h" namespace "ML::fil": cdef enum leaf_value_t: FLOAT_SCALAR, - INT_CLASS_LABEL, - FLOAT_VECTOR + INT_CLASS_LABEL cdef struct forest: pass From 4d60c9b66f5a2169704309e0101ac9ccc5729c51 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Tue, 25 Feb 2020 20:36:25 -0800 Subject: [PATCH 033/330] threaded leaf_payload_type through FIL C API, triggers bugs for INT_CLASS_LABEL --- cpp/src/fil/fil.cu | 13 ++++ cpp/src/fil/infer.cu | 1 + cpp/test/sg/fil_test.cu | 155 +++++++++++++++++++++------------------- python/cuml/fil/fil.pyx | 1 + 4 files changed, 96 insertions(+), 74 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 5f83ab3d3c..e874ac869a 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -140,6 +140,8 @@ struct forest { output_ = params->output; threshold_ = params->threshold; global_bias_ = params->global_bias; + leaf_payload_type_ = params->leaf_payload_type; + printf("%s line %d: leaf_payload_type %d\n", __FILE__, __LINE__, params->leaf_payload_type); init_max_shm(); } @@ -156,6 +158,7 @@ struct forest { params.num_rows = num_rows; params.max_shm = max_shm_; params.num_output_classes = predict_proba ? 2 : 1; + params.leaf_payload_type = leaf_payload_type_; // Predict using the forest. cudaStream_t stream = h.getStream(); @@ -181,6 +184,7 @@ struct forest { output_t output_ = output_t::RAW; float threshold_ = 0.5; float global_bias_ = 0; + leaf_value_t leaf_payload_type_ = FLOAT_SCALAR; }; struct dense_forest : forest { @@ -295,6 +299,14 @@ void check_params(const forest_params_t* params, bool dense) { ASSERT(false, "algo should be ALGO_AUTO, NAIVE, TREE_REORG or BATCH_TREE_REORG"); } + switch (params->leaf_payload_type) { + case leaf_value_t::FLOAT_SCALAR: + case leaf_value_t::INT_CLASS_LABEL: + break; + default: + ASSERT(false, + "leaf_payload_type should be FLOAT_SCALAR or INT_CLASS_LABEL"); + } // output_t::RAW == 0, and doesn't have a separate flag output_t all_set = output_t(output_t::AVG | output_t::SIGMOID | output_t::THRESHOLD); @@ -551,6 +563,7 @@ void init_sparse(const cumlHandle& h, forest_t* pf, const int* trees, void from_treelite(const cumlHandle& handle, forest_t* pforest, ModelHandle model, const treelite_params_t* tl_params) { + printf("%s line %d: leaf_payload_type %d\n", __FILE__, __LINE__, tl_params->leaf_payload_type); storage_type_t storage_type = tl_params->storage_type; // build dense trees by default const tl::Model& model_ref = *(tl::Model*)model; diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index ab1c57c9cf..56acf25b11 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -243,6 +243,7 @@ void infer(storage_type forest, predict_params params, cudaStream_t stream) { forest, params, stream); break; default: + printf("%s line %d: leaf_payload_type %d\n", __FILE__, __LINE__, params.leaf_payload_type); ASSERT(false, "unknown leaf_payload_type"); } } diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 89f7972c5a..3d9162380a 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -57,6 +57,7 @@ struct FilTestParams { float tolerance; // treelite parameters, only used for treelite tests tl::Operator op; + fil::leaf_value_t leaf_payload_type; }; std::ostream& operator<<(std::ostream& os, const FilTestParams& ps) { @@ -65,7 +66,8 @@ std::ostream& operator<<(std::ostream& os, const FilTestParams& ps) { << ", num_trees = " << ps.num_trees << ", leaf_prob = " << ps.leaf_prob << ", output = " << ps.output << ", threshold = " << ps.threshold << ", algo = " << ps.algo << ", seed = " << ps.seed - << ", tolerance = " << ps.tolerance << ", op = " << tl::OpName(ps.op); + << ", tolerance = " << ps.tolerance << ", op = " << tl::OpName(ps.op) + << ", leaf_payload_type = " << ps.leaf_payload_type; return os; } @@ -295,6 +297,8 @@ class PredictDenseFilTest : public BaseFilTest { fil_ps.output = ps.output; fil_ps.threshold = ps.threshold; fil_ps.global_bias = ps.global_bias; + fil_ps.leaf_payload_type = ps.leaf_payload_type; + printf("%s line %d: leaf_payload_type %d\n", __FILE__, __LINE__, fil_ps.leaf_payload_type); fil::init_dense(handle, pforest, nodes.data(), &fil_ps); } }; @@ -348,6 +352,7 @@ class PredictSparseFilTest : public BaseFilTest { fil_params.output = ps.output; fil_params.threshold = ps.threshold; fil_params.global_bias = ps.global_bias; + fil_params.leaf_payload_type = ps.leaf_payload_type; dense2sparse(); fil_params.num_nodes = sparse_nodes.size(); fil::init_sparse(handle, pforest, trees.data(), sparse_nodes.data(), @@ -444,6 +449,8 @@ class TreeliteFilTest : public BaseFilTest { params.threshold = ps.threshold; params.output_class = (ps.output & fil::output_t::THRESHOLD) != 0; params.storage_type = storage_type; + params.leaf_payload_type = ps.leaf_payload_type; + printf("%s line %d: leaf_payload_type %d\n", __FILE__, __LINE__, params.leaf_payload_type); fil::from_treelite(handle, pforest, (ModelHandle)model.get(), ¶ms); CUDA_CHECK(cudaStreamSynchronize(stream)); } @@ -474,52 +481,52 @@ class TreeliteAutoFilTest : public TreeliteFilTest { // global_bias, algo, seed, tolerance std::vector predict_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, }; TEST_P(PredictDenseFilTest, Predict) { compare(); } @@ -531,29 +538,29 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictDenseFilTest, // global_bias, algo, seed, tolerance std::vector predict_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, }; TEST_P(PredictSparseFilTest, Predict) { compare(); } @@ -565,72 +572,72 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictSparseFilTest, // global_bias, algo, seed, tolerance std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT}, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE}, + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT}, + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, }; TEST_P(TreeliteDenseFilTest, Import) { compare(); } @@ -642,28 +649,28 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteDenseFilTest, // global_bias, algo, seed, tolerance std::vector import_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT}, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE}, + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT}, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT}, + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, }; TEST_P(TreeliteSparseFilTest, Import) { compare(); } @@ -675,13 +682,13 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteSparseFilTest, // global_bias, algo, seed, tolerance std::vector import_auto_inputs = { {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, {20000, 50, 0.05, 15, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, }; TEST_P(TreeliteAutoFilTest, Import) { compare(); } diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx index 61e99a793f..ca4256dd12 100644 --- a/python/cuml/fil/fil.pyx +++ b/python/cuml/fil/fil.pyx @@ -323,6 +323,7 @@ cdef class ForestInference_impl(): treelite_params.threshold = threshold treelite_params.algo = self.get_algo(algo) treelite_params.storage_type = self.get_storage_type(storage_type) + treelite_params.leaf_payload = leaf_value_t.INT_CLASS_LABEL cdef cumlHandle* handle_ =\ self.handle.getHandle() From 740a51bbceb81aa7b4ef5a97b232a8c765a6be81 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Tue, 25 Feb 2020 20:42:00 -0800 Subject: [PATCH 034/330] changelog --- CHANGELOG.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7e5b626b0..c51af83327 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # cuML 0.14.0 (Date TBD) ## New Features +- PR #1757: Add multi class inference in FIL for forests from cuML RF ## Improvements @@ -20,9 +21,6 @@ - PR #1709: Add `decision_function()` and `predict_proba()` for LogisticRegression - PR #1714: Add `print_env.sh` file to gather important environment details - PR #1750: LinearRegression CumlArray for configurable output -- PR #1767: Single GPU decomposition models configurable output -- PR #1778: Make cuML Handle picklable -- PR #1738: cuml.dask refactor beginning and dask array input option for OLS, Ridge and KMeans ## Improvements - PR #1644: Add `predict_proba()` for FIL binary classifier From a92d4d92bf3aa363ee48f0b92cfbcd1332689c7d Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 27 Feb 2020 16:25:02 -0800 Subject: [PATCH 035/330] addressed review comments on determining the treelite model type and style TODO: survey libraries to understand output conventions, e.g. class output for multi-class classification, whether other modes (class probabilities, multi-valued regression) are supported. --- cpp/include/cuml/fil/fil.h | 36 ++++++----- cpp/src/fil/common.cuh | 27 +++------ cpp/src/fil/fil.cu | 118 +++++++++++++++++++------------------ cpp/src/fil/infer.cu | 79 +++++++++++++------------ cpp/test/sg/fil_test.cu | 27 +++++---- python/cuml/fil/fil.pyx | 6 -- 6 files changed, 146 insertions(+), 147 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index 52014b6998..3486495d2b 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -83,16 +83,19 @@ enum storage_type_t { SPARSE }; +/** val_t is the payload within a FIL leaf */ +union val_t { + /** threshold value for branch node or output value (e.g. class + probability or regression summand) for leaf node */ + float f; + /** class label or index of the float vector + (vector can be used for class probabilities or regression) */ + unsigned int idx; +}; + /** dense_node_t is a node in a densely-stored forest */ struct dense_node_t { - union Val { - /** threshold value for branch node or output value (e.g. class - probability or regression summand) for leaf node */ - float f; - /** class label or index of the float vector - vector can be used for class probabilities or regression */ - unsigned int idx; - } val; + union val_t val; int bits; }; @@ -108,7 +111,8 @@ struct sparse_node_t : dense_node_t, sparse_node_extra_data { : dense_node_t(dn), sparse_node_extra_data(ed) {} }; -enum leaf_value_t { +/** leaf_value_desc_t describes what the leaves in a FIL forest store (predict) */ +enum leaf_value_desc_t { /** storing a clas probability or regression summand */ FLOAT_SCALAR, /** storing a class label */ @@ -122,8 +126,7 @@ void dense_node_init(dense_node_t* n, float output, float thresh, int fid, /** dense_node_decode extracts individual members from node */ void dense_node_decode(const dense_node_t* node, float* output, float* thresh, - int* fid, bool* def_left, bool* is_leaf, - leaf_value_t leaf_payload_type); + int* fid, bool* def_left, bool* is_leaf); /** sparse_node_init initializes node from parameters */ void sparse_node_init(sparse_node_t* node, float output, float thresh, int fid, @@ -132,7 +135,7 @@ void sparse_node_init(sparse_node_t* node, float output, float thresh, int fid, /** sparse_node_decode extracts individual members from node */ void sparse_node_decode(const sparse_node_t* node, float* output, float* thresh, int* fid, bool* def_left, bool* is_leaf, - int* left_index, leaf_value_t leaf_payload_type); + int* left_index); struct forest; @@ -149,8 +152,8 @@ struct forest_params_t { int num_trees; // num_cols is the number of columns in the data int num_cols; - // TODO doc - leaf_value_t leaf_payload_type; + // leaf_payload_type determines what the leaves store (predict) + leaf_value_desc_t leaf_payload_type; // algo is the inference algorithm; // sparse forests do not distinguish between NAIVE and TREE_REORG algo_t algo; @@ -162,6 +165,9 @@ struct forest_params_t { // global_bias is added to the sum of tree predictions // (after averaging, if it is used, but before any further transformations) float global_bias; + // output_dim is how many values a single data row would yield. + // an exception: for two-class classification, it is 1 + int output_dim; }; /** treelite_params_t are parameters for importing treelite models */ @@ -176,8 +182,6 @@ struct treelite_params_t { float threshold; // storage_type indicates whether the forest should be imported as dense or sparse storage_type_t storage_type; - // TODO doc - leaf_value_t leaf_payload_type; }; /** init_dense uses params and nodes to initialize the dense forest stored in pf diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index 8134194da2..af109b2dd9 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -47,32 +47,23 @@ struct base_node : dense_node_t { static const int FID_MASK = (1 << 30) - 1; static const int DEF_LEFT_MASK = 1 << 30; static const int IS_LEAF_MASK = 1 << 31; - template - inline T output() const; + inline val_t output() const { return val; } __host__ __device__ float thresh() const { return val.f; } __host__ __device__ int fid() const { return bits & FID_MASK; } __host__ __device__ bool def_left() const { return bits & DEF_LEFT_MASK; } __host__ __device__ bool is_leaf() const { return bits & IS_LEAF_MASK; } base_node() = default; base_node(dense_node_t node) : dense_node_t(node) {} - base_node(float output_, float thresh, int fid, bool def_left, bool is_leaf) { + base_node(val_t output_, float thresh, int fid, bool def_left, bool is_leaf) { bits = (fid & FID_MASK) | (def_left ? DEF_LEFT_MASK : 0) | (is_leaf ? IS_LEAF_MASK : 0); - val.f = is_leaf ? output_ : thresh; + if (is_leaf) + val = output_; + else + val.f = thresh; } }; -template <> -__host__ __device__ inline unsigned int base_node::output() - const { - return val.idx; -} - -template <> -__host__ __device__ inline float base_node::output() const { - return val.f; -} - /** dense_node is a single node of a dense forest */ struct alignas(8) dense_node : base_node { dense_node() = default; @@ -91,7 +82,6 @@ struct dense_tree { return nodes_[i * node_pitch_]; } dense_node* nodes_ = nullptr; - float* class_probs_ = nullptr; int node_pitch_ = 0; }; @@ -108,7 +98,6 @@ struct dense_storage { return dense_tree(nodes_ + i * tree_stride_, node_pitch_); } dense_node* nodes_ = nullptr; - float* class_probs_ = nullptr; int num_trees_ = 0; int tree_stride_ = 0; int node_pitch_ = 0; @@ -119,7 +108,7 @@ struct alignas(16) sparse_node : base_node, sparse_node_extra_data { //__host__ __device__ sparse_node() : left_idx(0), base_node() {} sparse_node(sparse_node_t node) : base_node(node), sparse_node_extra_data(node) {} - sparse_node(float output_, float thresh, int fid, bool def_left, bool is_leaf, + sparse_node(val_t output_, float thresh, int fid, bool def_left, bool is_leaf, int left_index) : base_node(output_, thresh, fid, def_left, is_leaf), sparse_node_extra_data({.left_idx = left_index, .dummy = 0}) {} @@ -161,7 +150,7 @@ struct predict_params { // so far, only 1 or 2 is supported, and only used to output probabilities // from classifier models // TODO doc - leaf_value_t leaf_payload_type; + leaf_value_desc_t leaf_payload_type; // Data parameters. float* preds; diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index e874ac869a..18d8c569df 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -36,33 +36,22 @@ namespace fil { using namespace MLCommon; namespace tl = treelite; -void dense_node_init(dense_node_t* n, float output, float thresh, int fid, +void dense_node_init(dense_node_t* n, val_t output, float thresh, int fid, bool def_left, bool is_leaf) { *n = dense_node(output, thresh, fid, def_left, is_leaf); } -void dense_node_decode(const dense_node_t* n, float* output, float* thresh, - int* fid, bool* def_left, bool* is_leaf, - leaf_value_t leaf_payload_type) { +void dense_node_decode(const dense_node_t* n, union val_t* output, float* thresh, + int* fid, bool* def_left, bool* is_leaf) { dense_node dn(*n); - // TODO: shouldn't it output a NAN in case the value is not applicable (e.g. leaf vs not a leaf)? - switch (leaf_payload_type) { - case INT_CLASS_LABEL: - *output = dn.output(); - break; - case FLOAT_SCALAR: - *output = dn.output(); - break; - default: - ASSERT(false, "unknown leaf_payload_type"); - } + *output = dn.output(); *thresh = dn.thresh(); *fid = dn.fid(); *def_left = dn.def_left(); *is_leaf = dn.is_leaf(); } -void sparse_node_init(sparse_node_t* node, float output, float thresh, int fid, +void sparse_node_init(sparse_node_t* node, val_t output, float thresh, int fid, bool def_left, bool is_leaf, int left_index) { sparse_node n(output, thresh, fid, def_left, is_leaf, left_index); @@ -70,25 +59,11 @@ void sparse_node_init(sparse_node_t* node, float output, float thresh, int fid, } /** sparse_node_decode extracts individual members from node */ -void sparse_node_decode(const sparse_node_t* node, float* output, float* thresh, +void sparse_node_decode(const sparse_node_t* node, union val_t* output, float* thresh, int* fid, bool* def_left, bool* is_leaf, - int* left_index, leaf_value_t leaf_payload_type) { - sparse_node n(*node); - switch (leaf_payload_type) { - case INT_CLASS_LABEL: - *output = n.output(); - break; - case FLOAT_SCALAR: - *output = n.output(); - break; - default: - ASSERT(false, "unknown leaf_payload_type"); - } - *thresh = n.thresh(); - *fid = n.fid(); - *def_left = n.def_left(); - *is_leaf = n.is_leaf(); - *left_index = n.left_index(); + int* left_index) { + dense_node_decode(node, output, thresh, fid, def_left, is_leaf); + *left_index = sparse_node(*node).left_index(); } __host__ __device__ float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); } @@ -141,7 +116,6 @@ struct forest { threshold_ = params->threshold; global_bias_ = params->global_bias; leaf_payload_type_ = params->leaf_payload_type; - printf("%s line %d: leaf_payload_type %d\n", __FILE__, __LINE__, params->leaf_payload_type); init_max_shm(); } @@ -184,7 +158,7 @@ struct forest { output_t output_ = output_t::RAW; float threshold_ = 0.5; float global_bias_ = 0; - leaf_value_t leaf_payload_type_ = FLOAT_SCALAR; + leaf_value_desc_t leaf_payload_type_ = FLOAT_SCALAR; }; struct dense_forest : forest { @@ -300,8 +274,8 @@ void check_params(const forest_params_t* params, bool dense) { "algo should be ALGO_AUTO, NAIVE, TREE_REORG or BATCH_TREE_REORG"); } switch (params->leaf_payload_type) { - case leaf_value_t::FLOAT_SCALAR: - case leaf_value_t::INT_CLASS_LABEL: + case leaf_value_desc_t::FLOAT_SCALAR: + case leaf_value_desc_t::INT_CLASS_LABEL: break; default: ASSERT(false, @@ -389,15 +363,36 @@ void adjust_threshold(float* pthreshold, int* tl_left, int* tl_right, } } +/** if the vector consists of zeros and a single one, return the position +for the one (assumed class label). Else, return -1. +If the vector contains a NAN, return -1. */ +int find_class_label_from_one_hot(float* vector, int len) { + bool found_label = false; + int out = -1; // in case all are 0.f + for(int i = 0; i < len; ++i) + if(vector[i] == 1.f) { + if(!found_label) + out = i; + else // more than one 1.f + return -1; + found_label = true; + } else if (vector[i] != 0.f) // NAN != 0.f + return -1; + return out; +} + template void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, - leaf_value_t leaf_payload_type) { + leaf_value_desc_t leaf_payload_type) { switch (leaf_payload_type) { case INT_CLASS_LABEL: - fil_node->val.idx = tl_node.leaf_value(); + auto vec = tl_node.leaf_vector(); + fil_node->val.idx = find_class_label_from_one_hot(&vec[0], vec.size()); + ASSERT(fil_node->val.idx != -1, "a non-empty non-one-hot leaf vector"); break; case FLOAT_SCALAR: fil_node->val.f = tl_node.leaf_value(); + ASSERT(tl_node.leaf_vector().size() == 0, "some but not all treelite leaves have leaf_vector()"); break; default: ASSERT(false, "unknown leaf_payload_type"); @@ -406,11 +401,11 @@ void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, void node2fil_dense(std::vector* pnodes, int root, int cur, const tl::Tree& tree, const tl::Tree::Node& node, - const treelite_params_t* tl_params) { + const leaf_value_desc_t leaf_payload_type) { if (node.is_leaf()) { dense_node_init(&(*pnodes)[root + cur], NAN, NAN, 0, false, true); tl2fil_leaf_payload(&(*pnodes)[root + cur], node, - tl_params->leaf_payload_type); + leaf_payload_type); return; } @@ -425,18 +420,17 @@ void node2fil_dense(std::vector* pnodes, int root, int cur, default_left, false); int left = 2 * cur + 1; node2fil_dense(pnodes, root, left, tree, tl_node_at(tree, tl_left), - tl_params); + leaf_payload_type); node2fil_dense(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right), - tl_params); + leaf_payload_type); } void node2fil_sparse(std::vector* pnodes, int root, int cur, const tl::Tree& tree, const tl::Tree::Node& node, - const treelite_params_t* tl_params) { + const leaf_value_desc_t leaf_payload_type) { if (node.is_leaf()) { sparse_node_init(&(*pnodes)[root + cur], NAN, NAN, 0, false, true, 0); - tl2fil_leaf_payload(&(*pnodes)[root + cur], node, - tl_params->leaf_payload_type); + tl2fil_leaf_payload(&(*pnodes)[root + cur], node, leaf_payload_type); return; } @@ -461,23 +455,23 @@ void node2fil_sparse(std::vector* pnodes, int root, int cur, // init child nodes node2fil_sparse(pnodes, root, left, tree, tl_node_at(tree, tl_left), - tl_params); + leaf_payload_type); node2fil_sparse(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right), - tl_params); + leaf_payload_type); } void tree2fil_dense(std::vector* pnodes, int root, - const tl::Tree& tree, const treelite_params_t* tl_params) { + const tl::Tree& tree, const leaf_value_desc_t leaf_payload_type) { node2fil_dense(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), - tl_params); + leaf_payload_type); } int tree2fil_sparse(std::vector* pnodes, const tl::Tree& tree, - const treelite_params_t* tl_params) { + const leaf_value_desc_t leaf_payload_type) { int root = pnodes->size(); pnodes->push_back(sparse_node_t()); node2fil_sparse(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), - tl_params); + leaf_payload_type); return root; } @@ -488,7 +482,17 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, // fill in forest-indendent params params->algo = tl_params->algo; params->threshold = tl_params->threshold; - params->leaf_payload_type = tl_params->leaf_payload_type; + + // assuming either all leaves use the .leaf_vector() or all leaves use .leaf_value() + auto tree = model.trees[0]; + auto vec = tl_node_at(tree, tree_root(tree)).leaf_vector(); + if(vec.size()) { + if(find_class_label_from_one_hot(&vec[0], vec.size()) != -1) + params->leaf_payload_type = INT_CLASS_LABEL; + else + ASSERT(false, "unexpected: non-empty non-one-hot leaf vector"); + } else + params->leaf_payload_type = FLOAT_SCALAR; // fill in forest-dependent params params->num_cols = model.num_feature; @@ -526,7 +530,7 @@ void tl2fil_dense(std::vector* pnodes, forest_params_t* params, pnodes->resize(num_nodes, dense_node_t{0, 0}); for (int i = 0; i < model.trees.size(); ++i) { tree2fil_dense(pnodes, i * tree_num_nodes(params->depth), model.trees[i], - tl_params); + params->leaf_payload_type); } } @@ -539,7 +543,8 @@ void tl2fil_sparse(std::vector* ptrees, std::vector* pnodes, // convert the nodes for (int i = 0; i < model.trees.size(); ++i) { - int root = tree2fil_sparse(pnodes, model.trees[i], tl_params); + int root = tree2fil_sparse(pnodes, model.trees[i], + params->leaf_payload_type); ptrees->push_back(root); } params->num_nodes = pnodes->size(); @@ -563,7 +568,6 @@ void init_sparse(const cumlHandle& h, forest_t* pf, const int* trees, void from_treelite(const cumlHandle& handle, forest_t* pforest, ModelHandle model, const treelite_params_t* tl_params) { - printf("%s line %d: leaf_payload_type %d\n", __FILE__, __LINE__, tl_params->leaf_payload_type); storage_type_t storage_type = tl_params->storage_type; // build dense trees by default const tl::Model& model_ref = *(tl::Model*)model; diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 56acf25b11..ac5c6bcc03 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -40,8 +40,8 @@ struct vec { } }; -template -__device__ __forceinline__ vec infer_one_tree(tree_type tree, +template +__device__ __forceinline__ vec infer_one_tree(tree_type tree, float* sdata, int cols) { int curr[NITEMS]; @@ -61,15 +61,15 @@ __device__ __forceinline__ vec infer_one_tree(tree_type tree, curr[j] = n.left(curr[j]) + cond; } } while (mask != 0); - vec out; + vec out; #pragma unroll for (int j = 0; j < NITEMS; ++j) - out[j] = tree[curr[j]].base_node::output(); + out[j] = tree[curr[j]].output(); return out; } -template -__device__ __forceinline__ vec<1, TOUTPUT> infer_one_tree(tree_type tree, +template +__device__ __forceinline__ vec<1, output_type> infer_one_tree(tree_type tree, float* sdata, int cols) { int curr = 0; @@ -80,27 +80,19 @@ __device__ __forceinline__ vec<1, TOUTPUT> infer_one_tree(tree_type tree, bool cond = isnan(val) ? !n.def_left() : val >= n.thresh(); curr = n.left(curr) + cond; } - vec<1, TOUTPUT> out; - out[0] = tree[curr].base_node::output(); + vec<1, output_type> out; + out[0] = tree[curr].output(); return out; } -template -class AggregateTrees { - public: - __device__ __forceinline__ AggregateTrees(int num_output_classes, - void* smem_workspace); - __device__ __forceinline__ void accumulate(vec out); - __device__ __forceinline__ void finalize(float* out, int num_rows); -}; - -template -class AggregateTrees { +template +struct tree_aggregator_t { vec acc; int num_output_classes; - public: - __device__ __forceinline__ AggregateTrees(int num_output_classes_, void*) + __device__ __forceinline__ tree_aggregator_t(int num_output_classes_, void*) : num_output_classes(num_output_classes_) { // TODO: even if num_output_classes == 2, in regression, this needs to change #pragma unroll @@ -125,18 +117,17 @@ class AggregateTrees { }; template -class AggregateTrees { - typedef unsigned int VoteCount; +struct tree_aggregator_t { + typedef unsigned int vote_count_t; // can switch to unsigned short to save shared memory // provided atomicInc(short*) simulated with atomicAdd with appropriate shifts - VoteCount* votes; + vote_count_t* votes; int num_output_classes; - public: - __device__ __forceinline__ AggregateTrees(int num_output_classes_, + __device__ __forceinline__ tree_aggregator_t(int num_output_classes_, void* shared_workspace) - : num_output_classes(num_output_classes_) { - votes = (VoteCount*)shared_workspace; + : votes(shared_workspace), num_output_classes(num_output_classes_) { + for (int c = threadIdx.x; c < num_output_classes; c += FIL_TPB * NITEMS) #pragma unroll for (int i = 0; i < NITEMS; ++i) votes[i * num_output_classes + c] = 0; @@ -145,7 +136,7 @@ class AggregateTrees { __device__ __forceinline__ void accumulate(vec out) { #pragma unroll for (int i = 0; i < NITEMS; ++i) - atomicInc(votes + i * num_output_classes + out[i], UINT_MAX); + atomicAdd(votes + i * num_output_classes + out[i], 1); } __device__ __forceinline__ void finalize(float* out, int num_rows) { __syncthreads(); @@ -161,7 +152,7 @@ class AggregateTrees { } }; -template __global__ void infer_k(storage_type forest, predict_params params) { // cache the row for all threads to reuse @@ -176,7 +167,7 @@ __global__ void infer_k(storage_type forest, predict_params params) { } } - AggregateTrees acc( + tree_aggregator_t acc( params.num_output_classes, sdata + params.num_cols * NITEMS); __syncthreads(); // for both row cache init and acc init @@ -185,12 +176,12 @@ __global__ void infer_k(storage_type forest, predict_params params) { // one block works on NITEMS rows and the whole forest for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { acc.accumulate( - infer_one_tree(forest[j], sdata, params.num_cols)); + infer_one_tree(forest[j], sdata, params.num_cols)); } acc.finalize(params.preds, params.num_rows); } -template void infer_k_launcher(storage_type forest, predict_params params, cudaStream_t stream) { @@ -205,22 +196,33 @@ void infer_k_launcher(storage_type forest, predict_params params, } num_items = std::min(num_items, params.max_items); int num_blocks = ceildiv(int(params.num_rows), num_items); - int shm_sz = num_items * sizeof(float) * params.num_cols; + int shm_sz; + switch(leaf_payload_type) { + case INT_CLASS_LABEL: + shm_sz = num_items * sizeof(int) * params.num_output_classes; + break; + case FLOAT_SCALAR: + shm_sz = num_items * sizeof(float) * params.num_cols; + break; + default: + ASSERT(false, "internal error: unknown leaf_payload_type"); + } + if (leaf_payload_type == INT_CLASS_LABEL) switch (num_items) { case 1: - infer_k<1, leaf_payload_type, TOUTPUT> + infer_k<1, leaf_payload_type, output_type> <<>>(forest, params); break; case 2: - infer_k<2, leaf_payload_type, TOUTPUT> + infer_k<2, leaf_payload_type, output_type> <<>>(forest, params); break; case 3: - infer_k<3, leaf_payload_type, TOUTPUT> + infer_k<3, leaf_payload_type, output_type> <<>>(forest, params); break; case 4: - infer_k<4, leaf_payload_type, TOUTPUT> + infer_k<4, leaf_payload_type, output_type> <<>>(forest, params); break; default: @@ -243,7 +245,6 @@ void infer(storage_type forest, predict_params params, cudaStream_t stream) { forest, params, stream); break; default: - printf("%s line %d: leaf_payload_type %d\n", __FILE__, __LINE__, params.leaf_payload_type); ASSERT(false, "unknown leaf_payload_type"); } } diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 3d9162380a..410683feb1 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -154,7 +154,15 @@ class BaseFilTest : public testing::TestWithParam { // initialize nodes nodes.resize(num_nodes); for (size_t i = 0; i < num_nodes; ++i) { - fil::dense_node_init(&nodes[i], weights_h[i], thresholds_h[i], fids_h[i], + val_t w; + switch (ps.leaf_payload_type) { + case INT_CLASS_LABEL: + w.idx = (int)(weights_h[i] + 1.0f) % 2; // [0, 1] + break; + case FLOAT_SCALAR: + w.f = weights_h[i]; + } + fil::dense_node_init(&nodes[i], w, thresholds_h[i], fids_h[i], def_lefts_h[i], is_leafs_h[i]); } @@ -298,7 +306,6 @@ class PredictDenseFilTest : public BaseFilTest { fil_ps.threshold = ps.threshold; fil_ps.global_bias = ps.global_bias; fil_ps.leaf_payload_type = ps.leaf_payload_type; - printf("%s line %d: leaf_payload_type %d\n", __FILE__, __LINE__, fil_ps.leaf_payload_type); fil::init_dense(handle, pforest, nodes.data(), &fil_ps); } }; @@ -307,11 +314,12 @@ class PredictSparseFilTest : public BaseFilTest { protected: void dense2sparse_node(const fil::dense_node_t* dense_root, int i_dense, int i_sparse_root, int i_sparse) { - float output, threshold; + float threshold; + val_t output; int feature; bool def_left, is_leaf; dense_node_decode(&dense_root[i_dense], &output, &threshold, &feature, - &def_left, &is_leaf, fil::leaf_value_t::FLOAT_SCALAR); + &def_left, &is_leaf); if (is_leaf) { // leaf sparse node sparse_node_init(&sparse_nodes[i_sparse], output, threshold, feature, @@ -372,13 +380,14 @@ class TreeliteFilTest : public BaseFilTest { int key = (*pkey)++; TL_CPP_CHECK(builder->CreateNode(key)); int feature; - float threshold, output; + float threshold; + val_t output; bool is_leaf, default_left; fil::dense_node_decode(&nodes[node], &output, &threshold, &feature, - &default_left, &is_leaf, - fil::leaf_value_t::FLOAT_SCALAR); + &default_left, &is_leaf); if (is_leaf) { - TL_CPP_CHECK(builder->SetLeafNode(key, output)); + // default is fil::FLOAT_SCALAR + TL_CPP_CHECK(builder->SetLeafNode(key, output.f)); } else { int left = root + 2 * (node - root) + 1; int right = root + 2 * (node - root) + 2; @@ -449,8 +458,6 @@ class TreeliteFilTest : public BaseFilTest { params.threshold = ps.threshold; params.output_class = (ps.output & fil::output_t::THRESHOLD) != 0; params.storage_type = storage_type; - params.leaf_payload_type = ps.leaf_payload_type; - printf("%s line %d: leaf_payload_type %d\n", __FILE__, __LINE__, params.leaf_payload_type); fil::from_treelite(handle, pforest, (ModelHandle)model.get(), ¶ms); CUDA_CHECK(cudaStreamSynchronize(stream)); } diff --git a/python/cuml/fil/fil.pyx b/python/cuml/fil/fil.pyx index ca4256dd12..a488a25024 100644 --- a/python/cuml/fil/fil.pyx +++ b/python/cuml/fil/fil.pyx @@ -165,10 +165,6 @@ cdef extern from "cuml/fil/fil.h" namespace "ML::fil": DENSE, SPARSE - cdef enum leaf_value_t: - FLOAT_SCALAR, - INT_CLASS_LABEL - cdef struct forest: pass @@ -287,7 +283,6 @@ cdef class ForestInference_impl(): treelite_params.threshold = threshold treelite_params.algo = self.get_algo(algo) treelite_params.storage_type = self.get_storage_type(storage_type) - treelite_params.leaf_payload = leaf_value_t.FLOAT_SCALAR self.forest_data = NULL cdef cumlHandle* handle_ =\ @@ -323,7 +318,6 @@ cdef class ForestInference_impl(): treelite_params.threshold = threshold treelite_params.algo = self.get_algo(algo) treelite_params.storage_type = self.get_storage_type(storage_type) - treelite_params.leaf_payload = leaf_value_t.INT_CLASS_LABEL cdef cumlHandle* handle_ =\ self.handle.getHandle() From be8eea8ba0d4088c6aeafa6a37805f43000a757c Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Mon, 2 Mar 2020 18:00:53 -0800 Subject: [PATCH 036/330] added INT_CLASS_LABEL transform_k changes, can now predict class label instead of just probabilities --- cpp/include/cuml/fil/fil.h | 8 +++--- cpp/src/fil/fil.cu | 23 ++++++++++++----- cpp/src/fil/infer.cu | 51 +++++++++++++++++++++++++++----------- 3 files changed, 58 insertions(+), 24 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index 3486495d2b..f13a239be9 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -165,9 +165,11 @@ struct forest_params_t { // global_bias is added to the sum of tree predictions // (after averaging, if it is used, but before any further transformations) float global_bias; - // output_dim is how many values a single data row would yield. - // an exception: for two-class classification, it is 1 - int output_dim; + // prediction_dim determines the class probability prediction shape. + // also affects intermediate output in classification + // currently, multi-valued (vector) regression not supported due to model + // storage/layout restrictions + int num_classes; }; /** treelite_params_t are parameters for importing treelite models */ diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 18d8c569df..af22b04b33 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -75,11 +75,13 @@ __host__ __device__ float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); } and fills in the converse probability */ __global__ void transform_k(float* preds, size_t n, output_t output, float inv_num_trees, float threshold, - float global_bias, bool predict_proba) { + float global_bias, bool predict_proba, + leaf_value_desc_t leaf_payload_type) { size_t i = threadIdx.x + size_t(blockIdx.x) * blockDim.x; if (i >= n) return; - float result = preds[predict_proba ? i * 2 : i]; + bool complement_proba = predict_proba && (leaf_payload_type == FLOAT_SCALAR); + float result = preds[complement_proba ? i * 2 : i]; if ((output & output_t::AVG) != 0) result *= inv_num_trees; result += global_bias; if ((output & output_t::SIGMOID) != 0) result = sigmoid(result); @@ -88,7 +90,7 @@ __global__ void transform_k(float* preds, size_t n, output_t output, } // sklearn outputs numpy array in 'C' order, with the number of classes being last dimension // that is also the default order, so we should use the same one - if (predict_proba) { + if (complement_proba) { preds[i * 2] = 1.f - result; preds[i * 2 + 1] = result; } else @@ -116,6 +118,7 @@ struct forest { threshold_ = params->threshold; global_bias_ = params->global_bias; leaf_payload_type_ = params->leaf_payload_type; + num_output_classes_ = params->num_output_classes; init_max_shm(); } @@ -131,7 +134,9 @@ struct forest { params.data = data; params.num_rows = num_rows; params.max_shm = max_shm_; - params.num_output_classes = predict_proba ? 2 : 1; + params.num_output_classes = + ((num_output_classes > 2) || (leaf_payload_type_ == INT_CLASS_LABEL)) ? num_output_classes_ : + (predict_proba ? 2 : 1); params.leaf_payload_type = leaf_payload_type_; // Predict using the forest. @@ -140,9 +145,14 @@ struct forest { // Transform the output if necessary. if (output_ != output_t::RAW || global_bias_ != 0.0f || predict_proba) { + auto output = output_; + if (predict_proba && (leaf_payload_type == INT_CLASS_LABEL)) + // because infer(params, stream) will write vote counts + // instead of probabilities + output |= output_t::AVG; transform_k<<>>( - preds, num_rows, output_, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, - threshold_, global_bias_, predict_proba); + preds, num_rows, output, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, + threshold_, global_bias_, predict_proba, params.leaf_payload_type); CUDA_CHECK(cudaPeekAtLastError()); } } @@ -159,6 +169,7 @@ struct forest { float threshold_ = 0.5; float global_bias_ = 0; leaf_value_desc_t leaf_payload_type_ = FLOAT_SCALAR; + int num_output_classes_ = INT_MAX; }; struct dense_forest : forest { diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index ac5c6bcc03..78846c551a 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -26,6 +26,7 @@ using namespace MLCommon; template struct vec { T data[N]; + __host__ __device__ inline vec() = default; // zeros for numerical member vars __host__ __device__ T& operator[](int i) { return data[i]; } __host__ __device__ T operator[](int i) const { return data[i]; } friend __host__ __device__ void operator+=(vec& a, const vec& b) { @@ -95,8 +96,6 @@ struct tree_aggregator_t { __device__ __forceinline__ tree_aggregator_t(int num_output_classes_, void*) : num_output_classes(num_output_classes_) { // TODO: even if num_output_classes == 2, in regression, this needs to change -#pragma unroll - for (int i = 0; i < NITEMS; ++i) acc[i] = 0.0f; } __device__ __forceinline__ void accumulate(vec out) { acc += out; @@ -118,6 +117,7 @@ struct tree_aggregator_t { template struct tree_aggregator_t { + typedef unsigned int class_label_t; typedef unsigned int vote_count_t; // can switch to unsigned short to save shared memory // provided atomicInc(short*) simulated with atomicAdd with appropriate shifts @@ -130,24 +130,41 @@ struct tree_aggregator_t { for (int c = threadIdx.x; c < num_output_classes; c += FIL_TPB * NITEMS) #pragma unroll - for (int i = 0; i < NITEMS; ++i) votes[i * num_output_classes + c] = 0; + for (int i = 0; i < NITEMS; ++i) + votes[c * NITEMS + i] = 0; //__syncthreads(); // happening outside } - __device__ __forceinline__ void accumulate(vec out) { + __device__ __forceinline__ void accumulate(vec out) { #pragma unroll for (int i = 0; i < NITEMS; ++i) - atomicAdd(votes + i * num_output_classes + out[i], 1); + atomicAdd(votes + out[i] * NITEMS + i, 1); } __device__ __forceinline__ void finalize(float* out, int num_rows) { __syncthreads(); - if (threadIdx.x == 0) { - for (int i = 0; i < NITEMS; ++i) { - int row = blockIdx.x * NITEMS + i; - if (row < num_rows) - for (int c = 0; c < num_output_classes; ++c) - out[row * num_output_classes + c] = - votes[i * num_output_classes + c]; - } + int item = threadIdx.x; + int row = blockIdx.x * NITEMS + item; + if ((item < NITEMS) && (row < num_rows)) { +#pragma unroll + for (int c = 0; c < num_output_classes; ++c) + out[row * num_output_classes + c] = + votes[c * NITEMS + item]; + } + } + // using this when predicting a single class label, as opposed to sparse class vector + // or class probabilities or regression + __device__ __forceinline__ void finalize_class_label(float* out, int num_rows) { + __syncthreads(); + int item = threadIdx.x; + int row = blockIdx.x * NITEMS + item; + if ((item < NITEMS) && (row < num_rows)) { + vote_count_t max_votes = 0; + class_label_t best_class = 0; + for (int c = 0; c < num_output_classes; ++c) + if(votes[c * NITEMS + item] > max_votes) { + max_votes = votes[c * NITEMS + item]; + best_class = c; + } + out[row] = best_class; } } }; @@ -178,7 +195,12 @@ __global__ void infer_k(storage_type forest, predict_params params) { acc.accumulate( infer_one_tree(forest[j], sdata, params.num_cols)); } - acc.finalize(params.preds, params.num_rows); + // compute most probable class. in cuML RF, output is class label, + // hence, no-predicted class edge case doesn't apply + if ((leaf_payload_type == INT_CLASS_LABEL) && (!params.predict_proba)) + acc.finalize_class_label(params.preds, params.num_rows); + else + acc.finalize (params.preds, params.num_rows); } template From 9279f8705c85c448a1bb16a3d2e8cb7824177847 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Mon, 2 Mar 2020 18:32:29 -0800 Subject: [PATCH 037/330] fix some compiler issues and formatting --- cpp/include/cuml/fil/fil.h | 8 +- cpp/src/fil/common.cuh | 7 +- cpp/src/fil/fil.cu | 84 +++++++------ cpp/src/fil/infer.cu | 60 +++++----- cpp/test/sg/fil_test.cu | 237 ++++++++++++++++++++++++------------- 5 files changed, 237 insertions(+), 159 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index f13a239be9..3b9fe42396 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -121,19 +121,19 @@ enum leaf_value_desc_t { }; /** dense_node_init initializes node from paramters */ -void dense_node_init(dense_node_t* n, float output, float thresh, int fid, +void dense_node_init(dense_node_t* n, val_t output, float thresh, int fid, bool def_left, bool is_leaf); /** dense_node_decode extracts individual members from node */ -void dense_node_decode(const dense_node_t* node, float* output, float* thresh, +void dense_node_decode(const dense_node_t* node, val_t* output, float* thresh, int* fid, bool* def_left, bool* is_leaf); /** sparse_node_init initializes node from parameters */ -void sparse_node_init(sparse_node_t* node, float output, float thresh, int fid, +void sparse_node_init(sparse_node_t* node, val_t output, float thresh, int fid, bool def_left, bool is_leaf, int left_index); /** sparse_node_decode extracts individual members from node */ -void sparse_node_decode(const sparse_node_t* node, float* output, float* thresh, +void sparse_node_decode(const sparse_node_t* node, val_t* output, float* thresh, int* fid, bool* def_left, bool* is_leaf, int* left_index); diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index af109b2dd9..f947fd5d5d 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -47,7 +47,7 @@ struct base_node : dense_node_t { static const int FID_MASK = (1 << 30) - 1; static const int DEF_LEFT_MASK = 1 << 30; static const int IS_LEAF_MASK = 1 << 31; - inline val_t output() const { return val; } + __host__ __device__ val_t output() const { return val; } __host__ __device__ float thresh() const { return val.f; } __host__ __device__ int fid() const { return bits & FID_MASK; } __host__ __device__ bool def_left() const { return bits & DEF_LEFT_MASK; } @@ -68,7 +68,7 @@ struct base_node : dense_node_t { struct alignas(8) dense_node : base_node { dense_node() = default; dense_node(dense_node_t node) : base_node(node) {} - dense_node(float output_, float thresh, int fid, bool def_left, bool is_leaf) + dense_node(val_t output_, float thresh, int fid, bool def_left, bool is_leaf) : base_node(output_, thresh, fid, def_left, is_leaf) {} /** index of the left child, where curr is the index of the current node */ __host__ __device__ int left(int curr) const { return 2 * curr + 1; } @@ -147,10 +147,9 @@ struct predict_params { algo_t algo; int max_items; // only set and used by infer() int num_output_classes; - // so far, only 1 or 2 is supported, and only used to output probabilities - // from classifier models // TODO doc leaf_value_desc_t leaf_payload_type; + bool predict_proba; // Data parameters. float* preds; diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index af22b04b33..06e6e8b9fd 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -41,8 +41,8 @@ void dense_node_init(dense_node_t* n, val_t output, float thresh, int fid, *n = dense_node(output, thresh, fid, def_left, is_leaf); } -void dense_node_decode(const dense_node_t* n, union val_t* output, float* thresh, - int* fid, bool* def_left, bool* is_leaf) { +void dense_node_decode(const dense_node_t* n, union val_t* output, + float* thresh, int* fid, bool* def_left, bool* is_leaf) { dense_node dn(*n); *output = dn.output(); *thresh = dn.thresh(); @@ -59,8 +59,8 @@ void sparse_node_init(sparse_node_t* node, val_t output, float thresh, int fid, } /** sparse_node_decode extracts individual members from node */ -void sparse_node_decode(const sparse_node_t* node, union val_t* output, float* thresh, - int* fid, bool* def_left, bool* is_leaf, +void sparse_node_decode(const sparse_node_t* node, union val_t* output, + float* thresh, int* fid, bool* def_left, bool* is_leaf, int* left_index) { dense_node_decode(node, output, thresh, fid, def_left, is_leaf); *left_index = sparse_node(*node).left_index(); @@ -76,11 +76,10 @@ __host__ __device__ float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); } __global__ void transform_k(float* preds, size_t n, output_t output, float inv_num_trees, float threshold, float global_bias, bool predict_proba, - leaf_value_desc_t leaf_payload_type) { + bool complement_proba) { size_t i = threadIdx.x + size_t(blockIdx.x) * blockDim.x; if (i >= n) return; - bool complement_proba = predict_proba && (leaf_payload_type == FLOAT_SCALAR); float result = preds[complement_proba ? i * 2 : i]; if ((output & output_t::AVG) != 0) result *= inv_num_trees; result += global_bias; @@ -118,7 +117,7 @@ struct forest { threshold_ = params->threshold; global_bias_ = params->global_bias; leaf_payload_type_ = params->leaf_payload_type; - num_output_classes_ = params->num_output_classes; + num_output_classes_ = params->num_classes; init_max_shm(); } @@ -134,10 +133,12 @@ struct forest { params.data = data; params.num_rows = num_rows; params.max_shm = max_shm_; - params.num_output_classes = - ((num_output_classes > 2) || (leaf_payload_type_ == INT_CLASS_LABEL)) ? num_output_classes_ : - (predict_proba ? 2 : 1); + params.num_output_classes = + ((num_output_classes_ > 2) || (leaf_payload_type_ == INT_CLASS_LABEL)) + ? num_output_classes_ + : (predict_proba ? 2 : 1); params.leaf_payload_type = leaf_payload_type_; + params.predict_proba = predict_proba; // Predict using the forest. cudaStream_t stream = h.getStream(); @@ -146,13 +147,16 @@ struct forest { // Transform the output if necessary. if (output_ != output_t::RAW || global_bias_ != 0.0f || predict_proba) { auto output = output_; - if (predict_proba && (leaf_payload_type == INT_CLASS_LABEL)) + if (predict_proba && (leaf_payload_type_ == INT_CLASS_LABEL)) // because infer(params, stream) will write vote counts // instead of probabilities - output |= output_t::AVG; + output = output_t(output | output_t::AVG); + bool complement_proba = + predict_proba && (leaf_payload_type_ == FLOAT_SCALAR); + transform_k<<>>( preds, num_rows, output, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, - threshold_, global_bias_, predict_proba, params.leaf_payload_type); + threshold_, global_bias_, predict_proba, complement_proba); CUDA_CHECK(cudaPeekAtLastError()); } } @@ -377,17 +381,17 @@ void adjust_threshold(float* pthreshold, int* tl_left, int* tl_right, /** if the vector consists of zeros and a single one, return the position for the one (assumed class label). Else, return -1. If the vector contains a NAN, return -1. */ -int find_class_label_from_one_hot(float* vector, int len) { +int find_class_label_from_one_hot(tl::tl_float* vector, int len) { bool found_label = false; - int out = -1; // in case all are 0.f - for(int i = 0; i < len; ++i) - if(vector[i] == 1.f) { - if(!found_label) + int out = -1; // in case all are 0.f + for (int i = 0; i < len; ++i) + if (vector[i] == 1.f) { + if (!found_label) out = i; - else // more than one 1.f + else // more than one 1.f return -1; found_label = true; - } else if (vector[i] != 0.f) // NAN != 0.f + } else if (vector[i] != 0.f) // NAN != 0.f return -1; return out; } @@ -395,15 +399,17 @@ int find_class_label_from_one_hot(float* vector, int len) { template void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, leaf_value_desc_t leaf_payload_type) { + auto vec = tl_node.leaf_vector(); switch (leaf_payload_type) { case INT_CLASS_LABEL: - auto vec = tl_node.leaf_vector(); fil_node->val.idx = find_class_label_from_one_hot(&vec[0], vec.size()); - ASSERT(fil_node->val.idx != -1, "a non-empty non-one-hot leaf vector"); + ASSERT(fil_node->val.idx != (unsigned)-1, + "a non-empty non-one-hot leaf vector"); break; case FLOAT_SCALAR: fil_node->val.f = tl_node.leaf_value(); - ASSERT(tl_node.leaf_vector().size() == 0, "some but not all treelite leaves have leaf_vector()"); + ASSERT(tl_node.leaf_vector().size() == 0, + "some but not all treelite leaves have leaf_vector()"); break; default: ASSERT(false, "unknown leaf_payload_type"); @@ -414,9 +420,9 @@ void node2fil_dense(std::vector* pnodes, int root, int cur, const tl::Tree& tree, const tl::Tree::Node& node, const leaf_value_desc_t leaf_payload_type) { if (node.is_leaf()) { - dense_node_init(&(*pnodes)[root + cur], NAN, NAN, 0, false, true); - tl2fil_leaf_payload(&(*pnodes)[root + cur], node, - leaf_payload_type); + dense_node_init(&(*pnodes)[root + cur], val_t{.f = NAN}, NAN, 0, false, + true); + tl2fil_leaf_payload(&(*pnodes)[root + cur], node, leaf_payload_type); return; } @@ -427,8 +433,8 @@ void node2fil_dense(std::vector* pnodes, int root, int cur, bool default_left = node.default_left(); float threshold = node.threshold(); adjust_threshold(&threshold, &tl_left, &tl_right, &default_left, node); - dense_node_init(&(*pnodes)[root + cur], 0, threshold, node.split_index(), - default_left, false); + dense_node_init(&(*pnodes)[root + cur], val_t{.f = 0}, threshold, + node.split_index(), default_left, false); int left = 2 * cur + 1; node2fil_dense(pnodes, root, left, tree, tl_node_at(tree, tl_left), leaf_payload_type); @@ -440,7 +446,8 @@ void node2fil_sparse(std::vector* pnodes, int root, int cur, const tl::Tree& tree, const tl::Tree::Node& node, const leaf_value_desc_t leaf_payload_type) { if (node.is_leaf()) { - sparse_node_init(&(*pnodes)[root + cur], NAN, NAN, 0, false, true, 0); + sparse_node_init(&(*pnodes)[root + cur], val_t{.f = NAN}, NAN, 0, false, + true, 0); tl2fil_leaf_payload(&(*pnodes)[root + cur], node, leaf_payload_type); return; } @@ -461,8 +468,8 @@ void node2fil_sparse(std::vector* pnodes, int root, int cur, int left = pnodes->size() - root; pnodes->push_back(sparse_node_t()); pnodes->push_back(sparse_node_t()); - sparse_node_init(&(*pnodes)[root + cur], 0, threshold, node.split_index(), - default_left, false, left); + sparse_node_init(&(*pnodes)[root + cur], val_t{.f = 0}, threshold, + node.split_index(), default_left, false, left); // init child nodes node2fil_sparse(pnodes, root, left, tree, tl_node_at(tree, tl_left), @@ -472,7 +479,8 @@ void node2fil_sparse(std::vector* pnodes, int root, int cur, } void tree2fil_dense(std::vector* pnodes, int root, - const tl::Tree& tree, const leaf_value_desc_t leaf_payload_type) { + const tl::Tree& tree, + const leaf_value_desc_t leaf_payload_type) { node2fil_dense(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), leaf_payload_type); } @@ -493,17 +501,17 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, // fill in forest-indendent params params->algo = tl_params->algo; params->threshold = tl_params->threshold; - + // assuming either all leaves use the .leaf_vector() or all leaves use .leaf_value() auto tree = model.trees[0]; auto vec = tl_node_at(tree, tree_root(tree)).leaf_vector(); - if(vec.size()) { - if(find_class_label_from_one_hot(&vec[0], vec.size()) != -1) + if (vec.size()) { + if (find_class_label_from_one_hot(&vec[0], vec.size()) != -1) params->leaf_payload_type = INT_CLASS_LABEL; else ASSERT(false, "unexpected: non-empty non-one-hot leaf vector"); } else - params->leaf_payload_type = FLOAT_SCALAR; + params->leaf_payload_type = FLOAT_SCALAR; // fill in forest-dependent params params->num_cols = model.num_feature; @@ -554,8 +562,8 @@ void tl2fil_sparse(std::vector* ptrees, std::vector* pnodes, // convert the nodes for (int i = 0; i < model.trees.size(); ++i) { - int root = tree2fil_sparse(pnodes, model.trees[i], - params->leaf_payload_type); + int root = + tree2fil_sparse(pnodes, model.trees[i], params->leaf_payload_type); ptrees->push_back(root); } params->num_nodes = pnodes->size(); diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 78846c551a..7d203e8dc1 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -26,7 +26,7 @@ using namespace MLCommon; template struct vec { T data[N]; - __host__ __device__ inline vec() = default; // zeros for numerical member vars + inline vec() = default; // zeros for numerical member vars __host__ __device__ T& operator[](int i) { return data[i]; } __host__ __device__ T operator[](int i) const { return data[i]; } friend __host__ __device__ void operator+=(vec& a, const vec& b) { @@ -42,9 +42,8 @@ struct vec { }; template -__device__ __forceinline__ vec infer_one_tree(tree_type tree, - float* sdata, - int cols) { +__device__ __forceinline__ vec infer_one_tree( + tree_type tree, float* sdata, int cols) { int curr[NITEMS]; int mask = (1 << NITEMS) - 1; // all active for (int j = 0; j < NITEMS; ++j) curr[j] = 0; @@ -64,15 +63,14 @@ __device__ __forceinline__ vec infer_one_tree(tree_type tre } while (mask != 0); vec out; #pragma unroll - for (int j = 0; j < NITEMS; ++j) - out[j] = tree[curr[j]].output(); + for (int j = 0; j < NITEMS; ++j) out[j] = tree[curr[j]].output(); return out; } template __device__ __forceinline__ vec<1, output_type> infer_one_tree(tree_type tree, - float* sdata, - int cols) { + float* sdata, + int cols) { int curr = 0; for (;;) { auto n = tree[curr]; @@ -86,19 +84,20 @@ __device__ __forceinline__ vec<1, output_type> infer_one_tree(tree_type tree, return out; } -template +template // = float> struct tree_aggregator_t { vec acc; int num_output_classes; __device__ __forceinline__ tree_aggregator_t(int num_output_classes_, void*) : num_output_classes(num_output_classes_) { -// TODO: even if num_output_classes == 2, in regression, this needs to change + // TODO: even if num_output_classes == 2, in regression, this needs to change } - __device__ __forceinline__ void accumulate(vec out) { - acc += out; + __device__ __forceinline__ void accumulate(vec out) { +#pragma unroll + for (int i = 0; i < NITEMS; ++i) acc[i] += out[i].f; } __device__ __forceinline__ void finalize(float* out, int num_rows) { using BlockReduce = cub::BlockReduce, FIL_TPB>; @@ -113,6 +112,10 @@ struct tree_aggregator_t { } } } + __device__ __forceinline__ void finalize_class_label(float* out, + int num_rows) { + finalize(out, num_rows); + } }; template @@ -125,19 +128,18 @@ struct tree_aggregator_t { int num_output_classes; __device__ __forceinline__ tree_aggregator_t(int num_output_classes_, - void* shared_workspace) - : votes(shared_workspace), num_output_classes(num_output_classes_) { - + void* shared_workspace) + : votes((vote_count_t*)shared_workspace), + num_output_classes(num_output_classes_) { for (int c = threadIdx.x; c < num_output_classes; c += FIL_TPB * NITEMS) #pragma unroll - for (int i = 0; i < NITEMS; ++i) - votes[c * NITEMS + i] = 0; + for (int i = 0; i < NITEMS; ++i) votes[c * NITEMS + i] = 0; //__syncthreads(); // happening outside } - __device__ __forceinline__ void accumulate(vec out) { + __device__ __forceinline__ void accumulate(vec out) { #pragma unroll for (int i = 0; i < NITEMS; ++i) - atomicAdd(votes + out[i] * NITEMS + i, 1); + atomicAdd(votes + out[i].idx * NITEMS + i, 1); } __device__ __forceinline__ void finalize(float* out, int num_rows) { __syncthreads(); @@ -146,13 +148,13 @@ struct tree_aggregator_t { if ((item < NITEMS) && (row < num_rows)) { #pragma unroll for (int c = 0; c < num_output_classes; ++c) - out[row * num_output_classes + c] = - votes[c * NITEMS + item]; + out[row * num_output_classes + c] = votes[c * NITEMS + item]; } } // using this when predicting a single class label, as opposed to sparse class vector // or class probabilities or regression - __device__ __forceinline__ void finalize_class_label(float* out, int num_rows) { + __device__ __forceinline__ void finalize_class_label(float* out, + int num_rows) { __syncthreads(); int item = threadIdx.x; int row = blockIdx.x * NITEMS + item; @@ -160,7 +162,7 @@ struct tree_aggregator_t { vote_count_t max_votes = 0; class_label_t best_class = 0; for (int c = 0; c < num_output_classes; ++c) - if(votes[c * NITEMS + item] > max_votes) { + if (votes[c * NITEMS + item] > max_votes) { max_votes = votes[c * NITEMS + item]; best_class = c; } @@ -193,14 +195,14 @@ __global__ void infer_k(storage_type forest, predict_params params) { // one block works on NITEMS rows and the whole forest for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { acc.accumulate( - infer_one_tree(forest[j], sdata, params.num_cols)); + infer_one_tree(forest[j], sdata, params.num_cols)); } // compute most probable class. in cuML RF, output is class label, // hence, no-predicted class edge case doesn't apply - if ((leaf_payload_type == INT_CLASS_LABEL) && (!params.predict_proba)) + if (!params.predict_proba) acc.finalize_class_label(params.preds, params.num_rows); else - acc.finalize (params.preds, params.num_rows); + acc.finalize(params.preds, params.num_rows); } template { // initialize nodes nodes.resize(num_nodes); for (size_t i = 0; i < num_nodes; ++i) { - val_t w; - switch (ps.leaf_payload_type) { - case INT_CLASS_LABEL: - w.idx = (int)(weights_h[i] + 1.0f) % 2; // [0, 1] + fil::val_t w; + switch (ps.leaf_payload_type) { + case fil::leaf_value_desc_t::INT_CLASS_LABEL: + w.idx = (int)(weights_h[i] + 1.0f) % 2; // [0, 1] break; - case FLOAT_SCALAR: + case fil::leaf_value_desc_t::FLOAT_SCALAR: w.f = weights_h[i]; } fil::dense_node_init(&nodes[i], w, thresholds_h[i], fids_h[i], @@ -260,9 +260,19 @@ class BaseFilTest : public testing::TestWithParam { int fid = 0; bool def_left = false, is_leaf = false; for (;;) { - fil::dense_node_decode(&root[curr], &output, &threshold, &fid, &def_left, - &is_leaf, fil::leaf_value_t::FLOAT_SCALAR); - if (is_leaf) break; + fil::val_t w; + fil::dense_node_decode(&root[curr], &w, &threshold, &fid, &def_left, + &is_leaf); + if (is_leaf) { + switch (ps.leaf_payload_type) { + case fil::leaf_value_desc_t::INT_CLASS_LABEL: + output = w.idx; + break; + case fil::leaf_value_desc_t::FLOAT_SCALAR: + output = w.f; + } + break; + } float val = data[fid]; bool cond = isnan(val) ? !def_left : val >= threshold; curr = (curr << 1) + 1 + (cond ? 1 : 0); @@ -315,7 +325,7 @@ class PredictSparseFilTest : public BaseFilTest { void dense2sparse_node(const fil::dense_node_t* dense_root, int i_dense, int i_sparse_root, int i_sparse) { float threshold; - val_t output; + fil::val_t output; int feature; bool def_left, is_leaf; dense_node_decode(&dense_root[i_dense], &output, &threshold, &feature, @@ -381,7 +391,7 @@ class TreeliteFilTest : public BaseFilTest { TL_CPP_CHECK(builder->CreateNode(key)); int feature; float threshold; - val_t output; + fil::val_t output; bool is_leaf, default_left; fil::dense_node_decode(&nodes[node], &output, &threshold, &feature, &default_left, &is_leaf); @@ -488,52 +498,69 @@ class TreeliteAutoFilTest : public TreeliteFilTest { // global_bias, algo, seed, tolerance std::vector predict_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, }; TEST_P(PredictDenseFilTest, Predict) { compare(); } @@ -545,29 +572,35 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictDenseFilTest, // global_bias, algo, seed, tolerance std::vector predict_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_desc_t::FLOAT_SCALAR}, }; TEST_P(PredictSparseFilTest, Predict) { compare(); } @@ -579,72 +612,98 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictSparseFilTest, // global_bias, algo, seed, tolerance std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL}, + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL}, + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, }; TEST_P(TreeliteDenseFilTest, Import) { compare(); } @@ -656,28 +715,34 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteDenseFilTest, // global_bias, algo, seed, tolerance std::vector import_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL}, + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL}, + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, }; TEST_P(TreeliteSparseFilTest, Import) { compare(); } @@ -689,13 +754,17 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteSparseFilTest, // global_bias, algo, seed, tolerance std::vector import_auto_inputs = { {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::FLOAT_SCALAR}, {20000, 50, 0.05, 15, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::INT_CLASS_LABEL}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_desc_t::FLOAT_SCALAR}, }; TEST_P(TreeliteAutoFilTest, Import) { compare(); } From 19ff8b6f180912248b0445069c6bb39189500b64 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Tue, 3 Mar 2020 15:43:34 -0800 Subject: [PATCH 038/330] fixed treelite flags like random_forest_flag and num_output_groups --- cpp/include/cuml/fil/fil.h | 4 +- cpp/src/fil/fil.cu | 60 ++++++---- cpp/src/fil/infer.cu | 33 +++--- cpp/test/sg/fil_test.cu | 219 +++++++++++++++++++++---------------- 4 files changed, 181 insertions(+), 135 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index 3b9fe42396..63f2a0519f 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -114,9 +114,9 @@ struct sparse_node_t : dense_node_t, sparse_node_extra_data { /** leaf_value_desc_t describes what the leaves in a FIL forest store (predict) */ enum leaf_value_desc_t { /** storing a clas probability or regression summand */ - FLOAT_SCALAR, + FLOAT_SCALAR = 0, /** storing a class label */ - INT_CLASS_LABEL + INT_CLASS_LABEL = 1 // to be extended }; diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 06e6e8b9fd..7dac9b87c5 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -117,6 +117,7 @@ struct forest { threshold_ = params->threshold; global_bias_ = params->global_bias; leaf_payload_type_ = params->leaf_payload_type; + printf("@forest init_common leaf_payload_type_ == %d\n", leaf_payload_type_); num_output_classes_ = params->num_classes; init_max_shm(); } @@ -137,6 +138,7 @@ struct forest { ((num_output_classes_ > 2) || (leaf_payload_type_ == INT_CLASS_LABEL)) ? num_output_classes_ : (predict_proba ? 2 : 1); + printf("forest::num_output_classes_ = %d, predict_params.num_output_classes = %d\n", num_output_classes_, params.num_output_classes); params.leaf_payload_type = leaf_payload_type_; params.predict_proba = predict_proba; @@ -172,7 +174,8 @@ struct forest { output_t output_ = output_t::RAW; float threshold_ = 0.5; float global_bias_ = 0; - leaf_value_desc_t leaf_payload_type_ = FLOAT_SCALAR; + // init to invalid + leaf_value_desc_t leaf_payload_type_ = (leaf_value_desc_t)-1; int num_output_classes_ = INT_MAX; }; @@ -398,10 +401,12 @@ int find_class_label_from_one_hot(tl::tl_float* vector, int len) { template void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, - leaf_value_desc_t leaf_payload_type) { + const forest_params_t& forest_params) { auto vec = tl_node.leaf_vector(); - switch (leaf_payload_type) { + switch (forest_params.leaf_payload_type) { case INT_CLASS_LABEL: + ASSERT(vec.size() == forest_params.num_classes, + "inconsistent number of classes in treelite leaves"); fil_node->val.idx = find_class_label_from_one_hot(&vec[0], vec.size()); ASSERT(fil_node->val.idx != (unsigned)-1, "a non-empty non-one-hot leaf vector"); @@ -418,11 +423,11 @@ void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, void node2fil_dense(std::vector* pnodes, int root, int cur, const tl::Tree& tree, const tl::Tree::Node& node, - const leaf_value_desc_t leaf_payload_type) { + const forest_params_t& forest_params) { if (node.is_leaf()) { dense_node_init(&(*pnodes)[root + cur], val_t{.f = NAN}, NAN, 0, false, true); - tl2fil_leaf_payload(&(*pnodes)[root + cur], node, leaf_payload_type); + tl2fil_leaf_payload(&(*pnodes)[root + cur], node, forest_params); return; } @@ -437,18 +442,18 @@ void node2fil_dense(std::vector* pnodes, int root, int cur, node.split_index(), default_left, false); int left = 2 * cur + 1; node2fil_dense(pnodes, root, left, tree, tl_node_at(tree, tl_left), - leaf_payload_type); + forest_params); node2fil_dense(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right), - leaf_payload_type); + forest_params); } void node2fil_sparse(std::vector* pnodes, int root, int cur, const tl::Tree& tree, const tl::Tree::Node& node, - const leaf_value_desc_t leaf_payload_type) { + const forest_params_t& forest_params) { if (node.is_leaf()) { sparse_node_init(&(*pnodes)[root + cur], val_t{.f = NAN}, NAN, 0, false, true, 0); - tl2fil_leaf_payload(&(*pnodes)[root + cur], node, leaf_payload_type); + tl2fil_leaf_payload(&(*pnodes)[root + cur], node, forest_params); return; } @@ -473,24 +478,24 @@ void node2fil_sparse(std::vector* pnodes, int root, int cur, // init child nodes node2fil_sparse(pnodes, root, left, tree, tl_node_at(tree, tl_left), - leaf_payload_type); + forest_params); node2fil_sparse(pnodes, root, left + 1, tree, tl_node_at(tree, tl_right), - leaf_payload_type); + forest_params); } void tree2fil_dense(std::vector* pnodes, int root, const tl::Tree& tree, - const leaf_value_desc_t leaf_payload_type) { + const forest_params_t& forest_params) { node2fil_dense(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), - leaf_payload_type); + forest_params); } int tree2fil_sparse(std::vector* pnodes, const tl::Tree& tree, - const leaf_value_desc_t leaf_payload_type) { + const forest_params_t& forest_params) { int root = pnodes->size(); pnodes->push_back(sparse_node_t()); node2fil_sparse(pnodes, root, 0, tree, tl_node_at(tree, tree_root(tree)), - leaf_payload_type); + forest_params); return root; } @@ -504,19 +509,27 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, // assuming either all leaves use the .leaf_vector() or all leaves use .leaf_value() auto tree = model.trees[0]; - auto vec = tl_node_at(tree, tree_root(tree)).leaf_vector(); + int node_key; + for(node_key = tree_root(tree); + !tl_node_at(tree, node_key).is_leaf(); + node_key = tl_node_at(tree, node_key).cleft()); + auto vec = tl_node_at(tree, node_key).leaf_vector(); if (vec.size()) { - if (find_class_label_from_one_hot(&vec[0], vec.size()) != -1) + if (find_class_label_from_one_hot(&vec[0], vec.size()) != -1) { + params->num_classes = vec.size(); + ASSERT(vec.size() == model.num_output_group, "treelite model inconsistent"); params->leaf_payload_type = INT_CLASS_LABEL; - else + printf("detected %lu-class classification model \n", vec.size()); + } else ASSERT(false, "unexpected: non-empty non-one-hot leaf vector"); - } else + } else { params->leaf_payload_type = FLOAT_SCALAR; + params->num_classes = tl_params->output_class ? 2 : 1; + } + printf("@tl2fil_common leaf_payload_type == %d\n", params->leaf_payload_type); // fill in forest-dependent params params->num_cols = model.num_feature; - ASSERT(model.num_output_group == 1, - "multi-class classification not supported"); const tl::ModelParam& param = model.param; ASSERT(param.sigmoid_alpha == 1.0f, "sigmoid_alpha not supported"); params->global_bias = param.global_bias; @@ -549,7 +562,7 @@ void tl2fil_dense(std::vector* pnodes, forest_params_t* params, pnodes->resize(num_nodes, dense_node_t{0, 0}); for (int i = 0; i < model.trees.size(); ++i) { tree2fil_dense(pnodes, i * tree_num_nodes(params->depth), model.trees[i], - params->leaf_payload_type); + *params); } } @@ -562,8 +575,7 @@ void tl2fil_sparse(std::vector* ptrees, std::vector* pnodes, // convert the nodes for (int i = 0; i < model.trees.size(); ++i) { - int root = - tree2fil_sparse(pnodes, model.trees[i], params->leaf_payload_type); + int root = tree2fil_sparse(pnodes, model.trees[i], *params); ptrees->push_back(root); } params->num_nodes = pnodes->size(); diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 7d203e8dc1..0a98ec6d78 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -41,6 +41,8 @@ struct vec { } }; +#define __forceinline__ + template __device__ __forceinline__ vec infer_one_tree( tree_type tree, float* sdata, int cols) { @@ -212,25 +214,28 @@ void infer_k_launcher(storage_type forest, predict_params params, const int MAX_BATCH_ITEMS = 4; params.max_items = params.algo == algo_t::BATCH_TREE_REORG ? MAX_BATCH_ITEMS : 1; - int num_items = params.max_shm / (sizeof(float) * params.num_cols); - if (num_items == 0) { - int max_cols = params.max_shm / sizeof(float); - ASSERT(false, "p.num_cols == %d: too many features, only %d allowed", - params.num_cols, max_cols); - } - num_items = std::min(num_items, params.max_items); - int num_blocks = ceildiv(int(params.num_rows), num_items); - int shm_sz; + + int shared_mem_per_item = sizeof(float) * params.num_cols; switch (leaf_payload_type) { case INT_CLASS_LABEL: - shm_sz = num_items * sizeof(int) * params.num_output_classes; + // class vote histogram, while inferring trees + shared_mem_per_item += sizeof(int) * params.num_output_classes; break; case FLOAT_SCALAR: - shm_sz = num_items * sizeof(float) * params.num_cols; + // CUB workspace should fit itself, and we don't need + // the row by the time CUB is used break; - default: - ASSERT(false, "internal error: unknown leaf_payload_type"); } + int num_items = params.max_shm / shared_mem_per_item; + if (num_items == 0) { + int max_cols = params.max_shm / sizeof(float); + ASSERT(false, "p.num_cols == %d: too many features, only %d allowed%s", + params.num_cols, max_cols, leaf_payload_type == INT_CLASS_LABEL ? + "(accounting for shared class vote histogram)" : ""); + } + num_items = std::min(num_items, params.max_items); + int num_blocks = ceildiv(int(params.num_rows), num_items); + int shm_sz = num_items * shared_mem_per_item; switch (num_items) { case 1: infer_k<1, leaf_payload_type, output_type> @@ -256,6 +261,7 @@ void infer_k_launcher(storage_type forest, predict_params params, template void infer(storage_type forest, predict_params params, cudaStream_t stream) { + printf("infer::num_output_classes = %u\n", params.num_output_classes); switch (params.leaf_payload_type) { case FLOAT_SCALAR: ASSERT(params.num_output_classes <= 2, @@ -279,3 +285,4 @@ template void infer(sparse_storage forest, } // namespace fil } // namespace ML +#undef __forceinline__ diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index cd75ab420e..2ec478e0a1 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -58,6 +58,10 @@ struct FilTestParams { // treelite parameters, only used for treelite tests tl::Operator op; fil::leaf_value_desc_t leaf_payload_type; + // num_classes must be 1 when FLOAT_SCALAR == leaf_payload_type + // num_classes must be >1 when INT_CLASS_LABEL == leaf_payload_type + // it's used in treelite ModelBuilder initialization + int num_classes; }; std::ostream& operator<<(std::ostream& os, const FilTestParams& ps) { @@ -67,7 +71,8 @@ std::ostream& operator<<(std::ostream& os, const FilTestParams& ps) { << ", output = " << ps.output << ", threshold = " << ps.threshold << ", algo = " << ps.algo << ", seed = " << ps.seed << ", tolerance = " << ps.tolerance << ", op = " << tl::OpName(ps.op) - << ", leaf_payload_type = " << ps.leaf_payload_type; + << ", leaf_payload_type = " << ps.leaf_payload_type + << ", num_classes = " << ps.num_classes; return os; } @@ -371,6 +376,9 @@ class PredictSparseFilTest : public BaseFilTest { fil_params.threshold = ps.threshold; fil_params.global_bias = ps.global_bias; fil_params.leaf_payload_type = ps.leaf_payload_type; + fil_params.num_classes = + (ps.num_classes == 1 && ps.output & fil::output_t::THRESHOLD) ? 2 + : ps.num_classes; dense2sparse(); fil_params.num_nodes = sparse_nodes.size(); fil::init_sparse(handle, pforest, trees.data(), sparse_nodes.data(), @@ -396,8 +404,16 @@ class TreeliteFilTest : public BaseFilTest { fil::dense_node_decode(&nodes[node], &output, &threshold, &feature, &default_left, &is_leaf); if (is_leaf) { - // default is fil::FLOAT_SCALAR - TL_CPP_CHECK(builder->SetLeafNode(key, output.f)); + switch (ps.leaf_payload_type) { + case fil::leaf_value_desc_t::FLOAT_SCALAR: + // default is fil::FLOAT_SCALAR + TL_CPP_CHECK(builder->SetLeafNode(key, output.f)); + break; + case fil::leaf_value_desc_t::INT_CLASS_LABEL: + std::vector vec(ps.num_classes); + vec[output.idx] = 1.; + TL_CPP_CHECK(builder->SetLeafVectorNode(key, vec)); + } } else { int left = root + 2 * (node - root) + 1; int right = root + 2 * (node - root) + 2; @@ -431,9 +447,14 @@ class TreeliteFilTest : public BaseFilTest { void init_forest_impl(fil::forest_t* pforest, fil::storage_type_t storage_type) { - bool random_forest_flag = (ps.output & fil::output_t::AVG) != 0; + bool random_forest_flag = + (ps.output & fil::output_t::AVG) && + // TODO: why does ModelBuilder(num_cols, 1, true) break on FLOAT_SCALAR? + (ps.leaf_payload_type == fil::leaf_value_desc_t::INT_CLASS_LABEL); + printf("%s && %s == %s\n", (ps.output & fil::output_t::AVG) ? "at" : "af", (ps.leaf_payload_type == fil::leaf_value_desc_t::INT_CLASS_LABEL) ? "pt" : "pf", random_forest_flag ? "ft" : "ff"); std::unique_ptr model_builder( - new tlf::ModelBuilder(ps.num_cols, 1, random_forest_flag)); + new tlf::ModelBuilder(ps.num_cols, ps.num_classes, random_forest_flag)); + printf("ModelBuilder(num_cols, num_classes = %d, random_forest_flag = %s)\n", ps.num_classes, random_forest_flag ? "true" : "false"); // prediction transform if ((ps.output & fil::output_t::SIGMOID) != 0) { @@ -498,69 +519,69 @@ class TreeliteAutoFilTest : public TreeliteFilTest { // global_bias, algo, seed, tolerance std::vector predict_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, }; TEST_P(PredictDenseFilTest, Predict) { compare(); } @@ -572,35 +593,35 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictDenseFilTest, // global_bias, algo, seed, tolerance std::vector predict_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, }; TEST_P(PredictSparseFilTest, Predict) { compare(); } @@ -612,98 +633,104 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictSparseFilTest, // global_bias, algo, seed, tolerance std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::FLOAT_SCALAR}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 4}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_desc_t::INT_CLASS_LABEL}, + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::FLOAT_SCALAR}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::FLOAT_SCALAR}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::FLOAT_SCALAR}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::FLOAT_SCALAR}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0.5, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_desc_t::INT_CLASS_LABEL}, + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_desc_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::FLOAT_SCALAR}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, }; TEST_P(TreeliteDenseFilTest, Import) { compare(); } @@ -714,35 +741,35 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteDenseFilTest, // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, // global_bias, algo, seed, tolerance std::vector import_sparse_inputs = { - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::INT_CLASS_LABEL}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_desc_t::INT_CLASS_LABEL}, + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_desc_t::INT_CLASS_LABEL}, + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_desc_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::FLOAT_SCALAR}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, }; TEST_P(TreeliteSparseFilTest, Import) { compare(); } @@ -755,16 +782,16 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteSparseFilTest, std::vector import_auto_inputs = { {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::FLOAT_SCALAR}, - {20000, 50, 0.05, 15, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + {20000, 50, 0.05, 15, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, - {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL}, + fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::FLOAT_SCALAR}, + fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, }; TEST_P(TreeliteAutoFilTest, Import) { compare(); } From 0416633fb795fca5091da8aef322030ceef89bf8 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 5 Mar 2020 21:27:13 -0800 Subject: [PATCH 039/330] fixed some parameter logic bugs, initialization bug. 2/3 of the tests pass. still debug-rigged. --- cpp/include/cuml/fil/fil.h | 22 ++- cpp/src/fil/common.cuh | 9 +- cpp/src/fil/fil.cu | 85 +++++----- cpp/src/fil/infer.cu | 109 ++++++------ cpp/test/sg/fil_test.cu | 334 ++++++++++++++++++++++--------------- 5 files changed, 322 insertions(+), 237 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index 63f2a0519f..7aeb0293fe 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -95,7 +95,7 @@ union val_t { /** dense_node_t is a node in a densely-stored forest */ struct dense_node_t { - union val_t val; + val_t val; int bits; }; @@ -111,8 +111,8 @@ struct sparse_node_t : dense_node_t, sparse_node_extra_data { : dense_node_t(dn), sparse_node_extra_data(ed) {} }; -/** leaf_value_desc_t describes what the leaves in a FIL forest store (predict) */ -enum leaf_value_desc_t { +/** leaf_value_t describes what the leaves in a FIL forest store (predict) */ +enum leaf_value_t { /** storing a clas probability or regression summand */ FLOAT_SCALAR = 0, /** storing a class label */ @@ -120,6 +120,14 @@ enum leaf_value_desc_t { // to be extended }; +template +struct leaf_output_t {}; +template<> +struct leaf_output_t { typedef float T;}; +template<> +struct leaf_output_t { typedef unsigned T;}; + + /** dense_node_init initializes node from paramters */ void dense_node_init(dense_node_t* n, val_t output, float thresh, int fid, bool def_left, bool is_leaf); @@ -153,7 +161,7 @@ struct forest_params_t { // num_cols is the number of columns in the data int num_cols; // leaf_payload_type determines what the leaves store (predict) - leaf_value_desc_t leaf_payload_type; + leaf_value_t leaf_payload_type; // algo is the inference algorithm; // sparse forests do not distinguish between NAIVE and TREE_REORG algo_t algo; @@ -179,8 +187,10 @@ struct treelite_params_t { // output_class indicates whether thresholding will be applied // to the model output bool output_class; - // threshold is used for thresholding if output_class == true, - // and is ignored otherwise + // threshold may be used for thresholding if output_class == true, + // and is ignored otherwise. threshold is ignored if leaves store + // vectorized class labels. in that case, a class with most votes + // is returned regardless of the absolute vote count float threshold; // storage_type indicates whether the forest should be imported as dense or sparse storage_type_t storage_type; diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index f947fd5d5d..e20087f0c6 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -40,14 +40,14 @@ __host__ __device__ __forceinline__ int forest_num_nodes(int num_trees, } // FIL_TPB is the number of threads per block to use with FIL kernels -const int FIL_TPB = 256; +const unsigned long FIL_TPB = 256; /** base_node contains common implementation details for dense and sparse nodes */ struct base_node : dense_node_t { static const int FID_MASK = (1 << 30) - 1; static const int DEF_LEFT_MASK = 1 << 30; static const int IS_LEAF_MASK = 1 << 31; - __host__ __device__ val_t output() const { return val; } + template __host__ __device__ o_t output() const { return val; } __host__ __device__ float thresh() const { return val.f; } __host__ __device__ int fid() const { return bits & FID_MASK; } __host__ __device__ bool def_left() const { return bits & DEF_LEFT_MASK; } @@ -64,6 +64,9 @@ struct base_node : dense_node_t { } }; +template<> __host__ __device__ __forceinline__ float base_node::output() const { return val.f; } +template<> __host__ __device__ __forceinline__ unsigned base_node::output() const { return val.idx; } + /** dense_node is a single node of a dense forest */ struct alignas(8) dense_node : base_node { dense_node() = default; @@ -148,7 +151,7 @@ struct predict_params { int max_items; // only set and used by infer() int num_output_classes; // TODO doc - leaf_value_desc_t leaf_payload_type; + leaf_value_t leaf_payload_type; bool predict_proba; // Data parameters. diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 7dac9b87c5..62b3b6334a 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -44,7 +44,7 @@ void dense_node_init(dense_node_t* n, val_t output, float thresh, int fid, void dense_node_decode(const dense_node_t* n, union val_t* output, float* thresh, int* fid, bool* def_left, bool* is_leaf) { dense_node dn(*n); - *output = dn.output(); + *output = dn.output(); *thresh = dn.thresh(); *fid = dn.fid(); *def_left = dn.def_left(); @@ -79,8 +79,9 @@ __global__ void transform_k(float* preds, size_t n, output_t output, bool complement_proba) { size_t i = threadIdx.x + size_t(blockIdx.x) * blockDim.x; if (i >= n) return; + if (complement_proba && i % 2) return; - float result = preds[complement_proba ? i * 2 : i]; + float result = preds[i]; if ((output & output_t::AVG) != 0) result *= inv_num_trees; result += global_bias; if ((output & output_t::SIGMOID) != 0) result = sigmoid(result); @@ -90,8 +91,8 @@ __global__ void transform_k(float* preds, size_t n, output_t output, // sklearn outputs numpy array in 'C' order, with the number of classes being last dimension // that is also the default order, so we should use the same one if (complement_proba) { - preds[i * 2] = 1.f - result; - preds[i * 2 + 1] = result; + preds[i] = 1.f - result; + preds[i + 1] = result; } else preds[i] = result; } @@ -134,14 +135,22 @@ struct forest { params.data = data; params.num_rows = num_rows; params.max_shm = max_shm_; - params.num_output_classes = - ((num_output_classes_ > 2) || (leaf_payload_type_ == INT_CLASS_LABEL)) - ? num_output_classes_ - : (predict_proba ? 2 : 1); - printf("forest::num_output_classes_ = %d, predict_params.num_output_classes = %d\n", num_output_classes_, params.num_output_classes); + params.num_output_classes = (predict_proba || leaf_payload_type_ == INT_CLASS_LABEL) ? num_output_classes_ : 1; + // FLOAT_SCALAR means inference produces 1 class score/component and + // transform_k might complement to 2 for classification, + // if class probabilities are being requested + // assuming predict(..., predict_proba=true) will not get called + // for regression, hence forest::num_output_classes_ == 2 + params.predict_proba = predict_proba; + printf("predict_proba = %s, forest::num_output_classes_ = %d, predict_params.num_output_classes = %d\n", predict_proba ? "true" : "false", num_output_classes_, params.num_output_classes); params.leaf_payload_type = leaf_payload_type_; params.predict_proba = predict_proba; + ASSERT(output_ & output_t::THRESHOLD || num_output_classes_ == 1 || leaf_payload_type_ == INT_CLASS_LABEL, "cannot do two-component regression using FLOAT_SCALAR leaf_payload_type"); + ASSERT(output_ & output_t::THRESHOLD || leaf_payload_type_ == INT_CLASS_LABEL || !predict_proba, "predict_proba does not make sense for regression"); + ASSERT(num_output_classes_ != 1 || !(output_ & output_t::THRESHOLD), "single-class classification does not make sense"); + ASSERT(params.leaf_payload_type != INT_CLASS_LABEL || output_ & output_t::AVG, "need averaging to turn multi-class votes into probabilities"); + // Predict using the forest. cudaStream_t stream = h.getStream(); infer(params, stream); @@ -149,15 +158,14 @@ struct forest { // Transform the output if necessary. if (output_ != output_t::RAW || global_bias_ != 0.0f || predict_proba) { auto output = output_; - if (predict_proba && (leaf_payload_type_ == INT_CLASS_LABEL)) - // because infer(params, stream) will write vote counts - // instead of probabilities - output = output_t(output | output_t::AVG); bool complement_proba = - predict_proba && (leaf_payload_type_ == FLOAT_SCALAR); + predict_proba && leaf_payload_type_ == FLOAT_SCALAR; - transform_k<<>>( - preds, num_rows, output, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, + unsigned long values_to_transform = predict_proba ? + (unsigned long) num_rows * (unsigned long) num_output_classes_ : num_rows; + printf("global_bias = %f\n", global_bias_); + transform_k<<>>( + preds, values_to_transform, output, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, threshold_, global_bias_, predict_proba, complement_proba); CUDA_CHECK(cudaPeekAtLastError()); } @@ -175,8 +183,8 @@ struct forest { float threshold_ = 0.5; float global_bias_ = 0; // init to invalid - leaf_value_desc_t leaf_payload_type_ = (leaf_value_desc_t)-1; - int num_output_classes_ = INT_MAX; + leaf_value_t leaf_payload_type_ = FLOAT_SCALAR; + int num_output_classes_ = 0; }; struct dense_forest : forest { @@ -292,8 +300,8 @@ void check_params(const forest_params_t* params, bool dense) { "algo should be ALGO_AUTO, NAIVE, TREE_REORG or BATCH_TREE_REORG"); } switch (params->leaf_payload_type) { - case leaf_value_desc_t::FLOAT_SCALAR: - case leaf_value_desc_t::INT_CLASS_LABEL: + case leaf_value_t::FLOAT_SCALAR: + case leaf_value_t::INT_CLASS_LABEL: break; default: ASSERT(false, @@ -306,6 +314,9 @@ void check_params(const forest_params_t* params, bool dense) { ASSERT(false, "output should be a combination of RAW, AVG, SIGMOID and THRESHOLD"); } + ASSERT(params->output & output_t::THRESHOLD || params->num_classes == 1 || params->leaf_payload_type == INT_CLASS_LABEL, "cannot do two-component regression using FLOAT_SCALAR leaf_payload_type"); + ASSERT(params->num_classes != 1 || !(params->output & output_t::THRESHOLD), "single-class classification does not make sense"); + ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || params->output & output_t::AVG, "need averaging to turn multi-class votes into probabilities"); } // tl_node_at is a checked version of tree[i] @@ -382,20 +393,18 @@ void adjust_threshold(float* pthreshold, int* tl_left, int* tl_right, } /** if the vector consists of zeros and a single one, return the position -for the one (assumed class label). Else, return -1. -If the vector contains a NAN, return -1. */ +for the one (assumed class label). Else, asserts false. +If the vector contains a NAN, asserts false */ int find_class_label_from_one_hot(tl::tl_float* vector, int len) { bool found_label = false; - int out = -1; // in case all are 0.f + int out; for (int i = 0; i < len; ++i) - if (vector[i] == 1.f) { - if (!found_label) - out = i; - else // more than one 1.f - return -1; + if (vector[i] == 1.) { + ASSERT(!found_label, "label vector contains multiple 1.f"); + out = i; found_label = true; - } else if (vector[i] != 0.f) // NAN != 0.f - return -1; + } else + ASSERT(vector[i] == 0., "label vector contains values other than 0. and 1."); return out; } @@ -408,8 +417,6 @@ void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, ASSERT(vec.size() == forest_params.num_classes, "inconsistent number of classes in treelite leaves"); fil_node->val.idx = find_class_label_from_one_hot(&vec[0], vec.size()); - ASSERT(fil_node->val.idx != (unsigned)-1, - "a non-empty non-one-hot leaf vector"); break; case FLOAT_SCALAR: fil_node->val.f = tl_node.leaf_value(); @@ -515,13 +522,10 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, node_key = tl_node_at(tree, node_key).cleft()); auto vec = tl_node_at(tree, node_key).leaf_vector(); if (vec.size()) { - if (find_class_label_from_one_hot(&vec[0], vec.size()) != -1) { - params->num_classes = vec.size(); - ASSERT(vec.size() == model.num_output_group, "treelite model inconsistent"); - params->leaf_payload_type = INT_CLASS_LABEL; - printf("detected %lu-class classification model \n", vec.size()); - } else - ASSERT(false, "unexpected: non-empty non-one-hot leaf vector"); + params->num_classes = vec.size(); + ASSERT(vec.size() == model.num_output_group, "treelite model inconsistent"); + params->leaf_payload_type = INT_CLASS_LABEL; + printf("detected %lu-class classification model \n", vec.size()); } else { params->leaf_payload_type = FLOAT_SCALAR; params->num_classes = tl_params->output_class ? 2 : 1; @@ -535,7 +539,8 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, params->global_bias = param.global_bias; params->output = output_t::RAW; if (tl_params->output_class) { - params->output = output_t(params->output | output_t::THRESHOLD); + if (params->leaf_payload_type == FLOAT_SCALAR) + params->output = output_t(params->output | output_t::THRESHOLD); } // "random forest" in treelite means tree output averaging if (model.random_forest_flag) { diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 0a98ec6d78..a662629c74 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -26,7 +26,10 @@ using namespace MLCommon; template struct vec { T data[N]; - inline vec() = default; // zeros for numerical member vars + inline __host__ __device__ vec() { +#pragma unroll + for (int i = 0; i < N; ++i) data[i] = 0; + } __host__ __device__ T& operator[](int i) { return data[i]; } __host__ __device__ T operator[](int i) const { return data[i]; } friend __host__ __device__ void operator+=(vec& a, const vec& b) { @@ -65,7 +68,7 @@ __device__ __forceinline__ vec infer_one_tree( } while (mask != 0); vec out; #pragma unroll - for (int j = 0; j < NITEMS; ++j) out[j] = tree[curr[j]].output(); + for (int j = 0; j < NITEMS; ++j) out[j] = tree[curr[j]].base_node::output(); return out; } @@ -82,50 +85,52 @@ __device__ __forceinline__ vec<1, output_type> infer_one_tree(tree_type tree, curr = n.left(curr) + cond; } vec<1, output_type> out; - out[0] = tree[curr].output(); + out[0] = tree[curr].base_node::output(); return out; } template // = float> + leaf_value_t leaf_payload_type> // = FLOAT_SCALAR struct tree_aggregator_t { vec acc; int num_output_classes; __device__ __forceinline__ tree_aggregator_t(int num_output_classes_, void*) : num_output_classes(num_output_classes_) { - // TODO: even if num_output_classes == 2, in regression, this needs to change } - __device__ __forceinline__ void accumulate(vec out) { -#pragma unroll - for (int i = 0; i < NITEMS; ++i) acc[i] += out[i].f; + __device__ __forceinline__ void accumulate(vec out) { + acc += out; } - __device__ __forceinline__ void finalize(float* out, int num_rows) { + __device__ __forceinline__ void finalize(float* out, int num_rows, char output_stride) { + __syncthreads(); using BlockReduce = cub::BlockReduce, FIL_TPB>; __shared__ typename BlockReduce::TempStorage tmp_storage; acc = BlockReduce(tmp_storage).Sum(acc); if (threadIdx.x == 0) { for (int i = 0; i < NITEMS; ++i) { int row = blockIdx.x * NITEMS + i; - if (row < num_rows) out[row * num_output_classes] = acc[i]; - //TODO for 2 output values, will need to change the above line - // to fix regression + if (row < num_rows) out[row * output_stride] = acc[i]; } } } + __device__ __forceinline__ void finalize_regression(float* out, int num_rows) { + finalize(out, num_rows, 1); + } + __device__ __forceinline__ void finalize_class_proba(float* out, int num_rows) { + finalize(out, num_rows, 2); + } __device__ __forceinline__ void finalize_class_label(float* out, int num_rows) { - finalize(out, num_rows); + finalize(out, num_rows, 1); } }; template -struct tree_aggregator_t { +struct tree_aggregator_t { typedef unsigned int class_label_t; typedef unsigned int vote_count_t; - // can switch to unsigned short to save shared memory - // provided atomicInc(short*) simulated with atomicAdd with appropriate shifts + // could switch to unsigned short to save shared memory + // provided atomicAdd(short*) simulated with appropriate shifts vote_count_t* votes; int num_output_classes; @@ -138,16 +143,19 @@ struct tree_aggregator_t { for (int i = 0; i < NITEMS; ++i) votes[c * NITEMS + i] = 0; //__syncthreads(); // happening outside } - __device__ __forceinline__ void accumulate(vec out) { + __device__ __forceinline__ void accumulate(vec out) { #pragma unroll for (int i = 0; i < NITEMS; ++i) - atomicAdd(votes + out[i].idx * NITEMS + i, 1); + atomicAdd(votes + out[i] * NITEMS + i, 1); + } + __device__ __forceinline__ void finalize_regression(float* out, int num_rows) { + asm("trap;"); } - __device__ __forceinline__ void finalize(float* out, int num_rows) { + __device__ __forceinline__ void finalize_class_proba(float* out, int num_rows) { __syncthreads(); int item = threadIdx.x; int row = blockIdx.x * NITEMS + item; - if ((item < NITEMS) && (row < num_rows)) { + if (item < NITEMS && row < num_rows) { #pragma unroll for (int c = 0; c < num_output_classes; ++c) out[row * num_output_classes + c] = votes[c * NITEMS + item]; @@ -160,7 +168,7 @@ struct tree_aggregator_t { __syncthreads(); int item = threadIdx.x; int row = blockIdx.x * NITEMS + item; - if ((item < NITEMS) && (row < num_rows)) { + if (item < NITEMS && row < num_rows) { vote_count_t max_votes = 0; class_label_t best_class = 0; for (int c = 0; c < num_output_classes; ++c) @@ -173,8 +181,7 @@ struct tree_aggregator_t { } }; -template +template __global__ void infer_k(storage_type forest, predict_params params) { // cache the row for all threads to reuse extern __shared__ char smem[]; @@ -188,7 +195,7 @@ __global__ void infer_k(storage_type forest, predict_params params) { } } - tree_aggregator_t acc( + tree_aggregator_t acc( params.num_output_classes, sdata + params.num_cols * NITEMS); __syncthreads(); // for both row cache init and acc init @@ -197,35 +204,36 @@ __global__ void infer_k(storage_type forest, predict_params params) { // one block works on NITEMS rows and the whole forest for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { acc.accumulate( - infer_one_tree(forest[j], sdata, params.num_cols)); + infer_one_tree::T> + (forest[j], sdata, params.num_cols)); } // compute most probable class. in cuML RF, output is class label, // hence, no-predicted class edge case doesn't apply - if (!params.predict_proba) - acc.finalize_class_label(params.preds, params.num_rows); - else - acc.finalize(params.preds, params.num_rows); + if(false && !threadIdx.x && !blockIdx.x) { + printf("%s\n", params.predict_proba ? "finalize_class_proba" : + (params.num_output_classes > 1 ? "finalize_class_label" : "finalize_regression")); + } + if (!params.predict_proba) { + if (params.num_output_classes > 1) + acc.finalize_class_label(params.preds, params.num_rows); + else + acc.finalize_regression(params.preds, params.num_rows); + } else + acc.finalize_class_proba(params.preds, params.num_rows); } -template +template void infer_k_launcher(storage_type forest, predict_params params, cudaStream_t stream) { - const int MAX_BATCH_ITEMS = 4; + const int MAX_BATCH_ITEMS = 1; //4; TODO: restore params.max_items = params.algo == algo_t::BATCH_TREE_REORG ? MAX_BATCH_ITEMS : 1; - int shared_mem_per_item = sizeof(float) * params.num_cols; - switch (leaf_payload_type) { - case INT_CLASS_LABEL: - // class vote histogram, while inferring trees - shared_mem_per_item += sizeof(int) * params.num_output_classes; - break; - case FLOAT_SCALAR: - // CUB workspace should fit itself, and we don't need - // the row by the time CUB is used - break; - } + int shared_mem_per_item = sizeof(float) * params.num_cols + + // class vote histogram, while inferring trees + (leaf_payload_type == INT_CLASS_LABEL ? sizeof(int) * params.num_output_classes : 0); + // CUB workspace should fit itself, and we don't need + // the row by the time CUB is used int num_items = params.max_shm / shared_mem_per_item; if (num_items == 0) { int max_cols = params.max_shm / sizeof(float); @@ -236,21 +244,22 @@ void infer_k_launcher(storage_type forest, predict_params params, num_items = std::min(num_items, params.max_items); int num_blocks = ceildiv(int(params.num_rows), num_items); int shm_sz = num_items * shared_mem_per_item; + std::cout << "num_items " << num_items << " num_blocks " << num_blocks << " shm_sz " << shm_sz << "\n"; switch (num_items) { case 1: - infer_k<1, leaf_payload_type, output_type> + infer_k<1, leaf_payload_type> <<>>(forest, params); break; case 2: - infer_k<2, leaf_payload_type, output_type> + infer_k<2, leaf_payload_type> <<>>(forest, params); break; case 3: - infer_k<3, leaf_payload_type, output_type> + infer_k<3, leaf_payload_type> <<>>(forest, params); break; case 4: - infer_k<4, leaf_payload_type, output_type> + infer_k<4, leaf_payload_type> <<>>(forest, params); break; default: @@ -266,11 +275,11 @@ void infer(storage_type forest, predict_params params, cudaStream_t stream) { case FLOAT_SCALAR: ASSERT(params.num_output_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); - infer_k_launcher(forest, params, + infer_k_launcher(forest, params, stream); break; case INT_CLASS_LABEL: - infer_k_launcher( + infer_k_launcher( forest, params, stream); break; default: diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 2ec478e0a1..2090cb32d9 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -38,6 +38,7 @@ using namespace MLCommon; namespace tl = treelite; namespace tlf = treelite::frontend; + struct FilTestParams { // input data parameters int num_rows; @@ -57,18 +58,31 @@ struct FilTestParams { float tolerance; // treelite parameters, only used for treelite tests tl::Operator op; - fil::leaf_value_desc_t leaf_payload_type; + fil::leaf_value_t leaf_payload_type; // num_classes must be 1 when FLOAT_SCALAR == leaf_payload_type // num_classes must be >1 when INT_CLASS_LABEL == leaf_payload_type // it's used in treelite ModelBuilder initialization int num_classes; }; +std::string output2str(fil::output_t output) { + if(output==fil::RAW) + return "RAW"; + std::string s = ""; + if(output & fil::AVG) + s += "| AVG"; + if(output & fil::THRESHOLD) + s += "| THRESHOLD"; + if(output & fil::SIGMOID) + s += "| SIGMOID"; + return s; +} + std::ostream& operator<<(std::ostream& os, const FilTestParams& ps) { os << "num_rows = " << ps.num_rows << ", num_cols = " << ps.num_cols << ", nan_prob = " << ps.nan_prob << ", depth = " << ps.depth << ", num_trees = " << ps.num_trees << ", leaf_prob = " << ps.leaf_prob - << ", output = " << ps.output << ", threshold = " << ps.threshold + << ", output = " << output2str(ps.output) << ", threshold = " << ps.threshold << ", algo = " << ps.algo << ", seed = " << ps.seed << ", tolerance = " << ps.tolerance << ", op = " << tl::OpName(ps.op) << ", leaf_payload_type = " << ps.leaf_payload_type @@ -84,13 +98,23 @@ __global__ void nan_kernel(float* data, const bool* mask, int len, float nan) { float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); } +typedef std::vector vote_vec; +vote_vec& operator+=(vote_vec& a, vote_vec b) { + ASSERT(a.size() == b.size(), "trying to add two vectors of different size"); + for(int i=0; i < a.size(); ++i) + a[i] += b[i]; + return a; +} + class BaseFilTest : public testing::TestWithParam { protected: void SetUp() override { // setup + handle = new cumlHandle; ps = testing::TestWithParam::GetParam(); + ps.num_rows = 10; //TODO restore CUDA_CHECK(cudaStreamCreate(&stream)); - handle.setStream(stream); + handle->setStream(stream); generate_forest(); generate_data(); @@ -100,10 +124,14 @@ class BaseFilTest : public testing::TestWithParam { void TearDown() override { CUDA_CHECK(cudaFree(preds_d)); - CUDA_CHECK(cudaFree(proba_d)); CUDA_CHECK(cudaFree(want_preds_d)); - CUDA_CHECK(cudaFree(want_proba_d)); CUDA_CHECK(cudaFree(data_d)); + if (ps.num_classes >= 2) { + CUDA_CHECK(cudaFree(want_proba_d)); + CUDA_CHECK(cudaFree(proba_d)); + } + delete handle; + //CUDA_CHECK(cudaDeviceReset()); } void generate_forest() { @@ -161,10 +189,11 @@ class BaseFilTest : public testing::TestWithParam { for (size_t i = 0; i < num_nodes; ++i) { fil::val_t w; switch (ps.leaf_payload_type) { - case fil::leaf_value_desc_t::INT_CLASS_LABEL: - w.idx = (int)(weights_h[i] + 1.0f) % 2; // [0, 1] + case fil::leaf_value_t::INT_CLASS_LABEL: + w.idx = (int)((weights_h[i]*.5 + .5) // [0., 1.] + * ps.num_classes + .5) % ps.num_classes; // [0..num_classes] break; - case fil::leaf_value_desc_t::FLOAT_SCALAR: + case fil::leaf_value_t::FLOAT_SCALAR: w.f = weights_h[i]; } fil::dense_node_init(&nodes[i], w, thresholds_h[i], fids_h[i], @@ -209,29 +238,57 @@ class BaseFilTest : public testing::TestWithParam { void predict_on_cpu() { // predict on host std::vector want_preds_h(ps.num_rows); - std::vector want_proba_h(ps.num_rows * 2); + std::vector want_proba_h(ps.num_rows * ps.num_classes); int num_nodes = tree_num_nodes(); - for (int i = 0; i < ps.num_rows; ++i) { - float pred = 0.0f; - for (int j = 0; j < ps.num_trees; ++j) { - pred += infer_one_tree(&nodes[j * num_nodes], &data_h[i * ps.num_cols]); - } - if ((ps.output & fil::output_t::AVG) != 0) pred = pred / ps.num_trees; - pred += ps.global_bias; - if ((ps.output & fil::output_t::SIGMOID) != 0) pred = sigmoid(pred); - want_proba_h[i * 2] = 1.f - pred; - want_proba_h[i * 2 + 1] = pred; - if ((ps.output & fil::output_t::THRESHOLD) != 0) { - pred = pred > ps.threshold ? 1.0f : 0.0f; - } - want_preds_h[i] = pred; + switch(ps.leaf_payload_type) { + case fil::leaf_value_t::FLOAT_SCALAR: + for (int i = 0; i < ps.num_rows; ++i) { + float pred = 0.0f; + for (int j = 0; j < ps.num_trees; ++j) { + pred += infer_one_tree(&nodes[j * num_nodes], &data_h[i * ps.num_cols]).f; + } + if ((ps.output & fil::output_t::AVG) != 0) pred = pred * (1.f / ps.num_trees); + pred += ps.global_bias; + if ((ps.output & fil::output_t::SIGMOID) != 0) pred = sigmoid(pred); + if(ps.num_classes == 2) { + want_proba_h[i * 2] = 1.f - pred; + want_proba_h[i * 2 + 1] = pred; + } + if ((ps.output & fil::output_t::THRESHOLD) != 0) { + pred = pred > ps.threshold ? 1.0f : 0.0f; + } + want_preds_h[i] = pred; + } + break; + case fil::leaf_value_t::INT_CLASS_LABEL: + std::vector class_votes(ps.num_classes); + for (int r = 0; r < ps.num_rows; ++r) { + for(auto& v: class_votes) v = 0; + for (int j = 0; j < ps.num_trees; ++j) { + unsigned class_label = infer_one_tree(&nodes[j * num_nodes], &data_h[r * ps.num_cols]).idx; + ++class_votes[class_label]; + } + unsigned best_class = 0, most_votes = 0; + for(int c = 0; c < ps.num_classes; ++c) { + unsigned votes = class_votes[c]; + want_proba_h[r * ps.num_classes + c] = votes * (1.0f / ps.num_trees); + if (votes > most_votes) { + most_votes = votes; + best_class = c; + } + } + want_preds_h[r] = best_class; + } + break; } // copy to GPU allocate(want_preds_d, ps.num_rows); - allocate(want_proba_d, ps.num_rows * 2); updateDevice(want_preds_d, want_preds_h.data(), ps.num_rows, stream); - updateDevice(want_proba_d, want_proba_h.data(), ps.num_rows * 2, stream); + if (ps.num_classes >= 2) { + allocate(want_proba_d, ps.num_rows * ps.num_classes); + updateDevice(want_proba_d, want_proba_h.data(), ps.num_rows * ps.num_classes, stream); + } CUDA_CHECK(cudaStreamSynchronize(stream)); } @@ -243,39 +300,43 @@ class BaseFilTest : public testing::TestWithParam { // predict allocate(preds_d, ps.num_rows); - allocate(proba_d, ps.num_rows * 2); - fil::predict(handle, forest, preds_d, data_d, ps.num_rows); - fil::predict(handle, forest, proba_d, data_d, ps.num_rows, true); + fil::predict(*handle, forest, preds_d, data_d, ps.num_rows); + if (ps.num_classes >= 2) { + allocate(proba_d, ps.num_rows * ps.num_classes); + fil::predict(*handle, forest, proba_d, data_d, ps.num_rows, true); + } CUDA_CHECK(cudaStreamSynchronize(stream)); // cleanup - fil::free(handle, forest); + fil::free(*handle, forest); } void compare() { - ASSERT_TRUE(devArrMatch(want_proba_d, proba_d, ps.num_rows * 2, + if (ps.num_classes >= 2) { + std::cout << arr2Str(want_proba_d, 3 * ps.num_classes, "want_proba_d", 0) + << arr2Str(proba_d, 3*ps.num_classes, "proba_d", 0) << std::endl; + ASSERT_TRUE(devArrMatch(want_proba_d, proba_d, ps.num_rows * ps.num_classes, CompareApprox(ps.tolerance), stream)); + } + float tolerance = ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR ? ps.tolerance : std::numeric_limits::epsilon(); + // in multi-class prediction, floats represent the most likely class + // and would be generated by converting an int to float + std::cout << arr2Str(want_preds_d, 10, "want_preds_d", 0) + << arr2Str(preds_d, 10, "preds_d", 0) << std::endl; ASSERT_TRUE(devArrMatch(want_preds_d, preds_d, ps.num_rows, - CompareApprox(ps.tolerance), stream)); + CompareApprox(tolerance), stream)); } - float infer_one_tree(fil::dense_node_t* root, float* data) { + fil::val_t infer_one_tree(fil::dense_node_t* root, float* data) { int curr = 0; - float output = 0.0f, threshold = 0.0f; + float threshold = 0.0f; + fil::val_t output{.f = 0.0f}; int fid = 0; bool def_left = false, is_leaf = false; for (;;) { - fil::val_t w; - fil::dense_node_decode(&root[curr], &w, &threshold, &fid, &def_left, + fil::dense_node_decode(&root[curr], &output, &threshold, &fid, &def_left, &is_leaf); if (is_leaf) { - switch (ps.leaf_payload_type) { - case fil::leaf_value_desc_t::INT_CLASS_LABEL: - output = w.idx; - break; - case fil::leaf_value_desc_t::FLOAT_SCALAR: - output = w.f; - } break; } float val = data[fid]; @@ -304,7 +365,7 @@ class BaseFilTest : public testing::TestWithParam { // parameters cudaStream_t stream; - cumlHandle handle; + cumlHandle* handle; FilTestParams ps; }; @@ -321,7 +382,8 @@ class PredictDenseFilTest : public BaseFilTest { fil_ps.threshold = ps.threshold; fil_ps.global_bias = ps.global_bias; fil_ps.leaf_payload_type = ps.leaf_payload_type; - fil::init_dense(handle, pforest, nodes.data(), &fil_ps); + fil_ps.num_classes = ps.num_classes; + fil::init_dense(*handle, pforest, nodes.data(), &fil_ps); } }; @@ -376,12 +438,10 @@ class PredictSparseFilTest : public BaseFilTest { fil_params.threshold = ps.threshold; fil_params.global_bias = ps.global_bias; fil_params.leaf_payload_type = ps.leaf_payload_type; - fil_params.num_classes = - (ps.num_classes == 1 && ps.output & fil::output_t::THRESHOLD) ? 2 - : ps.num_classes; + fil_params.num_classes = ps.num_classes; dense2sparse(); fil_params.num_nodes = sparse_nodes.size(); - fil::init_sparse(handle, pforest, trees.data(), sparse_nodes.data(), + fil::init_sparse(*handle, pforest, trees.data(), sparse_nodes.data(), &fil_params); } std::vector sparse_nodes; @@ -405,11 +465,11 @@ class TreeliteFilTest : public BaseFilTest { &default_left, &is_leaf); if (is_leaf) { switch (ps.leaf_payload_type) { - case fil::leaf_value_desc_t::FLOAT_SCALAR: + case fil::leaf_value_t::FLOAT_SCALAR: // default is fil::FLOAT_SCALAR TL_CPP_CHECK(builder->SetLeafNode(key, output.f)); break; - case fil::leaf_value_desc_t::INT_CLASS_LABEL: + case fil::leaf_value_t::INT_CLASS_LABEL: std::vector vec(ps.num_classes); vec[output.idx] = 1.; TL_CPP_CHECK(builder->SetLeafVectorNode(key, vec)); @@ -447,14 +507,12 @@ class TreeliteFilTest : public BaseFilTest { void init_forest_impl(fil::forest_t* pforest, fil::storage_type_t storage_type) { - bool random_forest_flag = - (ps.output & fil::output_t::AVG) && - // TODO: why does ModelBuilder(num_cols, 1, true) break on FLOAT_SCALAR? - (ps.leaf_payload_type == fil::leaf_value_desc_t::INT_CLASS_LABEL); - printf("%s && %s == %s\n", (ps.output & fil::output_t::AVG) ? "at" : "af", (ps.leaf_payload_type == fil::leaf_value_desc_t::INT_CLASS_LABEL) ? "pt" : "pf", random_forest_flag ? "ft" : "ff"); + bool random_forest_flag = (ps.output & fil::output_t::AVG) != 0; + printf("%s && %s == %s\n", (ps.output & fil::output_t::AVG) ? "at" : "af", (ps.leaf_payload_type == fil::leaf_value_t::INT_CLASS_LABEL) ? "pt" : "pf", random_forest_flag ? "ft" : "ff"); + int treelite_num_classes = ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR ? 1 : ps.num_classes; std::unique_ptr model_builder( - new tlf::ModelBuilder(ps.num_cols, ps.num_classes, random_forest_flag)); - printf("ModelBuilder(num_cols, num_classes = %d, random_forest_flag = %s)\n", ps.num_classes, random_forest_flag ? "true" : "false"); + new tlf::ModelBuilder(ps.num_cols, treelite_num_classes, random_forest_flag)); + printf("ModelBuilder(num_cols, num_classes = %d, random_forest_flag = %s)\n", treelite_num_classes, random_forest_flag ? "true" : "false"); // prediction transform if ((ps.output & fil::output_t::SIGMOID) != 0) { @@ -489,7 +547,7 @@ class TreeliteFilTest : public BaseFilTest { params.threshold = ps.threshold; params.output_class = (ps.output & fil::output_t::THRESHOLD) != 0; params.storage_type = storage_type; - fil::from_treelite(handle, pforest, (ModelHandle)model.get(), ¶ms); + fil::from_treelite(*handle, pforest, (ModelHandle)model.get(), ¶ms); CUDA_CHECK(cudaStreamSynchronize(stream)); } }; @@ -518,70 +576,70 @@ class TreeliteAutoFilTest : public TreeliteFilTest { // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, // global_bias, algo, seed, tolerance std::vector predict_dense_inputs = { + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 01 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 02 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 04 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 05 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 06 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 07 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 08 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 09 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 10 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 11 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 12 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 13 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 14 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 15 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 16 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 17 }; TEST_P(PredictDenseFilTest, Predict) { compare(); } @@ -593,35 +651,35 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictDenseFilTest, // global_bias, algo, seed, tolerance std::vector predict_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 01 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 02 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 04 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 05 }; TEST_P(PredictSparseFilTest, Predict) { compare(); } @@ -632,105 +690,105 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictSparseFilTest, // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, // global_bias, algo, seed, tolerance std::vector import_dense_inputs = { + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 01 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 4}, + fil::leaf_value_t::INT_CLASS_LABEL, 4}, // 02 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 03 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 04 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 07 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 3}, + fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 09 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 10 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 11 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 12 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 13 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 14 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 15 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 16 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 17 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 18 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 19 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 20 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 21 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 22 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 23 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0.5, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 24 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_desc_t::INT_CLASS_LABEL, 3}, + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 25 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 26 }; TEST_P(TreeliteDenseFilTest, Import) { compare(); } @@ -741,35 +799,35 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteDenseFilTest, // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, // global_bias, algo, seed, tolerance std::vector import_sparse_inputs = { + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 01 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 02 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 03 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 04 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 07 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_desc_t::INT_CLASS_LABEL, 3}, + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 09 }; TEST_P(TreeliteSparseFilTest, Import) { compare(); } @@ -782,16 +840,16 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteSparseFilTest, std::vector import_auto_inputs = { {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 {20000, 50, 0.05, 15, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 01 {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::INT_CLASS_LABEL, 2}, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 02 {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_desc_t::FLOAT_SCALAR, 1}, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 }; TEST_P(TreeliteAutoFilTest, Import) { compare(); } From ed0eb71cb5b6e78d5b8c71433595364cc5ab230a Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 5 Mar 2020 22:14:56 -0800 Subject: [PATCH 040/330] fixed all C++ test bugs --- cpp/src/fil/fil.cu | 13 +++++++------ cpp/src/fil/infer.cu | 2 +- cpp/test/sg/fil_test.cu | 29 +++++++++++++++-------------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 62b3b6334a..a1a4fe6d17 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -146,18 +146,17 @@ struct forest { params.leaf_payload_type = leaf_payload_type_; params.predict_proba = predict_proba; - ASSERT(output_ & output_t::THRESHOLD || num_output_classes_ == 1 || leaf_payload_type_ == INT_CLASS_LABEL, "cannot do two-component regression using FLOAT_SCALAR leaf_payload_type"); ASSERT(output_ & output_t::THRESHOLD || leaf_payload_type_ == INT_CLASS_LABEL || !predict_proba, "predict_proba does not make sense for regression"); - ASSERT(num_output_classes_ != 1 || !(output_ & output_t::THRESHOLD), "single-class classification does not make sense"); - ASSERT(params.leaf_payload_type != INT_CLASS_LABEL || output_ & output_t::AVG, "need averaging to turn multi-class votes into probabilities"); // Predict using the forest. cudaStream_t stream = h.getStream(); infer(params, stream); // Transform the output if necessary. - if (output_ != output_t::RAW || global_bias_ != 0.0f || predict_proba) { - auto output = output_; + output_t ot = output_; + if(leaf_payload_type_ == INT_CLASS_LABEL && !predict_proba) + ot = output_t(ot & ~output_t::AVG); // don't "average" class labels + if (ot != output_t::RAW || global_bias_ != 0.0f || predict_proba) { bool complement_proba = predict_proba && leaf_payload_type_ == FLOAT_SCALAR; @@ -165,7 +164,7 @@ struct forest { (unsigned long) num_rows * (unsigned long) num_output_classes_ : num_rows; printf("global_bias = %f\n", global_bias_); transform_k<<>>( - preds, values_to_transform, output, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, + preds, values_to_transform, ot, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, threshold_, global_bias_, predict_proba, complement_proba); CUDA_CHECK(cudaPeekAtLastError()); } @@ -317,6 +316,8 @@ void check_params(const forest_params_t* params, bool dense) { ASSERT(params->output & output_t::THRESHOLD || params->num_classes == 1 || params->leaf_payload_type == INT_CLASS_LABEL, "cannot do two-component regression using FLOAT_SCALAR leaf_payload_type"); ASSERT(params->num_classes != 1 || !(params->output & output_t::THRESHOLD), "single-class classification does not make sense"); ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || params->output & output_t::AVG, "need averaging to turn multi-class votes into probabilities"); + ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || !(params->output & output_t::SIGMOID), "SIGMOID does not make sense for class-vote-based classification"); + ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || !params->global_bias, "global_bias does not make sense for class-vote-based classification"); } // tl_node_at is a checked version of tree[i] diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index a662629c74..0f3503e1d0 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -225,7 +225,7 @@ __global__ void infer_k(storage_type forest, predict_params params) { template void infer_k_launcher(storage_type forest, predict_params params, cudaStream_t stream) { - const int MAX_BATCH_ITEMS = 1; //4; TODO: restore + const int MAX_BATCH_ITEMS = 4; params.max_items = params.algo == algo_t::BATCH_TREE_REORG ? MAX_BATCH_ITEMS : 1; diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 2090cb32d9..3e1cdeeef6 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -85,6 +85,7 @@ std::ostream& operator<<(std::ostream& os, const FilTestParams& ps) { << ", output = " << output2str(ps.output) << ", threshold = " << ps.threshold << ", algo = " << ps.algo << ", seed = " << ps.seed << ", tolerance = " << ps.tolerance << ", op = " << tl::OpName(ps.op) + << ", global_bias = " << ps.global_bias << ", leaf_payload_type = " << ps.leaf_payload_type << ", num_classes = " << ps.num_classes; return os; @@ -112,7 +113,7 @@ class BaseFilTest : public testing::TestWithParam { // setup handle = new cumlHandle; ps = testing::TestWithParam::GetParam(); - ps.num_rows = 10; //TODO restore + //ps.num_rows = 10; //TODO restore CUDA_CHECK(cudaStreamCreate(&stream)); handle->setStream(stream); @@ -691,13 +692,13 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictSparseFilTest, // global_bias, algo, seed, tolerance std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 01 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 4}, // 02 {20000, 50, 0.05, 8, 50, 0.05, @@ -714,11 +715,11 @@ std::vector import_dense_inputs = { fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, + fil::output_t(fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, @@ -732,7 +733,7 @@ std::vector import_dense_inputs = { fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 09 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 10 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, @@ -745,7 +746,7 @@ std::vector import_dense_inputs = { fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 13 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 14 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, @@ -755,11 +756,11 @@ std::vector import_dense_inputs = { fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 16 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, + fil::output_t(fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 17 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, + fil::output_t(fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 18 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, @@ -776,14 +777,14 @@ std::vector import_dense_inputs = { fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 22 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 23 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0.5, + fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 24 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 25 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, @@ -817,12 +818,12 @@ std::vector import_sparse_inputs = { fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 07 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, From 5012bdbd1fa68e91410f1c8f6a5510f9bcf9dcb8 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 5 Mar 2020 22:31:31 -0800 Subject: [PATCH 041/330] removed C++ fil_test.cu debug rigging --- cpp/test/sg/fil_test.cu | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 3e1cdeeef6..0da0afb3c9 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -59,7 +59,8 @@ struct FilTestParams { // treelite parameters, only used for treelite tests tl::Operator op; fil::leaf_value_t leaf_payload_type; - // num_classes must be 1 when FLOAT_SCALAR == leaf_payload_type + // num_classes must be 1 or 2 when FLOAT_SCALAR == leaf_payload_type + // (1 if it's regression) // num_classes must be >1 when INT_CLASS_LABEL == leaf_payload_type // it's used in treelite ModelBuilder initialization int num_classes; @@ -111,11 +112,9 @@ class BaseFilTest : public testing::TestWithParam { protected: void SetUp() override { // setup - handle = new cumlHandle; ps = testing::TestWithParam::GetParam(); - //ps.num_rows = 10; //TODO restore CUDA_CHECK(cudaStreamCreate(&stream)); - handle->setStream(stream); + handle.setStream(stream); generate_forest(); generate_data(); @@ -131,8 +130,6 @@ class BaseFilTest : public testing::TestWithParam { CUDA_CHECK(cudaFree(want_proba_d)); CUDA_CHECK(cudaFree(proba_d)); } - delete handle; - //CUDA_CHECK(cudaDeviceReset()); } void generate_forest() { @@ -301,29 +298,25 @@ class BaseFilTest : public testing::TestWithParam { // predict allocate(preds_d, ps.num_rows); - fil::predict(*handle, forest, preds_d, data_d, ps.num_rows); + fil::predict(handle, forest, preds_d, data_d, ps.num_rows); if (ps.num_classes >= 2) { allocate(proba_d, ps.num_rows * ps.num_classes); - fil::predict(*handle, forest, proba_d, data_d, ps.num_rows, true); + fil::predict(handle, forest, proba_d, data_d, ps.num_rows, true); } CUDA_CHECK(cudaStreamSynchronize(stream)); // cleanup - fil::free(*handle, forest); + fil::free(handle, forest); } void compare() { if (ps.num_classes >= 2) { - std::cout << arr2Str(want_proba_d, 3 * ps.num_classes, "want_proba_d", 0) - << arr2Str(proba_d, 3*ps.num_classes, "proba_d", 0) << std::endl; ASSERT_TRUE(devArrMatch(want_proba_d, proba_d, ps.num_rows * ps.num_classes, CompareApprox(ps.tolerance), stream)); } float tolerance = ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR ? ps.tolerance : std::numeric_limits::epsilon(); // in multi-class prediction, floats represent the most likely class // and would be generated by converting an int to float - std::cout << arr2Str(want_preds_d, 10, "want_preds_d", 0) - << arr2Str(preds_d, 10, "preds_d", 0) << std::endl; ASSERT_TRUE(devArrMatch(want_preds_d, preds_d, ps.num_rows, CompareApprox(tolerance), stream)); } @@ -366,7 +359,7 @@ class BaseFilTest : public testing::TestWithParam { // parameters cudaStream_t stream; - cumlHandle* handle; + cumlHandle handle; FilTestParams ps; }; @@ -384,7 +377,7 @@ class PredictDenseFilTest : public BaseFilTest { fil_ps.global_bias = ps.global_bias; fil_ps.leaf_payload_type = ps.leaf_payload_type; fil_ps.num_classes = ps.num_classes; - fil::init_dense(*handle, pforest, nodes.data(), &fil_ps); + fil::init_dense(handle, pforest, nodes.data(), &fil_ps); } }; @@ -442,7 +435,7 @@ class PredictSparseFilTest : public BaseFilTest { fil_params.num_classes = ps.num_classes; dense2sparse(); fil_params.num_nodes = sparse_nodes.size(); - fil::init_sparse(*handle, pforest, trees.data(), sparse_nodes.data(), + fil::init_sparse(handle, pforest, trees.data(), sparse_nodes.data(), &fil_params); } std::vector sparse_nodes; @@ -548,7 +541,7 @@ class TreeliteFilTest : public BaseFilTest { params.threshold = ps.threshold; params.output_class = (ps.output & fil::output_t::THRESHOLD) != 0; params.storage_type = storage_type; - fil::from_treelite(*handle, pforest, (ModelHandle)model.get(), ¶ms); + fil::from_treelite(handle, pforest, (ModelHandle)model.get(), ¶ms); CUDA_CHECK(cudaStreamSynchronize(stream)); } }; From fbcc0a73e027854cf75539094e29690702a794aa Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 5 Mar 2020 22:39:58 -0800 Subject: [PATCH 042/330] all python tests are called --- cpp/src/fil/fil.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index a1a4fe6d17..bd0387fd4d 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -146,7 +146,7 @@ struct forest { params.leaf_payload_type = leaf_payload_type_; params.predict_proba = predict_proba; - ASSERT(output_ & output_t::THRESHOLD || leaf_payload_type_ == INT_CLASS_LABEL || !predict_proba, "predict_proba does not make sense for regression"); + ASSERT(output_ & (output_t::THRESHOLD | output_t::SIGMOID) || leaf_payload_type_ == INT_CLASS_LABEL || !predict_proba, "predict_proba does not make sense for regression"); // Predict using the forest. cudaStream_t stream = h.getStream(); From ca862fce099155394d7c29e257f52e2a49ede208 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Fri, 6 Mar 2020 00:02:00 -0800 Subject: [PATCH 043/330] fix formatting, re-add inlining --- cpp/include/cuml/fil/fil.h | 15 +- cpp/src/fil/common.cuh | 16 +- cpp/src/fil/fil.cu | 77 ++++++--- cpp/src/fil/infer.cu | 64 +++---- cpp/src/randomforest/randomforest.cu | 27 +++ cpp/test/sg/fil_test.cu | 247 ++++++++++++++------------- python/cuml/test/test_fil.py | 3 +- 7 files changed, 263 insertions(+), 186 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index 7aeb0293fe..daa292c232 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -120,13 +120,16 @@ enum leaf_value_t { // to be extended }; -template +template struct leaf_output_t {}; -template<> -struct leaf_output_t { typedef float T;}; -template<> -struct leaf_output_t { typedef unsigned T;}; - +template <> +struct leaf_output_t { + typedef float T; +}; +template <> +struct leaf_output_t { + typedef unsigned T; +}; /** dense_node_init initializes node from paramters */ void dense_node_init(dense_node_t* n, val_t output, float thresh, int fid, diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index e20087f0c6..8a44eeab7b 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -47,7 +47,10 @@ struct base_node : dense_node_t { static const int FID_MASK = (1 << 30) - 1; static const int DEF_LEFT_MASK = 1 << 30; static const int IS_LEAF_MASK = 1 << 31; - template __host__ __device__ o_t output() const { return val; } + template + __host__ __device__ o_t output() const { + return val; + } __host__ __device__ float thresh() const { return val.f; } __host__ __device__ int fid() const { return bits & FID_MASK; } __host__ __device__ bool def_left() const { return bits & DEF_LEFT_MASK; } @@ -64,8 +67,15 @@ struct base_node : dense_node_t { } }; -template<> __host__ __device__ __forceinline__ float base_node::output() const { return val.f; } -template<> __host__ __device__ __forceinline__ unsigned base_node::output() const { return val.idx; } +template <> +__host__ __device__ __forceinline__ float base_node::output() const { + return val.f; +} +template <> +__host__ __device__ __forceinline__ unsigned base_node::output() + const { + return val.idx; +} /** dense_node is a single node of a dense forest */ struct alignas(8) dense_node : base_node { diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index bd0387fd4d..ed0c7286c2 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -118,7 +118,8 @@ struct forest { threshold_ = params->threshold; global_bias_ = params->global_bias; leaf_payload_type_ = params->leaf_payload_type; - printf("@forest init_common leaf_payload_type_ == %d\n", leaf_payload_type_); + printf("@forest init_common leaf_payload_type_ == %d\n", + leaf_payload_type_); num_output_classes_ = params->num_classes; init_max_shm(); } @@ -135,37 +136,50 @@ struct forest { params.data = data; params.num_rows = num_rows; params.max_shm = max_shm_; - params.num_output_classes = (predict_proba || leaf_payload_type_ == INT_CLASS_LABEL) ? num_output_classes_ : 1; - // FLOAT_SCALAR means inference produces 1 class score/component and - // transform_k might complement to 2 for classification, - // if class probabilities are being requested - // assuming predict(..., predict_proba=true) will not get called - // for regression, hence forest::num_output_classes_ == 2 + params.num_output_classes = + (predict_proba || leaf_payload_type_ == INT_CLASS_LABEL) + ? num_output_classes_ + : 1; + // FLOAT_SCALAR means inference produces 1 class score/component and + // transform_k might complement to 2 for classification, + // if class probabilities are being requested + // assuming predict(..., predict_proba=true) will not get called + // for regression, hence forest::num_output_classes_ == 2 params.predict_proba = predict_proba; - printf("predict_proba = %s, forest::num_output_classes_ = %d, predict_params.num_output_classes = %d\n", predict_proba ? "true" : "false", num_output_classes_, params.num_output_classes); + printf( + "predict_proba = %s, forest::num_output_classes_ = %d, " + "predict_params.num_output_classes = %d\n", + predict_proba ? "true" : "false", num_output_classes_, + params.num_output_classes); params.leaf_payload_type = leaf_payload_type_; params.predict_proba = predict_proba; - ASSERT(output_ & (output_t::THRESHOLD | output_t::SIGMOID) || leaf_payload_type_ == INT_CLASS_LABEL || !predict_proba, "predict_proba does not make sense for regression"); - + ASSERT(output_ & (output_t::THRESHOLD | output_t::SIGMOID) || + leaf_payload_type_ == INT_CLASS_LABEL || !predict_proba, + "predict_proba does not make sense for regression"); + // Predict using the forest. cudaStream_t stream = h.getStream(); infer(params, stream); // Transform the output if necessary. output_t ot = output_; - if(leaf_payload_type_ == INT_CLASS_LABEL && !predict_proba) - ot = output_t(ot & ~output_t::AVG); // don't "average" class labels + if (leaf_payload_type_ == INT_CLASS_LABEL && !predict_proba) + ot = output_t(ot & ~output_t::AVG); // don't "average" class labels if (ot != output_t::RAW || global_bias_ != 0.0f || predict_proba) { bool complement_proba = predict_proba && leaf_payload_type_ == FLOAT_SCALAR; - unsigned long values_to_transform = predict_proba ? - (unsigned long) num_rows * (unsigned long) num_output_classes_ : num_rows; + unsigned long values_to_transform = + predict_proba + ? (unsigned long)num_rows * (unsigned long)num_output_classes_ + : num_rows; printf("global_bias = %f\n", global_bias_); - transform_k<<>>( - preds, values_to_transform, ot, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, - threshold_, global_bias_, predict_proba, complement_proba); + transform_k<<>>(preds, values_to_transform, ot, + num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, + threshold_, global_bias_, predict_proba, + complement_proba); CUDA_CHECK(cudaPeekAtLastError()); } } @@ -313,11 +327,20 @@ void check_params(const forest_params_t* params, bool dense) { ASSERT(false, "output should be a combination of RAW, AVG, SIGMOID and THRESHOLD"); } - ASSERT(params->output & output_t::THRESHOLD || params->num_classes == 1 || params->leaf_payload_type == INT_CLASS_LABEL, "cannot do two-component regression using FLOAT_SCALAR leaf_payload_type"); - ASSERT(params->num_classes != 1 || !(params->output & output_t::THRESHOLD), "single-class classification does not make sense"); - ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || params->output & output_t::AVG, "need averaging to turn multi-class votes into probabilities"); - ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || !(params->output & output_t::SIGMOID), "SIGMOID does not make sense for class-vote-based classification"); - ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || !params->global_bias, "global_bias does not make sense for class-vote-based classification"); + ASSERT( + params->output & output_t::THRESHOLD || params->num_classes == 1 || + params->leaf_payload_type == INT_CLASS_LABEL, + "cannot do two-component regression using FLOAT_SCALAR leaf_payload_type"); + ASSERT(params->num_classes != 1 || !(params->output & output_t::THRESHOLD), + "single-class classification does not make sense"); + ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || + params->output & output_t::AVG, + "need averaging to turn multi-class votes into probabilities"); + ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || + !(params->output & output_t::SIGMOID), + "SIGMOID does not make sense for class-vote-based classification"); + ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || !params->global_bias, + "global_bias does not make sense for class-vote-based classification"); } // tl_node_at is a checked version of tree[i] @@ -405,7 +428,8 @@ int find_class_label_from_one_hot(tl::tl_float* vector, int len) { out = i; found_label = true; } else - ASSERT(vector[i] == 0., "label vector contains values other than 0. and 1."); + ASSERT(vector[i] == 0., + "label vector contains values other than 0. and 1."); return out; } @@ -518,9 +542,9 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, // assuming either all leaves use the .leaf_vector() or all leaves use .leaf_value() auto tree = model.trees[0]; int node_key; - for(node_key = tree_root(tree); - !tl_node_at(tree, node_key).is_leaf(); - node_key = tl_node_at(tree, node_key).cleft()); + for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); + node_key = tl_node_at(tree, node_key).cleft()) + ; auto vec = tl_node_at(tree, node_key).leaf_vector(); if (vec.size()) { params->num_classes = vec.size(); @@ -528,6 +552,7 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, params->leaf_payload_type = INT_CLASS_LABEL; printf("detected %lu-class classification model \n", vec.size()); } else { + printf("scalar leaves\n"); params->leaf_payload_type = FLOAT_SCALAR; params->num_classes = tl_params->output_class ? 2 : 1; } diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 0f3503e1d0..5628c13830 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -44,8 +44,6 @@ struct vec { } }; -#define __forceinline__ - template __device__ __forceinline__ vec infer_one_tree( tree_type tree, float* sdata, int cols) { @@ -68,7 +66,8 @@ __device__ __forceinline__ vec infer_one_tree( } while (mask != 0); vec out; #pragma unroll - for (int j = 0; j < NITEMS; ++j) out[j] = tree[curr[j]].base_node::output(); + for (int j = 0; j < NITEMS; ++j) + out[j] = tree[curr[j]].base_node::output(); return out; } @@ -96,12 +95,12 @@ struct tree_aggregator_t { int num_output_classes; __device__ __forceinline__ tree_aggregator_t(int num_output_classes_, void*) - : num_output_classes(num_output_classes_) { - } + : num_output_classes(num_output_classes_) {} __device__ __forceinline__ void accumulate(vec out) { acc += out; } - __device__ __forceinline__ void finalize(float* out, int num_rows, char output_stride) { + __device__ __forceinline__ void finalize(float* out, int num_rows, + char output_stride) { __syncthreads(); using BlockReduce = cub::BlockReduce, FIL_TPB>; __shared__ typename BlockReduce::TempStorage tmp_storage; @@ -113,10 +112,12 @@ struct tree_aggregator_t { } } } - __device__ __forceinline__ void finalize_regression(float* out, int num_rows) { + __device__ __forceinline__ void finalize_regression(float* out, + int num_rows) { finalize(out, num_rows, 1); } - __device__ __forceinline__ void finalize_class_proba(float* out, int num_rows) { + __device__ __forceinline__ void finalize_class_proba(float* out, + int num_rows) { finalize(out, num_rows, 2); } __device__ __forceinline__ void finalize_class_label(float* out, @@ -145,13 +146,14 @@ struct tree_aggregator_t { } __device__ __forceinline__ void accumulate(vec out) { #pragma unroll - for (int i = 0; i < NITEMS; ++i) - atomicAdd(votes + out[i] * NITEMS + i, 1); + for (int i = 0; i < NITEMS; ++i) atomicAdd(votes + out[i] * NITEMS + i, 1); } - __device__ __forceinline__ void finalize_regression(float* out, int num_rows) { + __device__ __forceinline__ void finalize_regression(float* out, + int num_rows) { asm("trap;"); } - __device__ __forceinline__ void finalize_class_proba(float* out, int num_rows) { + __device__ __forceinline__ void finalize_class_proba(float* out, + int num_rows) { __syncthreads(); int item = threadIdx.x; int row = blockIdx.x * NITEMS + item; @@ -203,15 +205,16 @@ __global__ void infer_k(storage_type forest, predict_params params) { AggregateTrees acc(params.num_output_classes, nullptr); // one block works on NITEMS rows and the whole forest for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { - acc.accumulate( - infer_one_tree::T> - (forest[j], sdata, params.num_cols)); + acc.accumulate(infer_one_tree::T>( + forest[j], sdata, params.num_cols)); } // compute most probable class. in cuML RF, output is class label, // hence, no-predicted class edge case doesn't apply - if(false && !threadIdx.x && !blockIdx.x) { - printf("%s\n", params.predict_proba ? "finalize_class_proba" : - (params.num_output_classes > 1 ? "finalize_class_label" : "finalize_regression")); + if (false && !threadIdx.x && !blockIdx.x) { + printf("%s\n", params.predict_proba + ? "finalize_class_proba" + : (params.num_output_classes > 1 ? "finalize_class_label" + : "finalize_regression")); } if (!params.predict_proba) { if (params.num_output_classes > 1) @@ -230,21 +233,26 @@ void infer_k_launcher(storage_type forest, predict_params params, params.algo == algo_t::BATCH_TREE_REORG ? MAX_BATCH_ITEMS : 1; int shared_mem_per_item = sizeof(float) * params.num_cols + - // class vote histogram, while inferring trees - (leaf_payload_type == INT_CLASS_LABEL ? sizeof(int) * params.num_output_classes : 0); - // CUB workspace should fit itself, and we don't need - // the row by the time CUB is used + // class vote histogram, while inferring trees + (leaf_payload_type == INT_CLASS_LABEL + ? sizeof(int) * params.num_output_classes + : 0); + // CUB workspace should fit itself, and we don't need + // the row by the time CUB is used int num_items = params.max_shm / shared_mem_per_item; if (num_items == 0) { int max_cols = params.max_shm / sizeof(float); ASSERT(false, "p.num_cols == %d: too many features, only %d allowed%s", - params.num_cols, max_cols, leaf_payload_type == INT_CLASS_LABEL ? - "(accounting for shared class vote histogram)" : ""); + params.num_cols, max_cols, + leaf_payload_type == INT_CLASS_LABEL + ? "(accounting for shared class vote histogram)" + : ""); } num_items = std::min(num_items, params.max_items); int num_blocks = ceildiv(int(params.num_rows), num_items); int shm_sz = num_items * shared_mem_per_item; - std::cout << "num_items " << num_items << " num_blocks " << num_blocks << " shm_sz " << shm_sz << "\n"; + std::cout << "num_items " << num_items << " num_blocks " << num_blocks + << " shm_sz " << shm_sz << "\n"; switch (num_items) { case 1: infer_k<1, leaf_payload_type> @@ -275,12 +283,10 @@ void infer(storage_type forest, predict_params params, cudaStream_t stream) { case FLOAT_SCALAR: ASSERT(params.num_output_classes <= 2, "wrong leaf payload for multi-class (>2) inference"); - infer_k_launcher(forest, params, - stream); + infer_k_launcher(forest, params, stream); break; case INT_CLASS_LABEL: - infer_k_launcher( - forest, params, stream); + infer_k_launcher(forest, params, stream); break; default: ASSERT(false, "unknown leaf_payload_type"); diff --git a/cpp/src/randomforest/randomforest.cu b/cpp/src/randomforest/randomforest.cu index c5c3aff470..5a4f58cb3a 100644 --- a/cpp/src/randomforest/randomforest.cu +++ b/cpp/src/randomforest/randomforest.cu @@ -273,6 +273,25 @@ void print_rf_detailed(const RandomForestMetaData* forest) { } } +// tl_node_at is a checked version of tree[i] +const tl::Tree::Node& tl_node_at(const tl::Tree& tree, size_t i) { + ASSERT(i < tree.num_nodes, "node index out of range"); + return tree[i]; +} + +int tree_root(const tl::Tree& tree) { + // find the root + int root = -1; + for (int i = 0; i < tree.num_nodes; ++i) { + if (tl_node_at(tree, i).is_root()) { + ASSERT(root == -1, "multi-root trees not supported"); + root = i; + } + } + ASSERT(root != -1, "a tree must have a root"); + return root; +} + template void build_treelite_forest(ModelHandle* model, const RandomForestMetaData* forest, @@ -322,6 +341,14 @@ void build_treelite_forest(ModelHandle* model, } TREELITE_CHECK(TreeliteModelBuilderCommitModel(model_builder, model)); + + auto tree = model->trees[0]; + int node_key; + for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); + node_key = tl_node_at(tree, node_key).cleft()) + ; + auto vec = tl_node_at(tree, node_key).leaf_vector(); + printf("Built a tree with 0th tree first leaf vec size %d\n", vec.size()); TREELITE_CHECK(TreeliteDeleteModelBuilder(model_builder)); } } diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 0da0afb3c9..28646d97a3 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -38,7 +38,6 @@ using namespace MLCommon; namespace tl = treelite; namespace tlf = treelite::frontend; - struct FilTestParams { // input data parameters int num_rows; @@ -67,15 +66,11 @@ struct FilTestParams { }; std::string output2str(fil::output_t output) { - if(output==fil::RAW) - return "RAW"; + if (output == fil::RAW) return "RAW"; std::string s = ""; - if(output & fil::AVG) - s += "| AVG"; - if(output & fil::THRESHOLD) - s += "| THRESHOLD"; - if(output & fil::SIGMOID) - s += "| SIGMOID"; + if (output & fil::AVG) s += "| AVG"; + if (output & fil::THRESHOLD) s += "| THRESHOLD"; + if (output & fil::SIGMOID) s += "| SIGMOID"; return s; } @@ -83,10 +78,10 @@ std::ostream& operator<<(std::ostream& os, const FilTestParams& ps) { os << "num_rows = " << ps.num_rows << ", num_cols = " << ps.num_cols << ", nan_prob = " << ps.nan_prob << ", depth = " << ps.depth << ", num_trees = " << ps.num_trees << ", leaf_prob = " << ps.leaf_prob - << ", output = " << output2str(ps.output) << ", threshold = " << ps.threshold - << ", algo = " << ps.algo << ", seed = " << ps.seed - << ", tolerance = " << ps.tolerance << ", op = " << tl::OpName(ps.op) - << ", global_bias = " << ps.global_bias + << ", output = " << output2str(ps.output) + << ", threshold = " << ps.threshold << ", algo = " << ps.algo + << ", seed = " << ps.seed << ", tolerance = " << ps.tolerance + << ", op = " << tl::OpName(ps.op) << ", global_bias = " << ps.global_bias << ", leaf_payload_type = " << ps.leaf_payload_type << ", num_classes = " << ps.num_classes; return os; @@ -103,8 +98,7 @@ float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); } typedef std::vector vote_vec; vote_vec& operator+=(vote_vec& a, vote_vec b) { ASSERT(a.size() == b.size(), "trying to add two vectors of different size"); - for(int i=0; i < a.size(); ++i) - a[i] += b[i]; + for (int i = 0; i < a.size(); ++i) a[i] += b[i]; return a; } @@ -188,8 +182,10 @@ class BaseFilTest : public testing::TestWithParam { fil::val_t w; switch (ps.leaf_payload_type) { case fil::leaf_value_t::INT_CLASS_LABEL: - w.idx = (int)((weights_h[i]*.5 + .5) // [0., 1.] - * ps.num_classes + .5) % ps.num_classes; // [0..num_classes] + w.idx = (int)((weights_h[i] * .5 + .5) // [0., 1.] + * ps.num_classes + + .5) % + ps.num_classes; // [0..num_classes] break; case fil::leaf_value_t::FLOAT_SCALAR: w.f = weights_h[i]; @@ -238,17 +234,19 @@ class BaseFilTest : public testing::TestWithParam { std::vector want_preds_h(ps.num_rows); std::vector want_proba_h(ps.num_rows * ps.num_classes); int num_nodes = tree_num_nodes(); - switch(ps.leaf_payload_type) { + switch (ps.leaf_payload_type) { case fil::leaf_value_t::FLOAT_SCALAR: for (int i = 0; i < ps.num_rows; ++i) { float pred = 0.0f; for (int j = 0; j < ps.num_trees; ++j) { - pred += infer_one_tree(&nodes[j * num_nodes], &data_h[i * ps.num_cols]).f; + pred += + infer_one_tree(&nodes[j * num_nodes], &data_h[i * ps.num_cols]).f; } - if ((ps.output & fil::output_t::AVG) != 0) pred = pred * (1.f / ps.num_trees); + if ((ps.output & fil::output_t::AVG) != 0) + pred = pred * (1.f / ps.num_trees); pred += ps.global_bias; if ((ps.output & fil::output_t::SIGMOID) != 0) pred = sigmoid(pred); - if(ps.num_classes == 2) { + if (ps.num_classes == 2) { want_proba_h[i * 2] = 1.f - pred; want_proba_h[i * 2 + 1] = pred; } @@ -261,15 +259,18 @@ class BaseFilTest : public testing::TestWithParam { case fil::leaf_value_t::INT_CLASS_LABEL: std::vector class_votes(ps.num_classes); for (int r = 0; r < ps.num_rows; ++r) { - for(auto& v: class_votes) v = 0; + for (auto& v : class_votes) v = 0; for (int j = 0; j < ps.num_trees; ++j) { - unsigned class_label = infer_one_tree(&nodes[j * num_nodes], &data_h[r * ps.num_cols]).idx; + unsigned class_label = + infer_one_tree(&nodes[j * num_nodes], &data_h[r * ps.num_cols]) + .idx; ++class_votes[class_label]; } unsigned best_class = 0, most_votes = 0; - for(int c = 0; c < ps.num_classes; ++c) { + for (int c = 0; c < ps.num_classes; ++c) { unsigned votes = class_votes[c]; - want_proba_h[r * ps.num_classes + c] = votes * (1.0f / ps.num_trees); + want_proba_h[r * ps.num_classes + c] = + votes * (1.0f / ps.num_trees); if (votes > most_votes) { most_votes = votes; best_class = c; @@ -285,7 +286,8 @@ class BaseFilTest : public testing::TestWithParam { updateDevice(want_preds_d, want_preds_h.data(), ps.num_rows, stream); if (ps.num_classes >= 2) { allocate(want_proba_d, ps.num_rows * ps.num_classes); - updateDevice(want_proba_d, want_proba_h.data(), ps.num_rows * ps.num_classes, stream); + updateDevice(want_proba_d, want_proba_h.data(), + ps.num_rows * ps.num_classes, stream); } CUDA_CHECK(cudaStreamSynchronize(stream)); } @@ -311,14 +313,17 @@ class BaseFilTest : public testing::TestWithParam { void compare() { if (ps.num_classes >= 2) { - ASSERT_TRUE(devArrMatch(want_proba_d, proba_d, ps.num_rows * ps.num_classes, - CompareApprox(ps.tolerance), stream)); + ASSERT_TRUE(devArrMatch(want_proba_d, proba_d, + ps.num_rows * ps.num_classes, + CompareApprox(ps.tolerance), stream)); } - float tolerance = ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR ? ps.tolerance : std::numeric_limits::epsilon(); + float tolerance = ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR + ? ps.tolerance + : std::numeric_limits::epsilon(); // in multi-class prediction, floats represent the most likely class // and would be generated by converting an int to float ASSERT_TRUE(devArrMatch(want_preds_d, preds_d, ps.num_rows, - CompareApprox(tolerance), stream)); + CompareApprox(tolerance), stream)); } fil::val_t infer_one_tree(fil::dense_node_t* root, float* data) { @@ -502,11 +507,18 @@ class TreeliteFilTest : public BaseFilTest { void init_forest_impl(fil::forest_t* pforest, fil::storage_type_t storage_type) { bool random_forest_flag = (ps.output & fil::output_t::AVG) != 0; - printf("%s && %s == %s\n", (ps.output & fil::output_t::AVG) ? "at" : "af", (ps.leaf_payload_type == fil::leaf_value_t::INT_CLASS_LABEL) ? "pt" : "pf", random_forest_flag ? "ft" : "ff"); - int treelite_num_classes = ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR ? 1 : ps.num_classes; - std::unique_ptr model_builder( - new tlf::ModelBuilder(ps.num_cols, treelite_num_classes, random_forest_flag)); - printf("ModelBuilder(num_cols, num_classes = %d, random_forest_flag = %s)\n", treelite_num_classes, random_forest_flag ? "true" : "false"); + printf("%s && %s == %s\n", (ps.output & fil::output_t::AVG) ? "at" : "af", + (ps.leaf_payload_type == fil::leaf_value_t::INT_CLASS_LABEL) ? "pt" + : "pf", + random_forest_flag ? "ft" : "ff"); + int treelite_num_classes = + ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR ? 1 + : ps.num_classes; + std::unique_ptr model_builder(new tlf::ModelBuilder( + ps.num_cols, treelite_num_classes, random_forest_flag)); + printf( + "ModelBuilder(num_cols, num_classes = %d, random_forest_flag = %s)\n", + treelite_num_classes, random_forest_flag ? "true" : "false"); // prediction transform if ((ps.output & fil::output_t::SIGMOID) != 0) { @@ -572,68 +584,68 @@ class TreeliteAutoFilTest : public TreeliteFilTest { std::vector predict_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 01 + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 01 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 02 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 02 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 04 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 04 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 05 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 05 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 06 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 06 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 07 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 07 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 08 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 08 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 09 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 09 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 10 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 10 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 11 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 11 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 12 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 12 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 13 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 13 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 14 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 14 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 15 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 15 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 16 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 16 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 17 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 17 }; TEST_P(PredictDenseFilTest, Predict) { compare(); } @@ -645,35 +657,35 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictDenseFilTest, // global_bias, algo, seed, tolerance std::vector predict_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 01 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 01 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 02 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 02 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 04 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 04 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 05 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 05 }; TEST_P(PredictSparseFilTest, Predict) { compare(); } @@ -684,105 +696,98 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictSparseFilTest, // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, // global_bias, algo, seed, tolerance std::vector import_dense_inputs = { - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t::AVG, 0, 0, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 00 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 01 - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t::AVG, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 4}, // 02 + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 01 + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 4}, // 02 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 03 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 03 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 04 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 04 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t::AVG, 0, 0, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 07 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 07 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 + fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 09 - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t::AVG, 0, 0, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 09 + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 10 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 10 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 11 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 11 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 12 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 12 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 13 - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t::AVG, 0, 0, + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 13 + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 14 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 14 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 15 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 15 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 16 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 16 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 17 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 17 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 18 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 18 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 19 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 19 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 20 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 20 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 21 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 21 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 22 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 22 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 23 - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t::AVG, 0, 0, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 23 + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 24 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 24 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 25 + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 25 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 26 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 26 }; TEST_P(TreeliteDenseFilTest, Import) { compare(); } @@ -795,33 +800,33 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteDenseFilTest, std::vector import_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 00 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 01 + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 01 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 02 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 02 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 03 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 03 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 04 + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 04 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 07 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 07 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 09 + fil::leaf_value_t::FLOAT_SCALAR, 2}, // 09 }; TEST_P(TreeliteSparseFilTest, Import) { compare(); } @@ -834,16 +839,16 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteSparseFilTest, std::vector import_auto_inputs = { {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 {20000, 50, 0.05, 15, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 01 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 01 {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 02 + fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 02 {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 + fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 }; TEST_P(TreeliteAutoFilTest, Import) { compare(); } diff --git a/python/cuml/test/test_fil.py b/python/cuml/test/test_fil.py index 14aca684d5..657db930df 100644 --- a/python/cuml/test/test_fil.py +++ b/python/cuml/test/test_fil.py @@ -302,7 +302,8 @@ def test_fil_skl_regression(n_rows, n_columns, n_estimators, max_depth, fil_preds = np.asarray(fm.predict(X_validation)) fil_mse = mean_squared_error(y_validation, fil_preds) - assert fil_mse == pytest.approx(skl_mse, 1e-4) + # if fil is better than skl, no need to fail the test + assert fil_mse <= skl_mse + 1e-4 assert array_equal(fil_preds, skl_preds) From 468cc3f7bf4366e539363888206a261f346e8ed6 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Fri, 6 Mar 2020 00:06:15 -0800 Subject: [PATCH 044/330] remove stray debug rigging --- cpp/src/randomforest/randomforest.cu | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/cpp/src/randomforest/randomforest.cu b/cpp/src/randomforest/randomforest.cu index 5a4f58cb3a..c5c3aff470 100644 --- a/cpp/src/randomforest/randomforest.cu +++ b/cpp/src/randomforest/randomforest.cu @@ -273,25 +273,6 @@ void print_rf_detailed(const RandomForestMetaData* forest) { } } -// tl_node_at is a checked version of tree[i] -const tl::Tree::Node& tl_node_at(const tl::Tree& tree, size_t i) { - ASSERT(i < tree.num_nodes, "node index out of range"); - return tree[i]; -} - -int tree_root(const tl::Tree& tree) { - // find the root - int root = -1; - for (int i = 0; i < tree.num_nodes; ++i) { - if (tl_node_at(tree, i).is_root()) { - ASSERT(root == -1, "multi-root trees not supported"); - root = i; - } - } - ASSERT(root != -1, "a tree must have a root"); - return root; -} - template void build_treelite_forest(ModelHandle* model, const RandomForestMetaData* forest, @@ -341,14 +322,6 @@ void build_treelite_forest(ModelHandle* model, } TREELITE_CHECK(TreeliteModelBuilderCommitModel(model_builder, model)); - - auto tree = model->trees[0]; - int node_key; - for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); - node_key = tl_node_at(tree, node_key).cleft()) - ; - auto vec = tl_node_at(tree, node_key).leaf_vector(); - printf("Built a tree with 0th tree first leaf vec size %d\n", vec.size()); TREELITE_CHECK(TreeliteDeleteModelBuilder(model_builder)); } } From fdf439a0f08160ec7855b33354eb002076f74f8a Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Fri, 6 Mar 2020 18:28:09 -0800 Subject: [PATCH 045/330] addressed review comments, passing all tests --- cpp/include/cuml/fil/fil.h | 11 +++-- cpp/src/fil/common.cuh | 21 ++++++---- cpp/src/fil/fil.cu | 86 ++++++++++++++++---------------------- cpp/src/fil/infer.cu | 83 +++++++++++++----------------------- cpp/test/sg/fil_test.cu | 52 +++++++++++------------ 5 files changed, 109 insertions(+), 144 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index daa292c232..87eff78da5 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -53,7 +53,7 @@ enum algo_t { * output of the previous stage: * - one of RAW or AVG, indicating how to combine individual tree outputs into the forest output * - optional SIGMOID for applying the sigmoid transform - * - optional THRESHOLD, for thresholding for classification + * - optional CLASS, for thresholding for classification */ enum output_t { /** raw output: the sum of the tree outputs; use for GBM models for @@ -70,7 +70,7 @@ enum output_t { SIGMOID = 0x10, /** threshold: apply threshold to the output of the previous stage to get the class (0 or 1) */ - THRESHOLD = 0x100, + CLASS = 0x100, }; /** storage_type_t defines whether to import the forests as dense or sparse */ @@ -176,10 +176,9 @@ struct forest_params_t { // global_bias is added to the sum of tree predictions // (after averaging, if it is used, but before any further transformations) float global_bias; - // prediction_dim determines the class probability prediction shape. - // also affects intermediate output in classification - // currently, multi-valued (vector) regression not supported due to model - // storage/layout restrictions + // only used for INT_CLASS_LABEL inference. since we're storing the + // labels in leaves instead of the whole vector, this keeps track + // of total unique label/class/component count int num_classes; }; diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index 8a44eeab7b..cb45109cb2 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -57,11 +57,11 @@ struct base_node : dense_node_t { __host__ __device__ bool is_leaf() const { return bits & IS_LEAF_MASK; } base_node() = default; base_node(dense_node_t node) : dense_node_t(node) {} - base_node(val_t output_, float thresh, int fid, bool def_left, bool is_leaf) { + base_node(val_t output, float thresh, int fid, bool def_left, bool is_leaf) { bits = (fid & FID_MASK) | (def_left ? DEF_LEFT_MASK : 0) | (is_leaf ? IS_LEAF_MASK : 0); if (is_leaf) - val = output_; + val = output; else val.f = thresh; } @@ -81,8 +81,8 @@ __host__ __device__ __forceinline__ unsigned base_node::output() struct alignas(8) dense_node : base_node { dense_node() = default; dense_node(dense_node_t node) : base_node(node) {} - dense_node(val_t output_, float thresh, int fid, bool def_left, bool is_leaf) - : base_node(output_, thresh, fid, def_left, is_leaf) {} + dense_node(val_t output, float thresh, int fid, bool def_left, bool is_leaf) + : base_node(output, thresh, fid, def_left, is_leaf) {} /** index of the left child, where curr is the index of the current node */ __host__ __device__ int left(int curr) const { return 2 * curr + 1; } }; @@ -121,9 +121,9 @@ struct alignas(16) sparse_node : base_node, sparse_node_extra_data { //__host__ __device__ sparse_node() : left_idx(0), base_node() {} sparse_node(sparse_node_t node) : base_node(node), sparse_node_extra_data(node) {} - sparse_node(val_t output_, float thresh, int fid, bool def_left, bool is_leaf, + sparse_node(val_t output, float thresh, int fid, bool def_left, bool is_leaf, int left_index) - : base_node(output_, thresh, fid, def_left, is_leaf), + : base_node(output, thresh, fid, def_left, is_leaf), sparse_node_extra_data({.left_idx = left_index, .dummy = 0}) {} __host__ __device__ int left_index() const { return left_idx; } /** index of the left child, where curr is the index of the current node */ @@ -159,10 +159,13 @@ struct predict_params { int num_cols; algo_t algo; int max_items; // only set and used by infer() - int num_output_classes; - // TODO doc + // number of outputs for the forest for each data instance (sample) (row) + int num_outputs; + // for class probabilities, this is the number of classes considered + // ignored otherwise + int num_classes; + // leaf_payload_type determines what the leaves store (predict) leaf_value_t leaf_payload_type; - bool predict_proba; // Data parameters. float* preds; diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index ed0c7286c2..e5ff9ae9c8 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -71,12 +71,11 @@ __host__ __device__ float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); } /** performs additional transformations on the array of forest predictions (preds) of size n; the transformations are defined by output, and include averaging (multiplying by inv_num_trees), adding global_bias (always done), - sigmoid and applying threshold. in case of predict_proba, skips threshold - and fills in the converse probability */ + sigmoid and applying threshold. in case of complement_proba, + fills in the converse probability */ __global__ void transform_k(float* preds, size_t n, output_t output, float inv_num_trees, float threshold, - float global_bias, bool predict_proba, - bool complement_proba) { + float global_bias, bool complement_proba) { size_t i = threadIdx.x + size_t(blockIdx.x) * blockDim.x; if (i >= n) return; if (complement_proba && i % 2) return; @@ -85,9 +84,8 @@ __global__ void transform_k(float* preds, size_t n, output_t output, if ((output & output_t::AVG) != 0) result *= inv_num_trees; result += global_bias; if ((output & output_t::SIGMOID) != 0) result = sigmoid(result); - if ((output & output_t::THRESHOLD) && !predict_proba) { + if ((output & output_t::CLASS) != 0) result = result > threshold ? 1.0f : 0.0f; - } // sklearn outputs numpy array in 'C' order, with the number of classes being last dimension // that is also the default order, so we should use the same one if (complement_proba) { @@ -120,7 +118,7 @@ struct forest { leaf_payload_type_ = params->leaf_payload_type; printf("@forest init_common leaf_payload_type_ == %d\n", leaf_payload_type_); - num_output_classes_ = params->num_classes; + num_classes_ = params->num_classes; init_max_shm(); } @@ -136,27 +134,22 @@ struct forest { params.data = data; params.num_rows = num_rows; params.max_shm = max_shm_; - params.num_output_classes = - (predict_proba || leaf_payload_type_ == INT_CLASS_LABEL) - ? num_output_classes_ - : 1; + params.num_classes = num_classes_; + params.num_outputs = + predict_proba + ? (leaf_payload_type_ == INT_CLASS_LABEL ? num_classes_ : 2) + : 1; // FLOAT_SCALAR means inference produces 1 class score/component and // transform_k might complement to 2 for classification, // if class probabilities are being requested // assuming predict(..., predict_proba=true) will not get called - // for regression, hence forest::num_output_classes_ == 2 - params.predict_proba = predict_proba; + // for regression, hence forest::num_classes == 2 printf( - "predict_proba = %s, forest::num_output_classes_ = %d, " - "predict_params.num_output_classes = %d\n", - predict_proba ? "true" : "false", num_output_classes_, - params.num_output_classes); + "predict_proba = %s, forest::num_classes = %d, " + "predict_params.num_outputs = %d\n", + predict_proba ? "true" : "false", num_classes_, + params.num_outputs); params.leaf_payload_type = leaf_payload_type_; - params.predict_proba = predict_proba; - - ASSERT(output_ & (output_t::THRESHOLD | output_t::SIGMOID) || - leaf_payload_type_ == INT_CLASS_LABEL || !predict_proba, - "predict_proba does not make sense for regression"); // Predict using the forest. cudaStream_t stream = h.getStream(); @@ -164,22 +157,24 @@ struct forest { // Transform the output if necessary. output_t ot = output_; + if (predict_proba) + ot = output_t(ot & ~output_t::CLASS); // no threshold on probabilities + bool complement_proba = + predict_proba && leaf_payload_type_ == FLOAT_SCALAR; + bool do_transform = ot != output_t::RAW || global_bias_ != 0.0f || complement_proba; if (leaf_payload_type_ == INT_CLASS_LABEL && !predict_proba) - ot = output_t(ot & ~output_t::AVG); // don't "average" class labels - if (ot != output_t::RAW || global_bias_ != 0.0f || predict_proba) { - bool complement_proba = - predict_proba && leaf_payload_type_ == FLOAT_SCALAR; - - unsigned long values_to_transform = - predict_proba - ? (unsigned long)num_rows * (unsigned long)num_output_classes_ - : num_rows; + // since choosing best class and all transforms are monotonic + do_transform = false; + + if (do_transform) { + + unsigned long values_to_transform = + (unsigned long) num_rows * (unsigned long) params.num_outputs; printf("global_bias = %f\n", global_bias_); transform_k<<>>(preds, values_to_transform, ot, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, - threshold_, global_bias_, predict_proba, - complement_proba); + threshold_, global_bias_, complement_proba); CUDA_CHECK(cudaPeekAtLastError()); } } @@ -197,7 +192,7 @@ struct forest { float global_bias_ = 0; // init to invalid leaf_value_t leaf_payload_type_ = FLOAT_SCALAR; - int num_output_classes_ = 0; + int num_classes_ = 0; }; struct dense_forest : forest { @@ -322,25 +317,15 @@ void check_params(const forest_params_t* params, bool dense) { } // output_t::RAW == 0, and doesn't have a separate flag output_t all_set = - output_t(output_t::AVG | output_t::SIGMOID | output_t::THRESHOLD); + output_t(output_t::AVG | output_t::SIGMOID | output_t::CLASS); if ((params->output & ~all_set) != 0) { ASSERT(false, - "output should be a combination of RAW, AVG, SIGMOID and THRESHOLD"); + "output should be a combination of RAW, AVG, SIGMOID and CLASS"); } ASSERT( - params->output & output_t::THRESHOLD || params->num_classes == 1 || + params->output & output_t::CLASS || params->num_classes < 2 || params->leaf_payload_type == INT_CLASS_LABEL, "cannot do two-component regression using FLOAT_SCALAR leaf_payload_type"); - ASSERT(params->num_classes != 1 || !(params->output & output_t::THRESHOLD), - "single-class classification does not make sense"); - ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || - params->output & output_t::AVG, - "need averaging to turn multi-class votes into probabilities"); - ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || - !(params->output & output_t::SIGMOID), - "SIGMOID does not make sense for class-vote-based classification"); - ASSERT(params->leaf_payload_type != INT_CLASS_LABEL || !params->global_bias, - "global_bias does not make sense for class-vote-based classification"); } // tl_node_at is a checked version of tree[i] @@ -542,7 +527,8 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, // assuming either all leaves use the .leaf_vector() or all leaves use .leaf_value() auto tree = model.trees[0]; int node_key; - for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); + for (node_key = tree_root(tree); + !tl_node_at(tree, node_key).is_leaf(); node_key = tl_node_at(tree, node_key).cleft()) ; auto vec = tl_node_at(tree, node_key).leaf_vector(); @@ -554,7 +540,7 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, } else { printf("scalar leaves\n"); params->leaf_payload_type = FLOAT_SCALAR; - params->num_classes = tl_params->output_class ? 2 : 1; + params->num_classes = 0; // ignored } printf("@tl2fil_common leaf_payload_type == %d\n", params->leaf_payload_type); @@ -566,7 +552,7 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, params->output = output_t::RAW; if (tl_params->output_class) { if (params->leaf_payload_type == FLOAT_SCALAR) - params->output = output_t(params->output | output_t::THRESHOLD); + params->output = output_t(params->output | output_t::CLASS); } // "random forest" in treelite means tree output averaging if (model.random_forest_flag) { diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 5628c13830..4f6737f57d 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -92,10 +92,8 @@ template // = FLOAT_SCALAR struct tree_aggregator_t { vec acc; - int num_output_classes; - __device__ __forceinline__ tree_aggregator_t(int num_output_classes_, void*) - : num_output_classes(num_output_classes_) {} + __device__ __forceinline__ tree_aggregator_t(int, void*) {} __device__ __forceinline__ void accumulate(vec out) { acc += out; } @@ -112,55 +110,40 @@ struct tree_aggregator_t { } } } - __device__ __forceinline__ void finalize_regression(float* out, - int num_rows) { - finalize(out, num_rows, 1); - } - __device__ __forceinline__ void finalize_class_proba(float* out, - int num_rows) { - finalize(out, num_rows, 2); - } - __device__ __forceinline__ void finalize_class_label(float* out, - int num_rows) { - finalize(out, num_rows, 1); - } }; template struct tree_aggregator_t { - typedef unsigned int class_label_t; - typedef unsigned int vote_count_t; + typedef unsigned class_label_t; + typedef unsigned vote_count_t; // could switch to unsigned short to save shared memory // provided atomicAdd(short*) simulated with appropriate shifts vote_count_t* votes; - int num_output_classes; + class_label_t num_classes; - __device__ __forceinline__ tree_aggregator_t(int num_output_classes_, + __device__ __forceinline__ tree_aggregator_t(int num_classes_, void* shared_workspace) - : votes((vote_count_t*)shared_workspace), - num_output_classes(num_output_classes_) { - for (int c = threadIdx.x; c < num_output_classes; c += FIL_TPB * NITEMS) + : num_classes(num_classes_), votes((vote_count_t*)shared_workspace) { + for (class_label_t c = threadIdx.x; c < num_classes; c += FIL_TPB * NITEMS) #pragma unroll - for (int i = 0; i < NITEMS; ++i) votes[c * NITEMS + i] = 0; - //__syncthreads(); // happening outside + for (int item = 0; item < NITEMS; ++item) votes[c * NITEMS + item] = 0; + //__syncthreads(); // happening outside already } __device__ __forceinline__ void accumulate(vec out) { #pragma unroll - for (int i = 0; i < NITEMS; ++i) atomicAdd(votes + out[i] * NITEMS + i, 1); + for (int item = 0; item < NITEMS; ++item) atomicAdd(votes + out[item] * NITEMS + item, 1); } - __device__ __forceinline__ void finalize_regression(float* out, - int num_rows) { - asm("trap;"); - } - __device__ __forceinline__ void finalize_class_proba(float* out, + // class probabilities or regression. for regression, num_classes + // is just the number of outputs for each data instance + __device__ __forceinline__ void finalize_multiple_outputs(float* out, int num_rows) { __syncthreads(); int item = threadIdx.x; int row = blockIdx.x * NITEMS + item; if (item < NITEMS && row < num_rows) { #pragma unroll - for (int c = 0; c < num_output_classes; ++c) - out[row * num_output_classes + c] = votes[c * NITEMS + item]; + for (int c = 0; c < num_classes; ++c) + out[row * num_classes + c] = votes[c * NITEMS + item]; } } // using this when predicting a single class label, as opposed to sparse class vector @@ -173,7 +156,7 @@ struct tree_aggregator_t { if (item < NITEMS && row < num_rows) { vote_count_t max_votes = 0; class_label_t best_class = 0; - for (int c = 0; c < num_output_classes; ++c) + for (int c = 0; c < num_classes; ++c) if (votes[c * NITEMS + item] > max_votes) { max_votes = votes[c * NITEMS + item]; best_class = c; @@ -181,6 +164,14 @@ struct tree_aggregator_t { out[row] = best_class; } } + __device__ __forceinline__ void finalize(float* out, int num_rows, + int num_outputs) { + if(num_outputs > 1) + // only supporting num_outputs == num_classes + finalize_multiple_outputs(out, num_rows); + else + finalize_class_label(out, num_rows); + } }; template @@ -198,7 +189,7 @@ __global__ void infer_k(storage_type forest, predict_params params) { } tree_aggregator_t acc( - params.num_output_classes, sdata + params.num_cols * NITEMS); + params.num_classes, sdata + params.num_cols * NITEMS); __syncthreads(); // for both row cache init and acc init @@ -208,21 +199,7 @@ __global__ void infer_k(storage_type forest, predict_params params) { acc.accumulate(infer_one_tree::T>( forest[j], sdata, params.num_cols)); } - // compute most probable class. in cuML RF, output is class label, - // hence, no-predicted class edge case doesn't apply - if (false && !threadIdx.x && !blockIdx.x) { - printf("%s\n", params.predict_proba - ? "finalize_class_proba" - : (params.num_output_classes > 1 ? "finalize_class_label" - : "finalize_regression")); - } - if (!params.predict_proba) { - if (params.num_output_classes > 1) - acc.finalize_class_label(params.preds, params.num_rows); - else - acc.finalize_regression(params.preds, params.num_rows); - } else - acc.finalize_class_proba(params.preds, params.num_rows); + acc.finalize(params.preds, params.num_rows, params.num_outputs); } template @@ -235,7 +212,7 @@ void infer_k_launcher(storage_type forest, predict_params params, int shared_mem_per_item = sizeof(float) * params.num_cols + // class vote histogram, while inferring trees (leaf_payload_type == INT_CLASS_LABEL - ? sizeof(int) * params.num_output_classes + ? sizeof(int) * params.num_classes : 0); // CUB workspace should fit itself, and we don't need // the row by the time CUB is used @@ -245,7 +222,7 @@ void infer_k_launcher(storage_type forest, predict_params params, ASSERT(false, "p.num_cols == %d: too many features, only %d allowed%s", params.num_cols, max_cols, leaf_payload_type == INT_CLASS_LABEL - ? "(accounting for shared class vote histogram)" + ? " (accounting for shared class vote histogram)" : ""); } num_items = std::min(num_items, params.max_items); @@ -278,10 +255,10 @@ void infer_k_launcher(storage_type forest, predict_params params, template void infer(storage_type forest, predict_params params, cudaStream_t stream) { - printf("infer::num_output_classes = %u\n", params.num_output_classes); + printf("infer::num_outputs = %u\n", params.num_outputs); switch (params.leaf_payload_type) { case FLOAT_SCALAR: - ASSERT(params.num_output_classes <= 2, + ASSERT(params.num_outputs <= 2, "wrong leaf payload for multi-class (>2) inference"); infer_k_launcher(forest, params, stream); break; diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 28646d97a3..bd6fc38830 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -69,7 +69,7 @@ std::string output2str(fil::output_t output) { if (output == fil::RAW) return "RAW"; std::string s = ""; if (output & fil::AVG) s += "| AVG"; - if (output & fil::THRESHOLD) s += "| THRESHOLD"; + if (output & fil::CLASS) s += "| CLASS"; if (output & fil::SIGMOID) s += "| SIGMOID"; return s; } @@ -250,7 +250,7 @@ class BaseFilTest : public testing::TestWithParam { want_proba_h[i * 2] = 1.f - pred; want_proba_h[i * 2 + 1] = pred; } - if ((ps.output & fil::output_t::THRESHOLD) != 0) { + if ((ps.output & fil::output_t::CLASS) != 0) { pred = pred > ps.threshold ? 1.0f : 0.0f; } want_preds_h[i] = pred; @@ -551,7 +551,7 @@ class TreeliteFilTest : public BaseFilTest { fil::treelite_params_t params; params.algo = ps.algo; params.threshold = ps.threshold; - params.output_class = (ps.output & fil::output_t::THRESHOLD) != 0; + params.output_class = (ps.output & fil::output_t::CLASS) != 0; params.storage_type = storage_type; fil::from_treelite(handle, pforest, (ModelHandle)model.get(), ¶ms); CUDA_CHECK(cudaStreamSynchronize(stream)); @@ -603,15 +603,15 @@ std::vector predict_dense_inputs = { fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 06 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 07 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 08 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 09 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, @@ -623,15 +623,15 @@ std::vector predict_dense_inputs = { fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 11 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 12 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 13 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 14 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, @@ -643,7 +643,7 @@ std::vector predict_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 17 }; @@ -662,13 +662,13 @@ std::vector predict_sparse_inputs = { fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 01 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0.5, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 02 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, @@ -679,11 +679,11 @@ std::vector predict_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 04 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 05 }; @@ -704,13 +704,13 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 4}, // 02 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 03 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 04 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, @@ -720,14 +720,14 @@ std::vector import_dense_inputs = { fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, + fil::output_t(fil::output_t::CLASS | fil::output_t::AVG), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 07 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, @@ -755,11 +755,11 @@ std::vector import_dense_inputs = { fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 16 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, + fil::output_t(fil::output_t::CLASS | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 17 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::THRESHOLD | fil::output_t::AVG), 0, 0, + fil::output_t(fil::output_t::CLASS | fil::output_t::AVG), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 18 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, @@ -769,11 +769,11 @@ std::vector import_dense_inputs = { fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 20 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 21 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 22 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, @@ -785,7 +785,7 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 25 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 26 }; @@ -807,13 +807,13 @@ std::vector import_sparse_inputs = { fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 02 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 03 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 04 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 0, 0, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, @@ -824,7 +824,7 @@ std::vector import_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::THRESHOLD), 1.0, 0.5, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 2}, // 09 }; From e853444ed9d248101159f20011f3cb4c9fbe9adc Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Fri, 6 Mar 2020 21:54:41 -0800 Subject: [PATCH 046/330] reverted the tests to preserve more flag combos in FLOAT_SCALAR --- cpp/src/fil/fil.cu | 8 +- cpp/test/sg/fil_test.cu | 325 ++++++++++++++++++---------------------- 2 files changed, 155 insertions(+), 178 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index e5ff9ae9c8..9f91dd42c0 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -81,11 +81,16 @@ __global__ void transform_k(float* preds, size_t n, output_t output, if (complement_proba && i % 2) return; float result = preds[i]; + if (!i) printf("gpu: tree_sum %f ", result); if ((output & output_t::AVG) != 0) result *= inv_num_trees; + if (!i) printf(" AVG %f ", result); result += global_bias; + if (!i) printf(" bias %f ", result); if ((output & output_t::SIGMOID) != 0) result = sigmoid(result); + if (!i) printf(" SIGMOID %f ", result); if ((output & output_t::CLASS) != 0) result = result > threshold ? 1.0f : 0.0f; + if (!i) printf(" CLASS %f \n", result); // sklearn outputs numpy array in 'C' order, with the number of classes being last dimension // that is also the default order, so we should use the same one if (complement_proba) { @@ -163,7 +168,8 @@ struct forest { predict_proba && leaf_payload_type_ == FLOAT_SCALAR; bool do_transform = ot != output_t::RAW || global_bias_ != 0.0f || complement_proba; if (leaf_payload_type_ == INT_CLASS_LABEL && !predict_proba) - // since choosing best class and all transforms are monotonic + // moot since choosing best class and all transforms are monotonic + // also, would break current code do_transform = false; if (do_transform) { diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index bd6fc38830..44d9a251e1 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -120,10 +120,8 @@ class BaseFilTest : public testing::TestWithParam { CUDA_CHECK(cudaFree(preds_d)); CUDA_CHECK(cudaFree(want_preds_d)); CUDA_CHECK(cudaFree(data_d)); - if (ps.num_classes >= 2) { - CUDA_CHECK(cudaFree(want_proba_d)); - CUDA_CHECK(cudaFree(proba_d)); - } + CUDA_CHECK(cudaFree(want_proba_d)); + CUDA_CHECK(cudaFree(proba_d)); } void generate_forest() { @@ -232,7 +230,7 @@ class BaseFilTest : public testing::TestWithParam { void predict_on_cpu() { // predict on host std::vector want_preds_h(ps.num_rows); - std::vector want_proba_h(ps.num_rows * ps.num_classes); + std::vector want_proba_h(ps.num_rows * std::max(ps.num_classes, 2)); int num_nodes = tree_num_nodes(); switch (ps.leaf_payload_type) { case fil::leaf_value_t::FLOAT_SCALAR: @@ -242,17 +240,20 @@ class BaseFilTest : public testing::TestWithParam { pred += infer_one_tree(&nodes[j * num_nodes], &data_h[i * ps.num_cols]).f; } + if (!i) std::cout << "cpu: tree_sum " << pred; if ((ps.output & fil::output_t::AVG) != 0) pred = pred * (1.f / ps.num_trees); + if (!i) std::cout << " AVG " << pred; pred += ps.global_bias; + if (!i) std::cout << " bias " << pred; if ((ps.output & fil::output_t::SIGMOID) != 0) pred = sigmoid(pred); - if (ps.num_classes == 2) { - want_proba_h[i * 2] = 1.f - pred; - want_proba_h[i * 2 + 1] = pred; - } - if ((ps.output & fil::output_t::CLASS) != 0) { + if (!i) std::cout << " sigmoid " << pred; + want_proba_h[i * 2] = 1.f - pred; + want_proba_h[i * 2 + 1] = pred; + + if ((ps.output & fil::output_t::CLASS) != 0) pred = pred > ps.threshold ? 1.0f : 0.0f; - } + if (!i) std::cout << " CLASS " << pred; want_preds_h[i] = pred; } break; @@ -266,15 +267,23 @@ class BaseFilTest : public testing::TestWithParam { .idx; ++class_votes[class_label]; } - unsigned best_class = 0, most_votes = 0; + unsigned best_class = 0; + float most_votes = 0.; for (int c = 0; c < ps.num_classes; ++c) { - unsigned votes = class_votes[c]; - want_proba_h[r * ps.num_classes + c] = - votes * (1.0f / ps.num_trees); - if (votes > most_votes) { - most_votes = votes; + float pred = class_votes[c]; + if (!r && !c) std::cout << "cpu: tree_sum " << pred; + if (pred > most_votes) { + most_votes = pred; best_class = c; } + if ((ps.output & fil::output_t::AVG) != 0) + pred = pred * (1.f / ps.num_trees); + if (!r && !c) std::cout << " AVG " << pred; + pred += ps.global_bias; + if (!r && !c) std::cout << " bias " << pred; + if ((ps.output & fil::output_t::SIGMOID) != 0) pred = sigmoid(pred); + if (!r && !c) std::cout << " sigmoid " << pred << "\n"; + want_proba_h[r * ps.num_classes + c] = pred; } want_preds_h[r] = best_class; } @@ -284,11 +293,9 @@ class BaseFilTest : public testing::TestWithParam { // copy to GPU allocate(want_preds_d, ps.num_rows); updateDevice(want_preds_d, want_preds_h.data(), ps.num_rows, stream); - if (ps.num_classes >= 2) { - allocate(want_proba_d, ps.num_rows * ps.num_classes); - updateDevice(want_proba_d, want_proba_h.data(), - ps.num_rows * ps.num_classes, stream); - } + allocate(want_proba_d, ps.num_rows * std::max(ps.num_classes, 2)); + updateDevice(want_proba_d, want_proba_h.data(), + ps.num_rows * std::max(ps.num_classes, 2), stream); CUDA_CHECK(cudaStreamSynchronize(stream)); } @@ -301,10 +308,8 @@ class BaseFilTest : public testing::TestWithParam { // predict allocate(preds_d, ps.num_rows); fil::predict(handle, forest, preds_d, data_d, ps.num_rows); - if (ps.num_classes >= 2) { - allocate(proba_d, ps.num_rows * ps.num_classes); - fil::predict(handle, forest, proba_d, data_d, ps.num_rows, true); - } + allocate(proba_d, ps.num_rows * std::max(ps.num_classes, 2)); + fil::predict(handle, forest, proba_d, data_d, ps.num_rows, true); CUDA_CHECK(cudaStreamSynchronize(stream)); // cleanup @@ -312,11 +317,9 @@ class BaseFilTest : public testing::TestWithParam { } void compare() { - if (ps.num_classes >= 2) { - ASSERT_TRUE(devArrMatch(want_proba_d, proba_d, - ps.num_rows * ps.num_classes, - CompareApprox(ps.tolerance), stream)); - } + ASSERT_TRUE(devArrMatch(want_proba_d, proba_d, + ps.num_rows * std::max(ps.num_classes, 2), + CompareApprox(ps.tolerance), stream)); float tolerance = ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR ? ps.tolerance : std::numeric_limits::epsilon(); @@ -582,70 +585,53 @@ class TreeliteAutoFilTest : public TreeliteFilTest { // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, // global_bias, algo, seed, tolerance std::vector predict_dense_inputs = { - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 01 + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 02 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 04 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 7}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 05 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 06 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 07 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 08 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 09 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 10 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 11 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 12 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 13 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 14 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 15 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 4}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 16 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 4}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 17 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, }; TEST_P(PredictDenseFilTest, Predict) { compare(); } @@ -657,35 +643,29 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictDenseFilTest, // global_bias, algo, seed, tolerance std::vector predict_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 01 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 02 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kEQ, fil::leaf_value_t::FLOAT_SCALAR, 1}, // + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 04 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kEQ, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 05 + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, }; TEST_P(PredictSparseFilTest, Predict) { compare(); } @@ -696,98 +676,89 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictSparseFilTest, // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, // global_bias, algo, seed, tolerance std::vector import_dense_inputs = { - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 00 {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 1}, // 01 + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 4}, // 02 + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 6}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 03 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 04 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 7}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::CLASS | fil::output_t::AVG), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // + fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 07 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 09 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 10 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 11 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 12 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 13 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 14 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 5}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 15 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 6}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 16 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::CLASS | fil::output_t::AVG), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 17 + fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::CLASS | fil::output_t::AVG), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 18 + fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 19 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 20 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 21 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 22 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 23 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 24 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 25 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0.5, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 4}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 26 + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, }; TEST_P(TreeliteDenseFilTest, Import) { compare(); } @@ -798,35 +769,35 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteDenseFilTest, // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, // global_bias, algo, seed, tolerance std::vector import_sparse_inputs = { - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 00 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 01 + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 02 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 03 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 04 + 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 05 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 06 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 07 - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, // 08 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::FLOAT_SCALAR, 2}, // 09 + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 4}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, }; TEST_P(TreeliteSparseFilTest, Import) { compare(); } @@ -837,18 +808,18 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteSparseFilTest, // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, // global_bias, algo, seed, tolerance std::vector import_auto_inputs = { + {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::AVG, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 00 - {20000, 50, 0.05, 15, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 01 + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 15, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, // 02 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 1}, // 03 + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, }; TEST_P(TreeliteAutoFilTest, Import) { compare(); } From 2555f1362d7dd1db6ed40a722702b120f7ef90db Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Fri, 6 Mar 2020 22:25:30 -0800 Subject: [PATCH 047/330] removed debug prints --- cpp/src/fil/fil.cu | 16 ---------------- cpp/src/fil/infer.cu | 3 --- cpp/test/sg/fil_test.cu | 16 ---------------- 3 files changed, 35 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 9f91dd42c0..365f93fc4f 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -81,16 +81,11 @@ __global__ void transform_k(float* preds, size_t n, output_t output, if (complement_proba && i % 2) return; float result = preds[i]; - if (!i) printf("gpu: tree_sum %f ", result); if ((output & output_t::AVG) != 0) result *= inv_num_trees; - if (!i) printf(" AVG %f ", result); result += global_bias; - if (!i) printf(" bias %f ", result); if ((output & output_t::SIGMOID) != 0) result = sigmoid(result); - if (!i) printf(" SIGMOID %f ", result); if ((output & output_t::CLASS) != 0) result = result > threshold ? 1.0f : 0.0f; - if (!i) printf(" CLASS %f \n", result); // sklearn outputs numpy array in 'C' order, with the number of classes being last dimension // that is also the default order, so we should use the same one if (complement_proba) { @@ -121,8 +116,6 @@ struct forest { threshold_ = params->threshold; global_bias_ = params->global_bias; leaf_payload_type_ = params->leaf_payload_type; - printf("@forest init_common leaf_payload_type_ == %d\n", - leaf_payload_type_); num_classes_ = params->num_classes; init_max_shm(); } @@ -149,11 +142,6 @@ struct forest { // if class probabilities are being requested // assuming predict(..., predict_proba=true) will not get called // for regression, hence forest::num_classes == 2 - printf( - "predict_proba = %s, forest::num_classes = %d, " - "predict_params.num_outputs = %d\n", - predict_proba ? "true" : "false", num_classes_, - params.num_outputs); params.leaf_payload_type = leaf_payload_type_; // Predict using the forest. @@ -176,7 +164,6 @@ struct forest { unsigned long values_to_transform = (unsigned long) num_rows * (unsigned long) params.num_outputs; - printf("global_bias = %f\n", global_bias_); transform_k<<>>(preds, values_to_transform, ot, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, @@ -542,13 +529,10 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, params->num_classes = vec.size(); ASSERT(vec.size() == model.num_output_group, "treelite model inconsistent"); params->leaf_payload_type = INT_CLASS_LABEL; - printf("detected %lu-class classification model \n", vec.size()); } else { - printf("scalar leaves\n"); params->leaf_payload_type = FLOAT_SCALAR; params->num_classes = 0; // ignored } - printf("@tl2fil_common leaf_payload_type == %d\n", params->leaf_payload_type); // fill in forest-dependent params params->num_cols = model.num_feature; diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 4f6737f57d..458f3d99f5 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -228,8 +228,6 @@ void infer_k_launcher(storage_type forest, predict_params params, num_items = std::min(num_items, params.max_items); int num_blocks = ceildiv(int(params.num_rows), num_items); int shm_sz = num_items * shared_mem_per_item; - std::cout << "num_items " << num_items << " num_blocks " << num_blocks - << " shm_sz " << shm_sz << "\n"; switch (num_items) { case 1: infer_k<1, leaf_payload_type> @@ -255,7 +253,6 @@ void infer_k_launcher(storage_type forest, predict_params params, template void infer(storage_type forest, predict_params params, cudaStream_t stream) { - printf("infer::num_outputs = %u\n", params.num_outputs); switch (params.leaf_payload_type) { case FLOAT_SCALAR: ASSERT(params.num_outputs <= 2, diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 44d9a251e1..0b2efb009d 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -240,20 +240,15 @@ class BaseFilTest : public testing::TestWithParam { pred += infer_one_tree(&nodes[j * num_nodes], &data_h[i * ps.num_cols]).f; } - if (!i) std::cout << "cpu: tree_sum " << pred; if ((ps.output & fil::output_t::AVG) != 0) pred = pred * (1.f / ps.num_trees); - if (!i) std::cout << " AVG " << pred; pred += ps.global_bias; - if (!i) std::cout << " bias " << pred; if ((ps.output & fil::output_t::SIGMOID) != 0) pred = sigmoid(pred); - if (!i) std::cout << " sigmoid " << pred; want_proba_h[i * 2] = 1.f - pred; want_proba_h[i * 2 + 1] = pred; if ((ps.output & fil::output_t::CLASS) != 0) pred = pred > ps.threshold ? 1.0f : 0.0f; - if (!i) std::cout << " CLASS " << pred; want_preds_h[i] = pred; } break; @@ -271,18 +266,14 @@ class BaseFilTest : public testing::TestWithParam { float most_votes = 0.; for (int c = 0; c < ps.num_classes; ++c) { float pred = class_votes[c]; - if (!r && !c) std::cout << "cpu: tree_sum " << pred; if (pred > most_votes) { most_votes = pred; best_class = c; } if ((ps.output & fil::output_t::AVG) != 0) pred = pred * (1.f / ps.num_trees); - if (!r && !c) std::cout << " AVG " << pred; pred += ps.global_bias; - if (!r && !c) std::cout << " bias " << pred; if ((ps.output & fil::output_t::SIGMOID) != 0) pred = sigmoid(pred); - if (!r && !c) std::cout << " sigmoid " << pred << "\n"; want_proba_h[r * ps.num_classes + c] = pred; } want_preds_h[r] = best_class; @@ -510,18 +501,11 @@ class TreeliteFilTest : public BaseFilTest { void init_forest_impl(fil::forest_t* pforest, fil::storage_type_t storage_type) { bool random_forest_flag = (ps.output & fil::output_t::AVG) != 0; - printf("%s && %s == %s\n", (ps.output & fil::output_t::AVG) ? "at" : "af", - (ps.leaf_payload_type == fil::leaf_value_t::INT_CLASS_LABEL) ? "pt" - : "pf", - random_forest_flag ? "ft" : "ff"); int treelite_num_classes = ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR ? 1 : ps.num_classes; std::unique_ptr model_builder(new tlf::ModelBuilder( ps.num_cols, treelite_num_classes, random_forest_flag)); - printf( - "ModelBuilder(num_cols, num_classes = %d, random_forest_flag = %s)\n", - treelite_num_classes, random_forest_flag ? "true" : "false"); // prediction transform if ((ps.output & fil::output_t::SIGMOID) != 0) { From 54f293b3fa4a85c067779e94fce71a5c4e29ec1d Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Fri, 6 Mar 2020 22:48:28 -0800 Subject: [PATCH 048/330] fix formatting --- cpp/src/fil/fil.cu | 23 ++-- cpp/src/fil/infer.cu | 17 +-- cpp/test/sg/fil_test.cu | 231 ++++++++++++++++++++++++++-------------- 3 files changed, 171 insertions(+), 100 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 365f93fc4f..616ff10758 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -134,9 +134,8 @@ struct forest { params.max_shm = max_shm_; params.num_classes = num_classes_; params.num_outputs = - predict_proba - ? (leaf_payload_type_ == INT_CLASS_LABEL ? num_classes_ : 2) - : 1; + predict_proba ? (leaf_payload_type_ == INT_CLASS_LABEL ? num_classes_ : 2) + : 1; // FLOAT_SCALAR means inference produces 1 class score/component and // transform_k might complement to 2 for classification, // if class probabilities are being requested @@ -151,19 +150,18 @@ struct forest { // Transform the output if necessary. output_t ot = output_; if (predict_proba) - ot = output_t(ot & ~output_t::CLASS); // no threshold on probabilities - bool complement_proba = - predict_proba && leaf_payload_type_ == FLOAT_SCALAR; - bool do_transform = ot != output_t::RAW || global_bias_ != 0.0f || complement_proba; + ot = output_t(ot & ~output_t::CLASS); // no threshold on probabilities + bool complement_proba = predict_proba && leaf_payload_type_ == FLOAT_SCALAR; + bool do_transform = + ot != output_t::RAW || global_bias_ != 0.0f || complement_proba; if (leaf_payload_type_ == INT_CLASS_LABEL && !predict_proba) // moot since choosing best class and all transforms are monotonic // also, would break current code do_transform = false; if (do_transform) { - - unsigned long values_to_transform = - (unsigned long) num_rows * (unsigned long) params.num_outputs; + unsigned long values_to_transform = + (unsigned long)num_rows * (unsigned long)params.num_outputs; transform_k<<>>(preds, values_to_transform, ot, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, @@ -520,8 +518,7 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, // assuming either all leaves use the .leaf_vector() or all leaves use .leaf_value() auto tree = model.trees[0]; int node_key; - for (node_key = tree_root(tree); - !tl_node_at(tree, node_key).is_leaf(); + for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); node_key = tl_node_at(tree, node_key).cleft()) ; auto vec = tl_node_at(tree, node_key).leaf_vector(); @@ -531,7 +528,7 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, params->leaf_payload_type = INT_CLASS_LABEL; } else { params->leaf_payload_type = FLOAT_SCALAR; - params->num_classes = 0; // ignored + params->num_classes = 0; // ignored } // fill in forest-dependent params diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 458f3d99f5..7e2d3e8bf9 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -131,12 +131,13 @@ struct tree_aggregator_t { } __device__ __forceinline__ void accumulate(vec out) { #pragma unroll - for (int item = 0; item < NITEMS; ++item) atomicAdd(votes + out[item] * NITEMS + item, 1); + for (int item = 0; item < NITEMS; ++item) + atomicAdd(votes + out[item] * NITEMS + item, 1); } // class probabilities or regression. for regression, num_classes // is just the number of outputs for each data instance __device__ __forceinline__ void finalize_multiple_outputs(float* out, - int num_rows) { + int num_rows) { __syncthreads(); int item = threadIdx.x; int row = blockIdx.x * NITEMS + item; @@ -166,7 +167,7 @@ struct tree_aggregator_t { } __device__ __forceinline__ void finalize(float* out, int num_rows, int num_outputs) { - if(num_outputs > 1) + if (num_outputs > 1) // only supporting num_outputs == num_classes finalize_multiple_outputs(out, num_rows); else @@ -209,11 +210,11 @@ void infer_k_launcher(storage_type forest, predict_params params, params.max_items = params.algo == algo_t::BATCH_TREE_REORG ? MAX_BATCH_ITEMS : 1; - int shared_mem_per_item = sizeof(float) * params.num_cols + - // class vote histogram, while inferring trees - (leaf_payload_type == INT_CLASS_LABEL - ? sizeof(int) * params.num_classes - : 0); + int shared_mem_per_item = + sizeof(float) * params.num_cols + + // class vote histogram, while inferring trees + (leaf_payload_type == INT_CLASS_LABEL ? sizeof(int) * params.num_classes + : 0); // CUB workspace should fit itself, and we don't need // the row by the time CUB is used int num_items = params.max_shm / shared_mem_per_item; diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 0b2efb009d..fbce0aa4b9 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -246,7 +246,7 @@ class BaseFilTest : public testing::TestWithParam { if ((ps.output & fil::output_t::SIGMOID) != 0) pred = sigmoid(pred); want_proba_h[i * 2] = 1.f - pred; want_proba_h[i * 2 + 1] = pred; - + if ((ps.output & fil::output_t::CLASS) != 0) pred = pred > ps.threshold ? 1.0f : 0.0f; want_preds_h[i] = pred; @@ -329,9 +329,7 @@ class BaseFilTest : public testing::TestWithParam { for (;;) { fil::dense_node_decode(&root[curr], &output, &threshold, &fid, &def_left, &is_leaf); - if (is_leaf) { - break; - } + if (is_leaf) break; float val = data[fid]; bool cond = isnan(val) ? !def_left : val >= threshold; curr = (curr << 1) + 1 + (cond ? 1 : 0); @@ -572,50 +570,67 @@ std::vector predict_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 5}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 7}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 7}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 4}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 4}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 4}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, }; TEST_P(PredictDenseFilTest, Predict) { compare(); } @@ -629,27 +644,33 @@ std::vector predict_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 3}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::FLOAT_SCALAR, 0}, }; TEST_P(PredictSparseFilTest, Predict) { compare(); } @@ -665,84 +686,121 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 7}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 6}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 7}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 5}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 3}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 5}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::INT_CLASS_LABEL, 6}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::INT_CLASS_LABEL, 5}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0.5, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 4}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0.5, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 4}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, }; TEST_P(TreeliteDenseFilTest, Import) { compare(); } @@ -756,32 +814,41 @@ std::vector import_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::INT_CLASS_LABEL, 5}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 4}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::INT_CLASS_LABEL, 4}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, }; TEST_P(TreeliteSparseFilTest, Import) { compare(); } @@ -793,17 +860,23 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteSparseFilTest, // global_bias, algo, seed, tolerance std::vector import_auto_inputs = { {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 15, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 6}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, }; TEST_P(TreeliteAutoFilTest, Import) { compare(); } From 7c08ebaec0e8f7bff4d334c7c083d8b79ce0d17b Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Tue, 10 Mar 2020 17:59:57 -0700 Subject: [PATCH 049/330] address review comments --- cpp/include/cuml/fil/fil.h | 17 ++++++------ cpp/src/fil/common.cuh | 7 +++-- cpp/src/fil/fil.cu | 56 ++++++++++++++++++++------------------ cpp/src/fil/infer.cu | 29 ++++++++++---------- cpp/test/sg/fil_test.cu | 46 +++++++++++++++---------------- 5 files changed, 80 insertions(+), 75 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index 87eff78da5..bde8427395 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -53,7 +53,7 @@ enum algo_t { * output of the previous stage: * - one of RAW or AVG, indicating how to combine individual tree outputs into the forest output * - optional SIGMOID for applying the sigmoid transform - * - optional CLASS, for thresholding for classification + * - optional CLASS, to output the class label */ enum output_t { /** raw output: the sum of the tree outputs; use for GBM models for @@ -68,8 +68,8 @@ enum output_t { /** sigmoid transformation: apply 1/(1+exp(-x)) to the sum or average of tree outputs; use for GBM binary classification models for probability */ SIGMOID = 0x10, - /** threshold: apply threshold to the output of the previous stage to get the - class (0 or 1) */ + /** output class label: either apply threshold to the output of the previous stage (for binary classification), + or select the class with the most votes to get the class label (for multi-class classification). */ CLASS = 0x100, }; @@ -88,9 +88,8 @@ union val_t { /** threshold value for branch node or output value (e.g. class probability or regression summand) for leaf node */ float f; - /** class label or index of the float vector - (vector can be used for class probabilities or regression) */ - unsigned int idx; + /** class label */ + int idx; }; /** dense_node_t is a node in a densely-stored forest */ @@ -113,7 +112,7 @@ struct sparse_node_t : dense_node_t, sparse_node_extra_data { /** leaf_value_t describes what the leaves in a FIL forest store (predict) */ enum leaf_value_t { - /** storing a clas probability or regression summand */ + /** storing a class probability or regression summand */ FLOAT_SCALAR = 0, /** storing a class label */ INT_CLASS_LABEL = 1 @@ -128,7 +127,7 @@ struct leaf_output_t { }; template <> struct leaf_output_t { - typedef unsigned T; + typedef int T; }; /** dense_node_init initializes node from paramters */ @@ -178,7 +177,7 @@ struct forest_params_t { float global_bias; // only used for INT_CLASS_LABEL inference. since we're storing the // labels in leaves instead of the whole vector, this keeps track - // of total unique label/class/component count + // of the number of classes int num_classes; }; diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index cb45109cb2..13851cfcc6 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -40,7 +40,7 @@ __host__ __device__ __forceinline__ int forest_num_nodes(int num_trees, } // FIL_TPB is the number of threads per block to use with FIL kernels -const unsigned long FIL_TPB = 256; +const int FIL_TPB = 256; /** base_node contains common implementation details for dense and sparse nodes */ struct base_node : dense_node_t { @@ -72,7 +72,7 @@ __host__ __device__ __forceinline__ float base_node::output() const { return val.f; } template <> -__host__ __device__ __forceinline__ unsigned base_node::output() +__host__ __device__ __forceinline__ int base_node::output() const { return val.idx; } @@ -159,7 +159,7 @@ struct predict_params { int num_cols; algo_t algo; int max_items; // only set and used by infer() - // number of outputs for the forest for each data instance (sample) (row) + // number of outputs for the forest per each data row int num_outputs; // for class probabilities, this is the number of classes considered // ignored otherwise @@ -170,6 +170,7 @@ struct predict_params { // Data parameters. float* preds; const float* data; + // number of data rows (instances) to predict on size_t num_rows; // Other parameters. diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 616ff10758..fbd40c75df 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -84,12 +84,13 @@ __global__ void transform_k(float* preds, size_t n, output_t output, if ((output & output_t::AVG) != 0) result *= inv_num_trees; result += global_bias; if ((output & output_t::SIGMOID) != 0) result = sigmoid(result); + // will not be done on INT_CLASS_LABEL because the whole kernel will not run if ((output & output_t::CLASS) != 0) result = result > threshold ? 1.0f : 0.0f; // sklearn outputs numpy array in 'C' order, with the number of classes being last dimension // that is also the default order, so we should use the same one if (complement_proba) { - preds[i] = 1.f - result; + preds[i] = 1.0f - result; preds[i + 1] = result; } else preds[i] = result; @@ -133,14 +134,14 @@ struct forest { params.num_rows = num_rows; params.max_shm = max_shm_; params.num_classes = num_classes_; + /** FLOAT_SCALAR means inference produces 1 class score/component and + transform_k might complement to 2 for classification, + if class probabilities are being requested. + assuming predict(..., predict_proba=true) will not get called + for regression, hence predict_params::num_outputs == 2 */ params.num_outputs = predict_proba ? (leaf_payload_type_ == INT_CLASS_LABEL ? num_classes_ : 2) : 1; - // FLOAT_SCALAR means inference produces 1 class score/component and - // transform_k might complement to 2 for classification, - // if class probabilities are being requested - // assuming predict(..., predict_proba=true) will not get called - // for regression, hence forest::num_classes == 2 params.leaf_payload_type = leaf_payload_type_; // Predict using the forest. @@ -160,10 +161,10 @@ struct forest { do_transform = false; if (do_transform) { - unsigned long values_to_transform = - (unsigned long)num_rows * (unsigned long)params.num_outputs; - transform_k<<>>(preds, values_to_transform, ot, + size_t num_values_to_transform = + (size_t)num_rows * (size_t)params.num_outputs; + transform_k<<>>(preds, num_values_to_transform, ot, num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, threshold_, global_bias_, complement_proba); CUDA_CHECK(cudaPeekAtLastError()); @@ -181,7 +182,6 @@ struct forest { output_t output_ = output_t::RAW; float threshold_ = 0.5; float global_bias_ = 0; - // init to invalid leaf_value_t leaf_payload_type_ = FLOAT_SCALAR; int num_classes_ = 0; }; @@ -399,13 +399,13 @@ int find_class_label_from_one_hot(tl::tl_float* vector, int len) { bool found_label = false; int out; for (int i = 0; i < len; ++i) - if (vector[i] == 1.) { - ASSERT(!found_label, "label vector contains multiple 1.f"); + if (vector[i] == 1.0f) { + ASSERT(!found_label, "label vector contains multiple 1.0f"); out = i; found_label = true; } else - ASSERT(vector[i] == 0., - "label vector contains values other than 0. and 1."); + ASSERT(vector[i] == 0.0f, + "label vector contains values other than 0.0 and 1.0"); return out; } @@ -507,6 +507,16 @@ int tree2fil_sparse(std::vector* pnodes, const tl::Tree& tree, return root; } +size_t tl_leaf_vector_size(const tl::Model& model) { + auto tree = model.trees[0]; + int node_key; + for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); + node_key = tl_node_at(tree, node_key).cleft()) + ; + auto vec = tl_node_at(tree, node_key).leaf_vector(); + return vec.size(); +} + // tl2fil_common is the part of conversion from a treelite model // common for dense and sparse forests void tl2fil_common(forest_params_t* params, const tl::Model& model, @@ -516,15 +526,10 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, params->threshold = tl_params->threshold; // assuming either all leaves use the .leaf_vector() or all leaves use .leaf_value() - auto tree = model.trees[0]; - int node_key; - for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); - node_key = tl_node_at(tree, node_key).cleft()) - ; - auto vec = tl_node_at(tree, node_key).leaf_vector(); - if (vec.size()) { - params->num_classes = vec.size(); - ASSERT(vec.size() == model.num_output_group, "treelite model inconsistent"); + size_t leaf_vec_size = tl_leaf_vector_size(model); + if (leaf_vec_size > 0) { + ASSERT(leaf_vec_size == model.num_output_group, "treelite model inconsistent"); + params->num_classes = leaf_vec_size; params->leaf_payload_type = INT_CLASS_LABEL; } else { params->leaf_payload_type = FLOAT_SCALAR; @@ -538,8 +543,7 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, params->global_bias = param.global_bias; params->output = output_t::RAW; if (tl_params->output_class) { - if (params->leaf_payload_type == FLOAT_SCALAR) - params->output = output_t(params->output | output_t::CLASS); + params->output = output_t(params->output | output_t::CLASS); } // "random forest" in treelite means tree output averaging if (model.random_forest_flag) { diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 7e2d3e8bf9..7811e178f5 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -67,7 +67,10 @@ __device__ __forceinline__ vec infer_one_tree( vec out; #pragma unroll for (int j = 0; j < NITEMS; ++j) - out[j] = tree[curr[j]].base_node::output(); + /** dependent names are not considered templates by default, + unless it's a member of a current [template] instantiation. + alternatively, could have used .base_node::output<... */ + out[j] = tree[curr[j]].template output(); return out; } @@ -94,11 +97,11 @@ struct tree_aggregator_t { vec acc; __device__ __forceinline__ tree_aggregator_t(int, void*) {} - __device__ __forceinline__ void accumulate(vec out) { - acc += out; + __device__ __forceinline__ void accumulate(vec single_tree_prediction) { + acc += single_tree_prediction; } __device__ __forceinline__ void finalize(float* out, int num_rows, - char output_stride) { + int output_stride) { __syncthreads(); using BlockReduce = cub::BlockReduce, FIL_TPB>; __shared__ typename BlockReduce::TempStorage tmp_storage; @@ -114,25 +117,23 @@ struct tree_aggregator_t { template struct tree_aggregator_t { - typedef unsigned class_label_t; - typedef unsigned vote_count_t; // could switch to unsigned short to save shared memory // provided atomicAdd(short*) simulated with appropriate shifts - vote_count_t* votes; - class_label_t num_classes; + int* votes; + int num_classes; __device__ __forceinline__ tree_aggregator_t(int num_classes_, void* shared_workspace) - : num_classes(num_classes_), votes((vote_count_t*)shared_workspace) { - for (class_label_t c = threadIdx.x; c < num_classes; c += FIL_TPB * NITEMS) + : num_classes(num_classes_), votes((int*)shared_workspace) { + for (int c = threadIdx.x; c < num_classes; c += FIL_TPB * NITEMS) #pragma unroll for (int item = 0; item < NITEMS; ++item) votes[c * NITEMS + item] = 0; //__syncthreads(); // happening outside already } - __device__ __forceinline__ void accumulate(vec out) { + __device__ __forceinline__ void accumulate(vec single_tree_prediction) { #pragma unroll for (int item = 0; item < NITEMS; ++item) - atomicAdd(votes + out[item] * NITEMS + item, 1); + atomicAdd(votes + single_tree_prediction[item] * NITEMS + item, 1); } // class probabilities or regression. for regression, num_classes // is just the number of outputs for each data instance @@ -155,8 +156,8 @@ struct tree_aggregator_t { int item = threadIdx.x; int row = blockIdx.x * NITEMS + item; if (item < NITEMS && row < num_rows) { - vote_count_t max_votes = 0; - class_label_t best_class = 0; + int max_votes = 0; + int best_class = 0; for (int c = 0; c < num_classes; ++c) if (votes[c * NITEMS + item] > max_votes) { max_votes = votes[c * NITEMS + item]; diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index fbce0aa4b9..06e20222ad 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -95,7 +95,7 @@ __global__ void nan_kernel(float* data, const bool* mask, int len, float nan) { float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); } -typedef std::vector vote_vec; +typedef std::vector vote_vec; vote_vec& operator+=(vote_vec& a, vote_vec b) { ASSERT(a.size() == b.size(), "trying to add two vectors of different size"); for (int i = 0; i < a.size(); ++i) a[i] += b[i]; @@ -180,9 +180,9 @@ class BaseFilTest : public testing::TestWithParam { fil::val_t w; switch (ps.leaf_payload_type) { case fil::leaf_value_t::INT_CLASS_LABEL: - w.idx = (int)((weights_h[i] * .5 + .5) // [0., 1.] + w.idx = (int)((weights_h[i] * 0.5 + 0.5) // [0.0, 1.0] * ps.num_classes + - .5) % + 0.5) % ps.num_classes; // [0..num_classes] break; case fil::leaf_value_t::FLOAT_SCALAR: @@ -227,6 +227,17 @@ class BaseFilTest : public testing::TestWithParam { CUDA_CHECK(cudaFree(mask_d)); } + void transform(float f, float& proba, float& output) { + if ((ps.output & fil::output_t::AVG) != 0) + f *= (1.0f / ps.num_trees); + f += ps.global_bias; + if ((ps.output & fil::output_t::SIGMOID) != 0) f = sigmoid(f); + proba = f; + if ((ps.output & fil::output_t::CLASS) != 0) + f = f > ps.threshold ? 1.0f : 0.0f; + output = f; + } + void predict_on_cpu() { // predict on host std::vector want_preds_h(ps.num_rows); @@ -240,41 +251,30 @@ class BaseFilTest : public testing::TestWithParam { pred += infer_one_tree(&nodes[j * num_nodes], &data_h[i * ps.num_cols]).f; } - if ((ps.output & fil::output_t::AVG) != 0) - pred = pred * (1.f / ps.num_trees); - pred += ps.global_bias; - if ((ps.output & fil::output_t::SIGMOID) != 0) pred = sigmoid(pred); - want_proba_h[i * 2] = 1.f - pred; - want_proba_h[i * 2 + 1] = pred; - - if ((ps.output & fil::output_t::CLASS) != 0) - pred = pred > ps.threshold ? 1.0f : 0.0f; - want_preds_h[i] = pred; + transform(pred, want_proba_h[i * 2 + 1], want_preds_h[i]); + want_proba_h[i * 2] = 1.0f - want_proba_h[i * 2 + 1]; } break; case fil::leaf_value_t::INT_CLASS_LABEL: - std::vector class_votes(ps.num_classes); + std::vector class_votes(ps.num_classes); for (int r = 0; r < ps.num_rows; ++r) { for (auto& v : class_votes) v = 0; for (int j = 0; j < ps.num_trees; ++j) { - unsigned class_label = + int class_label = infer_one_tree(&nodes[j * num_nodes], &data_h[r * ps.num_cols]) .idx; ++class_votes[class_label]; } - unsigned best_class = 0; - float most_votes = 0.; + int best_class = 0; + float most_votes = 0.0; for (int c = 0; c < ps.num_classes; ++c) { float pred = class_votes[c]; if (pred > most_votes) { most_votes = pred; best_class = c; } - if ((ps.output & fil::output_t::AVG) != 0) - pred = pred * (1.f / ps.num_trees); - pred += ps.global_bias; - if ((ps.output & fil::output_t::SIGMOID) != 0) pred = sigmoid(pred); - want_proba_h[r * ps.num_classes + c] = pred; + float _; + transform(pred, want_proba_h[r * ps.num_classes + c], _); } want_preds_h[r] = best_class; } @@ -462,7 +462,7 @@ class TreeliteFilTest : public BaseFilTest { break; case fil::leaf_value_t::INT_CLASS_LABEL: std::vector vec(ps.num_classes); - vec[output.idx] = 1.; + vec[output.idx] = 1.0; TL_CPP_CHECK(builder->SetLeafVectorNode(key, vec)); } } else { From 04edf26206096c812502d2eb5b29b3d06d60f014 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Wed, 11 Mar 2020 22:58:52 -0700 Subject: [PATCH 050/330] accounting for cub::BlockReduce::TempStorage when computing max items; address review comments --- cpp/src/fil/infer.cu | 86 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 17 deletions(-) diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 7811e178f5..82f8e9ab53 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -94,18 +94,34 @@ __device__ __forceinline__ vec<1, output_type> infer_one_tree(tree_type tree, template // = FLOAT_SCALAR struct tree_aggregator_t { + static const int ptx_arch = 750; + typedef cub::BlockReduce, FIL_TPB, + cub::BLOCK_REDUCE_WARP_REDUCTIONS, 1, 1, ptx_arch> BlockReduce; + typedef typename BlockReduce::TempStorage TempStorage; + vec acc; + TempStorage* tmp_storage; - __device__ __forceinline__ tree_aggregator_t(int, void*) {} + static size_t smem_finalize_footprint(int) { + /** To compute accurately, would need to know the latest __CUDA_ARCH__ + for which the code is compiled and which fits the SM being run on. + This is an approximation */ + return sizeof (TempStorage); + } + static size_t smem_accumulate_footprint(int) { + return 0; + } + + __device__ __forceinline__ tree_aggregator_t(int, void* shared_workspace): + tmp_storage((TempStorage*)shared_workspace) {} __device__ __forceinline__ void accumulate(vec single_tree_prediction) { acc += single_tree_prediction; } __device__ __forceinline__ void finalize(float* out, int num_rows, int output_stride) { __syncthreads(); - using BlockReduce = cub::BlockReduce, FIL_TPB>; - __shared__ typename BlockReduce::TempStorage tmp_storage; - acc = BlockReduce(tmp_storage).Sum(acc); + new(tmp_storage) TempStorage; + acc = BlockReduce(*tmp_storage).Sum(acc); if (threadIdx.x == 0) { for (int i = 0; i < NITEMS; ++i) { int row = blockIdx.x * NITEMS + i; @@ -122,6 +138,13 @@ struct tree_aggregator_t { int* votes; int num_classes; + static size_t smem_finalize_footprint(int num_classes) { + return sizeof(int) * num_classes * NITEMS; + } + static size_t smem_accumulate_footprint(int num_classes) { + return smem_finalize_footprint(num_classes); + } + __device__ __forceinline__ tree_aggregator_t(int num_classes_, void* shared_workspace) : num_classes(num_classes_), votes((int*)shared_workspace) { @@ -204,6 +227,18 @@ __global__ void infer_k(storage_type forest, predict_params params) { acc.finalize(params.preds, params.num_rows, params.num_outputs); } +template +size_t get_smem_footprint(predict_params params) { + size_t finalize_footprint = + tree_aggregator_t::smem_finalize_footprint + (params.num_classes); + size_t accumulate_footprint = sizeof(float) * params.num_cols * NITEMS + + tree_aggregator_t::smem_accumulate_footprint + (params.num_classes); + + return std::max(accumulate_footprint, finalize_footprint); +} + template void infer_k_launcher(storage_type forest, predict_params params, cudaStream_t stream) { @@ -211,14 +246,36 @@ void infer_k_launcher(storage_type forest, predict_params params, params.max_items = params.algo == algo_t::BATCH_TREE_REORG ? MAX_BATCH_ITEMS : 1; - int shared_mem_per_item = - sizeof(float) * params.num_cols + - // class vote histogram, while inferring trees - (leaf_payload_type == INT_CLASS_LABEL ? sizeof(int) * params.num_classes - : 0); - // CUB workspace should fit itself, and we don't need - // the row by the time CUB is used - int num_items = params.max_shm / shared_mem_per_item; + int num_items = 0; + size_t shm_sz = 0; + // solving this linear programming problem in a single equation + // would be obscure + for(int nitems=1; + (nitems <= MAX_BATCH_ITEMS) && (nitems <= params.max_items); + ++nitems) { + size_t peak_footprint; + switch(nitems) { + case 1: + peak_footprint = get_smem_footprint<1, leaf_payload_type>(params); + break; + case 2: + peak_footprint = get_smem_footprint<2, leaf_payload_type>(params); + break; + case 3: + peak_footprint = get_smem_footprint<3, leaf_payload_type>(params); + break; + case 4: + peak_footprint = get_smem_footprint<4, leaf_payload_type>(params); + break; + default: + ASSERT(false, "internal error: nitems > 4"); + } + // for data row + if (peak_footprint <= params.max_shm) { + num_items = nitems; + shm_sz = peak_footprint; + } + } if (num_items == 0) { int max_cols = params.max_shm / sizeof(float); ASSERT(false, "p.num_cols == %d: too many features, only %d allowed%s", @@ -227,9 +284,7 @@ void infer_k_launcher(storage_type forest, predict_params params, ? " (accounting for shared class vote histogram)" : ""); } - num_items = std::min(num_items, params.max_items); int num_blocks = ceildiv(int(params.num_rows), num_items); - int shm_sz = num_items * shared_mem_per_item; switch (num_items) { case 1: infer_k<1, leaf_payload_type> @@ -257,8 +312,6 @@ template void infer(storage_type forest, predict_params params, cudaStream_t stream) { switch (params.leaf_payload_type) { case FLOAT_SCALAR: - ASSERT(params.num_outputs <= 2, - "wrong leaf payload for multi-class (>2) inference"); infer_k_launcher(forest, params, stream); break; case INT_CLASS_LABEL: @@ -276,4 +329,3 @@ template void infer(sparse_storage forest, } // namespace fil } // namespace ML -#undef __forceinline__ From c6078ddd64a7afb5f26fd48c38b16bdead6eab5e Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 12 Mar 2020 02:45:39 -0700 Subject: [PATCH 051/330] fixed a bad_alloc due to tl::Tree copy; shared memory offsets --- cpp/src/fil/fil.cu | 10 ++++++---- cpp/src/fil/infer.cu | 23 +++++++++++++---------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index fbd40c75df..1f1a8921d0 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -508,13 +508,15 @@ int tree2fil_sparse(std::vector* pnodes, const tl::Tree& tree, } size_t tl_leaf_vector_size(const tl::Model& model) { - auto tree = model.trees[0]; + const tl::Tree& tree = model.trees[0]; int node_key; for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); - node_key = tl_node_at(tree, node_key).cleft()) + node_key = tl_node_at(tree, node_key).cright()) ; - auto vec = tl_node_at(tree, node_key).leaf_vector(); - return vec.size(); + const tl::Tree::Node& node = tl_node_at(tree, node_key); + if(node.has_leaf_vector()) + return node.leaf_vector().size(); + return 0; } // tl2fil_common is the part of conversion from a treelite model diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 82f8e9ab53..f335873291 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -112,7 +112,7 @@ struct tree_aggregator_t { return 0; } - __device__ __forceinline__ tree_aggregator_t(int, void* shared_workspace): + __device__ __forceinline__ tree_aggregator_t(int, void* shared_workspace, size_t): tmp_storage((TempStorage*)shared_workspace) {} __device__ __forceinline__ void accumulate(vec single_tree_prediction) { acc += single_tree_prediction; @@ -146,8 +146,10 @@ struct tree_aggregator_t { } __device__ __forceinline__ tree_aggregator_t(int num_classes_, - void* shared_workspace) - : num_classes(num_classes_), votes((int*)shared_workspace) { + void* shared_workspace, + size_t data_row_size) + : num_classes(num_classes_), + votes((int*)(data_row_size + (char*)shared_workspace)) { for (int c = threadIdx.x; c < num_classes; c += FIL_TPB * NITEMS) #pragma unroll for (int item = 0; item < NITEMS; ++item) votes[c * NITEMS + item] = 0; @@ -214,7 +216,7 @@ __global__ void infer_k(storage_type forest, predict_params params) { } tree_aggregator_t acc( - params.num_classes, sdata + params.num_cols * NITEMS); + params.num_classes, sdata, params.num_cols * NITEMS * sizeof(float)); __syncthreads(); // for both row cache init and acc init @@ -249,10 +251,8 @@ void infer_k_launcher(storage_type forest, predict_params params, int num_items = 0; size_t shm_sz = 0; // solving this linear programming problem in a single equation - // would be obscure - for(int nitems=1; - (nitems <= MAX_BATCH_ITEMS) && (nitems <= params.max_items); - ++nitems) { + // looks less tractable than this + for(int nitems=1; nitems <= params.max_items; ++nitems) { size_t peak_footprint; switch(nitems) { case 1: @@ -277,9 +277,12 @@ void infer_k_launcher(storage_type forest, predict_params params, } } if (num_items == 0) { - int max_cols = params.max_shm / sizeof(float); + int real_num_cols = params.num_cols; + // since we're crashing, this will not take too long + while(get_smem_footprint<1, leaf_payload_type>(params) > params.max_shm) + --params.num_cols; ASSERT(false, "p.num_cols == %d: too many features, only %d allowed%s", - params.num_cols, max_cols, + real_num_cols, params.num_cols, leaf_payload_type == INT_CLASS_LABEL ? " (accounting for shared class vote histogram)" : ""); From b57cb6d4881ef5cddf8fb39d935f21d40cce7a7c Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 12 Mar 2020 03:52:40 -0700 Subject: [PATCH 052/330] fix style --- cpp/include/cuml/fil/fil.h | 3 +++ cpp/src/fil/common.cuh | 3 +-- cpp/src/fil/fil.cu | 22 ++++++++--------- cpp/src/fil/infer.cu | 49 ++++++++++++++++++++------------------ cpp/test/sg/fil_test.cu | 15 ++++++------ 5 files changed, 49 insertions(+), 43 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index bde8427395..ffdc2833bc 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -98,6 +98,9 @@ struct dense_node_t { int bits; }; +/** sparse_node_extra_data is what's missing from a dense node to store + a sparse node, that is, extra indexing information due to compressing + a sparse tree. */ struct sparse_node_extra_data { int left_idx; int dummy; // make alignment explicit and reserve for future use diff --git a/cpp/src/fil/common.cuh b/cpp/src/fil/common.cuh index 13851cfcc6..000d028b9a 100644 --- a/cpp/src/fil/common.cuh +++ b/cpp/src/fil/common.cuh @@ -72,8 +72,7 @@ __host__ __device__ __forceinline__ float base_node::output() const { return val.f; } template <> -__host__ __device__ __forceinline__ int base_node::output() - const { +__host__ __device__ __forceinline__ int base_node::output() const { return val.idx; } diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 1f1a8921d0..557d1586a9 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -41,8 +41,8 @@ void dense_node_init(dense_node_t* n, val_t output, float thresh, int fid, *n = dense_node(output, thresh, fid, def_left, is_leaf); } -void dense_node_decode(const dense_node_t* n, union val_t* output, - float* thresh, int* fid, bool* def_left, bool* is_leaf) { +void dense_node_decode(const dense_node_t* n, val_t* output, float* thresh, + int* fid, bool* def_left, bool* is_leaf) { dense_node dn(*n); *output = dn.output(); *thresh = dn.thresh(); @@ -59,8 +59,8 @@ void sparse_node_init(sparse_node_t* node, val_t output, float thresh, int fid, } /** sparse_node_decode extracts individual members from node */ -void sparse_node_decode(const sparse_node_t* node, union val_t* output, - float* thresh, int* fid, bool* def_left, bool* is_leaf, +void sparse_node_decode(const sparse_node_t* node, val_t* output, float* thresh, + int* fid, bool* def_left, bool* is_leaf, int* left_index) { dense_node_decode(node, output, thresh, fid, def_left, is_leaf); *left_index = sparse_node(*node).left_index(); @@ -163,10 +163,10 @@ struct forest { if (do_transform) { size_t num_values_to_transform = (size_t)num_rows * (size_t)params.num_outputs; - transform_k<<>>(preds, num_values_to_transform, ot, - num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, - threshold_, global_bias_, complement_proba); + transform_k<<>>(preds, num_values_to_transform, ot, + num_trees_ > 0 ? (1.0f / num_trees_) : 1.0f, + threshold_, global_bias_, complement_proba); CUDA_CHECK(cudaPeekAtLastError()); } } @@ -514,8 +514,7 @@ size_t tl_leaf_vector_size(const tl::Model& model) { node_key = tl_node_at(tree, node_key).cright()) ; const tl::Tree::Node& node = tl_node_at(tree, node_key); - if(node.has_leaf_vector()) - return node.leaf_vector().size(); + if (node.has_leaf_vector()) return node.leaf_vector().size(); return 0; } @@ -530,7 +529,8 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, // assuming either all leaves use the .leaf_vector() or all leaves use .leaf_value() size_t leaf_vec_size = tl_leaf_vector_size(model); if (leaf_vec_size > 0) { - ASSERT(leaf_vec_size == model.num_output_group, "treelite model inconsistent"); + ASSERT(leaf_vec_size == model.num_output_group, + "treelite model inconsistent"); params->num_classes = leaf_vec_size; params->leaf_payload_type = INT_CLASS_LABEL; } else { diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index f335873291..0a32baa42e 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -96,9 +96,10 @@ template , FIL_TPB, - cub::BLOCK_REDUCE_WARP_REDUCTIONS, 1, 1, ptx_arch> BlockReduce; + cub::BLOCK_REDUCE_WARP_REDUCTIONS, 1, 1, ptx_arch> + BlockReduce; typedef typename BlockReduce::TempStorage TempStorage; - + vec acc; TempStorage* tmp_storage; @@ -106,21 +107,21 @@ struct tree_aggregator_t { /** To compute accurately, would need to know the latest __CUDA_ARCH__ for which the code is compiled and which fits the SM being run on. This is an approximation */ - return sizeof (TempStorage); - } - static size_t smem_accumulate_footprint(int) { - return 0; + return sizeof(TempStorage); } - - __device__ __forceinline__ tree_aggregator_t(int, void* shared_workspace, size_t): - tmp_storage((TempStorage*)shared_workspace) {} - __device__ __forceinline__ void accumulate(vec single_tree_prediction) { + static size_t smem_accumulate_footprint(int) { return 0; } + + __device__ __forceinline__ tree_aggregator_t(int, void* shared_workspace, + size_t) + : tmp_storage((TempStorage*)shared_workspace) {} + __device__ __forceinline__ void accumulate( + vec single_tree_prediction) { acc += single_tree_prediction; } __device__ __forceinline__ void finalize(float* out, int num_rows, int output_stride) { __syncthreads(); - new(tmp_storage) TempStorage; + new (tmp_storage) TempStorage; acc = BlockReduce(*tmp_storage).Sum(acc); if (threadIdx.x == 0) { for (int i = 0; i < NITEMS; ++i) { @@ -155,7 +156,8 @@ struct tree_aggregator_t { for (int item = 0; item < NITEMS; ++item) votes[c * NITEMS + item] = 0; //__syncthreads(); // happening outside already } - __device__ __forceinline__ void accumulate(vec single_tree_prediction) { + __device__ __forceinline__ void accumulate( + vec single_tree_prediction) { #pragma unroll for (int item = 0; item < NITEMS; ++item) atomicAdd(votes + single_tree_prediction[item] * NITEMS + item, 1); @@ -231,14 +233,15 @@ __global__ void infer_k(storage_type forest, predict_params params) { template size_t get_smem_footprint(predict_params params) { - size_t finalize_footprint = - tree_aggregator_t::smem_finalize_footprint - (params.num_classes); - size_t accumulate_footprint = sizeof(float) * params.num_cols * NITEMS + - tree_aggregator_t::smem_accumulate_footprint - (params.num_classes); - - return std::max(accumulate_footprint, finalize_footprint); + size_t finalize_footprint = + tree_aggregator_t::smem_finalize_footprint( + params.num_classes); + size_t accumulate_footprint = + sizeof(float) * params.num_cols * NITEMS + + tree_aggregator_t::smem_accumulate_footprint( + params.num_classes); + + return std::max(accumulate_footprint, finalize_footprint); } template @@ -252,9 +255,9 @@ void infer_k_launcher(storage_type forest, predict_params params, size_t shm_sz = 0; // solving this linear programming problem in a single equation // looks less tractable than this - for(int nitems=1; nitems <= params.max_items; ++nitems) { + for (int nitems = 1; nitems <= params.max_items; ++nitems) { size_t peak_footprint; - switch(nitems) { + switch (nitems) { case 1: peak_footprint = get_smem_footprint<1, leaf_payload_type>(params); break; @@ -279,7 +282,7 @@ void infer_k_launcher(storage_type forest, predict_params params, if (num_items == 0) { int real_num_cols = params.num_cols; // since we're crashing, this will not take too long - while(get_smem_footprint<1, leaf_payload_type>(params) > params.max_shm) + while (get_smem_footprint<1, leaf_payload_type>(params) > params.max_shm) --params.num_cols; ASSERT(false, "p.num_cols == %d: too many features, only %d allowed%s", real_num_cols, params.num_cols, diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 06e20222ad..b9da0298e5 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -63,6 +63,8 @@ struct FilTestParams { // num_classes must be >1 when INT_CLASS_LABEL == leaf_payload_type // it's used in treelite ModelBuilder initialization int num_classes; + + size_t max_outputs_per_row() { return std::max(num_classes, 2); } }; std::string output2str(fil::output_t output) { @@ -228,8 +230,7 @@ class BaseFilTest : public testing::TestWithParam { } void transform(float f, float& proba, float& output) { - if ((ps.output & fil::output_t::AVG) != 0) - f *= (1.0f / ps.num_trees); + if ((ps.output & fil::output_t::AVG) != 0) f *= (1.0f / ps.num_trees); f += ps.global_bias; if ((ps.output & fil::output_t::SIGMOID) != 0) f = sigmoid(f); proba = f; @@ -241,7 +242,7 @@ class BaseFilTest : public testing::TestWithParam { void predict_on_cpu() { // predict on host std::vector want_preds_h(ps.num_rows); - std::vector want_proba_h(ps.num_rows * std::max(ps.num_classes, 2)); + std::vector want_proba_h(ps.num_rows * ps.max_outputs_per_row()); int num_nodes = tree_num_nodes(); switch (ps.leaf_payload_type) { case fil::leaf_value_t::FLOAT_SCALAR: @@ -284,9 +285,9 @@ class BaseFilTest : public testing::TestWithParam { // copy to GPU allocate(want_preds_d, ps.num_rows); updateDevice(want_preds_d, want_preds_h.data(), ps.num_rows, stream); - allocate(want_proba_d, ps.num_rows * std::max(ps.num_classes, 2)); + allocate(want_proba_d, ps.num_rows * ps.max_outputs_per_row()); updateDevice(want_proba_d, want_proba_h.data(), - ps.num_rows * std::max(ps.num_classes, 2), stream); + ps.num_rows * ps.max_outputs_per_row(), stream); CUDA_CHECK(cudaStreamSynchronize(stream)); } @@ -299,7 +300,7 @@ class BaseFilTest : public testing::TestWithParam { // predict allocate(preds_d, ps.num_rows); fil::predict(handle, forest, preds_d, data_d, ps.num_rows); - allocate(proba_d, ps.num_rows * std::max(ps.num_classes, 2)); + allocate(proba_d, ps.num_rows * ps.max_outputs_per_row()); fil::predict(handle, forest, proba_d, data_d, ps.num_rows, true); CUDA_CHECK(cudaStreamSynchronize(stream)); @@ -309,7 +310,7 @@ class BaseFilTest : public testing::TestWithParam { void compare() { ASSERT_TRUE(devArrMatch(want_proba_d, proba_d, - ps.num_rows * std::max(ps.num_classes, 2), + ps.num_rows * ps.max_outputs_per_row(), CompareApprox(ps.tolerance), stream)); float tolerance = ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR ? ps.tolerance From 0a1c575faceadd9d4caf2feeecf12b826189972d Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Fri, 13 Mar 2020 23:50:38 -0700 Subject: [PATCH 053/330] addressed review comments; build fails later --- cpp/test/sg/fil_test.cu | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index b9da0298e5..2e9af25e71 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -97,13 +97,6 @@ __global__ void nan_kernel(float* data, const bool* mask, int len, float nan) { float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); } -typedef std::vector vote_vec; -vote_vec& operator+=(vote_vec& a, vote_vec b) { - ASSERT(a.size() == b.size(), "trying to add two vectors of different size"); - for (int i = 0; i < a.size(); ++i) a[i] += b[i]; - return a; -} - class BaseFilTest : public testing::TestWithParam { protected: void SetUp() override { @@ -147,7 +140,12 @@ class BaseFilTest : public testing::TestWithParam { // generate on-GPU random data Random::Rng r(ps.seed); - r.uniform(weights_d, num_nodes, -1.0f, 1.0f, stream); + if (ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR) + r.uniform(weights_d, num_nodes, -1.0f, 1.0f, stream); + else + r.uniform(weights_d, num_nodes, 0.0f, + // [0..num_classes + 1) + std::nextafterf(ps.num_classes + 1, 0.0f), stream); r.uniform(thresholds_d, num_nodes, -1.0f, 1.0f, stream); r.uniformInt(fids_d, num_nodes, 0, ps.num_cols, stream); r.bernoulli(def_lefts_d, num_nodes, 0.5f, stream); From a0cd47611a781230e327c49e07a944082172a58d Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 19 Mar 2020 18:04:31 -0700 Subject: [PATCH 054/330] changelog, fix a merge --- CHANGELOG.md | 2 +- cpp/src/fil/infer.cu | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c51af83327..f25db1d637 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # cuML 0.14.0 (Date TBD) ## New Features -- PR #1757: Add multi class inference in FIL for forests from cuML RF +- PR #1902: Multi class inference in FIL C++ and importing multi-class forests from treelite ## Improvements diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 0a32baa42e..bca7c39643 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -222,7 +222,6 @@ __global__ void infer_k(storage_type forest, predict_params params) { __syncthreads(); // for both row cache init and acc init - AggregateTrees acc(params.num_output_classes, nullptr); // one block works on NITEMS rows and the whole forest for (int j = threadIdx.x; j < forest.num_trees(); j += blockDim.x) { acc.accumulate(infer_one_tree::T>( From a291f2e06828dc4882ea86484c2a8d81aa20e78d Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 19 Mar 2020 13:50:21 -0700 Subject: [PATCH 055/330] CI WAR until 0.14 is fully supported --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 308745b6bd..dc1046da32 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -48,7 +48,7 @@ conda install -c conda-forge -c rapidsai -c rapidsai-nightly -c rapidsai/label/x "cudf=${MINOR_VERSION}" \ "rmm=${MINOR_VERSION}" \ "nvstrings=${MINOR_VERSION}" \ - "libcumlprims=${MINOR_VERSION}" \ + "libcumlprims=0.13" \ "lapack" \ "cmake==3.14.3" \ "umap-learn" \ From eea73d34827ae16b69644da827636ae08bcea7b4 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 19 Mar 2020 19:19:48 -0700 Subject: [PATCH 056/330] forgot a patch --- cpp/include/cuml/fil/fil.h | 2 +- cpp/src/fil/fil.cu | 12 ++++++++++++ cpp/test/sg/fil_test.cu | 11 ++++------- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index ffdc2833bc..3ea63d8a52 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -172,7 +172,7 @@ struct forest_params_t { algo_t algo; // output is the desired output type output_t output; - // threshold is used to for classification if output == OUTPUT_CLASS, + // threshold is used to for classification if (output & OUTPUT_CLASS), // and is ignored otherwise float threshold; // global_bias is added to the sum of tree predictions diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 557d1586a9..e837067e98 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -300,7 +300,15 @@ void check_params(const forest_params_t* params, bool dense) { } switch (params->leaf_payload_type) { case leaf_value_t::FLOAT_SCALAR: + /* params->num_classes is ignored in this case, since the user might call + predict_proba() on regression. Hence, no point checking the range of + an ignored variable */ + break; case leaf_value_t::INT_CLASS_LABEL: + ASSERT(params->num_classes != 1, "trees will always predict class 0"); + ASSERT(params->num_classes > 0, + "num_classes is not ignored for " + "leaf_payload_type == INT_CLASS_LABEL"); break; default: ASSERT(false, @@ -406,6 +414,7 @@ int find_class_label_from_one_hot(tl::tl_float* vector, int len) { } else ASSERT(vector[i] == 0.0f, "label vector contains values other than 0.0 and 1.0"); + ASSERT(found_label, "did not find 1.0f in vector"); return out; } @@ -418,6 +427,9 @@ void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, ASSERT(vec.size() == forest_params.num_classes, "inconsistent number of classes in treelite leaves"); fil_node->val.idx = find_class_label_from_one_hot(&vec[0], vec.size()); + assert(fil_node->val.idx > 0); + assert(fil_node->val.idx < forest_params.num_classes); + assert(forest_params.num_classes == 6); break; case FLOAT_SCALAR: fil_node->val.f = tl_node.leaf_value(); diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 2e9af25e71..b46859eb07 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -144,8 +144,8 @@ class BaseFilTest : public testing::TestWithParam { r.uniform(weights_d, num_nodes, -1.0f, 1.0f, stream); else r.uniform(weights_d, num_nodes, 0.0f, - // [0..num_classes + 1) - std::nextafterf(ps.num_classes + 1, 0.0f), stream); + // [0..num_classes) + std::nextafterf(ps.num_classes, 0.0f), stream); r.uniform(thresholds_d, num_nodes, -1.0f, 1.0f, stream); r.uniformInt(fids_d, num_nodes, 0, ps.num_cols, stream); r.bernoulli(def_lefts_d, num_nodes, 0.5f, stream); @@ -180,10 +180,7 @@ class BaseFilTest : public testing::TestWithParam { fil::val_t w; switch (ps.leaf_payload_type) { case fil::leaf_value_t::INT_CLASS_LABEL: - w.idx = (int)((weights_h[i] * 0.5 + 0.5) // [0.0, 1.0] - * ps.num_classes + - 0.5) % - ps.num_classes; // [0..num_classes] + w.idx = int(weights_h[i]); break; case fil::leaf_value_t::FLOAT_SCALAR: w.f = weights_h[i]; @@ -461,7 +458,7 @@ class TreeliteFilTest : public BaseFilTest { break; case fil::leaf_value_t::INT_CLASS_LABEL: std::vector vec(ps.num_classes); - vec[output.idx] = 1.0; + for (int i = 0; i < ps.num_classes; ++i) vec[i] = i == output.idx; TL_CPP_CHECK(builder->SetLeafVectorNode(key, vec)); } } else { From 3723cb5422c7efd5b310541c98307feea2370782 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 19 Mar 2020 19:25:59 -0700 Subject: [PATCH 057/330] . --- cpp/src/fil/fil.cu | 3 --- cpp/src/fil/infer.cu | 6 +++--- python/cuml/test/test_fil.py | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index e837067e98..4e67e95f45 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -427,9 +427,6 @@ void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, ASSERT(vec.size() == forest_params.num_classes, "inconsistent number of classes in treelite leaves"); fil_node->val.idx = find_class_label_from_one_hot(&vec[0], vec.size()); - assert(fil_node->val.idx > 0); - assert(fil_node->val.idx < forest_params.num_classes); - assert(forest_params.num_classes == 6); break; case FLOAT_SCALAR: fil_node->val.f = tl_node.leaf_value(); diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index bca7c39643..6e4330982d 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -94,6 +94,9 @@ __device__ __forceinline__ vec<1, output_type> infer_one_tree(tree_type tree, template // = FLOAT_SCALAR struct tree_aggregator_t { + /** To compute accurately, would need to know the latest __CUDA_ARCH__ + for which the code is compiled and which fits the SM being run on. + This is an approximation */ static const int ptx_arch = 750; typedef cub::BlockReduce, FIL_TPB, cub::BLOCK_REDUCE_WARP_REDUCTIONS, 1, 1, ptx_arch> @@ -104,9 +107,6 @@ struct tree_aggregator_t { TempStorage* tmp_storage; static size_t smem_finalize_footprint(int) { - /** To compute accurately, would need to know the latest __CUDA_ARCH__ - for which the code is compiled and which fits the SM being run on. - This is an approximation */ return sizeof(TempStorage); } static size_t smem_accumulate_footprint(int) { return 0; } diff --git a/python/cuml/test/test_fil.py b/python/cuml/test/test_fil.py index 657db930df..e688604bab 100644 --- a/python/cuml/test/test_fil.py +++ b/python/cuml/test/test_fil.py @@ -303,7 +303,7 @@ def test_fil_skl_regression(n_rows, n_columns, n_estimators, max_depth, fil_mse = mean_squared_error(y_validation, fil_preds) # if fil is better than skl, no need to fail the test - assert fil_mse <= skl_mse + 1e-4 + assert fil_mse <= skl_mse * (1. + 1e-7) + 1e-4 assert array_equal(fil_preds, skl_preds) From 13f5bac04a060048230b284bce28e6f401b043a4 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 19 Mar 2020 19:42:52 -0700 Subject: [PATCH 058/330] fix formatting --- cpp/src/fil/infer.cu | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 6e4330982d..2c7935eb47 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -106,9 +106,7 @@ struct tree_aggregator_t { vec acc; TempStorage* tmp_storage; - static size_t smem_finalize_footprint(int) { - return sizeof(TempStorage); - } + static size_t smem_finalize_footprint(int) { return sizeof(TempStorage); } static size_t smem_accumulate_footprint(int) { return 0; } __device__ __forceinline__ tree_aggregator_t(int, void* shared_workspace, From 72040bdfa17fdb4800d3f32415287cb344bf1960 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Fri, 20 Mar 2020 11:33:56 -0500 Subject: [PATCH 059/330] Add tests for unknown categories encountered in OneHotEncoding --- python/cuml/preprocessing/encoders.py | 2 +- python/cuml/test/test_one_hot_encoder.py | 31 +++++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 7e4795a888..5d0c041e6b 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -130,7 +130,7 @@ def fit(self, X): if not X[feature].isin(self.categories[feature]).all(): msg = ("Found unknown categories in column {0}" " during fit".format(feature)) - raise ValueError(msg) + raise KeyError(msg) # self.drop_idx_ = self._compute_drop_idx() self._fitted = True diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index 78666389a2..1fdc4d519e 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -62,8 +62,37 @@ def test_onehot_fit_handle_unknown(): Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]}) enc = OneHotEncoder(handle_unknown='error', categories=Y) - with pytest.raises(ValueError): + with pytest.raises(KeyError): enc.fit(X) enc = OneHotEncoder(handle_unknown='ignore', categories=Y) enc.fit(X) + + +def test_onehot_transform_handle_unknown(): + X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) + Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]}) + + enc = OneHotEncoder(handle_unknown='error') + enc = enc.fit(X) + with pytest.raises(KeyError): + enc.transform(Y) + + enc = OneHotEncoder(handle_unknown='ignore') + enc = enc.fit(X) + ohe = enc.transform(Y) + ref = cp.array([[0., 0., 1., 0.], + [0., 1., 0., 1.]]) + cp.testing.assert_array_equal(ohe, ref) + + +def test_onehot_inverse_transform_handle_unknown(): + X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) + Y_ohe = cp.array([[0., 0., 1., 0.], + [0., 1., 0., 1.]]) + + enc = OneHotEncoder(handle_unknown='ignore') + enc = enc.fit(X) + df = enc.inverse_transform(Y_ohe) + ref = DataFrame({'chars': [None, 'b'], 'int': [0, 2]}) + assert df.equals(ref) From 935436adf9d3edfb7ae564e4439f071ed0d3c305 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Fri, 20 Mar 2020 13:20:54 -0500 Subject: [PATCH 060/330] Add random inputs tests for one hot encoder --- python/cuml/test/test_one_hot_encoder.py | 33 ++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index 1fdc4d519e..bd49727873 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -16,9 +16,13 @@ from cuml.preprocessing import OneHotEncoder import cupy as cp +import pandas as pd +import numpy as np from sklearn.preprocessing import OneHotEncoder as SkOneHotEncoder +from cuml.test.utils import stress_param + def _from_df_to_array(df): return list(zip(*[df[feature] for feature in df.columns])) @@ -96,3 +100,32 @@ def test_onehot_inverse_transform_handle_unknown(): df = enc.inverse_transform(Y_ohe) ref = DataFrame({'chars': [None, 'b'], 'int': [0, 2]}) assert df.equals(ref) + + +def generate_inputs_from_categories(categories=None, + n_samples=10, seed=5060): + if categories is None: + categories = {'strings': ['Foo', 'Bar', 'Baz'], + 'integers': list(range(1000))} + + rd = np.random.RandomState(seed) + pandas_df = pd.DataFrame({name: rd.choice(cat, n_samples) + for name, cat in categories.items()}) + ary = _from_df_to_array(pandas_df) + df = DataFrame.from_pandas(pandas_df) + return df, ary + + +@pytest.mark.parametrize("n_samples", [10, 10000, stress_param(250000)]) +def test_onehot_random_inputs(n_samples): + df, ary = generate_inputs_from_categories(n_samples=n_samples) + + enc = OneHotEncoder(sparse=False) + sk_enc = SkOneHotEncoder(sparse=False) + ohe = enc.fit_transform(df) + ref = sk_enc.fit_transform(ary) + cp.testing.assert_array_equal(ohe, ref) + + inv_ohe = enc.inverse_transform(ohe) + + assert inv_ohe.equals(df) From 9a6feb6095b45c1f3e7ea120b72325931481964d Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Fri, 20 Mar 2020 13:25:35 -0500 Subject: [PATCH 061/330] Add support for custom output datatype for transform --- python/cuml/preprocessing/encoders.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 5d0c041e6b..697e72a738 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -56,9 +56,8 @@ class OneHotEncoder: should be dropped. # sparse : bool, default=True # Will return sparse matrix if set True else will return an array. - TODO: Implement dtype dtype : number type, default=np.float - Desired dtype of output. + Desired datatype of transform's output. handle_unknown : {'error', 'ignore'}, default='error' Whether to raise an error or ignore if an unknown categorical feature is present during transform (default is to raise). When this parameter @@ -152,12 +151,11 @@ def fit_transform(self, X): """ return self.fit(X).transform(X) - @staticmethod @with_cupy_rmm - def _one_hot_encoding(encoder, X): + def _one_hot_encoding(self, encoder, X): col_idx = encoder.transform(X).to_gpu_array(fillna="pandas") col_idx = cp.asarray(col_idx) - ohe = cp.zeros((len(X), len(encoder.classes_))) + ohe = cp.zeros((len(X), len(encoder.classes_)), dtype=self.dtype) # Filter out rows with null values idx_to_keep = col_idx > -1 row_idx = cp.arange(len(ohe))[idx_to_keep] From de6e2ad06fe96ef5015b31017bc97daa59c92f73 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Fri, 20 Mar 2020 20:06:06 -0500 Subject: [PATCH 062/330] Add support for drop index feature of OneHotEncoder --- python/cuml/preprocessing/encoders.py | 47 ++++++++++++++++++++++-- python/cuml/test/test_one_hot_encoder.py | 37 ++++++++++++++++++- 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 697e72a738..3492a134a7 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -43,7 +43,7 @@ class OneHotEncoder: - 'auto' : Determine categories automatically from the training data. - DataFrame : ``categories[col]`` holds the categories expected in the feature col. - TODO: Implement drop + TODO: Change documentation to reflect dict[Series] instead of DataFrame drop : 'first' or a cuml.DataFrame, default=None Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear @@ -102,6 +102,34 @@ def _check_is_fitted(self): if not self._fitted: raise RuntimeError("Model must first be .fit()") + def _compute_drop_idx(self): + if self.drop is None: + return None + elif isinstance(self.drop, str) and self.drop == 'first': + return {feature: cp.array(0) for feature in self._encoders.keys()} + elif not isinstance(self.drop, str): + if len(self.drop.keys()) != len(self._encoders): + msg = ("`drop` should have as many columns as the number " + "of features ({}), got {}") + raise ValueError(msg.format(len(self._encoders), + len(self.drop.keys()))) + drop_idx = dict() + for feature in self.drop.keys(): + cats = self._encoders[feature].classes_ + if not self.drop[feature].isin(cats).all(): + msg = ("Some categories for feature {} were supposed " + "to be dropped, but were not found in the encoder " + "categories.".format(feature)) + raise ValueError(msg) + cats = Series(cats.to_gpu_array()) + idx = cats.isin(self.drop[feature]) + drop_idx[feature] = cp.asarray(cats[idx].index) + return drop_idx + else: + msg = ("Wrong input for parameter `drop`. Expected " + "'first', None or a dataframe, got {}") + raise ValueError(msg.format(type(self.drop))) + def fit(self, X): """ Fit OneHotEncoder to X. @@ -131,7 +159,7 @@ def fit(self, X): " during fit".format(feature)) raise KeyError(msg) - # self.drop_idx_ = self._compute_drop_idx() + self.drop_idx_ = self._compute_drop_idx() self._fitted = True return self @@ -152,15 +180,25 @@ def fit_transform(self, X): return self.fit(X).transform(X) @with_cupy_rmm - def _one_hot_encoding(self, encoder, X): + def _one_hot_encoding(self, feature, X): + encoder = self._encoders[feature] + col_idx = encoder.transform(X).to_gpu_array(fillna="pandas") col_idx = cp.asarray(col_idx) + ohe = cp.zeros((len(X), len(encoder.classes_)), dtype=self.dtype) # Filter out rows with null values idx_to_keep = col_idx > -1 row_idx = cp.arange(len(ohe))[idx_to_keep] col_idx = col_idx[idx_to_keep] ohe[row_idx, col_idx] = 1 + + if self.drop_idx_ is not None: + drop_idx = self.drop_idx_[feature] + mask = cp.ones((ohe.shape[1]), dtype=cp.bool) + mask[drop_idx] = False + ohe = ohe[:, mask] + return ohe @with_cupy_rmm @@ -177,7 +215,7 @@ def transform(self, X): Transformed input. """ self._check_is_fitted() - onehots = [self._one_hot_encoding(self._encoders[feature], X[feature]) + onehots = [self._one_hot_encoding(feature, X[feature]) for feature in X.columns] return cp.concatenate(onehots, axis=1) @@ -196,6 +234,7 @@ def inverse_transform(self, X): X_tr : cudf.DataFrame Inverse transformed array. """ + # TODO: drop_idx for inverse transform self._check_is_fitted() result = DataFrame(columns=self._encoders.keys()) j = 0 diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index bd49727873..a5db6b5571 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import pytest -from cudf import DataFrame +from cudf import DataFrame, Series from cuml.preprocessing import OneHotEncoder import cupy as cp @@ -129,3 +129,38 @@ def test_onehot_random_inputs(n_samples): inv_ohe = enc.inverse_transform(ohe) assert inv_ohe.equals(df) + + +def test_onehot_drop_idx_first(): + X_ary = [['c', 2, 'a'], + ['b', 2, 'b']] + X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) + + enc = OneHotEncoder(sparse=False, drop='first') + sk_enc = SkOneHotEncoder(sparse=False, drop='first') + ohe = enc.fit_transform(X) + ref = sk_enc.fit_transform(X_ary) + cp.testing.assert_array_equal(ohe, ref) + + +def test_onehot_drop_idx_series(): + X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) + drop = dict({'chars': Series(['b']), + 'int': Series([]), + 'letters': Series(['a', 'b'])}) + enc = OneHotEncoder(sparse=False, drop=drop) + ohe = enc.fit_transform(X) + ref = cp.array([[1., 1.], + [0., 1.]]) + cp.testing.assert_array_equal(ohe, ref) + + +def test_onehot_drop_idx(): + X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) + drop = dict({'chars': Series('b'), + 'int': Series([2]), + 'letters': Series('b')}) + enc = OneHotEncoder(sparse=False, drop=drop) + ohe = enc.fit_transform(X) + ref = SkOneHotEncoder(sparse=False, drop=['b', 2, 'b']).fit_transform(X) + cp.testing.assert_array_equal(ohe, ref) From 4c49bf2ce9b63ff397863dc393cbe0df4ebe6725 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 23 Mar 2020 10:40:57 -0500 Subject: [PATCH 063/330] Add tests for drop exceptions in OneHotEncoder --- python/cuml/preprocessing/encoders.py | 7 ++-- python/cuml/test/test_one_hot_encoder.py | 46 +++++++++++++++--------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 3492a134a7..a7cc091f7e 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -43,7 +43,6 @@ class OneHotEncoder: - 'auto' : Determine categories automatically from the training data. - DataFrame : ``categories[col]`` holds the categories expected in the feature col. - TODO: Change documentation to reflect dict[Series] instead of DataFrame drop : 'first' or a cuml.DataFrame, default=None Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear @@ -52,7 +51,7 @@ class OneHotEncoder: - None : retain all features (the default). - 'first' : drop the first category in each feature. If only one category is present, the feature will be dropped entirely. - - DataFrame : ``drop[col]`` is the category in feature col that + - Dict[Series] : ``drop[col]`` are the categories in feature col that should be dropped. # sparse : bool, default=True # Will return sparse matrix if set True else will return an array. @@ -107,7 +106,7 @@ def _compute_drop_idx(self): return None elif isinstance(self.drop, str) and self.drop == 'first': return {feature: cp.array(0) for feature in self._encoders.keys()} - elif not isinstance(self.drop, str): + elif isinstance(self.drop, dict): if len(self.drop.keys()) != len(self._encoders): msg = ("`drop` should have as many columns as the number " "of features ({}), got {}") @@ -127,7 +126,7 @@ def _compute_drop_idx(self): return drop_idx else: msg = ("Wrong input for parameter `drop`. Expected " - "'first', None or a dataframe, got {}") + "'first', None or a dict, got {}") raise ValueError(msg.format(type(self.drop))) def fit(self, X): diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index a5db6b5571..d432187379 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -28,6 +28,20 @@ def _from_df_to_array(df): return list(zip(*[df[feature] for feature in df.columns])) +def _generate_inputs_from_categories(categories=None, + n_samples=10, seed=5060): + if categories is None: + categories = {'strings': ['Foo', 'Bar', 'Baz'], + 'integers': list(range(1000))} + + rd = np.random.RandomState(seed) + pandas_df = pd.DataFrame({name: rd.choice(cat, n_samples) + for name, cat in categories.items()}) + ary = _from_df_to_array(pandas_df) + df = DataFrame.from_pandas(pandas_df) + return df, ary + + def test_onehot_vs_skonehot(): X = DataFrame({'gender': ['Male', 'Female', 'Female'], 'int': [1, 3, 2]}) skX = _from_df_to_array(X) @@ -102,23 +116,9 @@ def test_onehot_inverse_transform_handle_unknown(): assert df.equals(ref) -def generate_inputs_from_categories(categories=None, - n_samples=10, seed=5060): - if categories is None: - categories = {'strings': ['Foo', 'Bar', 'Baz'], - 'integers': list(range(1000))} - - rd = np.random.RandomState(seed) - pandas_df = pd.DataFrame({name: rd.choice(cat, n_samples) - for name, cat in categories.items()}) - ary = _from_df_to_array(pandas_df) - df = DataFrame.from_pandas(pandas_df) - return df, ary - - @pytest.mark.parametrize("n_samples", [10, 10000, stress_param(250000)]) def test_onehot_random_inputs(n_samples): - df, ary = generate_inputs_from_categories(n_samples=n_samples) + df, ary = _generate_inputs_from_categories(n_samples=n_samples) enc = OneHotEncoder(sparse=False) sk_enc = SkOneHotEncoder(sparse=False) @@ -155,7 +155,7 @@ def test_onehot_drop_idx_series(): cp.testing.assert_array_equal(ohe, ref) -def test_onehot_drop_idx(): +def test_onehot_drop_one_of_each(): X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) drop = dict({'chars': Series('b'), 'int': Series([2]), @@ -164,3 +164,17 @@ def test_onehot_drop_idx(): ohe = enc.fit_transform(X) ref = SkOneHotEncoder(sparse=False, drop=['b', 2, 'b']).fit_transform(X) cp.testing.assert_array_equal(ohe, ref) + + +@pytest.mark.parametrize("drop, pattern", + [[dict({'chars': Series('b')}), + '`drop` should have as many columns'], + [dict({'chars': Series('b'), 'int': Series(3)}), + 'Some categories [a-zA-Z, ]* were not found'], + [DataFrame({'chars': Series('b'), 'int': Series(3)}), + 'Wrong input for parameter `drop`.']]) +def test_onehot_drop_exceptions(drop, pattern): + X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2]}) + + with pytest.raises(ValueError, match=pattern): + OneHotEncoder(sparse=False, drop=drop).fit(X) From 0024e9de4db9e096bba8e335033aab8b31db1100 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 23 Mar 2020 18:53:31 -0500 Subject: [PATCH 064/330] Fix drop index feature for inverse_transform --- python/cuml/preprocessing/encoders.py | 45 +++++++++++++++++++----- python/cuml/test/test_one_hot_encoder.py | 35 +++++++----------- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index a7cc091f7e..1f445e500a 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -51,7 +51,7 @@ class OneHotEncoder: - None : retain all features (the default). - 'first' : drop the first category in each feature. If only one category is present, the feature will be dropped entirely. - - Dict[Series] : ``drop[col]`` are the categories in feature col that + - Dict : ``drop[col]`` is the category in feature col that should be dropped. # sparse : bool, default=True # Will return sparse matrix if set True else will return an array. @@ -105,7 +105,8 @@ def _compute_drop_idx(self): if self.drop is None: return None elif isinstance(self.drop, str) and self.drop == 'first': - return {feature: cp.array(0) for feature in self._encoders.keys()} + return {feature: cp.array([0]) + for feature in self._encoders.keys()} elif isinstance(self.drop, dict): if len(self.drop.keys()) != len(self._encoders): msg = ("`drop` should have as many columns as the number " @@ -115,12 +116,21 @@ def _compute_drop_idx(self): drop_idx = dict() for feature in self.drop.keys(): cats = self._encoders[feature].classes_ + self.drop[feature] = Series(self.drop[feature]) + if len(self.drop[feature]) != 1: + msg = ("Trying to drop multiple values for feature {}, " + "this is not supported.").format(feature) + # Dropping multiple values actually works except in inverse + # transform where there is no way to know which categories + # where present before one hot encoding if multiples + # categories where dropped. + raise ValueError(msg) if not self.drop[feature].isin(cats).all(): msg = ("Some categories for feature {} were supposed " "to be dropped, but were not found in the encoder " "categories.".format(feature)) raise ValueError(msg) - cats = Series(cats.to_gpu_array()) + cats = Series(cats) idx = cats.isin(self.drop[feature]) drop_idx[feature] = cp.asarray(cats[idx].index) return drop_idx @@ -233,17 +243,36 @@ def inverse_transform(self, X): X_tr : cudf.DataFrame Inverse transformed array. """ - # TODO: drop_idx for inverse transform self._check_is_fitted() result = DataFrame(columns=self._encoders.keys()) j = 0 for feature in self._encoders.keys(): - enc_size = len(self._encoders[feature].classes_) + feature_enc = self._encoders[feature] + cats = feature_enc.classes_ + + if self.drop is not None: + # Remove dropped categories + dropped_class_idx = Series(self.drop_idx_[feature]) + dropped_class_mask = Series(cats).isin(cats[dropped_class_idx]) + cats = cats[~dropped_class_mask] + + enc_size = len(cats) x_feature = X[:, j:j + enc_size] - not_null_idx = x_feature.any(axis=1) idx = cp.argmax(x_feature, axis=1) - inv = self._encoders[feature].inverse_transform(Series(idx)) - inv.iloc[~not_null_idx] = None + inv = Series(cats[idx]) + + if self.handle_unknown == 'ignore': + not_null_idx = x_feature.any(axis=1) + inv.iloc[~not_null_idx] = None + elif self.drop is not None: + # drop will either be None or handle_unknown will be error. If + # self.drop is not None, then we can safely assume that all of + # the nulls in each column are the dropped value + dropped_mask = cp.asarray(x_feature.sum(axis=1) == 0).flatten() + if dropped_mask.any(): + inv[dropped_mask] = feature_enc.inverse_transform( + Series(self.drop_idx_[feature]))[0] + result[feature] = inv j += enc_size return result diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index d432187379..d0fcaa2b15 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -55,10 +55,13 @@ def test_onehot_vs_skonehot(): cp.testing.assert_array_equal(ohe, ref) -def test_onehot_inverse_transform(): - X = DataFrame({'gender': ['Male', 'Female', 'Female'], 'int': [1, 3, 2]}) +@pytest.mark.parametrize('drop', [None, + 'first', + {'g': Series('F'), 'i': Series(3)}]) +def test_onehot_inverse_transform(drop): + X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2]}) - enc = OneHotEncoder() + enc = OneHotEncoder(drop=drop) ohe = enc.fit_transform(X) inv = enc.inverse_transform(ohe) @@ -143,23 +146,9 @@ def test_onehot_drop_idx_first(): cp.testing.assert_array_equal(ohe, ref) -def test_onehot_drop_idx_series(): - X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) - drop = dict({'chars': Series(['b']), - 'int': Series([]), - 'letters': Series(['a', 'b'])}) - enc = OneHotEncoder(sparse=False, drop=drop) - ohe = enc.fit_transform(X) - ref = cp.array([[1., 1.], - [0., 1.]]) - cp.testing.assert_array_equal(ohe, ref) - - def test_onehot_drop_one_of_each(): X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) - drop = dict({'chars': Series('b'), - 'int': Series([2]), - 'letters': Series('b')}) + drop = dict({'chars': 'b', 'int': 2, 'letters': 'b'}) enc = OneHotEncoder(sparse=False, drop=drop) ohe = enc.fit_transform(X) ref = SkOneHotEncoder(sparse=False, drop=['b', 2, 'b']).fit_transform(X) @@ -167,14 +156,16 @@ def test_onehot_drop_one_of_each(): @pytest.mark.parametrize("drop, pattern", - [[dict({'chars': Series('b')}), + [[dict({'chars': 'b'}), '`drop` should have as many columns'], - [dict({'chars': Series('b'), 'int': Series(3)}), + [dict({'chars': 'b', 'int': [2, 0]}), + 'Trying to drop multiple values'], + [dict({'chars': 'b', 'int': 3}), 'Some categories [a-zA-Z, ]* were not found'], - [DataFrame({'chars': Series('b'), 'int': Series(3)}), + [DataFrame({'chars': 'b', 'int': 3}), 'Wrong input for parameter `drop`.']]) def test_onehot_drop_exceptions(drop, pattern): - X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2]}) + X = DataFrame({'chars': ['c', 'b', 'd'], 'int': [2, 1, 0]}) with pytest.raises(ValueError, match=pattern): OneHotEncoder(sparse=False, drop=drop).fit(X) From 3a6a772d561c0a7297e5fb82fae839162041bcc1 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Tue, 24 Mar 2020 12:28:46 -0500 Subject: [PATCH 065/330] Add sparse feature for OneHotEncoder --- python/cuml/preprocessing/encoders.py | 16 ++++++++++++---- python/cuml/test/test_one_hot_encoder.py | 13 ++++++++----- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 1f445e500a..92997d28e6 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -53,8 +53,8 @@ class OneHotEncoder: category is present, the feature will be dropped entirely. - Dict : ``drop[col]`` is the category in feature col that should be dropped. - # sparse : bool, default=True - # Will return sparse matrix if set True else will return an array. + sparse : bool, default=True + Transform will return sparse matrix if True else will return an array. dtype : number type, default=np.float Desired datatype of transform's output. handle_unknown : {'error', 'ignore'}, default='error' @@ -73,7 +73,7 @@ class OneHotEncoder: be retained. """ def __init__(self, categories='auto', drop=None, sparse=True, - dtype=np.float64, handle_unknown='error'): + dtype=np.float, handle_unknown='error'): self.categories = categories self.sparse = sparse self.dtype = dtype @@ -82,6 +82,9 @@ def __init__(self, categories='auto', drop=None, sparse=True, self._fitted = False self.drop_idx_ = None self._encoders = None + if sparse and np.dtype(dtype) not in ['f', 'd', 'F', 'D']: + raise ValueError('Only float32, float64, complex64 and complex128 ' + 'are supported when using sparse') def _validate_keywords(self): if self.handle_unknown not in ('error', 'ignore'): @@ -226,7 +229,10 @@ def transform(self, X): self._check_is_fitted() onehots = [self._one_hot_encoding(feature, X[feature]) for feature in X.columns] - return cp.concatenate(onehots, axis=1) + onehots = cp.concatenate(onehots, axis=1) + if self.sparse: + onehots = cp.sparse.csr_matrix(onehots) + return onehots @with_cupy_rmm def inverse_transform(self, X): @@ -244,6 +250,8 @@ def inverse_transform(self, X): Inverse transformed array. """ self._check_is_fitted() + if cp.sparse.issparse(X): + X = X.toarray() result = DataFrame(columns=self._encoders.keys()) j = 0 for feature in self._encoders.keys(): diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index d0fcaa2b15..be2720acab 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -58,10 +58,11 @@ def test_onehot_vs_skonehot(): @pytest.mark.parametrize('drop', [None, 'first', {'g': Series('F'), 'i': Series(3)}]) -def test_onehot_inverse_transform(drop): +@pytest.mark.parametrize('sparse', [True, False]) +def test_onehot_inverse_transform(drop, sparse): X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2]}) - enc = OneHotEncoder(drop=drop) + enc = OneHotEncoder(drop=drop, sparse=sparse) ohe = enc.fit_transform(X) inv = enc.inverse_transform(ohe) @@ -71,7 +72,9 @@ def test_onehot_inverse_transform(drop): def test_onehot_categories(): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) enc = OneHotEncoder( - categories=DataFrame({'chars': ['a', 'b', 'c'], 'int': [0, 1, 2]})) + categories=DataFrame({'chars': ['a', 'b', 'c'], 'int': [0, 1, 2]}), + sparse=False + ) ref = cp.array([[1., 0., 0., 1., 0., 0.], [0., 1., 0., 0., 0., 1.]]) res = enc.fit_transform(X) @@ -94,12 +97,12 @@ def test_onehot_transform_handle_unknown(): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]}) - enc = OneHotEncoder(handle_unknown='error') + enc = OneHotEncoder(handle_unknown='error', sparse=False) enc = enc.fit(X) with pytest.raises(KeyError): enc.transform(Y) - enc = OneHotEncoder(handle_unknown='ignore') + enc = OneHotEncoder(handle_unknown='ignore', sparse=False) enc = enc.fit(X) ohe = enc.transform(Y) ref = cp.array([[0., 0., 1., 0.], From 3f939bcfdba62464a6a431a580755f7b2fb6c075 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Tue, 24 Mar 2020 14:54:20 -0500 Subject: [PATCH 066/330] Fix inverse_transform when number of categories is 1 and dropping first --- python/cuml/preprocessing/encoders.py | 5 +++++ python/cuml/test/test_one_hot_encoder.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 92997d28e6..264e6f48c4 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -17,6 +17,7 @@ from cuml.preprocessing import LabelEncoder from cudf import DataFrame, Series +from cudf.core import GenericIndex from cuml.utils import with_cupy_rmm @@ -262,6 +263,10 @@ def inverse_transform(self, X): # Remove dropped categories dropped_class_idx = Series(self.drop_idx_[feature]) dropped_class_mask = Series(cats).isin(cats[dropped_class_idx]) + if len(cats) == 1: + inv = Series(GenericIndex(cats[0]).repeat(X.shape[0])) + result[feature] = inv + continue cats = cats[~dropped_class_mask] enc_size = len(cats) diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index be2720acab..a4c7872bb0 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -147,6 +147,7 @@ def test_onehot_drop_idx_first(): ohe = enc.fit_transform(X) ref = sk_enc.fit_transform(X_ary) cp.testing.assert_array_equal(ohe, ref) + assert X.equals(enc.inverse_transform(ohe)) def test_onehot_drop_one_of_each(): @@ -156,6 +157,7 @@ def test_onehot_drop_one_of_each(): ohe = enc.fit_transform(X) ref = SkOneHotEncoder(sparse=False, drop=['b', 2, 'b']).fit_transform(X) cp.testing.assert_array_equal(ohe, ref) + assert X.equals(enc.inverse_transform(ohe)) @pytest.mark.parametrize("drop, pattern", From 35973abdf5b8cf1c0676a0dc2a8f222867d30ecd Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Tue, 24 Mar 2020 17:45:39 -0500 Subject: [PATCH 067/330] Deactivated sparse feature because of incorrectness on large arrays --- python/cuml/preprocessing/encoders.py | 12 +++++++++--- python/cuml/test/test_one_hot_encoder.py | 23 +++++++++++++++-------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 264e6f48c4..00d640db1d 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -54,8 +54,11 @@ class OneHotEncoder: category is present, the feature will be dropped entirely. - Dict : ``drop[col]`` is the category in feature col that should be dropped. - sparse : bool, default=True - Transform will return sparse matrix if True else will return an array. + sparse : bool, default=False + This feature was deactivated and will give an exception when True. + The reason is because sparse matrix are not fully supported by cupy + yet, causing incorrect values when computing one hot encodings. + See https://github.com/cupy/cupy/issues/3223 dtype : number type, default=np.float Desired datatype of transform's output. handle_unknown : {'error', 'ignore'}, default='error' @@ -73,7 +76,7 @@ class OneHotEncoder: be dropped for each feature. None if all the transformed features will be retained. """ - def __init__(self, categories='auto', drop=None, sparse=True, + def __init__(self, categories='auto', drop=None, sparse=False, dtype=np.float, handle_unknown='error'): self.categories = categories self.sparse = sparse @@ -83,6 +86,9 @@ def __init__(self, categories='auto', drop=None, sparse=True, self._fitted = False self.drop_idx_ = None self._encoders = None + if sparse: + raise ValueError('Sparse matrix are not fully supported by cupy ' + 'yet, causing incorrect values') if sparse and np.dtype(dtype) not in ['f', 'd', 'F', 'D']: raise ValueError('Only float32, float64, complex64 and complex128 ' 'are supported when using sparse') diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index a4c7872bb0..906fe2ef95 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -58,11 +58,10 @@ def test_onehot_vs_skonehot(): @pytest.mark.parametrize('drop', [None, 'first', {'g': Series('F'), 'i': Series(3)}]) -@pytest.mark.parametrize('sparse', [True, False]) -def test_onehot_inverse_transform(drop, sparse): +def test_onehot_inverse_transform(drop): X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2]}) - enc = OneHotEncoder(drop=drop, sparse=sparse) + enc = OneHotEncoder(drop=drop) ohe = enc.fit_transform(X) inv = enc.inverse_transform(ohe) @@ -122,15 +121,23 @@ def test_onehot_inverse_transform_handle_unknown(): assert df.equals(ref) -@pytest.mark.parametrize("n_samples", [10, 10000, stress_param(250000)]) -def test_onehot_random_inputs(n_samples): +@pytest.mark.parametrize('drop', [None, 'first']) +@pytest.mark.parametrize('sparse', [True, False]) +@pytest.mark.parametrize("n_samples", [10, 10000, 50000, stress_param(250000)]) +def test_onehot_random_inputs(drop, sparse, n_samples): + if sparse: + pytest.xfail("Sparse arrays are not fully supported by cupy.") + df, ary = _generate_inputs_from_categories(n_samples=n_samples) - enc = OneHotEncoder(sparse=False) - sk_enc = SkOneHotEncoder(sparse=False) + enc = OneHotEncoder(sparse=sparse, drop=drop) + sk_enc = SkOneHotEncoder(sparse=sparse, drop=drop) ohe = enc.fit_transform(df) ref = sk_enc.fit_transform(ary) - cp.testing.assert_array_equal(ohe, ref) + if sparse: + cp.testing.assert_array_equal(ohe.toarray(), ref.toarray()) + else: + cp.testing.assert_array_equal(ohe, ref) inv_ohe = enc.inverse_transform(ohe) From 73c963c6f237f9163ad93a37a432720513066914 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 25 Mar 2020 11:13:58 -0500 Subject: [PATCH 068/330] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d076372281..3db24be141 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## New Features - PR #1777: Python bindings for entropy - PR #1742: Mean squared error implementation with cupy -- PR #1817: Confusion matrix implementation with cupy +- PR #1817: Confusion matrix implementation with cupy (SNSG and MNMG) - PR #1766: Mean absolute error implementation with cupy - PR #1766: Mean squared log error implementation with cupy - PR #1635: cuML Array shim and configurable output added to cluster methods From 11d6d0a95ebf70c9bcbbc4f785aa1303bbd23d0e Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 26 Mar 2020 15:42:58 -0700 Subject: [PATCH 069/330] addressed review comments --- cpp/include/cuml/fil/fil.h | 4 +-- cpp/src/fil/fil.cu | 13 ++++---- cpp/src/fil/infer.cu | 66 +++++++++++++++++++++++++++----------- 3 files changed, 56 insertions(+), 27 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index 3ea63d8a52..94985e1e35 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -122,7 +122,7 @@ enum leaf_value_t { // to be extended }; -template +template struct leaf_output_t {}; template <> struct leaf_output_t { @@ -172,7 +172,7 @@ struct forest_params_t { algo_t algo; // output is the desired output type output_t output; - // threshold is used to for classification if (output & OUTPUT_CLASS), + // threshold is used to for classification if ((output & OUTPUT_CLASS) != 0), // and is ignored otherwise float threshold; // global_bias is added to the sum of tree predictions diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 4e67e95f45..e8a547e31f 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -72,13 +72,13 @@ __host__ __device__ float sigmoid(float x) { return 1.0f / (1.0f + expf(-x)); } (preds) of size n; the transformations are defined by output, and include averaging (multiplying by inv_num_trees), adding global_bias (always done), sigmoid and applying threshold. in case of complement_proba, - fills in the converse probability */ + fills in the complement probability */ __global__ void transform_k(float* preds, size_t n, output_t output, float inv_num_trees, float threshold, float global_bias, bool complement_proba) { size_t i = threadIdx.x + size_t(blockIdx.x) * blockDim.x; if (i >= n) return; - if (complement_proba && i % 2) return; + if (complement_proba && (i % 2) != 0) return; float result = preds[i]; if ((output & output_t::AVG) != 0) result *= inv_num_trees; @@ -155,10 +155,11 @@ struct forest { bool complement_proba = predict_proba && leaf_payload_type_ == FLOAT_SCALAR; bool do_transform = ot != output_t::RAW || global_bias_ != 0.0f || complement_proba; - if (leaf_payload_type_ == INT_CLASS_LABEL && !predict_proba) + if (leaf_payload_type_ == INT_CLASS_LABEL && !predict_proba) { // moot since choosing best class and all transforms are monotonic // also, would break current code do_transform = false; + } if (do_transform) { size_t num_values_to_transform = @@ -305,8 +306,7 @@ void check_params(const forest_params_t* params, bool dense) { an ignored variable */ break; case leaf_value_t::INT_CLASS_LABEL: - ASSERT(params->num_classes != 1, "trees will always predict class 0"); - ASSERT(params->num_classes > 0, + ASSERT(params->num_classes >= 2, "num_classes is not ignored for " "leaf_payload_type == INT_CLASS_LABEL"); break; @@ -406,7 +406,7 @@ If the vector contains a NAN, asserts false */ int find_class_label_from_one_hot(tl::tl_float* vector, int len) { bool found_label = false; int out; - for (int i = 0; i < len; ++i) + for (int i = 0; i < len; ++i) { if (vector[i] == 1.0f) { ASSERT(!found_label, "label vector contains multiple 1.0f"); out = i; @@ -414,6 +414,7 @@ int find_class_label_from_one_hot(tl::tl_float* vector, int len) { } else ASSERT(vector[i] == 0.0f, "label vector contains values other than 0.0 and 1.0"); + } ASSERT(found_label, "did not find 1.0f in vector"); return out; } diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 2c7935eb47..834a7ae377 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -66,11 +66,12 @@ __device__ __forceinline__ vec infer_one_tree( } while (mask != 0); vec out; #pragma unroll - for (int j = 0; j < NITEMS; ++j) + for (int j = 0; j < NITEMS; ++j) { /** dependent names are not considered templates by default, unless it's a member of a current [template] instantiation. alternatively, could have used .base_node::output<... */ out[j] = tree[curr[j]].template output(); + } return out; } @@ -91,36 +92,57 @@ __device__ __forceinline__ vec<1, output_type> infer_one_tree(tree_type tree, return out; } +// the device template should achieve the best performance, using up-to-date +// CUB defaults +#define BlockReduceDevice typename cub::BlockReduce, FIL_TPB> +/** +The shared memory requirements for finalization stage may differ based +on the set of PTX architectures the kernels were compiled for, as well as +the CUDA compute capability of the device chosen for computation. + +TODO: run a test kernel during forest init to determine the compute capability +chosen for the inference, for an accurate sizeof(BlockReduce::TempStorage), +which is used in determining max NITEMS or max input data columns. + +600 is the __CUDA_ARCH__ for Pascal (6.0) GPUs, which is not defined in +host code. +6.0 is the earliest compute capability supported by FIL and RAPIDS in general. +See https://rapids.ai/start.html as well as cmake defaults. +*/ +// values below are defaults as of this change. +template +struct BlockReduceHost { + typedef typename cub::BlockReduce, FIL_TPB, + cub::BLOCK_REDUCE_WARP_REDUCTIONS, 1, 1, + 600>::TempStorage TempStorage; +}; + template // = FLOAT_SCALAR struct tree_aggregator_t { - /** To compute accurately, would need to know the latest __CUDA_ARCH__ - for which the code is compiled and which fits the SM being run on. - This is an approximation */ - static const int ptx_arch = 750; - typedef cub::BlockReduce, FIL_TPB, - cub::BLOCK_REDUCE_WARP_REDUCTIONS, 1, 1, ptx_arch> - BlockReduce; - typedef typename BlockReduce::TempStorage TempStorage; - vec acc; - TempStorage* tmp_storage; + void* tmp_storage; + + static size_t smem_finalize_footprint(int) { + return sizeof(typename BlockReduceHost::TempStorage); + } - static size_t smem_finalize_footprint(int) { return sizeof(TempStorage); } static size_t smem_accumulate_footprint(int) { return 0; } __device__ __forceinline__ tree_aggregator_t(int, void* shared_workspace, size_t) - : tmp_storage((TempStorage*)shared_workspace) {} + : tmp_storage(shared_workspace) {} + __device__ __forceinline__ void accumulate( vec single_tree_prediction) { acc += single_tree_prediction; } + __device__ __forceinline__ void finalize(float* out, int num_rows, int output_stride) { __syncthreads(); - new (tmp_storage) TempStorage; - acc = BlockReduce(*tmp_storage).Sum(acc); + acc = + BlockReduceDevice(*(BlockReduceDevice::TempStorage*)tmp_storage).Sum(acc); if (threadIdx.x == 0) { for (int i = 0; i < NITEMS; ++i) { int row = blockIdx.x * NITEMS + i; @@ -130,6 +152,8 @@ struct tree_aggregator_t { } }; +#undef BlockReduce_ + template struct tree_aggregator_t { // could switch to unsigned short to save shared memory @@ -183,11 +207,12 @@ struct tree_aggregator_t { if (item < NITEMS && row < num_rows) { int max_votes = 0; int best_class = 0; - for (int c = 0; c < num_classes; ++c) + for (int c = 0; c < num_classes; ++c) { if (votes[c * NITEMS + item] > max_votes) { max_votes = votes[c * NITEMS + item]; best_class = c; } + } out[row] = best_class; } } @@ -277,12 +302,15 @@ void infer_k_launcher(storage_type forest, predict_params params, } } if (num_items == 0) { - int real_num_cols = params.num_cols; + int given_num_cols = params.num_cols; + // starting with maximum that might fit in shared memory, in case + // given_num_cols is a random large int + params.num_cols = params.max_shm / sizeof(float); // since we're crashing, this will not take too long while (get_smem_footprint<1, leaf_payload_type>(params) > params.max_shm) --params.num_cols; ASSERT(false, "p.num_cols == %d: too many features, only %d allowed%s", - real_num_cols, params.num_cols, + given_num_cols, params.num_cols, leaf_payload_type == INT_CLASS_LABEL ? " (accounting for shared class vote histogram)" : ""); @@ -321,7 +349,7 @@ void infer(storage_type forest, predict_params params, cudaStream_t stream) { infer_k_launcher(forest, params, stream); break; default: - ASSERT(false, "unknown leaf_payload_type"); + ASSERT(false, "internal error: invalid leaf_payload_type"); } } From 275c0508940794404cf4241f15eb1f239a27896a Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 30 Mar 2020 15:23:16 -0500 Subject: [PATCH 070/330] Fix documentation and exception message according to review --- python/cuml/preprocessing/encoders.py | 15 +++++++++------ python/cuml/test/test_one_hot_encoder.py | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 00d640db1d..28c45eb393 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -14,6 +14,7 @@ # import numpy as np import cupy as cp +from sklearn.exceptions import NotFittedError from cuml.preprocessing import LabelEncoder from cudf import DataFrame, Series @@ -39,12 +40,12 @@ class OneHotEncoder: Parameters ---------- - categories : 'auto' or a cuml.DataFrame, default='auto' + categories : 'auto' or a cudf.DataFrame, default='auto' Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - DataFrame : ``categories[col]`` holds the categories expected in the feature col. - drop : 'first' or a cuml.DataFrame, default=None + drop : 'first' or a cudf.DataFrame, default=None Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear features cause problems, such as when feeding the resulting data @@ -109,7 +110,9 @@ def _validate_keywords(self): def _check_is_fitted(self): if not self._fitted: - raise RuntimeError("Model must first be .fit()") + msg = ("This OneHotEncoder instance is not fitted yet. Call 'fit' " + "with appropriate arguments before using this estimator.") + raise NotFittedError(msg) def _compute_drop_idx(self): if self.drop is None: @@ -125,7 +128,6 @@ def _compute_drop_idx(self): len(self.drop.keys()))) drop_idx = dict() for feature in self.drop.keys(): - cats = self._encoders[feature].classes_ self.drop[feature] = Series(self.drop[feature]) if len(self.drop[feature]) != 1: msg = ("Trying to drop multiple values for feature {}, " @@ -135,6 +137,7 @@ def _compute_drop_idx(self): # where present before one hot encoding if multiples # categories where dropped. raise ValueError(msg) + cats = self._encoders[feature].classes_ if not self.drop[feature].isin(cats).all(): msg = ("Some categories for feature {} were supposed " "to be dropped, but were not found in the encoder " @@ -202,8 +205,8 @@ def fit_transform(self, X): def _one_hot_encoding(self, feature, X): encoder = self._encoders[feature] - col_idx = encoder.transform(X).to_gpu_array(fillna="pandas") - col_idx = cp.asarray(col_idx) + col_idx = encoder.transform(X) + col_idx = cp.asarray(col_idx.to_gpu_array(fillna="pandas")) ohe = cp.zeros((len(X), len(encoder.classes_)), dtype=self.dtype) # Filter out rows with null values diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index 906fe2ef95..05258eac9f 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -122,7 +122,7 @@ def test_onehot_inverse_transform_handle_unknown(): @pytest.mark.parametrize('drop', [None, 'first']) -@pytest.mark.parametrize('sparse', [True, False]) +@pytest.mark.parametrize('sparse', [True, False], ids=['sparse', 'dense']) @pytest.mark.parametrize("n_samples", [10, 10000, 50000, stress_param(250000)]) def test_onehot_random_inputs(drop, sparse, n_samples): if sparse: From aeb1a5bdbcf90861f6607a1997c0c4f819c81005 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 30 Mar 2020 16:24:21 -0500 Subject: [PATCH 071/330] Add get_categories_() utilities to check categories --- python/cuml/preprocessing/encoders.py | 12 ++++++++++++ python/cuml/test/test_one_hot_encoder.py | 11 +++++++++++ 2 files changed, 23 insertions(+) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 28c45eb393..7aeeb44fbe 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -86,6 +86,7 @@ def __init__(self, categories='auto', drop=None, sparse=False, self.drop = drop self._fitted = False self.drop_idx_ = None + self._features = None self._encoders = None if sparse: raise ValueError('Sparse matrix are not fully supported by cupy ' @@ -152,6 +153,15 @@ def _compute_drop_idx(self): "'first', None or a dict, got {}") raise ValueError(msg.format(type(self.drop))) + def get_categories_(self): + """ + Returns categories used for the one hot encoding in the correct order. + + This copies the categories to the CPU and should only be used to check + the order of the categories. + """ + return [self._encoders[f].classes_.to_array() for f in self._features] + def fit(self, X): """ Fit OneHotEncoder to X. @@ -165,12 +175,14 @@ def fit(self, X): """ self._validate_keywords() if type(self.categories) is str and self.categories == 'auto': + self._features = X.columns self._encoders = { feature: LabelEncoder(handle_unknown=self.handle_unknown).fit( X[feature]) for feature in X.columns } else: + self._features = self.categories.columns self._encoders = dict() for feature in self.categories.columns: le = LabelEncoder(handle_unknown=self.handle_unknown) diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index 05258eac9f..5a6c710879 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -181,3 +181,14 @@ def test_onehot_drop_exceptions(drop, pattern): with pytest.raises(ValueError, match=pattern): OneHotEncoder(sparse=False, drop=drop).fit(X) + + +def test_onehot_get_categories(): + X = DataFrame({'chars': ['c', 'b', 'd'], 'ints': [2, 1, 0]}) + + ref = [np.array(['b', 'c', 'd']), np.array([0, 1, 2])] + enc = OneHotEncoder().fit(X) + cats = enc.get_categories_() + + for i in range(len(ref)): + np.testing.assert_array_equal(ref[i], cats[i]) From db7a5f4ab4aa222c6fd3910cf3c2a56797c24e3c Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 30 Mar 2020 16:56:03 -0500 Subject: [PATCH 072/330] drop='first' now uses int instead of cupy for readability --- python/cuml/preprocessing/encoders.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 7aeeb44fbe..222db7b03b 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -119,8 +119,7 @@ def _compute_drop_idx(self): if self.drop is None: return None elif isinstance(self.drop, str) and self.drop == 'first': - return {feature: cp.array([0]) - for feature in self._encoders.keys()} + return {feature: 0 for feature in self._encoders.keys()} elif isinstance(self.drop, dict): if len(self.drop.keys()) != len(self._encoders): msg = ("`drop` should have as many columns as the number " From e5607c523ae586a8882f9325491db258eee7c563 Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Wed, 1 Apr 2020 17:58:24 -0500 Subject: [PATCH 073/330] updated dask rf class to have predict_proba function --- python/cuml/dask/common/base.py | 11 ++++++++++ .../dask/ensemble/randomforestclassifier.py | 21 +++++++++++++++++-- python/cuml/test/dask/test_random_forest.py | 20 +++++++++++++++--- 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/python/cuml/dask/common/base.py b/python/cuml/dask/common/base.py index 415dfc4526..b0ff04c6fa 100644 --- a/python/cuml/dask/common/base.py +++ b/python/cuml/dask/common/base.py @@ -187,6 +187,13 @@ def _run_parallel_func(self, return output if delayed else output.persist() +class DelayedPredictionProbaMixin(DelayedParallelFunc): + + def _predict_proba(self, X, delayed=True, **kwargs): + return self._run_parallel_func(_predict_proba_func, X, 2, delayed, + **kwargs) + + class DelayedPredictionMixin(DelayedParallelFunc): def _predict(self, X, delayed=True, **kwargs): @@ -232,6 +239,10 @@ def _predict_func(model, data, **kwargs): return model.predict(data, **kwargs) +def _predict_proba_func(model, data, **kwargs): + return model.predict_proba(data, **kwargs) + + def _transform_func(model, data, **kwargs): return model.transform(data, **kwargs) diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py index cbdaa18e6e..4f9cd20b6e 100755 --- a/python/cuml/dask/ensemble/randomforestclassifier.py +++ b/python/cuml/dask/ensemble/randomforestclassifier.py @@ -22,7 +22,8 @@ from dask.distributed import default_client, wait -from cuml.dask.common.base import DelayedPredictionMixin +from cuml.dask.common.base import DelayedPredictionMixin, \ + DelayedPredictionProbaMixin from cuml.dask.common.input_utils import DistributedDataHandler import math @@ -30,7 +31,8 @@ from uuid import uuid1 -class RandomForestClassifier(DelayedPredictionMixin): +class RandomForestClassifier(DelayedPredictionMixin, + DelayedPredictionProbaMixin): """ Experimental API implementing a multi-GPU Random Forest classifier @@ -592,6 +594,21 @@ def _predict_using_cpu(self, X, convert_dtype=True): pred.append(max_class) return pred + def predict_proba(self, X, output_class=True, algo='auto', + threshold=0.5, num_classes=2, + convert_dtype=False, + delayed=True, fil_sparse_format='auto'): + + self._concat_treelite_models() + data = DistributedDataHandler.single(X, client=self.client) + self.datatype = data.datatype + + kwargs = {"output_class": output_class, "convert_dtype": convert_dtype, + "threshold": threshold, + "num_classes": num_classes, "algo": algo, + "fil_sparse_format": fil_sparse_format} + return self._predict_proba(X, delayed, **kwargs) + def get_params(self, deep=True): """ Returns the value of all parameters diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index ad09a9bdd9..032bbcabb2 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -46,7 +46,9 @@ from dask.array import from_array from sklearn.datasets import make_regression, make_classification from sklearn.model_selection import train_test_split -from sklearn.metrics import accuracy_score, r2_score +from sklearn.metrics import accuracy_score, r2_score, mean_squared_error +from sklearn.ensemble import RandomForestClassifier as skrfc + from dask.distributed import Client @@ -139,7 +141,6 @@ def test_rf_classification_dask_cudf(partitions_per_worker, cluster): cu_rf_mg = cuRFC_mg(**cu_rf_params) cu_rf_mg.fit(X_train_df, y_train_df) cu_rf_mg_predict = cu_rf_mg.predict(X_test_cudf) - acc_score = accuracy_score(cu_rf_mg_predict, y_test, normalize=True) assert acc_score > 0.8 @@ -243,9 +244,22 @@ def test_rf_classification_dask_fil(partitions_per_worker, cluster, if not output_class: cu_rf_mg_predict = np.round(cu_rf_mg_predict) + fil_preds_proba = cu_rf_mg.predict_proba(X_test_df).compute() + fil_preds_proba = cp.asnumpy(fil_preds_proba.to_gpu_matrix()) + y_proba = np.zeros(np.shape(fil_preds_proba)) + y_proba[:, 1] = y_test + y_proba[:, 0] = 1.0 - y_test + fil_mse = mean_squared_error(y_proba, fil_preds_proba) + sk_model = skrfc(n_estimators=25, + max_depth=13, + random_state=10) + sk_model.fit(X_train, y_train) + sk_preds_proba = sk_model.predict_proba(X_test) + sk_mse = mean_squared_error(y_proba, sk_preds_proba) + acc_score = accuracy_score(cu_rf_mg_predict, y_test, normalize=True) - assert acc_score > 0.8 + assert acc_score > 0.8 and fil_mse <= (sk_mse + 0.071) finally: c.close() From a28a6362ebcf1444f4a546d90332c76125b8f2ee Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Wed, 1 Apr 2020 23:38:40 -0500 Subject: [PATCH 074/330] update test and changelog files --- CHANGELOG.md | 1 + python/cuml/test/dask/test_random_forest.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7858c24b8..6359a4b690 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ - PR #1738: cuml.dask refactor beginning and dask array input option for OLS, Ridge and KMeans - PR #1874: Add predict_proba function to RF classifier - PR #1815: Adding KNN parameter to UMAP +- PR #1978: Adding `predict_proba` function to dask RF ## Improvements - PR #1644: Add `predict_proba()` for FIL binary classifier diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index 032bbcabb2..136a87119f 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -259,7 +259,8 @@ def test_rf_classification_dask_fil(partitions_per_worker, cluster, acc_score = accuracy_score(cu_rf_mg_predict, y_test, normalize=True) - assert acc_score > 0.8 and fil_mse <= (sk_mse + 0.071) + assert acc_score > 0.8 + assert fil_mse <= (sk_mse + 0.012) finally: c.close() From 81df9df9a7f0554f3dad1824c912b995bb972e8a Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Thu, 2 Apr 2020 11:47:56 -0500 Subject: [PATCH 075/330] Fix docstring for drop parameter --- python/cuml/preprocessing/encoders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 222db7b03b..48ee76f6ab 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -45,7 +45,7 @@ class OneHotEncoder: - 'auto' : Determine categories automatically from the training data. - DataFrame : ``categories[col]`` holds the categories expected in the feature col. - drop : 'first' or a cudf.DataFrame, default=None + drop : 'first', None or a dict, default=None Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear features cause problems, such as when feeding the resulting data From b920ffdf6e3fdcb2fdf9fa65c502bc630f486d28 Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Thu, 2 Apr 2020 12:48:01 -0500 Subject: [PATCH 076/330] update docs for predict_proba --- .../dask/ensemble/randomforestclassifier.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py index 4f9cd20b6e..e628f6920e 100755 --- a/python/cuml/dask/ensemble/randomforestclassifier.py +++ b/python/cuml/dask/ensemble/randomforestclassifier.py @@ -598,7 +598,61 @@ def predict_proba(self, X, output_class=True, algo='auto', threshold=0.5, num_classes=2, convert_dtype=False, delayed=True, fil_sparse_format='auto'): + """ + Predicts the probability of each class for X. + + Parameters + ---------- + X : Dask cuDF dataframe or CuPy backed Dask Array (n_rows, n_features) + Distributed dense matrix (floats or doubles) of shape + (n_samples, n_features). + predict_model : String (default = 'GPU') + 'GPU' to predict using the GPU, 'CPU' otherwise. The 'GPU' can only + be used if the model was trained on float32 data and `X` is float32 + or convert_dtype is set to True. Also the 'GPU' should only be + used for binary classification problems. + output_class : boolean (default = True) + This is optional and required only while performing the + predict operation on the GPU. + If true, return a 1 or 0 depending on whether the raw + prediction exceeds the threshold. If False, just return + the raw prediction. + algo : string (default = 'auto') + This is optional and required only while performing the + predict operation on the GPU. + 'naive' - simple inference using shared memory + 'tree_reorg' - similar to naive but trees rearranged to be more + coalescing-friendly + 'batch_tree_reorg' - similar to tree_reorg but predicting + multiple rows per thread block + `auto` - choose the algorithm automatically. Currently + 'batch_tree_reorg' is used for dense storage + and 'naive' for sparse storage + threshold : float (default = 0.5) + Threshold used for classification. Optional and required only + while performing the predict operation on the GPU. + It is applied if output_class == True, else it is ignored + num_classes : int (default = 2) + number of different classes present in the dataset + convert_dtype : bool, optional (default = True) + When set to True, the predict method will, when necessary, convert + the input to the data type which was used to train the model. This + will increase memory used for the method. + fil_sparse_format : boolean or string (default = auto) + This variable is used to choose the type of forest that will be + created in the Forest Inference Library. It is not required + while using predict_model='CPU'. + 'auto' - choose the storage type automatically + (currently True is chosen by auto) + False - create a dense forest + True - create a sparse forest, requires algo='naive' + or algo='auto' + Returns + ---------- + y : NumPy + Dask cuDF dataframe or CuPy backed Dask Array (n_rows, n_classes) + """ self._concat_treelite_models() data = DistributedDataHandler.single(X, client=self.client) self.datatype = data.datatype From 6178ea393f758213e15d5d4a696e087096fc3dc5 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 3 Apr 2020 00:05:46 -0500 Subject: [PATCH 077/330] cuml array in all SG neighbors --- .../cuml/neighbors/kneighbors_classifier.pyx | 76 ++++++++----------- .../cuml/neighbors/kneighbors_regressor.pyx | 47 +++++------- python/cuml/neighbors/nearest_neighbors.pyx | 52 +++++-------- .../cuml/test/test_kneighbors_classifier.py | 10 ++- 4 files changed, 72 insertions(+), 113 deletions(-) diff --git a/python/cuml/neighbors/kneighbors_classifier.pyx b/python/cuml/neighbors/kneighbors_classifier.pyx index 7b25cd2829..c831723d21 100644 --- a/python/cuml/neighbors/kneighbors_classifier.pyx +++ b/python/cuml/neighbors/kneighbors_classifier.pyx @@ -21,10 +21,11 @@ from cuml.neighbors.nearest_neighbors import NearestNeighbors -from cuml.utils import get_cudf_column_ptr, get_dev_array_ptr, \ - input_to_dev_array, zeros, row_matrix +from cuml.common.array import CumlArray +from cuml.utils import input_to_cuml_array import numpy as np +import cupy as cp from cuml.metrics import accuracy_score @@ -151,21 +152,14 @@ class KNeighborsClassifier(NearestNeighbors): # Only need to store index if fit() was called if self.n_indices == 1: - state['y'] = cudf.Series(self.y) - state['X_m'] = cudf.DataFrame.from_gpu_matrix(self.X_m) + state['y'] = self.y + state['X_m'] = self.X_m return state def __setstate__(self, state): super(NearestNeighbors, self).__init__(handle=None, verbose=state['verbose']) - cdef uintptr_t x_ctype - - # Only need to recover state if model had been previously fit - if state["n_indices"] == 1: - - state['y'] = state['y'].to_gpu_array() - state['X_m'] = state['X_m'].as_gpu_matrix() self.__dict__.update(state) def fit(self, X, y, convert_dtype=True): @@ -189,8 +183,8 @@ class KNeighborsClassifier(NearestNeighbors): convert the inputs to np.float32. """ super(KNeighborsClassifier, self).fit(X, convert_dtype) - self.y, _, _, _, _ = \ - input_to_dev_array(y, order='F', check_dtype=np.int32, + self.y, _, _, _ = \ + input_to_cuml_array(y, order='F', check_dtype=np.int32, convert_to_dtype=(np.int32 if convert_dtype else None)) @@ -212,24 +206,26 @@ class KNeighborsClassifier(NearestNeighbors): When set to True, the fit method will automatically convert the inputs to np.float32. """ + + out_type = self._get_output_type(X) + knn_indices = self.kneighbors(X, return_distance=False, convert_dtype=convert_dtype) - cdef uintptr_t inds_ctype - - inds, inds_ctype, n_rows, _, _ = \ - input_to_dev_array(knn_indices, order='C', check_dtype=np.int64, + inds, n_rows, _, _ = \ + input_to_cuml_array(knn_indices, order='C', check_dtype=np.int64, convert_to_dtype=(np.int64 if convert_dtype else None)) + cdef uintptr_t inds_ctype = inds.ptr out_cols = self.y.shape[1] if len(self.y.shape) == 2 else 1 out_shape = (n_rows, out_cols) if out_cols > 1 else n_rows - classes = rmm.to_device(zeros(out_shape, + classes = CumlArray.zeros(out_shape, dtype=np.int32, - order="C")) + order="C") cdef vector[int*] *y_vec = new vector[int*]() @@ -238,10 +234,10 @@ class KNeighborsClassifier(NearestNeighbors): cdef uintptr_t y_ptr for i in range(out_cols): col = self.y[:, i] if out_cols > 1 else self.y - y_ptr = get_dev_array_ptr(col) + y_ptr = col.ptr y_vec.push_back(y_ptr) - cdef uintptr_t classes_ptr = get_dev_array_ptr(classes) + cdef uintptr_t classes_ptr = classes.ptr cdef cumlHandle* handle_ = self.handle.getHandle() @@ -255,14 +251,8 @@ class KNeighborsClassifier(NearestNeighbors): ) self.handle.sync() - if isinstance(X, np.ndarray): - return np.array(classes, dtype=np.int32) - elif isinstance(X, cudf.DataFrame): - if classes.ndim == 1: - classes = classes.reshape(classes.shape[0], 1) - return cudf.DataFrame.from_gpu_matrix(classes) - else: - return classes + + return classes.to_output(out_type) def predict_proba(self, X, convert_dtype=True): """ @@ -279,17 +269,19 @@ class KNeighborsClassifier(NearestNeighbors): When set to True, the fit method will automatically convert the inputs to np.float32. """ + + out_type = self._get_output_type(X) + knn_indices = self.kneighbors(X, return_distance=False, convert_dtype=convert_dtype) - cdef uintptr_t inds_ctype - - inds, inds_ctype, n_rows, n_cols, dtype = \ - input_to_dev_array(knn_indices, order='C', + inds, n_rows, n_cols, dtype = \ + input_to_cuml_array(knn_indices, order='C', check_dtype=np.int64, convert_to_dtype=(np.int64 if convert_dtype else None)) + cdef uintptr_t inds_ctype = inds.ptr out_cols = self.y.shape[1] if len(self.y.shape) == 2 else 1 @@ -301,15 +293,15 @@ class KNeighborsClassifier(NearestNeighbors): cdef uintptr_t y_ptr for out_col in range(out_cols): col = self.y[:, out_col] if out_cols > 1 else self.y - classes = rmm.to_device(zeros((n_rows, - len(np.unique(np.asarray(col)))), + classes = CumlArray.zeros((n_rows, + len(cp.unique(cp.asarray(col)))), dtype=np.float32, - order="C")) + order="C") out_classes.append(classes) - classes_ptr = get_dev_array_ptr(classes) + classes_ptr = classes.ptr out_vec.push_back(classes_ptr) - y_ptr = get_dev_array_ptr(col) + y_ptr = col.ptr y_vec.push_back(y_ptr) cdef cumlHandle* handle_ = self.handle.getHandle() @@ -327,13 +319,7 @@ class KNeighborsClassifier(NearestNeighbors): final_classes = [] for out_class in out_classes: - if isinstance(X, np.ndarray): - final_class = np.array(out_class, dtype=np.int32) - elif isinstance(X, cudf.DataFrame): - final_class = cudf.DataFrame.from_gpu_matrix(out_class) - else: - final_class = out_class - final_classes.append(final_class) + final_classes.append(out_class.to_output(out_type)) return final_classes[0] \ if len(final_classes) == 1 else tuple(final_classes) diff --git a/python/cuml/neighbors/kneighbors_regressor.pyx b/python/cuml/neighbors/kneighbors_regressor.pyx index fb57e0e9e2..8d6aba3a00 100644 --- a/python/cuml/neighbors/kneighbors_regressor.pyx +++ b/python/cuml/neighbors/kneighbors_regressor.pyx @@ -21,8 +21,8 @@ from cuml.neighbors.nearest_neighbors import NearestNeighbors -from cuml.utils import get_cudf_column_ptr, get_dev_array_ptr, \ - input_to_dev_array, zeros, row_matrix +from cuml.common.array import CumlArray +from cuml.utils import input_to_cuml_array from cuml.metrics import r2_score @@ -158,21 +158,14 @@ class KNeighborsRegressor(NearestNeighbors): # Only need to store index if fit() was called if self.n_indices == 1: - state['y'] = cudf.Series(self.y) - state['X_m'] = cudf.DataFrame.from_gpu_matrix(self.X_m) + state['y'] = self.y + state['X_m'] = self.X_m return state def __setstate__(self, state): super(NearestNeighbors, self).__init__(handle=None, verbose=state['verbose']) - cdef uintptr_t x_ctype - - # Only need to recover state if model had been previously fit - if state["n_indices"] == 1: - - state['y'] = state['y'].to_gpu_array() - state['X_m'] = state['X_m'].as_gpu_matrix() self.__dict__.update(state) def fit(self, X, y, convert_dtype=True): @@ -196,8 +189,8 @@ class KNeighborsRegressor(NearestNeighbors): convert the inputs to np.float32. """ super(KNeighborsRegressor, self).fit(X, convert_dtype=convert_dtype) - self.y, _, _, _, _ = \ - input_to_dev_array(y, order='F', check_dtype=np.float32, + self.y, _, _, _ = \ + input_to_cuml_array(y, order='F', check_dtype=np.float32, convert_to_dtype=(np.float32 if convert_dtype else None)) @@ -220,29 +213,31 @@ class KNeighborsRegressor(NearestNeighbors): When set to True, the fit method will automatically convert the inputs to np.float32. """ + + out_type = self._get_output_type(X) + knn_indices = self.kneighbors(X, return_distance=False, convert_dtype=convert_dtype) - cdef uintptr_t inds_ctype - - inds, inds_ctype, n_rows, n_cols, dtype = \ - input_to_dev_array(knn_indices, order='C', check_dtype=np.int64, + inds, n_rows, n_cols, dtype = \ + input_to_cuml_array(knn_indices, order='C', check_dtype=np.int64, convert_to_dtype=(np.int64 if convert_dtype else None)) + cdef uintptr_t inds_ctype = inds.ptr res_cols = 1 if len(self.y.shape) == 1 else self.y.shape[1] res_shape = n_rows if res_cols == 1 else (n_rows, res_cols) - results = rmm.to_device(zeros(res_shape, dtype=np.float32, - order="C")) + results = CumlArray.zeros(res_shape, dtype=np.float32, + order="C") - cdef uintptr_t results_ptr = get_dev_array_ptr(results) + cdef uintptr_t results_ptr = results.ptr cdef uintptr_t y_ptr cdef vector[float*] *y_vec = new vector[float*]() for col_num in range(res_cols): col = self.y if res_cols == 1 else self.y[:, col_num] - y_ptr = get_dev_array_ptr(col) + y_ptr = col.ptr y_vec.push_back(y_ptr) cdef cumlHandle* handle_ = self.handle.getHandle() @@ -257,14 +252,8 @@ class KNeighborsRegressor(NearestNeighbors): ) self.handle.sync() - if isinstance(X, np.ndarray): - return np.array(results) - elif isinstance(X, cudf.DataFrame): - if results.ndim == 1: - results = results.reshape(results.shape[0], 1) - return cudf.DataFrame.from_gpu_matrix(results) - else: - return results + + return results.to_output(out_type) def score(self, X, y, convert_dtype=True): """ diff --git a/python/cuml/neighbors/nearest_neighbors.pyx b/python/cuml/neighbors/nearest_neighbors.pyx index 3e903c745b..d0129f2624 100644 --- a/python/cuml/neighbors/nearest_neighbors.pyx +++ b/python/cuml/neighbors/nearest_neighbors.pyx @@ -27,8 +27,8 @@ import cuml import warnings from cuml.common.base import Base -from cuml.utils import get_cudf_column_ptr, get_dev_array_ptr, \ - input_to_dev_array, zeros, row_matrix +from cuml.common.array import CumlArray +from cuml.utils import input_to_cuml_array from cython.operator cimport dereference as deref @@ -190,7 +190,7 @@ class NearestNeighbors(Base): # Only need to store index if fit() was called if self.n_indices == 1: - state['X_m'] = cudf.DataFrame.from_gpu_matrix(self.X_m) + state['X_m'] = self.X_m return state @@ -198,12 +198,6 @@ class NearestNeighbors(Base): super(NearestNeighbors, self).__init__(handle=None, verbose=state['verbose']) - cdef uintptr_t x_ctype - # Only need to recover state if model had been previously fit - if state["n_indices"] == 1: - - state['X_m'] = state['X_m'].as_gpu_matrix() - self.__dict__.update(state) def fit(self, X, convert_dtype=True): @@ -227,8 +221,8 @@ class NearestNeighbors(Base): self.n_dims = X.shape[1] - self.X_m, X_ctype, n_rows, n_cols, dtype = \ - input_to_dev_array(X, order='F', check_dtype=np.float32, + self.X_m, n_rows, n_cols, dtype = \ + input_to_cuml_array(X, order='F', check_dtype=np.float32, convert_to_dtype=(np.float32 if convert_dtype else None)) @@ -273,6 +267,8 @@ class NearestNeighbors(Base): n_neighbors = self.n_neighbors if n_neighbors is None else n_neighbors X = self.X_m if X is None else X + out_type = self._get_output_type(X) + if (n_neighbors is None and self.n_neighbors is None) \ or n_neighbors <= 0: raise ValueError("k or n_neighbors must be a positive integers") @@ -289,31 +285,29 @@ class NearestNeighbors(Base): raise ValueError("Dimensions of X need to match dimensions of " "indices (%d)" % self.n_dims) - X_m, X_ctype, N, _, dtype = \ - input_to_dev_array(X, order='F', check_dtype=np.float32, + X_m, N, _, dtype = \ + input_to_cuml_array(X, order='F', check_dtype=np.float32, convert_to_dtype=(np.float32 if convert_dtype else False)) # Need to establish result matrices for indices (Nxk) # and for distances (Nxk) - I_ndarr = rmm.to_device(zeros(N*n_neighbors, dtype=np.int64, - order="C")) - D_ndarr = rmm.to_device(zeros(N*n_neighbors, dtype=np.float32, - order="C")) + I_ndarr = CumlArray.zeros((N, n_neighbors), dtype=np.int64, order="C") + D_ndarr = CumlArray.zeros((N, n_neighbors), dtype=np.float32, order="C") - cdef uintptr_t I_ptr = get_dev_array_ptr(I_ndarr) - cdef uintptr_t D_ptr = get_dev_array_ptr(D_ndarr) + cdef uintptr_t I_ptr = I_ndarr.ptr + cdef uintptr_t D_ptr = D_ndarr.ptr cdef vector[float*] *inputs = new vector[float*]() cdef vector[int] *sizes = new vector[int]() - cdef uintptr_t idx_ptr = get_dev_array_ptr(self.X_m) + cdef uintptr_t idx_ptr = self.X_m.ptr inputs.push_back(idx_ptr) sizes.push_back(self.X_m.shape[0]) cdef cumlHandle* handle_ = self.handle.getHandle() - cdef uintptr_t x_ctype_st = X_ctype + cdef uintptr_t x_ctype_st = X_m.ptr brute_force_knn( handle_[0], @@ -329,22 +323,10 @@ class NearestNeighbors(Base): False ) - I_ndarr = I_ndarr.reshape((N, n_neighbors)) - D_ndarr = D_ndarr.reshape((N, n_neighbors)) - - if isinstance(X, cudf.DataFrame): - inds = cudf.DataFrame.from_gpu_matrix(I_ndarr) - dists = cudf.DataFrame.from_gpu_matrix(D_ndarr) - - elif isinstance(X, np.ndarray): - inds = np.asarray(I_ndarr) - dists = np.asarray(D_ndarr) - - del I_ndarr - del D_ndarr del X_m del inputs del sizes - return (dists, inds) if return_distance else inds + return (D_ndarr.to_output(out_type), I_ndarr.to_output(out_type)) \ + if return_distance else I_ndarr.to_output(out_type) diff --git a/python/cuml/test/test_kneighbors_classifier.py b/python/cuml/test/test_kneighbors_classifier.py index eedf8be115..b0417cf4b3 100644 --- a/python/cuml/test/test_kneighbors_classifier.py +++ b/python/cuml/test/test_kneighbors_classifier.py @@ -57,11 +57,13 @@ def test_neighborhood_predictions(nrows, ncols, n_neighbors, predictions = knn_cu.predict(X) if datatype == "dataframe": - assert isinstance(predictions, cudf.DataFrame) + assert isinstance(predictions, cudf.Series) + assert array_equal(predictions.to_frame().astype(np.int32), + y.astype(np.int32)) else: assert isinstance(predictions, np.ndarray) - - assert array_equal(predictions.astype(np.int32), y.astype(np.int32)) + assert array_equal(predictions.astype(np.int32), + y.astype(np.int32)) @pytest.mark.parametrize("datatype", ["dataframe", "numpy"]) @@ -154,7 +156,7 @@ def test_predict_non_gaussian(n_samples, n_features, n_neighbors, n_query): cuml_result = knn_cuml.predict(X_device_test) assert np.array_equal( - np.asarray(cuml_result.as_gpu_matrix())[:, 0], sk_result) + np.asarray(cuml_result.to_gpu_array()), sk_result) def test_nonmonotonic_labels(): From 38f0a446d3ef5ea4fa286c712dd5bff0bc24e78d Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 3 Apr 2020 00:10:07 -0500 Subject: [PATCH 078/330] changelog and style fixes --- CHANGELOG.md | 1 + python/cuml/test/test_kneighbors_classifier.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aae548b60d..511fbfe9e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - PR #1947: Cleaning up cmake - PR #1927: Use Cython's `new_build_ext` (if available) - PR #1946: Removed zlib dependency from cmake +- PR #1981: CumlArray and DistributedDataHandler refactor in NearestNeighbors ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array diff --git a/python/cuml/test/test_kneighbors_classifier.py b/python/cuml/test/test_kneighbors_classifier.py index b0417cf4b3..2137ec6fd0 100644 --- a/python/cuml/test/test_kneighbors_classifier.py +++ b/python/cuml/test/test_kneighbors_classifier.py @@ -59,11 +59,11 @@ def test_neighborhood_predictions(nrows, ncols, n_neighbors, if datatype == "dataframe": assert isinstance(predictions, cudf.Series) assert array_equal(predictions.to_frame().astype(np.int32), - y.astype(np.int32)) + y.astype(np.int32)) else: assert isinstance(predictions, np.ndarray) assert array_equal(predictions.astype(np.int32), - y.astype(np.int32)) + y.astype(np.int32)) @pytest.mark.parametrize("datatype", ["dataframe", "numpy"]) From da98622e50f55f6dc6ed86dab9f15cd11bb7b0b7 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 3 Apr 2020 00:24:10 -0500 Subject: [PATCH 079/330] freeing some memory and making style checker happy --- .../cuml/neighbors/kneighbors_classifier.pyx | 38 ++++++++++--------- .../cuml/neighbors/kneighbors_regressor.pyx | 19 ++++++---- python/cuml/neighbors/nearest_neighbors.pyx | 15 ++++---- 3 files changed, 40 insertions(+), 32 deletions(-) diff --git a/python/cuml/neighbors/kneighbors_classifier.pyx b/python/cuml/neighbors/kneighbors_classifier.pyx index c831723d21..22883ebc7c 100644 --- a/python/cuml/neighbors/kneighbors_classifier.pyx +++ b/python/cuml/neighbors/kneighbors_classifier.pyx @@ -185,9 +185,9 @@ class KNeighborsClassifier(NearestNeighbors): super(KNeighborsClassifier, self).fit(X, convert_dtype) self.y, _, _, _ = \ input_to_cuml_array(y, order='F', check_dtype=np.int32, - convert_to_dtype=(np.int32 - if convert_dtype - else None)) + convert_to_dtype=(np.int32 + if convert_dtype + else None)) self.handle.sync() @@ -214,18 +214,16 @@ class KNeighborsClassifier(NearestNeighbors): inds, n_rows, _, _ = \ input_to_cuml_array(knn_indices, order='C', check_dtype=np.int64, - convert_to_dtype=(np.int64 - if convert_dtype - else None)) + convert_to_dtype=(np.int64 + if convert_dtype + else None)) cdef uintptr_t inds_ctype = inds.ptr out_cols = self.y.shape[1] if len(self.y.shape) == 2 else 1 out_shape = (n_rows, out_cols) if out_cols > 1 else n_rows - classes = CumlArray.zeros(out_shape, - dtype=np.int32, - order="C") + classes = CumlArray.zeros(out_shape, dtype=np.int32, order="C") cdef vector[int*] *y_vec = new vector[int*]() @@ -251,7 +249,10 @@ class KNeighborsClassifier(NearestNeighbors): ) self.handle.sync() - + + del knn_indices + del inds + return classes.to_output(out_type) def predict_proba(self, X, convert_dtype=True): @@ -277,10 +278,10 @@ class KNeighborsClassifier(NearestNeighbors): inds, n_rows, n_cols, dtype = \ input_to_cuml_array(knn_indices, order='C', - check_dtype=np.int64, - convert_to_dtype=(np.int64 - if convert_dtype - else None)) + check_dtype=np.int64, + convert_to_dtype=(np.int64 + if convert_dtype + else None)) cdef uintptr_t inds_ctype = inds.ptr out_cols = self.y.shape[1] if len(self.y.shape) == 2 else 1 @@ -294,9 +295,9 @@ class KNeighborsClassifier(NearestNeighbors): for out_col in range(out_cols): col = self.y[:, out_col] if out_cols > 1 else self.y classes = CumlArray.zeros((n_rows, - len(cp.unique(cp.asarray(col)))), - dtype=np.float32, - order="C") + len(cp.unique(cp.asarray(col)))), + dtype=np.float32, + order="C") out_classes.append(classes) classes_ptr = classes.ptr out_vec.push_back(classes_ptr) @@ -317,6 +318,9 @@ class KNeighborsClassifier(NearestNeighbors): self.handle.sync() + del knn_indices + del inds + final_classes = [] for out_class in out_classes: final_classes.append(out_class.to_output(out_type)) diff --git a/python/cuml/neighbors/kneighbors_regressor.pyx b/python/cuml/neighbors/kneighbors_regressor.pyx index 8d6aba3a00..910e07c3e6 100644 --- a/python/cuml/neighbors/kneighbors_regressor.pyx +++ b/python/cuml/neighbors/kneighbors_regressor.pyx @@ -191,9 +191,9 @@ class KNeighborsRegressor(NearestNeighbors): super(KNeighborsRegressor, self).fit(X, convert_dtype=convert_dtype) self.y, _, _, _ = \ input_to_cuml_array(y, order='F', check_dtype=np.float32, - convert_to_dtype=(np.float32 - if convert_dtype - else None)) + convert_to_dtype=(np.float32 + if convert_dtype + else None)) self.handle.sync() @@ -221,15 +221,15 @@ class KNeighborsRegressor(NearestNeighbors): inds, n_rows, n_cols, dtype = \ input_to_cuml_array(knn_indices, order='C', check_dtype=np.int64, - convert_to_dtype=(np.int64 - if convert_dtype - else None)) + convert_to_dtype=(np.int64 + if convert_dtype + else None)) cdef uintptr_t inds_ctype = inds.ptr res_cols = 1 if len(self.y.shape) == 1 else self.y.shape[1] res_shape = n_rows if res_cols == 1 else (n_rows, res_cols) results = CumlArray.zeros(res_shape, dtype=np.float32, - order="C") + order="C") cdef uintptr_t results_ptr = results.ptr cdef uintptr_t y_ptr @@ -252,7 +252,10 @@ class KNeighborsRegressor(NearestNeighbors): ) self.handle.sync() - + + del knn_indices + del inds + return results.to_output(out_type) def score(self, X, y, convert_dtype=True): diff --git a/python/cuml/neighbors/nearest_neighbors.pyx b/python/cuml/neighbors/nearest_neighbors.pyx index d0129f2624..b78572f6f6 100644 --- a/python/cuml/neighbors/nearest_neighbors.pyx +++ b/python/cuml/neighbors/nearest_neighbors.pyx @@ -223,9 +223,9 @@ class NearestNeighbors(Base): self.X_m, n_rows, n_cols, dtype = \ input_to_cuml_array(X, order='F', check_dtype=np.float32, - convert_to_dtype=(np.float32 - if convert_dtype - else None)) + convert_to_dtype=(np.float32 + if convert_dtype + else None)) self.n_indices = 1 @@ -287,13 +287,14 @@ class NearestNeighbors(Base): X_m, N, _, dtype = \ input_to_cuml_array(X, order='F', check_dtype=np.float32, - convert_to_dtype=(np.float32 if convert_dtype - else False)) + convert_to_dtype=(np.float32 if convert_dtype + else False)) # Need to establish result matrices for indices (Nxk) # and for distances (Nxk) I_ndarr = CumlArray.zeros((N, n_neighbors), dtype=np.int64, order="C") - D_ndarr = CumlArray.zeros((N, n_neighbors), dtype=np.float32, order="C") + D_ndarr = CumlArray.zeros((N, n_neighbors), + dtype=np.float32, order="C") cdef uintptr_t I_ptr = I_ndarr.ptr cdef uintptr_t D_ptr = D_ndarr.ptr @@ -329,4 +330,4 @@ class NearestNeighbors(Base): del sizes return (D_ndarr.to_output(out_type), I_ndarr.to_output(out_type)) \ - if return_distance else I_ndarr.to_output(out_type) + if return_distance else I_ndarr.to_output(out_type) From 268ce48b42ce715e32ea79a8e7d56ed3c94deb73 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 3 Apr 2020 01:49:31 -0500 Subject: [PATCH 080/330] cuml array for mg knn --- .../cuml/dask/neighbors/nearest_neighbors.py | 11 ++++-- .../cuml/neighbors/nearest_neighbors_mg.pyx | 39 ++++++++++--------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/python/cuml/dask/neighbors/nearest_neighbors.py b/python/cuml/dask/neighbors/nearest_neighbors.py index a6a9835adf..16ce63c556 100644 --- a/python/cuml/dask/neighbors/nearest_neighbors.py +++ b/python/cuml/dask/neighbors/nearest_neighbors.py @@ -20,6 +20,7 @@ from dask.distributed import default_client from cuml.dask.common.comms import worker_state, CommsContext from dask.distributed import wait +from cuml.dask.common.input_utils import to_output from uuid import uuid1 @@ -80,6 +81,7 @@ def _func_kneighbors(model, local_idx_parts, idx_m, n, idx_parts_to_ranks, local_query_parts, query_m, query_parts_to_ranks, rank, k): + print(local_query_parts) return model.kneighbors( local_idx_parts, idx_m, n, idx_parts_to_ranks, local_query_parts, query_m, query_parts_to_ranks, @@ -153,6 +155,7 @@ def _query_models(self, n_neighbors, index_worker_to_parts = workers_to_parts(index_futures) query_worker_to_parts = workers_to_parts(query_futures) + print(query_worker_to_parts) """ Build inputs and outputs @@ -165,6 +168,7 @@ def _query_models(self, n_neighbors, worker_info, query_futures) + print(query_parts_to_ranks) """ Invoke kneighbors on Dask workers to perform distributed query """ @@ -262,8 +266,9 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, ret = nn_fit, out_i_futures if not return_distance else \ (nn_fit, out_d_futures, out_i_futures) else: - ret = to_dask_cudf(out_i_futures) \ - if not return_distance else (to_dask_cudf(out_d_futures), - to_dask_cudf(out_i_futures)) + # TODO: Remove hard-coded dtypes once DataDistributedHandler is used + ret = to_output(out_i_futures, 'float32') \ + if not return_distance else (to_output(out_d_futures, 'float32'), + to_output(out_i_futures, 'float32')) return ret diff --git a/python/cuml/neighbors/nearest_neighbors_mg.pyx b/python/cuml/neighbors/nearest_neighbors_mg.pyx index abaf0d4bc0..e592e90ad0 100644 --- a/python/cuml/neighbors/nearest_neighbors_mg.pyx +++ b/python/cuml/neighbors/nearest_neighbors_mg.pyx @@ -29,8 +29,8 @@ import cuml import warnings from cuml.common.base import Base -from cuml.utils import get_cudf_column_ptr, get_dev_array_ptr, \ - input_to_dev_array, zeros, row_matrix +from cuml.common.array import CumlArray +from cuml.utils import input_to_cuml_array from cython.operator cimport dereference as deref @@ -192,12 +192,13 @@ def _build_part_inputs(cuda_arr_ifaces, arr_ints = [] for arr in cuda_arr_ifaces: - X_m, input_ptr, n_rows, n_cols, dtype = \ - input_to_dev_array(arr, order="F", - convert_to_dtype=(np.float32 - if convert_dtype - else None), - check_dtype=[np.float32]) + X_m, n_rows, n_cols, dtype = \ + input_to_cuml_array(arr, order="F", + convert_to_dtype=(np.float32 + if convert_dtype + else None), + check_dtype=[np.float32]) + input_ptr = X_m.ptr arr_ints.append({"obj": X_m, "data": input_ptr, "shape": (n_rows, n_cols)}) @@ -267,6 +268,8 @@ class NearestNeighborsMG(NearestNeighbors): output indices, output distances """ + out_type = self._get_output_type(queries[0]) + n_neighbors = self.n_neighbors if n_neighbors is None else n_neighbors self.n_dims = n @@ -295,18 +298,18 @@ class NearestNeighborsMG(NearestNeighbors): for query_part in q_cai: n_rows = query_part["shape"][0] - i_ary = rmm.to_device(zeros((n_rows, n_neighbors), - order="C", - dtype=np.int64)) - d_ary = rmm.to_device(zeros((n_rows, n_neighbors), - order="C", - dtype=np.float32)) + i_ary = CumlArray.zeros((n_rows, n_neighbors), + order="C", + dtype=np.int64) + d_ary = CumlArray.zeros((n_rows, n_neighbors), + order="C", + dtype=np.float32) output_i_arrs.append(i_ary) output_d_arrs.append(d_ary) - i_ptr = get_dev_array_ptr(i_ary) - d_ptr = get_dev_array_ptr(d_ary) + i_ptr = i_ary.ptr + d_ptr = d_ary.ptr out_i_vec.push_back(new int64Data_t( i_ptr, n_rows * n_neighbors)) @@ -331,9 +334,9 @@ class NearestNeighborsMG(NearestNeighbors): self.handle.sync() - output_i = list(map(lambda x: cudf.DataFrame.from_gpu_matrix(x), + output_i = list(map(lambda x: x.to_output(out_type), output_i_arrs)) - output_d = list(map(lambda x: cudf.DataFrame.from_gpu_matrix(x), + output_d = list(map(lambda x: x.to_output(out_type), output_d_arrs)) _free_mem(idx_rsp, From 84343b251cf67f252c15fdb5a10f02ca41773a00 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 3 Apr 2020 01:55:34 -0500 Subject: [PATCH 081/330] got too excited and didn't check style --- python/cuml/dask/neighbors/nearest_neighbors.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/cuml/dask/neighbors/nearest_neighbors.py b/python/cuml/dask/neighbors/nearest_neighbors.py index 16ce63c556..2e89820e2e 100644 --- a/python/cuml/dask/neighbors/nearest_neighbors.py +++ b/python/cuml/dask/neighbors/nearest_neighbors.py @@ -13,7 +13,7 @@ # limitations under the License. # -from cuml.dask.common import to_dask_cudf, extract_ddf_partitions, \ +from cuml.dask.common import extract_ddf_partitions, \ workers_to_parts, parts_to_ranks, raise_exception_from_futures, \ flatten_grouped_results, raise_mg_import_exception @@ -266,9 +266,10 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, ret = nn_fit, out_i_futures if not return_distance else \ (nn_fit, out_d_futures, out_i_futures) else: - # TODO: Remove hard-coded dtypes once DataDistributedHandler is used + # TODO: Remove hard-code once DataDistributedHandler is used ret = to_output(out_i_futures, 'float32') \ - if not return_distance else (to_output(out_d_futures, 'float32'), - to_output(out_i_futures, 'float32')) + if not return_distance else (to_output(out_d_futures, + 'float32'), to_output( + out_i_futures, 'float32')) return ret From 5c1a249c17fb8bc90b060e2483a9cc07377b5e26 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 3 Apr 2020 01:57:39 -0500 Subject: [PATCH 082/330] removing prints --- python/cuml/dask/neighbors/nearest_neighbors.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cuml/dask/neighbors/nearest_neighbors.py b/python/cuml/dask/neighbors/nearest_neighbors.py index 2e89820e2e..06706c5321 100644 --- a/python/cuml/dask/neighbors/nearest_neighbors.py +++ b/python/cuml/dask/neighbors/nearest_neighbors.py @@ -81,7 +81,6 @@ def _func_kneighbors(model, local_idx_parts, idx_m, n, idx_parts_to_ranks, local_query_parts, query_m, query_parts_to_ranks, rank, k): - print(local_query_parts) return model.kneighbors( local_idx_parts, idx_m, n, idx_parts_to_ranks, local_query_parts, query_m, query_parts_to_ranks, @@ -155,7 +154,6 @@ def _query_models(self, n_neighbors, index_worker_to_parts = workers_to_parts(index_futures) query_worker_to_parts = workers_to_parts(query_futures) - print(query_worker_to_parts) """ Build inputs and outputs @@ -168,7 +166,6 @@ def _query_models(self, n_neighbors, worker_info, query_futures) - print(query_parts_to_ranks) """ Invoke kneighbors on Dask workers to perform distributed query """ From 3c98b05a5e358e279c17f6690ca89bd33b87d0e6 Mon Sep 17 00:00:00 2001 From: divyegala Date: Sat, 4 Apr 2020 02:57:26 -0500 Subject: [PATCH 083/330] knn dask --- python/cuml/dask/common/input_utils.py | 1 - .../cuml/dask/neighbors/nearest_neighbors.py | 65 ++++++++++--------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/python/cuml/dask/common/input_utils.py b/python/cuml/dask/common/input_utils.py index 0787c68e12..5d2d80d6e9 100644 --- a/python/cuml/dask/common/input_utils.py +++ b/python/cuml/dask/common/input_utils.py @@ -72,7 +72,6 @@ def __init__(self, gpu_futures=None, workers=None, self.total_rows = None self.ranks = None self.parts_to_sizes = None - self.total_rows = None @classmethod def get_client(cls, client=None): diff --git a/python/cuml/dask/neighbors/nearest_neighbors.py b/python/cuml/dask/neighbors/nearest_neighbors.py index 06706c5321..48b5c4c6d7 100644 --- a/python/cuml/dask/neighbors/nearest_neighbors.py +++ b/python/cuml/dask/neighbors/nearest_neighbors.py @@ -16,11 +16,13 @@ from cuml.dask.common import extract_ddf_partitions, \ workers_to_parts, parts_to_ranks, raise_exception_from_futures, \ flatten_grouped_results, raise_mg_import_exception +from cuml.dask.common.base import BaseEstimator from dask.distributed import default_client from cuml.dask.common.comms import worker_state, CommsContext from dask.distributed import wait from cuml.dask.common.input_utils import to_output +from cuml.dask.common.input_utils import DistributedDataHandler from uuid import uuid1 @@ -35,19 +37,17 @@ def _func_get_i(f, idx): return i[idx] -class NearestNeighbors(object): +class NearestNeighbors(BaseEstimator): """ Multi-node Multi-GPU NearestNeighbors Model. """ def __init__(self, client=None, streams_per_handle=0, verbose=False, **kwargs): - self.client = default_client() if client is None else client - self.model_args = kwargs - self.X = None - self.Y = None - self.n_cols = 0 + super(NearestNeighbors, self).__init__(client=client, + verbose=verbose, + **kwargs) + self.streams_per_handle = streams_per_handle - self.verbose = verbose def fit(self, X): """ @@ -61,7 +61,8 @@ def fit(self, X): ------- self: NearestNeighbors model """ - self.X = self.client.sync(extract_ddf_partitions, X) + self.X_handler = DistributedDataHandler.create(data=X, client=self.client) + self.datatype = self.X_handler.datatype self.n_cols = X.shape[1] return self @@ -88,12 +89,12 @@ def _func_kneighbors(model, local_idx_parts, idx_m, n, idx_parts_to_ranks, ) @staticmethod - def _build_comms(index_futures, query_futures, streams_per_handle, + def _build_comms(index_handler, query_handler, streams_per_handle, verbose): # Communicator clique needs to include the union of workers hosting # query and index partitions - workers = set(map(lambda x: x[0], index_futures)) - workers.update(list(map(lambda x: x[0], query_futures))) + workers = set(index_handler.workers) + workers.update(query_handler.workers) comms = CommsContext(comms_p2p=True, streams_per_handle=streams_per_handle, @@ -117,9 +118,9 @@ def get_neighbors(self, n_neighbors): Default n_neighbors if parameter n_neighbors is none """ if n_neighbors is None: - if "n_neighbors" in self.model_args \ - and self.model_args["n_neighbors"] is not None: - n_neighbors = self.model_args["n_neighbors"] + if "n_neighbors" in self.kwargs \ + and self.kwargs["n_neighbors"] is not None: + n_neighbors = self.kwargs["n_neighbors"] else: try: from cuml.neighbors.nearest_neighbors_mg import \ @@ -139,7 +140,7 @@ def _create_models(self, comms): nn_models = dict([(worker, self.client.submit( NearestNeighbors._func_create_model, comms.sessionId, - **self.model_args, + **self.kwargs, workers=[worker], key="%s-%s" % (key, idx))) for idx, worker in enumerate(comms.worker_addresses)]) @@ -148,23 +149,23 @@ def _create_models(self, comms): def _query_models(self, n_neighbors, comms, nn_models, - index_futures, query_futures): + index_handler, query_handler): worker_info = comms.worker_info(comms.worker_addresses) - index_worker_to_parts = workers_to_parts(index_futures) - query_worker_to_parts = workers_to_parts(query_futures) - """ Build inputs and outputs """ + index_handler.calculate_parts_to_sizes(comms=comms) + query_handler.calculate_parts_to_sizes(comms=comms) + idx_parts_to_ranks, idx_M = parts_to_ranks(self.client, - worker_info, - index_futures) + worker_info, + index_handler.gpu_futures) query_parts_to_ranks, query_M = parts_to_ranks(self.client, worker_info, - query_futures) + query_handler.gpu_futures) """ Invoke kneighbors on Dask workers to perform distributed query @@ -174,14 +175,14 @@ def _query_models(self, n_neighbors, nn_fit = dict([(worker_info[worker]["r"], self.client.submit( NearestNeighbors._func_kneighbors, nn_models[worker], - index_worker_to_parts[worker] if - worker in index_worker_to_parts else [], - idx_M, + index_handler.worker_to_parts[worker] if + worker in index_handler.workers else [], + index_handler.total_rows, self.n_cols, idx_parts_to_ranks, - query_worker_to_parts[worker] if - worker in query_worker_to_parts else [], - query_M, + query_handler.worker_to_parts[worker] if + worker in query_handler.workers else [], + query_handler.total_rows, query_parts_to_ranks, worker_info[worker]["r"], n_neighbors, @@ -231,8 +232,8 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, """ n_neighbors = self.get_neighbors(n_neighbors) - query_futures = self.X if X is None else \ - self.client.sync(extract_ddf_partitions, X) + query_handler = self.X_handler if X is None else \ + DistributedDataHandler.create(data=X, client=self.client) if X is None: raise ValueError("Model needs to be trained using fit() " @@ -241,7 +242,7 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, """ Create communicator clique """ - comms = NearestNeighbors._build_comms(self.X, query_futures, + comms = NearestNeighbors._build_comms(self.X_handler, query_handler, self.streams_per_handle, self.verbose) @@ -255,7 +256,7 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, """ nn_fit, out_d_futures, out_i_futures = \ self._query_models(n_neighbors, comms, nn_models, - self.X, query_futures) + self.X_handler, query_handler) comms.destroy() From a039a0aadc2ae55331fdb3b0402121c6ed69daf6 Mon Sep 17 00:00:00 2001 From: Vinay D Date: Mon, 6 Apr 2020 14:36:24 +0530 Subject: [PATCH 084/330] Passing tree_params directly to plant() instead of passing individual tree paramters --- cpp/src/decisiontree/decisiontree_impl.cuh | 34 ++++++++++------------ cpp/src/decisiontree/decisiontree_impl.h | 5 +--- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/cpp/src/decisiontree/decisiontree_impl.cuh b/cpp/src/decisiontree/decisiontree_impl.cuh index a070f9c074..4cf31347c8 100644 --- a/cpp/src/decisiontree/decisiontree_impl.cuh +++ b/cpp/src/decisiontree/decisiontree_impl.cuh @@ -214,25 +214,23 @@ template void DecisionTreeBase::plant( std::vector> &sparsetree, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, - const int n_sampled_rows, int unique_labels, const int treeid, int maxdepth, - int max_leaf_nodes, const float colper, int n_bins, int split_algo_flag, - int cfg_min_rows_per_node, bool cfg_bootstrap_features, - CRITERION cfg_split_criterion, bool quantile_per_tree) { - split_algo = split_algo_flag; + const int n_sampled_rows, int unique_labels, const int treeid, + DecisionTreeParams &tree_params) { + split_algo = tree_params.split_algo; dinfo.NLocalrows = nrows; dinfo.NGlobalrows = nrows; dinfo.Ncols = ncols; - nbins = n_bins; - treedepth = maxdepth; - maxleaves = max_leaf_nodes; + nbins = tree_params.n_bins; + treedepth = tree_params.max_depth; + maxleaves = tree_params.max_leaves; n_unique_labels = unique_labels; - min_rows_per_node = cfg_min_rows_per_node; - bootstrap_features = cfg_bootstrap_features; - split_criterion = cfg_split_criterion; + min_rows_per_node = tree_params.min_rows_per_node; + bootstrap_features = tree_params.bootstrap_features; + split_criterion = tree_params.split_criterion; - if (split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && quantile_per_tree) { + if (split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && tree_params.quantile_per_tree) { preprocess_quantile(data, rowids, n_sampled_rows, ncols, dinfo.NLocalrows, - n_bins, tempmem); + tree_params.n_bins, tempmem); } CUDA_CHECK(cudaStreamSynchronize( tempmem->stream)); // added to ensure accurate measurement @@ -251,8 +249,9 @@ void DecisionTreeBase::plant( total_temp_mem = tempmem->totalmem; MLCommon::TimerCPU timer; - grow_deep_tree(data, labels, rowids, n_sampled_rows, ncols, colper, - dinfo.NLocalrows, sparsetree, treeid, tempmem); + grow_deep_tree(data, labels, rowids, n_sampled_rows, ncols, + tree_params.max_features, dinfo.NLocalrows, sparsetree, treeid, + tempmem); train_time = timer.getElapsedSeconds(); } template @@ -367,10 +366,7 @@ void DecisionTreeBase::base_fit( } plant(sparsetree, data, ncols, nrows, labels, rowids, n_sampled_rows, - unique_labels, treeid, tree_params.max_depth, tree_params.max_leaves, - tree_params.max_features, tree_params.n_bins, tree_params.split_algo, - tree_params.min_rows_per_node, tree_params.bootstrap_features, - tree_params.split_criterion, tree_params.quantile_per_tree); + unique_labels, treeid, tree_params); if (in_tempmem == nullptr) { tempmem.reset(); } diff --git a/cpp/src/decisiontree/decisiontree_impl.h b/cpp/src/decisiontree/decisiontree_impl.h index fe1e206990..4a10a672eb 100644 --- a/cpp/src/decisiontree/decisiontree_impl.h +++ b/cpp/src/decisiontree/decisiontree_impl.h @@ -88,10 +88,7 @@ class DecisionTreeBase { void plant(std::vector> &sparsetree, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, - const int treeid, int maxdepth, int max_leaf_nodes, - const float colper, int n_bins, int split_algo_flag, - int cfg_min_rows_per_node, bool cfg_bootstrap_features, - CRITERION cfg_split_criterion, bool cfg_quantile_per_tree); + const int treeid, DecisionTreeParams &tree_params); virtual void grow_deep_tree( const T *data, const L *labels, unsigned int *rowids, From 846ae5186e003995e2c626dfe7fc7c047a370865 Mon Sep 17 00:00:00 2001 From: Vinay D Date: Mon, 6 Apr 2020 14:51:17 +0530 Subject: [PATCH 085/330] Updated CHANGELOG.md for PR 2004 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d70d2624eb..46f70fa5ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - PR #1971: python: Correctly honor --singlegpu option and CUML_BUILD_PATH env variable - PR #1969: Update libcumlprims to 0.14 - PR #1973: Add missing mg files for setup.py --singlegpu flag +- PR #2004: Refactoring the arguments to `plant()` call # cuML 0.13.0 (Date TBD) From bc3ba550cca0c6cfd476747d553d557539a79261 Mon Sep 17 00:00:00 2001 From: Vinay D Date: Mon, 6 Apr 2020 15:17:12 +0530 Subject: [PATCH 086/330] Fixing style issues --- cpp/src/decisiontree/decisiontree_impl.cuh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/src/decisiontree/decisiontree_impl.cuh b/cpp/src/decisiontree/decisiontree_impl.cuh index 4cf31347c8..52d0bf7d5b 100644 --- a/cpp/src/decisiontree/decisiontree_impl.cuh +++ b/cpp/src/decisiontree/decisiontree_impl.cuh @@ -216,19 +216,20 @@ void DecisionTreeBase::plant( const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, const int treeid, DecisionTreeParams &tree_params) { - split_algo = tree_params.split_algo; + split_algo = tree_params.split_algo; dinfo.NLocalrows = nrows; dinfo.NGlobalrows = nrows; dinfo.Ncols = ncols; nbins = tree_params.n_bins; treedepth = tree_params.max_depth; - maxleaves = tree_params.max_leaves; + maxleaves = tree_params.max_leaves; n_unique_labels = unique_labels; - min_rows_per_node = tree_params.min_rows_per_node; + min_rows_per_node = tree_params.min_rows_per_node; bootstrap_features = tree_params.bootstrap_features; split_criterion = tree_params.split_criterion; - if (split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && tree_params.quantile_per_tree) { + if (split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && + tree_params.quantile_per_tree) { preprocess_quantile(data, rowids, n_sampled_rows, ncols, dinfo.NLocalrows, tree_params.n_bins, tempmem); } From bb10aa164a86be5a7e9987a3510321dd7261f6a3 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 6 Apr 2020 14:45:12 -0500 Subject: [PATCH 087/330] Better error report on test fails --- python/cuml/test/test_one_hot_encoder.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index 5a6c710879..77ea00ecd9 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -22,6 +22,7 @@ from sklearn.preprocessing import OneHotEncoder as SkOneHotEncoder from cuml.test.utils import stress_param +from pandas.util.testing import assert_frame_equal def _from_df_to_array(df): @@ -65,7 +66,7 @@ def test_onehot_inverse_transform(drop): ohe = enc.fit_transform(X) inv = enc.inverse_transform(ohe) - assert X.equals(inv) + assert_frame_equal(inv.to_pandas(), X.to_pandas()) def test_onehot_categories(): @@ -118,7 +119,7 @@ def test_onehot_inverse_transform_handle_unknown(): enc = enc.fit(X) df = enc.inverse_transform(Y_ohe) ref = DataFrame({'chars': [None, 'b'], 'int': [0, 2]}) - assert df.equals(ref) + assert_frame_equal(df.to_pandas(), ref.to_pandas()) @pytest.mark.parametrize('drop', [None, 'first']) @@ -141,7 +142,7 @@ def test_onehot_random_inputs(drop, sparse, n_samples): inv_ohe = enc.inverse_transform(ohe) - assert inv_ohe.equals(df) + assert_frame_equal(inv_ohe.to_pandas(), df.to_pandas()) def test_onehot_drop_idx_first(): @@ -154,7 +155,8 @@ def test_onehot_drop_idx_first(): ohe = enc.fit_transform(X) ref = sk_enc.fit_transform(X_ary) cp.testing.assert_array_equal(ohe, ref) - assert X.equals(enc.inverse_transform(ohe)) + inv = enc.inverse_transform(ohe) + assert_frame_equal(inv.to_pandas(), X.to_pandas()) def test_onehot_drop_one_of_each(): @@ -164,7 +166,8 @@ def test_onehot_drop_one_of_each(): ohe = enc.fit_transform(X) ref = SkOneHotEncoder(sparse=False, drop=['b', 2, 'b']).fit_transform(X) cp.testing.assert_array_equal(ohe, ref) - assert X.equals(enc.inverse_transform(ohe)) + inv = enc.inverse_transform(ohe) + assert_frame_equal(inv.to_pandas(), X.to_pandas()) @pytest.mark.parametrize("drop, pattern", From 81848d20a1097f9585abd10455702f57673692f0 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 6 Apr 2020 15:39:59 -0500 Subject: [PATCH 088/330] FIX Add python/record.txt to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index de6e534dc8..1e7a42a769 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ cuml.egg-info/ dist/ python/cuml/**/*.cpp python/external_repositories +python/record.txt log .ipynb_checkpoints .DS_Store @@ -40,4 +41,4 @@ dask-worker-space/ *.qdrep *.qdrep.cache *.qdstrm -*.nvprof \ No newline at end of file +*.nvprof From e40db175ea7ddba0cd6884fd1f0549d329c31288 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 6 Apr 2020 15:40:11 -0500 Subject: [PATCH 089/330] FEA Add proper cython deep cleaning to setup.py --- python/setup.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/python/setup.py b/python/setup.py index 0196e12a7a..fdaa3b52d0 100644 --- a/python/setup.py +++ b/python/setup.py @@ -88,11 +88,34 @@ cutlass_path = 'external_repositories/cutlass' else: - subprocess.check_call(['rm', '-rf', 'external_repositories']) + treelite_path = "" faiss_path = "" cub_path = "" cutlass_path = "" + libcuml_path = "" + + import shutil + import glob + import os + import sys + + try: + shutil.rmtree('external_repositories', ignore_errors=True) + shutil.rmtree('cuml.egg-info', ignore_errors=True) + shutil.rmtree('__pycache__', ignore_errors=True) + + sg_folders = glob.glob('cuml/*') + for folder in sg_folders: + cython_exts = glob.glob(folder + '/*.cpp') + cython_exts.extend(glob.glob(folder + '/*.cpython*')) + for file in cython_exts: + os.remove(file) + + sys.exit(0) + + except IOError: + pass ############################################################################## # - Cython extensions build and parameters ----------------------------------- From dda6afb74fa42a48d5c52225a46414a606013665 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 6 Apr 2020 15:40:19 -0500 Subject: [PATCH 090/330] FEA Make build.sh call clean target of setup.py --- build.sh | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/build.sh b/build.sh index c2c1a1c9d1..5c772e1f58 100755 --- a/build.sh +++ b/build.sh @@ -114,10 +114,13 @@ if (( ${CLEAN} == 1 )); then # The find removes all contents but leaves the dirs, the rmdir # attempts to remove the dirs but can fail safely. for bd in ${BUILD_DIRS}; do - if [ -d ${bd} ]; then - find ${bd} -mindepth 1 -delete - rmdir ${bd} || true - fi + if [ -d ${bd} ]; then + find ${bd} -mindepth 1 -delete + rmdir ${bd} || true + fi + cd ${REPODIR}/python + python setup.py clean --all + cd ${REPODIR} done fi From 53a899a53a5244e0dde634d396fb1daee16d4671 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 6 Apr 2020 15:54:40 -0500 Subject: [PATCH 091/330] FIX Import order fix --- python/setup.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/python/setup.py b/python/setup.py index fdaa3b52d0..d5d42bd613 100644 --- a/python/setup.py +++ b/python/setup.py @@ -16,7 +16,8 @@ from Cython.Build import cythonize from distutils.sysconfig import get_python_lib -from setuptools import setup, find_packages +from setuptools import find_packages +from setuptools import setup from setuptools.extension import Extension from setuputils import get_submodule_dependencies @@ -25,13 +26,15 @@ except ImportError: from setuptools.command.build_ext import build_ext +import glob +import numpy import os -import subprocess +import shutil import sys import sysconfig import versioneer import warnings -import numpy + install_requires = [ 'numba', @@ -95,11 +98,6 @@ cutlass_path = "" libcuml_path = "" - import shutil - import glob - import os - import sys - try: shutil.rmtree('external_repositories', ignore_errors=True) shutil.rmtree('cuml.egg-info', ignore_errors=True) From d7f7914a69587f9420a375a125c466483f605741 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 6 Apr 2020 15:57:11 -0500 Subject: [PATCH 092/330] DOC Added entry to changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e8e329d73..e0c28de592 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - PR #1972: updates to our flow to use conda-forge's clang and clang-tools packages - PR #1974: Reduce ARIMA testing time - PR #1984: Enable Ninja build +- PR #2016: Add capability to setup.py and build.sh to fully clean all cython build files and artifacts ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array From 2a1c26779fcd12ff2239e93f897c54072acd0870 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 6 Apr 2020 16:39:46 -0500 Subject: [PATCH 093/330] FIX Add --deep flag to avoid conflict with conda build of libcuml script --- build.sh | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/build.sh b/build.sh index 5c772e1f58..ed36ecb2e1 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,7 @@ ARGS=$* # script, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcuml cuml prims bench prims-bench -v -g -n --allgpuarch --singlegpu --nvtx --show_depr_warn -h --help" +VALIDARGS="clean libcuml cuml prims bench prims-bench -v -g -n --allgpuarch --singlegpu --nvtx --show_depr_warn -h --help --deep" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -33,6 +33,7 @@ HELP="$0 [ ...] [ ...] -g - build for debug -n - no install step --allgpuarch - build for all supported GPU architectures + --deep - Use to make clean target remove all cythonized cpp files and artifacts --singlegpu - Build cuml without multigpu support (multigpu requires libcumlprims) --nvtx - Enable nvtx for profiling support --show_depr_warn - show cmake deprecation warnings @@ -53,6 +54,7 @@ BUILD_ALL_GPU_ARCH=0 SINGLEGPU="" NVTX=OFF CLEAN=0 +DEEPCLEAN=0 BUILD_DISABLE_DEPRECATION_WARNING=ON # Set defaults for vars that may not have been defined externally @@ -106,6 +108,9 @@ fi if hasArg clean; then CLEAN=1 fi +if hasArg --deep; then + DEEPCLEAN=1 +fi # If clean given, run it prior to any other steps if (( ${CLEAN} == 1 )); then @@ -118,9 +123,11 @@ if (( ${CLEAN} == 1 )); then find ${bd} -mindepth 1 -delete rmdir ${bd} || true fi - cd ${REPODIR}/python - python setup.py clean --all - cd ${REPODIR} + if (( ${DEEPCLEAN} == 1 )); then + cd ${REPODIR}/python + python setup.py clean --all + cd ${REPODIR} + fi done fi From c45377ec9eb59973a6c30d9775991f254d646942 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 14:03:11 -0500 Subject: [PATCH 094/330] FIX Move import to try clause in case cuml is built without cumlprims --- python/cuml/test/dask/test_nearest_neighbors.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/cuml/test/dask/test_nearest_neighbors.py b/python/cuml/test/dask/test_nearest_neighbors.py index c906364c67..e0eda813aa 100644 --- a/python/cuml/test/dask/test_nearest_neighbors.py +++ b/python/cuml/test/dask/test_nearest_neighbors.py @@ -30,9 +30,6 @@ from sklearn.neighbors import KNeighborsClassifier -from cuml.neighbors.nearest_neighbors_mg import \ - NearestNeighborsMG as cumlNN - from cuml.test.utils import array_equal @@ -200,6 +197,8 @@ def test_default_n_neighbors(cluster): try: from cuml.dask.neighbors import NearestNeighbors as daskNN + from cuml.neighbors.nearest_neighbors_mg import \ + NearestNeighborsMG as cumlNN from sklearn.datasets import make_blobs From 0997c9f9ad31c0283ef83df0c4163e688c78f629 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 14:03:34 -0500 Subject: [PATCH 095/330] DOC Improved singlegpu docstring --- build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.sh b/build.sh index ed36ecb2e1..59d88eac2e 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. # cuml build script @@ -34,7 +34,7 @@ HELP="$0 [ ...] [ ...] -n - no install step --allgpuarch - build for all supported GPU architectures --deep - Use to make clean target remove all cythonized cpp files and artifacts - --singlegpu - Build cuml without multigpu support (multigpu requires libcumlprims) + --singlegpu - Build cuml without libcumlprims based multigpu algorithms. --nvtx - Enable nvtx for profiling support --show_depr_warn - show cmake deprecation warnings -h - print this text From fad6f5995d2a911b9cca2203418971fb2dbf34e9 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 14:06:28 -0500 Subject: [PATCH 096/330] FIX Various fixes and refactored clean folder function to setuputils --- python/setup.py | 102 +++++++++++++++++--------------------- python/setuputils.py | 114 ++++++++++++++++++++++++++----------------- 2 files changed, 115 insertions(+), 101 deletions(-) diff --git a/python/setup.py b/python/setup.py index d5d42bd613..0488eb6e49 100644 --- a/python/setup.py +++ b/python/setup.py @@ -19,6 +19,7 @@ from setuptools import find_packages from setuptools import setup from setuptools.extension import Extension +from setuputils import clean_folder from setuputils import get_submodule_dependencies try: @@ -26,7 +27,6 @@ except ImportError: from setuptools.command.build_ext import build_ext -import glob import numpy import os import shutil @@ -41,7 +41,6 @@ 'cython' ] - ############################################################################## # - Dependencies include and lib folder setup -------------------------------- @@ -53,82 +52,74 @@ cuda_include_dir = os.path.join(CUDA_HOME, "include") cuda_lib_dir = os.path.join(CUDA_HOME, "lib64") - ############################################################################## -# - Subrepo checking and cloning --------------------------------------------- - -subrepos = [ - 'cub', - 'cutlass', - 'faiss', - 'treelite' -] +# - Clean target ------------------------------------------------------------- -# We check if there is a libcuml++ build folder, by default in cpp/build -# or in CUML_BUILD_PATH env variable. Otherwise setup.py will clone the -# dependencies defined in cpp/CMakeListst.txt -if "clean" not in sys.argv: - if os.environ.get('CUML_BUILD_PATH', False): - libcuml_path = '../' + os.environ.get('CUML_BUILD_PATH') - else: - libcuml_path = '../cpp/build/' - - found_cmake_repos = get_submodule_dependencies(subrepos, - libcuml_path=libcuml_path) - - if found_cmake_repos: - treelite_path = os.path.join(libcuml_path, - 'treelite/src/treelite/include') - faiss_path = os.path.join(libcuml_path, 'faiss/src/') - cub_path = os.path.join(libcuml_path, 'cub/src/cub') - cutlass_path = os.path.join(libcuml_path, 'cutlass/src/cutlass') - else: - # faiss requires the include to be to the parent of the root of - # their repo instead of the full path like the others - faiss_path = 'external_repositories/' - treelite_path = 'external_repositories/treelite/include' - cub_path = 'external_repositories/cub' - cutlass_path = 'external_repositories/cutlass' - -else: +if "clean" in sys.argv: + print("Cleaning all Python and Cython build artifacts...") treelite_path = "" - faiss_path = "" - cub_path = "" - cutlass_path = "" libcuml_path = "" try: + shutil.rmtree('build') + shutil.rmtree('.pytest_cache', ignore_errors=True) shutil.rmtree('external_repositories', ignore_errors=True) shutil.rmtree('cuml.egg-info', ignore_errors=True) shutil.rmtree('__pycache__', ignore_errors=True) - sg_folders = glob.glob('cuml/*') - for folder in sg_folders: - cython_exts = glob.glob(folder + '/*.cpp') - cython_exts.extend(glob.glob(folder + '/*.cpython*')) - for file in cython_exts: - os.remove(file) - - sys.exit(0) + clean_folder('cuml') except IOError: pass + # need to terminate script so cythonizing doesn't get triggered after + # cleanup unintendedly + sys.argv.remove("clean") + sys.argv.remove("--all") + + if len(sys.argv) == 1: + sys.exit(0) + +############################################################################## +# - Cloning dependencies if needed ------------------------------------------- + +subrepos = [ + 'treelite' +] + +# We check if there is a libcuml++ build folder, by default in cpp/build +# or in CUML_BUILD_PATH env variable. Otherwise setup.py will clone the +# dependencies defined in cpp/cmake/Dependencies.cmake +if os.environ.get('CUML_BUILD_PATH', False): + libcuml_path = '../' + os.environ.get('CUML_BUILD_PATH') +else: + libcuml_path = '../cpp/build/' + +found_cmake_repos = get_submodule_dependencies(subrepos, + libcuml_path=libcuml_path) + +if found_cmake_repos: + treelite_path = os.path.join(libcuml_path, + 'treelite/src/treelite/include') +else: + treelite_path = 'external_repositories/treelite/include' + + ############################################################################## # - Cython extensions build and parameters ----------------------------------- +# cumlcomms and nccl are still needed for multigpu algos not based +# on libcumlprims libs = ['cuda', 'cuml++', + 'cumlcomms', + 'nccl', 'rmm'] include_dirs = ['../cpp/src', '../cpp/include', - '../cpp/external', '../cpp/src_prims', - cutlass_path, - cub_path, - faiss_path, treelite_path, '../cpp/comms/std/src', '../cpp/comms/std/include', @@ -153,13 +144,10 @@ exc_list.append('cuml/linear_model/ridge_mg.pyx') exc_list.append('cuml/linear_model/linear_regression_mg.pyx') exc_list.append('cuml/neighbors/nearest_neighbors_mg.pyx') + sys.argv.remove('--singlegpu') else: libs.append('cumlprims') - # ucx/ucx-py related functionality available in version 0.12+ - # libs.append("ucp") - libs.append('cumlcomms') - libs.append('nccl') sys_include = os.path.dirname(sysconfig.get_path("include")) include_dirs.append("%s/cumlprims" % sys_include) diff --git a/python/setuputils.py b/python/setuputils.py index 171efe579e..68d1b6e6e3 100644 --- a/python/setuputils.py +++ b/python/setuputils.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2020, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,12 +14,38 @@ # limitations under the License. # +import glob import os import re +import shutil import subprocess import warnings +def clean_folder(path): + """ + Function to clean all Cython and Python artifacts and cache folders. It + clean the folder as well as its direct children (NOT recursively). + + Parameters + ---------- + path : String + Path to the folder to be cleaned. + """ + shutil.rmtree(path + '/__pycache__', ignore_errors=True) + + folders = glob.glob(path + '/*') + for folder in folders: + shutil.rmtree(folder + '/__pycache__', ignore_errors=True) + + clean_folder(folder) + + cython_exts = glob.glob(folder + '/*.cpp') + cython_exts.extend(glob.glob(folder + '/*.cpython*')) + for file in cython_exts: + os.remove(file) + + def clone_repo(name, GIT_REPOSITORY, GIT_TAG, force_clone=False): """ Function to clone repos if they have not been cloned already. @@ -27,17 +53,17 @@ def clone_repo(name, GIT_REPOSITORY, GIT_TAG, force_clone=False): in spite of not being very pythonic. Parameters - ---------- - name : String - Name of the repo to be cloned - GIT_REPOSITORY : String - URL of the repo to be cloned - GIT_TAG : String - commit hash or git hash to be cloned. Accepts anything that - `git checkout` accepts - force_clone : Boolean - Set to True to ignore already cloned repositories in - external_repositories and clone + ---------- + name : String + Name of the repo to be cloned + GIT_REPOSITORY : String + URL of the repo to be cloned + GIT_TAG : String + commit hash or git hash to be cloned. Accepts anything that + `git checkout` accepts + force_clone : Boolean + Set to True to ignore already cloned repositories in + external_repositories and clone """ @@ -66,23 +92,23 @@ def get_repo_cmake_info(names, file_path): Function to find information about submodules from cpp/CMakeLists file Parameters - ---------- - name : List of Strings - List containing the names of the repos to be cloned. Must match - the names of the cmake git clone instruction - `ExternalProject_Add(name` - file_path : String - Relative path of the location of the CMakeLists.txt (or the cmake - module which contains ExternalProject_Add definitions) to extract - the information. + ---------- + name : List of Strings + List containing the names of the repos to be cloned. Must match + the names of the cmake git clone instruction + `ExternalProject_Add(name` + file_path : String + Relative path of the location of the CMakeLists.txt (or the cmake + module which contains ExternalProject_Add definitions) to extract + the information. Returns - ------- - results : dictionary - Dictionary where results[name] contains an array, - where results[name][0] is the url of the repo and - repo_info[repo][1] is the tag/commit hash to be cloned as - specified by cmake. + ------- + results : dictionary + Dictionary where results[name] contains an array, + where results[name][0] is the url of the repo and + repo_info[repo][1] is the tag/commit hash to be cloned as + specified by cmake. """ with open(file_path) as f: @@ -113,25 +139,25 @@ def get_submodule_dependencies(repos, repos needed to build the cuML Python package. Parameters - ---------- - repos : List of Strings - List containing the names of the repos to be cloned. Must match - the names of the cmake git clone instruction - `ExternalProject_Add(name` - file_path : String - Relative path of the location of the CMakeLists.txt (or the cmake - module which contains ExternalProject_Add definitions) to extract - the information. By default it will look in the standard location - `cuml_repo_root/cpp` - libcuml_path : String - Relative location of the build folder to look if repositories - already exist + ---------- + repos : List of Strings + List containing the names of the repos to be cloned. Must match + the names of the cmake git clone instruction + `ExternalProject_Add(name` + file_path : String + Relative path of the location of the CMakeLists.txt (or the cmake + module which contains ExternalProject_Add definitions) to extract + the information. By default it will look in the standard location + `cuml_repo_root/cpp` + libcuml_path : String + Relative location of the build folder to look if repositories + already exist Returns - ------- - result : boolean - True if repos were found in libcuml cpp build folder, False - if they were not found. + ------- + result : boolean + True if repos were found in libcuml cpp build folder, False + if they were not found. """ repo_info = get_repo_cmake_info(repos, file_path) From 1e5cb213c56cff4f95e8e9d7ee5c264da3f0d6ce Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 14:59:27 -0500 Subject: [PATCH 097/330] FIX use old style cythonization for singlegpu target --- python/setup.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/python/setup.py b/python/setup.py index e63a2abc50..daaf12a8b0 100644 --- a/python/setup.py +++ b/python/setup.py @@ -14,6 +14,7 @@ # limitations under the License. # +from Cython.Build import cythonize from distutils.sysconfig import get_python_lib from setuptools import find_packages from setuptools import setup @@ -144,7 +145,6 @@ exc_list.append('cuml/linear_model/linear_regression_mg.pyx') exc_list.append('cuml/neighbors/nearest_neighbors_mg.pyx') - sys.argv.remove('--singlegpu') else: libs.append('cumlprims') @@ -168,11 +168,19 @@ ] for e in extensions: - e.exclude = exc_list + # TODO: this exclude is not working, need to research way to properly + # exclude files for parallel build + # e.exclude = exc_list e.cython_directives = dict( profile=False, language_level=3, embedsignature=True ) +if "--singlegpu" in sys.argv: + print("Full cythonization in parallel is not supported for singlegpu " + + "target for now.") + extensions = cythonize(extensions, + exclude=exc_list) + sys.argv.remove('--singlegpu') ############################################################################## # - Python package generation ------------------------------------------------ From 7cd2572ba24da2b90c3e8875615a57b71e867992 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 15:39:03 -0500 Subject: [PATCH 098/330] DOC Add github issue reference --- python/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/setup.py b/python/setup.py index daaf12a8b0..b14b90be68 100644 --- a/python/setup.py +++ b/python/setup.py @@ -169,7 +169,8 @@ for e in extensions: # TODO: this exclude is not working, need to research way to properly - # exclude files for parallel build + # exclude files for parallel build. See issue + # https://github.com/rapidsai/cuml/issues/2037 # e.exclude = exc_list e.cython_directives = dict( profile=False, language_level=3, embedsignature=True From 5f9d20927565bbc4acf0837c3adc1a05a2d17271 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 17:38:11 -0500 Subject: [PATCH 099/330] FIX Improve path detection of folders to delete --- python/setup.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/python/setup.py b/python/setup.py index b14b90be68..64ced2a746 100644 --- a/python/setup.py +++ b/python/setup.py @@ -16,6 +16,7 @@ from Cython.Build import cythonize from distutils.sysconfig import get_python_lib +from pathlib import Path from setuptools import find_packages from setuptools import setup from setuptools.extension import Extension @@ -62,13 +63,15 @@ libcuml_path = "" try: - shutil.rmtree('build') - shutil.rmtree('.pytest_cache', ignore_errors=True) - shutil.rmtree('external_repositories', ignore_errors=True) - shutil.rmtree('cuml.egg-info', ignore_errors=True) - shutil.rmtree('__pycache__', ignore_errors=True) + setup_file_path = str(Path(__file__).parent.absolute()) + shutil.rmtree(setup_file_path + '/build') + shutil.rmtree(setup_file_path + '/.pytest_cache', ignore_errors=True) + shutil.rmtree(setup_file_path + '/external_repositories', + ignore_errors=True) + shutil.rmtree(setup_file_path + '/cuml.egg-info', ignore_errors=True) + shutil.rmtree(setup_file_path + '/__pycache__', ignore_errors=True) - clean_folder('cuml') + clean_folder(setup_file_path + '/cuml') except IOError: pass @@ -204,3 +207,4 @@ cmdclass=cmdclass, zip_safe=False ) + From 8b8444b2b0177d20d6aba09827b93e43d9751e50 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 17:44:17 -0500 Subject: [PATCH 100/330] FIX Docstring and serch only for folders fix --- python/setuputils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/setuputils.py b/python/setuputils.py index 68d1b6e6e3..24ee9a9e08 100644 --- a/python/setuputils.py +++ b/python/setuputils.py @@ -25,7 +25,7 @@ def clean_folder(path): """ Function to clean all Cython and Python artifacts and cache folders. It - clean the folder as well as its direct children (NOT recursively). + clean the folder as well as its direct children recursively. Parameters ---------- @@ -34,7 +34,7 @@ def clean_folder(path): """ shutil.rmtree(path + '/__pycache__', ignore_errors=True) - folders = glob.glob(path + '/*') + folders = glob.glob(path + '/*/') for folder in folders: shutil.rmtree(folder + '/__pycache__', ignore_errors=True) From 12f00c12a69077fdbd02d57a56cbf1c7aeedb2af Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 17:50:18 -0500 Subject: [PATCH 101/330] FIX Remove --deep option from build.sh --- build.sh | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/build.sh b/build.sh index 59d88eac2e..8b9d539c84 100755 --- a/build.sh +++ b/build.sh @@ -33,7 +33,6 @@ HELP="$0 [ ...] [ ...] -g - build for debug -n - no install step --allgpuarch - build for all supported GPU architectures - --deep - Use to make clean target remove all cythonized cpp files and artifacts --singlegpu - Build cuml without libcumlprims based multigpu algorithms. --nvtx - Enable nvtx for profiling support --show_depr_warn - show cmake deprecation warnings @@ -108,9 +107,6 @@ fi if hasArg clean; then CLEAN=1 fi -if hasArg --deep; then - DEEPCLEAN=1 -fi # If clean given, run it prior to any other steps if (( ${CLEAN} == 1 )); then @@ -123,11 +119,11 @@ if (( ${CLEAN} == 1 )); then find ${bd} -mindepth 1 -delete rmdir ${bd} || true fi - if (( ${DEEPCLEAN} == 1 )); then - cd ${REPODIR}/python - python setup.py clean --all - cd ${REPODIR} - fi + + cd ${REPODIR}/python + python setup.py clean --all + cd ${REPODIR} + done fi From b78f565514b4ef1f81c760c6b98e437ec0532af2 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 18:30:29 -0500 Subject: [PATCH 102/330] FIX Move cython import to try/catch --- python/setup.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/python/setup.py b/python/setup.py index 64ced2a746..5c476e4009 100644 --- a/python/setup.py +++ b/python/setup.py @@ -14,7 +14,6 @@ # limitations under the License. # -from Cython.Build import cythonize from distutils.sysconfig import get_python_lib from pathlib import Path from setuptools import find_packages @@ -23,11 +22,6 @@ from setuputils import clean_folder from setuputils import get_submodule_dependencies -try: - from Cython.Distutils.build_ext import new_build_ext as build_ext -except ImportError: - from setuptools.command.build_ext import build_ext - import numpy import os import shutil @@ -36,6 +30,13 @@ import versioneer import warnings +try: + if "--singlegpu" in sys.argv: + from Cython.Build import cythonize + else: + from Cython.Distutils.build_ext import new_build_ext as build_ext +except ImportError: + from setuptools.command.build_ext import build_ext install_requires = [ 'numba', From 444c42579cbab554b7a2423fbfe44cc52bcc7888 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 18:38:36 -0500 Subject: [PATCH 103/330] FIX PEP8 fixes --- python/setup.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/python/setup.py b/python/setup.py index b14b90be68..02143fe8c4 100644 --- a/python/setup.py +++ b/python/setup.py @@ -14,19 +14,14 @@ # limitations under the License. # -from Cython.Build import cythonize from distutils.sysconfig import get_python_lib +from pathlib import Path from setuptools import find_packages from setuptools import setup from setuptools.extension import Extension from setuputils import clean_folder from setuputils import get_submodule_dependencies -try: - from Cython.Distutils.build_ext import new_build_ext as build_ext -except ImportError: - from setuptools.command.build_ext import build_ext - import numpy import os import shutil @@ -35,6 +30,13 @@ import versioneer import warnings +try: + if "--singlegpu" in sys.argv: + from Cython.Build import cythonize + else: + from Cython.Distutils.build_ext import new_build_ext as build_ext +except ImportError: + from setuptools.command.build_ext import build_ext install_requires = [ 'numba', @@ -62,13 +64,15 @@ libcuml_path = "" try: - shutil.rmtree('build') - shutil.rmtree('.pytest_cache', ignore_errors=True) - shutil.rmtree('external_repositories', ignore_errors=True) - shutil.rmtree('cuml.egg-info', ignore_errors=True) - shutil.rmtree('__pycache__', ignore_errors=True) - - clean_folder('cuml') + setup_file_path = str(Path(__file__).parent.absolute()) + shutil.rmtree(setup_file_path + '/build') + shutil.rmtree(setup_file_path + '/.pytest_cache', ignore_errors=True) + shutil.rmtree(setup_file_path + '/external_repositories', + ignore_errors=True) + shutil.rmtree(setup_file_path + '/cuml.egg-info', ignore_errors=True) + shutil.rmtree(setup_file_path + '/__pycache__', ignore_errors=True) + + clean_folder(setup_file_path + '/cuml') except IOError: pass From dd83ba5c637ad244a9246495f4b6bcb6c096c93b Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 7 Apr 2020 18:39:46 -0500 Subject: [PATCH 104/330] FIX PEP8 fixes --- python/setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/setup.py b/python/setup.py index 5c476e4009..02143fe8c4 100644 --- a/python/setup.py +++ b/python/setup.py @@ -208,4 +208,3 @@ cmdclass=cmdclass, zip_safe=False ) - From 7fcc0b301cdc50f13647cc4ef0ae44e9250bbe41 Mon Sep 17 00:00:00 2001 From: Vinay D Date: Wed, 8 Apr 2020 11:49:49 +0530 Subject: [PATCH 105/330] Added tree_params member to DecisionTreeBase and moved a bunch of arguments of plant under it --- cpp/src/decisiontree/decisiontree_impl.cuh | 60 ++++++++++------------ cpp/src/decisiontree/decisiontree_impl.h | 21 +++----- 2 files changed, 33 insertions(+), 48 deletions(-) mode change 100644 => 100755 cpp/src/decisiontree/decisiontree_impl.cuh mode change 100644 => 100755 cpp/src/decisiontree/decisiontree_impl.h diff --git a/cpp/src/decisiontree/decisiontree_impl.cuh b/cpp/src/decisiontree/decisiontree_impl.cuh old mode 100644 new mode 100755 index 52d0bf7d5b..05505861ac --- a/cpp/src/decisiontree/decisiontree_impl.cuh +++ b/cpp/src/decisiontree/decisiontree_impl.cuh @@ -203,32 +203,18 @@ void DecisionTreeBase::print( * @param[in] n_sampled_rows: Number of rows after subsampling * @param[in] unique_labels: Number of unique classes for calssification. Its set to 1 for regression * @param[in] treeid: Tree id in case of building multiple tree from RF. - * @param[in] n_bins: Number of split bins for every node. - * @param[in] split_algo_flag: Split algo used. MinMax / Quantile - * @param[in] cfg_min_rows_per_rows: Minimum number of rows to consider before split evaluation - * @param[in] cfg_bootstrap_features: If features need to be bootstarpped. - * @param[in] cfg_split_criterion: Split criteria to be used. GINI, ENTROPY, MSE, MAE - * @param[in] quantile_per_tree: If per tree quantile needs to be built. */ template void DecisionTreeBase::plant( std::vector> &sparsetree, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, - const int n_sampled_rows, int unique_labels, const int treeid, - DecisionTreeParams &tree_params) { - split_algo = tree_params.split_algo; + const int n_sampled_rows, int unique_labels, const int treeid) { dinfo.NLocalrows = nrows; dinfo.NGlobalrows = nrows; dinfo.Ncols = ncols; - nbins = tree_params.n_bins; - treedepth = tree_params.max_depth; - maxleaves = tree_params.max_leaves; n_unique_labels = unique_labels; - min_rows_per_node = tree_params.min_rows_per_node; - bootstrap_features = tree_params.bootstrap_features; - split_criterion = tree_params.split_criterion; - - if (split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && + + if (tree_params.split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && tree_params.quantile_per_tree) { preprocess_quantile(data, rowids, n_sampled_rows, ncols, dinfo.NLocalrows, tree_params.n_bins, tempmem); @@ -238,7 +224,7 @@ void DecisionTreeBase::plant( //Bootstrap features unsigned int *h_colids = tempmem->h_colids->data(); - if (bootstrap_features) { + if (tree_params.bootstrap_features) { srand(treeid * 1000); for (int i = 0; i < dinfo.Ncols; i++) { h_colids[i] = rand() % dinfo.Ncols; @@ -255,6 +241,7 @@ void DecisionTreeBase::plant( tempmem); train_time = timer.getElapsedSeconds(); } + template void DecisionTreeBase::predict(const ML::cumlHandle &handle, const TreeMetaDataNode *tree, @@ -328,7 +315,7 @@ void DecisionTreeBase::base_fit( const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, std::vector> &sparsetree, - const int treeid, DecisionTreeParams &tree_params, bool is_classifier, + const int treeid, bool is_classifier, std::shared_ptr> in_tempmem) { prepare_fit_timer.reset(); const char *CRITERION_NAME[] = {"GINI", "ENTROPY", "MSE", "MAE", "END"}; @@ -367,7 +354,7 @@ void DecisionTreeBase::base_fit( } plant(sparsetree, data, ncols, nrows, labels, rowids, n_sampled_rows, - unique_labels, treeid, tree_params); + unique_labels, treeid); if (in_tempmem == nullptr) { tempmem.reset(); } @@ -378,13 +365,14 @@ void DecisionTreeClassifier::fit( const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, TreeMetaDataNode *&tree, - DecisionTreeParams tree_params, + DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem) { + this->tree_params = tree_parameters; this->base_fit(handle.getImpl().getDeviceAllocator(), handle.getImpl().getHostAllocator(), handle.getImpl().getStream(), data, ncols, nrows, labels, rowids, n_sampled_rows, unique_labels, tree->sparsetree, - tree->treeid, tree_params, true, in_tempmem); + tree->treeid, true, in_tempmem); this->set_metadata(tree); } @@ -396,11 +384,12 @@ void DecisionTreeClassifier::fit( const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, TreeMetaDataNode *&tree, - DecisionTreeParams tree_params, + DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem) { + this->tree_params = tree_parameters; this->base_fit(device_allocator_in, host_allocator_in, stream_in, data, ncols, nrows, labels, rowids, n_sampled_rows, unique_labels, - tree->sparsetree, tree->treeid, tree_params, true, in_tempmem); + tree->sparsetree, tree->treeid, true, in_tempmem); this->set_metadata(tree); } @@ -408,13 +397,14 @@ template void DecisionTreeRegressor::fit( const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, - TreeMetaDataNode *&tree, DecisionTreeParams tree_params, + TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem) { + this->tree_params = tree_parameters; this->base_fit(handle.getImpl().getDeviceAllocator(), handle.getImpl().getHostAllocator(), handle.getImpl().getStream(), data, ncols, nrows, labels, rowids, n_sampled_rows, 1, tree->sparsetree, tree->treeid, - tree_params, false, in_tempmem); + false, in_tempmem); this->set_metadata(tree); } @@ -424,11 +414,12 @@ void DecisionTreeRegressor::fit( const std::shared_ptr host_allocator_in, const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, - TreeMetaDataNode *&tree, DecisionTreeParams tree_params, + TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem) { + this->tree_params = tree_parameters; this->base_fit(device_allocator_in, host_allocator_in, stream_in, data, ncols, nrows, labels, rowids, n_sampled_rows, 1, tree->sparsetree, - tree->treeid, tree_params, false, in_tempmem); + tree->treeid, false, in_tempmem); this->set_metadata(tree); } @@ -442,9 +433,10 @@ void DecisionTreeClassifier::grow_deep_tree( int depth_cnt = 0; grow_deep_tree_classification( data, labels, rowids, ncols, colper, n_sampled_rows, nrows, - this->n_unique_labels, this->nbins, this->treedepth, this->maxleaves, - this->min_rows_per_node, this->split_criterion, this->split_algo, - this->min_impurity_decrease, depth_cnt, leaf_cnt, sparsetree, treeid, + this->n_unique_labels, this->tree_params.n_bins, this->tree_params.max_depth, + this->tree_params.max_leaves, + this->tree_params.min_rows_per_node, this->tree_params.split_criterion, this->tree_params.split_algo, + this->tree_params.min_impurity_decrease, depth_cnt, leaf_cnt, sparsetree, treeid, tempmem); this->depth_counter = depth_cnt; this->leaf_counter = leaf_cnt; @@ -459,9 +451,9 @@ void DecisionTreeRegressor::grow_deep_tree( int leaf_cnt = 0; int depth_cnt = 0; grow_deep_tree_regression( - data, labels, rowids, ncols, colper, n_sampled_rows, nrows, this->nbins, - this->treedepth, this->maxleaves, this->min_rows_per_node, - this->split_criterion, this->split_algo, this->min_impurity_decrease, + data, labels, rowids, ncols, colper, n_sampled_rows, nrows, this->tree_params.n_bins, + this->tree_params.max_depth, this->tree_params.max_leaves, this->tree_params.min_rows_per_node, + this->tree_params.split_criterion, this->tree_params.split_algo, this->tree_params.min_impurity_decrease, depth_cnt, leaf_cnt, sparsetree, treeid, tempmem); this->depth_counter = depth_cnt; this->leaf_counter = leaf_cnt; diff --git a/cpp/src/decisiontree/decisiontree_impl.h b/cpp/src/decisiontree/decisiontree_impl.h old mode 100644 new mode 100755 index 4a10a672eb..a1d726d710 --- a/cpp/src/decisiontree/decisiontree_impl.h +++ b/cpp/src/decisiontree/decisiontree_impl.h @@ -64,12 +64,8 @@ struct DataInfo { template class DecisionTreeBase { protected: - int split_algo; - int nbins; DataInfo dinfo; - int treedepth; int depth_counter = 0; - int maxleaves; int leaf_counter = 0; std::shared_ptr> tempmem; size_t total_temp_mem; @@ -79,16 +75,13 @@ class DecisionTreeBase { int n_unique_labels = -1; // number of unique labels in dataset double prepare_time = 0; double train_time = 0; - int min_rows_per_node; - bool bootstrap_features; - CRITERION split_criterion; MLCommon::TimerCPU prepare_fit_timer; - float min_impurity_decrease = 0.0; + DecisionTreeParams tree_params; void plant(std::vector> &sparsetree, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, - const int treeid, DecisionTreeParams &tree_params); + const int treeid); virtual void grow_deep_tree( const T *data, const L *labels, unsigned int *rowids, @@ -103,7 +96,7 @@ class DecisionTreeBase { const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, std::vector> &sparsetree, const int treeid, - DecisionTreeParams &tree_params, bool is_classifier, + bool is_classifier, std::shared_ptr> in_tempmem); public: @@ -137,7 +130,7 @@ class DecisionTreeClassifier : public DecisionTreeBase { void fit(const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, - TreeMetaDataNode *&tree, DecisionTreeParams tree_params, + TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem = nullptr); //This fit fucntion does not take handle , used by RF @@ -146,7 +139,7 @@ class DecisionTreeClassifier : public DecisionTreeBase { const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, - TreeMetaDataNode *&tree, DecisionTreeParams tree_params, + TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem); private: @@ -165,7 +158,7 @@ class DecisionTreeRegressor : public DecisionTreeBase { void fit(const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, TreeMetaDataNode *&tree, - DecisionTreeParams tree_params, + DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem = nullptr); //This fit function does not take handle. Used by RF @@ -174,7 +167,7 @@ class DecisionTreeRegressor : public DecisionTreeBase { const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, TreeMetaDataNode *&tree, - DecisionTreeParams tree_params, + DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem); private: From aae435b3c5548ad5908b379a75f36604b4a36972 Mon Sep 17 00:00:00 2001 From: Vinay D Date: Wed, 8 Apr 2020 12:02:47 +0530 Subject: [PATCH 106/330] Copyright correction and style related changes --- cpp/src/decisiontree/decisiontree_impl.cuh | 33 +++++++++++----------- cpp/src/decisiontree/decisiontree_impl.h | 5 ++-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cpp/src/decisiontree/decisiontree_impl.cuh b/cpp/src/decisiontree/decisiontree_impl.cuh index 05505861ac..3431147e3f 100755 --- a/cpp/src/decisiontree/decisiontree_impl.cuh +++ b/cpp/src/decisiontree/decisiontree_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -213,7 +213,7 @@ void DecisionTreeBase::plant( dinfo.NGlobalrows = nrows; dinfo.Ncols = ncols; n_unique_labels = unique_labels; - + if (tree_params.split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && tree_params.quantile_per_tree) { preprocess_quantile(data, rowids, n_sampled_rows, ncols, dinfo.NLocalrows, @@ -400,11 +400,10 @@ void DecisionTreeRegressor::fit( TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem) { this->tree_params = tree_parameters; - this->base_fit(handle.getImpl().getDeviceAllocator(), - handle.getImpl().getHostAllocator(), - handle.getImpl().getStream(), data, ncols, nrows, labels, - rowids, n_sampled_rows, 1, tree->sparsetree, tree->treeid, - false, in_tempmem); + this->base_fit( + handle.getImpl().getDeviceAllocator(), handle.getImpl().getHostAllocator(), + handle.getImpl().getStream(), data, ncols, nrows, labels, rowids, + n_sampled_rows, 1, tree->sparsetree, tree->treeid, false, in_tempmem); this->set_metadata(tree); } @@ -433,11 +432,11 @@ void DecisionTreeClassifier::grow_deep_tree( int depth_cnt = 0; grow_deep_tree_classification( data, labels, rowids, ncols, colper, n_sampled_rows, nrows, - this->n_unique_labels, this->tree_params.n_bins, this->tree_params.max_depth, - this->tree_params.max_leaves, - this->tree_params.min_rows_per_node, this->tree_params.split_criterion, this->tree_params.split_algo, - this->tree_params.min_impurity_decrease, depth_cnt, leaf_cnt, sparsetree, treeid, - tempmem); + this->n_unique_labels, this->tree_params.n_bins, + this->tree_params.max_depth, this->tree_params.max_leaves, + this->tree_params.min_rows_per_node, this->tree_params.split_criterion, + this->tree_params.split_algo, this->tree_params.min_impurity_decrease, + depth_cnt, leaf_cnt, sparsetree, treeid, tempmem); this->depth_counter = depth_cnt; this->leaf_counter = leaf_cnt; } @@ -451,10 +450,12 @@ void DecisionTreeRegressor::grow_deep_tree( int leaf_cnt = 0; int depth_cnt = 0; grow_deep_tree_regression( - data, labels, rowids, ncols, colper, n_sampled_rows, nrows, this->tree_params.n_bins, - this->tree_params.max_depth, this->tree_params.max_leaves, this->tree_params.min_rows_per_node, - this->tree_params.split_criterion, this->tree_params.split_algo, this->tree_params.min_impurity_decrease, - depth_cnt, leaf_cnt, sparsetree, treeid, tempmem); + data, labels, rowids, ncols, colper, n_sampled_rows, nrows, + this->tree_params.n_bins, this->tree_params.max_depth, + this->tree_params.max_leaves, this->tree_params.min_rows_per_node, + this->tree_params.split_criterion, this->tree_params.split_algo, + this->tree_params.min_impurity_decrease, depth_cnt, leaf_cnt, sparsetree, + treeid, tempmem); this->depth_counter = depth_cnt; this->leaf_counter = leaf_cnt; } diff --git a/cpp/src/decisiontree/decisiontree_impl.h b/cpp/src/decisiontree/decisiontree_impl.h index a1d726d710..576a1951c5 100755 --- a/cpp/src/decisiontree/decisiontree_impl.h +++ b/cpp/src/decisiontree/decisiontree_impl.h @@ -1,6 +1,6 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -96,8 +96,7 @@ class DecisionTreeBase { const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, std::vector> &sparsetree, const int treeid, - bool is_classifier, - std::shared_ptr> in_tempmem); + bool is_classifier, std::shared_ptr> in_tempmem); public: // Printing utility for high level tree info. From d38b4f9705267ff0693358ce2484417ab9279feb Mon Sep 17 00:00:00 2001 From: Vinay D Date: Wed, 8 Apr 2020 20:52:28 +0530 Subject: [PATCH 107/330] Refactoring tree parameters related arguments of few more functions and style related corrections --- cpp/src/decisiontree/decisiontree_impl.cuh | 25 ++---- .../levelalgo/levelfunc_classifier.cuh | 77 ++++++++++--------- .../levelalgo/levelfunc_regressor.cuh | 74 +++++++++--------- cpp/src/decisiontree/memory.cuh | 43 ++++------- cpp/src/decisiontree/memory.h | 23 +++--- cpp/src/randomforest/randomforest_impl.cuh | 16 +--- 6 files changed, 118 insertions(+), 140 deletions(-) mode change 100755 => 100644 cpp/src/decisiontree/decisiontree_impl.cuh diff --git a/cpp/src/decisiontree/decisiontree_impl.cuh b/cpp/src/decisiontree/decisiontree_impl.cuh old mode 100755 new mode 100644 index 3431147e3f..81422f9fcd --- a/cpp/src/decisiontree/decisiontree_impl.cuh +++ b/cpp/src/decisiontree/decisiontree_impl.cuh @@ -347,9 +347,7 @@ void DecisionTreeBase::base_fit( } else { tempmem = std::make_shared>( device_allocator_in, host_allocator_in, stream_in, nrows, ncols, - tree_params.max_features, unique_labels, tree_params.n_bins, - tree_params.split_algo, tree_params.max_depth, - tree_params.shuffle_features); + unique_labels, tree_params); tree_params.quantile_per_tree = true; } @@ -430,13 +428,10 @@ void DecisionTreeClassifier::grow_deep_tree( const int treeid, std::shared_ptr> tempmem) { int leaf_cnt = 0; int depth_cnt = 0; - grow_deep_tree_classification( - data, labels, rowids, ncols, colper, n_sampled_rows, nrows, - this->n_unique_labels, this->tree_params.n_bins, - this->tree_params.max_depth, this->tree_params.max_leaves, - this->tree_params.min_rows_per_node, this->tree_params.split_criterion, - this->tree_params.split_algo, this->tree_params.min_impurity_decrease, - depth_cnt, leaf_cnt, sparsetree, treeid, tempmem); + grow_deep_tree_classification(data, labels, rowids, ncols, colper, + n_sampled_rows, nrows, this->n_unique_labels, + this->tree_params, depth_cnt, leaf_cnt, + sparsetree, treeid, tempmem); this->depth_counter = depth_cnt; this->leaf_counter = leaf_cnt; } @@ -449,13 +444,9 @@ void DecisionTreeRegressor::grow_deep_tree( const int treeid, std::shared_ptr> tempmem) { int leaf_cnt = 0; int depth_cnt = 0; - grow_deep_tree_regression( - data, labels, rowids, ncols, colper, n_sampled_rows, nrows, - this->tree_params.n_bins, this->tree_params.max_depth, - this->tree_params.max_leaves, this->tree_params.min_rows_per_node, - this->tree_params.split_criterion, this->tree_params.split_algo, - this->tree_params.min_impurity_decrease, depth_cnt, leaf_cnt, sparsetree, - treeid, tempmem); + grow_deep_tree_regression(data, labels, rowids, ncols, colper, n_sampled_rows, + nrows, this->tree_params, depth_cnt, leaf_cnt, + sparsetree, treeid, tempmem); this->depth_counter = depth_cnt; this->leaf_counter = leaf_cnt; } diff --git a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh index ad26ca93af..0afc55e2ed 100644 --- a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh +++ b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,12 +34,10 @@ template void grow_deep_tree_classification( const T* data, const int* labels, unsigned int* rowids, const int Ncols, const float colper, int n_sampled_rows, const int nrows, - const int n_unique_labels, const int nbins, const int maxdepth, - const int maxleaves, const int min_rows_per_node, - const ML::CRITERION split_cr, const int split_algo, - const float min_impurity_decrease, int& depth_cnt, int& leaf_cnt, - std::vector>& sparsetree, const int treeid, - std::shared_ptr> tempmem) { + const int n_unique_labels, + const ML::DecisionTree::DecisionTreeParams& tree_params, int& depth_cnt, + int& leaf_cnt, std::vector>& sparsetree, + const int treeid, std::shared_ptr> tempmem) { const int ncols_sampled = (int)(colper * Ncols); unsigned int* flagsptr = tempmem->d_flags->data(); unsigned int* sample_cnt = tempmem->d_sample_cnt->data(); @@ -47,7 +45,7 @@ void grow_deep_tree_classification( tempmem->stream); std::vector histvec(n_unique_labels, 0); T initial_metric; - if (split_cr == ML::CRITERION::GINI) { + if (tree_params.split_criterion == ML::CRITERION::GINI) { initial_metric_classification(labels, sample_cnt, nrows, n_unique_labels, histvec, initial_metric, tempmem); @@ -56,7 +54,7 @@ void grow_deep_tree_classification( n_unique_labels, histvec, initial_metric, tempmem); } - int reserve_depth = std::min(tempmem->swap_depth, maxdepth); + int reserve_depth = std::min(tempmem->swap_depth, tree_params.max_depth); size_t total_nodes = pow(2, (reserve_depth + 1)) - 1; unsigned int* h_parent_hist = tempmem->h_parent_hist->data(); @@ -104,7 +102,7 @@ void grow_deep_tree_classification( } std::vector feature_selector(h_colids, h_colids + Ncols); - int scatter_algo_depth = std::min(tempmem->swap_depth, maxdepth); + int scatter_algo_depth = std::min(tempmem->swap_depth, tree_params.max_depth); for (int depth = 0; (depth < scatter_algo_depth) && (n_nodes_nextitr != 0); depth++) { depth_cnt = depth + 1; @@ -120,37 +118,40 @@ void grow_deep_tree_classification( ncols_sampled, n_nodes, mtg, dist, feature_selector, tempmem, d_rng); get_histogram_classification(data, labels, flagsptr, sample_cnt, nrows, - Ncols, ncols_sampled, n_unique_labels, nbins, - n_nodes, split_algo, tempmem, d_histogram); + Ncols, ncols_sampled, n_unique_labels, + tree_params.n_bins, n_nodes, + tree_params.split_algo, tempmem, d_histogram); float* infogain = tempmem->h_outgain->data(); - if (split_cr == ML::CRITERION::GINI) { + if (tree_params.split_criterion == ML::CRITERION::GINI) { get_best_split_classification( h_histogram, d_histogram, h_colids, d_colids, h_colstart, d_colstart, - Ncols, ncols_sampled, nbins, n_unique_labels, n_nodes, depth, - min_rows_per_node, split_algo, infogain, h_parent_hist, h_child_hist, - sparsetree, sparsesize, sparse_nodelist, h_split_colidx, h_split_binidx, - d_split_colidx, d_split_binidx, tempmem); + Ncols, ncols_sampled, tree_params.n_bins, n_unique_labels, n_nodes, + depth, tree_params.min_rows_per_node, tree_params.split_algo, infogain, + h_parent_hist, h_child_hist, sparsetree, sparsesize, sparse_nodelist, + h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, + tempmem); } else { get_best_split_classification( h_histogram, d_histogram, h_colids, d_colids, h_colstart, d_colstart, - Ncols, ncols_sampled, nbins, n_unique_labels, n_nodes, depth, - min_rows_per_node, split_algo, infogain, h_parent_hist, h_child_hist, - sparsetree, sparsesize, sparse_nodelist, h_split_colidx, h_split_binidx, - d_split_colidx, d_split_binidx, tempmem); + Ncols, ncols_sampled, tree_params.n_bins, n_unique_labels, n_nodes, + depth, tree_params.min_rows_per_node, tree_params.split_algo, infogain, + h_parent_hist, h_child_hist, sparsetree, sparsesize, sparse_nodelist, + h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, + tempmem); } CUDA_CHECK(cudaStreamSynchronize(tempmem->stream)); - leaf_eval_classification(infogain, depth, min_impurity_decrease, maxdepth, - n_unique_labels, maxleaves, h_new_node_flags, - sparsetree, sparsesize, h_parent_hist, - n_nodes_nextitr, sparse_nodelist, leaf_cnt); + leaf_eval_classification( + infogain, depth, tree_params.min_impurity_decrease, tree_params.max_depth, + n_unique_labels, tree_params.max_depth, h_new_node_flags, sparsetree, + sparsesize, h_parent_hist, n_nodes_nextitr, sparse_nodelist, leaf_cnt); MLCommon::updateDevice(d_new_node_flags, h_new_node_flags, n_nodes, tempmem->stream); - make_level_split(data, nrows, Ncols, ncols_sampled, nbins, n_nodes, - split_algo, d_split_colidx, d_split_binidx, - d_new_node_flags, flagsptr, tempmem); + make_level_split(data, nrows, Ncols, ncols_sampled, tree_params.n_bins, + n_nodes, tree_params.split_algo, d_split_colidx, + d_split_binidx, d_new_node_flags, flagsptr, tempmem); CUDA_CHECK(cudaStreamSynchronize(tempmem->stream)); if (depth != (scatter_algo_depth - 1)) { memcpy(h_parent_hist, h_child_hist, @@ -187,26 +188,26 @@ void grow_deep_tree_classification( sparsetree.resize(sparsetree.size() - lastsize); convert_scatter_to_gather(flagsptr, sample_cnt, n_nodes, nrows, d_nodecount, d_nodestart, d_samplelist, tempmem); - for (int depth = tempmem->swap_depth; (depth < maxdepth) && (n_nodes != 0); - depth++) { + for (int depth = tempmem->swap_depth; + (depth < tree_params.max_depth) && (n_nodes != 0); depth++) { depth_cnt = depth + 1; //Algorithm starts here update_feature_sampling(h_colids, d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, lastsize, mtg, dist, feature_selector, tempmem, d_rng); - if (split_cr == ML::CRITERION::GINI) { + if (tree_params.split_criterion == ML::CRITERION::GINI) { best_split_gather_classification( data, labels, d_colids, d_colstart, d_nodestart, d_samplelist, nrows, - Ncols, ncols_sampled, n_unique_labels, nbins, n_nodes, split_algo, - sparsetree.size() + lastsize, min_impurity_decrease, tempmem, - d_sparsenodes, d_nodelist); + Ncols, ncols_sampled, n_unique_labels, tree_params.n_bins, n_nodes, + tree_params.split_algo, sparsetree.size() + lastsize, + tree_params.min_impurity_decrease, tempmem, d_sparsenodes, d_nodelist); } else { best_split_gather_classification( data, labels, d_colids, d_colstart, d_nodestart, d_samplelist, nrows, - Ncols, ncols_sampled, n_unique_labels, nbins, n_nodes, split_algo, - sparsetree.size() + lastsize, min_impurity_decrease, tempmem, - d_sparsenodes, d_nodelist); + Ncols, ncols_sampled, n_unique_labels, tree_params.n_bins, n_nodes, + tree_params.split_algo, sparsetree.size() + lastsize, + tree_params.min_impurity_decrease, tempmem, d_sparsenodes, d_nodelist); } MLCommon::updateHost(h_sparsenodes, d_sparsenodes, lastsize, tempmem->stream); @@ -225,7 +226,7 @@ void grow_deep_tree_classification( n_nodes = h_counter[0]; } if (n_nodes != 0) { - if (split_cr == ML::CRITERION::GINI) { + if (tree_params.split_criterion == ML::CRITERION::GINI) { make_leaf_gather_classification( labels, d_nodestart, d_samplelist, n_unique_labels, d_sparsenodes, d_nodelist, n_nodes, tempmem); diff --git a/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh b/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh index 7377c63aee..cd51519d81 100644 --- a/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh +++ b/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,11 +35,9 @@ template void grow_deep_tree_regression( const T* data, const T* labels, unsigned int* rowids, const int Ncols, const float colper, const int n_sampled_rows, const int nrows, - const int nbins, int maxdepth, const int maxleaves, - const int min_rows_per_node, const ML::CRITERION split_cr, int split_algo, - const float min_impurity_decrease, int& depth_cnt, int& leaf_cnt, - std::vector>& sparsetree, const int treeid, - std::shared_ptr> tempmem) { + const ML::DecisionTree::DecisionTreeParams& tree_params, int& depth_cnt, + int& leaf_cnt, std::vector>& sparsetree, + const int treeid, std::shared_ptr> tempmem) { const int ncols_sampled = (int)(colper * Ncols); unsigned int* flagsptr = tempmem->d_flags->data(); unsigned int* sample_cnt = tempmem->d_sample_cnt->data(); @@ -49,14 +47,14 @@ void grow_deep_tree_regression( T mean; T initial_metric; unsigned int count; - if (split_cr == ML::CRITERION::MSE) { + if (tree_params.split_criterion == ML::CRITERION::MSE) { initial_metric_regression(labels, sample_cnt, nrows, mean, count, initial_metric, tempmem); } else { initial_metric_regression(labels, sample_cnt, nrows, mean, count, initial_metric, tempmem); } - int reserve_depth = std::min(tempmem->swap_depth, maxdepth); + int reserve_depth = std::min(tempmem->swap_depth, tree_params.max_depth); size_t total_nodes = pow(2, (reserve_depth + 1)) - 1; std::vector sparse_meanstate; @@ -112,7 +110,7 @@ void grow_deep_tree_regression( std::vector feature_selector(h_colids, h_colids + Ncols); float* infogain = tempmem->h_outgain->data(); - int scatter_algo_depth = std::min(tempmem->swap_depth, maxdepth); + int scatter_algo_depth = std::min(tempmem->swap_depth, tree_params.max_depth); for (int depth = 0; (depth < scatter_algo_depth) && (n_nodes_nextitr != 0); depth++) { depth_cnt = depth + 1; @@ -130,42 +128,44 @@ void grow_deep_tree_regression( init_parent_value(sparse_meanstate, sparse_countstate, sparse_nodelist, sparsesize, depth, tempmem); - if (split_cr == ML::CRITERION::MSE) { - get_mse_regression_fused( - data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, nbins, - n_nodes, split_algo, tempmem, d_mseout, d_predout, d_count); + if (tree_params.split_criterion == ML::CRITERION::MSE) { + get_mse_regression_fused(data, labels, flagsptr, sample_cnt, nrows, + Ncols, ncols_sampled, tree_params.n_bins, + n_nodes, tree_params.split_algo, tempmem, + d_mseout, d_predout, d_count); get_best_split_regression>( h_mseout, d_mseout, h_predout, d_predout, h_count, d_count, h_colids, - d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, nbins, n_nodes, - depth, min_rows_per_node, split_algo, sparsesize, infogain, - sparse_meanstate, sparse_countstate, sparsetree, sparse_nodelist, - h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, - tempmem); + d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, + tree_params.n_bins, n_nodes, depth, tree_params.min_rows_per_node, + tree_params.split_algo, sparsesize, infogain, sparse_meanstate, + sparse_countstate, sparsetree, sparse_nodelist, h_split_colidx, + h_split_binidx, d_split_colidx, d_split_binidx, tempmem); } else { get_mse_regression( - data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, nbins, - n_nodes, split_algo, tempmem, d_mseout, d_predout, d_count); + data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, + tree_params.n_bins, n_nodes, tree_params.split_algo, tempmem, d_mseout, + d_predout, d_count); get_best_split_regression>( h_mseout, d_mseout, h_predout, d_predout, h_count, d_count, h_colids, - d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, nbins, n_nodes, - depth, min_rows_per_node, split_algo, sparsesize, infogain, - sparse_meanstate, sparse_countstate, sparsetree, sparse_nodelist, - h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, - tempmem); + d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, + tree_params.n_bins, n_nodes, depth, tree_params.min_rows_per_node, + tree_params.split_algo, sparsesize, infogain, sparse_meanstate, + sparse_countstate, sparsetree, sparse_nodelist, h_split_colidx, + h_split_binidx, d_split_colidx, d_split_binidx, tempmem); } CUDA_CHECK(cudaStreamSynchronize(tempmem->stream)); - leaf_eval_regression(infogain, depth, min_impurity_decrease, maxdepth, - maxleaves, h_new_node_flags, sparsetree, sparsesize, - sparse_meanstate, n_nodes_nextitr, sparse_nodelist, - leaf_cnt); + leaf_eval_regression( + infogain, depth, tree_params.min_impurity_decrease, tree_params.max_depth, + tree_params.max_leaves, h_new_node_flags, sparsetree, sparsesize, + sparse_meanstate, n_nodes_nextitr, sparse_nodelist, leaf_cnt); MLCommon::updateDevice(d_new_node_flags, h_new_node_flags, n_nodes, tempmem->stream); - make_level_split(data, nrows, Ncols, ncols_sampled, nbins, n_nodes, - split_algo, d_split_colidx, d_split_binidx, - d_new_node_flags, flagsptr, tempmem); + make_level_split(data, nrows, Ncols, ncols_sampled, tree_params.n_bins, + n_nodes, tree_params.split_algo, d_split_colidx, + d_split_binidx, d_new_node_flags, flagsptr, tempmem); } // Start of gather algorithm @@ -198,8 +198,8 @@ void grow_deep_tree_regression( sparsetree.resize(sparsetree.size() - lastsize); convert_scatter_to_gather(flagsptr, sample_cnt, n_nodes, nrows, d_nodecount, d_nodestart, d_samplelist, tempmem); - for (int depth = tempmem->swap_depth; (depth < maxdepth) && (n_nodes != 0); - depth++) { + for (int depth = tempmem->swap_depth; + (depth < tree_params.max_depth) && (n_nodes != 0); depth++) { depth_cnt = depth + 1; //Algorithm starts here update_feature_sampling(h_colids, d_colids, h_colstart, d_colstart, Ncols, @@ -208,9 +208,9 @@ void grow_deep_tree_regression( best_split_gather_regression( data, labels, d_colids, d_colstart, d_nodestart, d_samplelist, nrows, - Ncols, ncols_sampled, nbins, n_nodes, split_algo, split_cr, - sparsetree.size() + lastsize, min_impurity_decrease, tempmem, - d_sparsenodes, d_nodelist); + Ncols, ncols_sampled, tree_params.n_bins, n_nodes, tree_params.split_algo, + tree_params.split_criterion, sparsetree.size() + lastsize, + tree_params.min_impurity_decrease, tempmem, d_sparsenodes, d_nodelist); MLCommon::updateHost(h_sparsenodes, d_sparsenodes, lastsize, tempmem->stream); diff --git a/cpp/src/decisiontree/memory.cuh b/cpp/src/decisiontree/memory.cuh index 5aa071f106..ed80c6ec3b 100644 --- a/cpp/src/decisiontree/memory.cuh +++ b/cpp/src/decisiontree/memory.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,35 +25,26 @@ template TemporaryMemory::TemporaryMemory( const std::shared_ptr device_allocator_in, const std::shared_ptr host_allocator_in, - const cudaStream_t stream_in, int N, int Ncols, float colper, int n_unique, - int n_bins, const int split_algo, int depth, bool col_shuffle) { + const cudaStream_t stream_in, int N, int Ncols, int n_unique, + const ML::DecisionTree::DecisionTreeParams& tree_params) { stream = stream_in; - splitalgo = split_algo; - max_shared_mem = MLCommon::getSharedMemPerBlock(); num_sms = MLCommon::getMultiProcessorCount(); device_allocator = device_allocator_in; host_allocator = host_allocator_in; - LevelMemAllocator(N, Ncols, colper, n_unique, n_bins, depth, split_algo, - col_shuffle); + LevelMemAllocator(N, Ncols, n_unique, tree_params); } template -TemporaryMemory::TemporaryMemory(const ML::cumlHandle_impl& handle, - cudaStream_t stream_in, int N, int Ncols, - float colper, int n_unique, int n_bins, - const int split_algo, int depth, - bool col_shuffle) { - //Assign Stream from cumlHandle +TemporaryMemory::TemporaryMemory( + const ML::cumlHandle_impl& handle, cudaStream_t stream_in, int N, int Ncols, + int n_unique, const ML::DecisionTree::DecisionTreeParams& tree_params) { stream = stream_in; - splitalgo = split_algo; - max_shared_mem = MLCommon::getSharedMemPerBlock(); num_sms = MLCommon::getMultiProcessorCount(); device_allocator = handle.getDeviceAllocator(); host_allocator = handle.getHostAllocator(); - LevelMemAllocator(N, Ncols, colper, n_unique, n_bins, depth, split_algo, - col_shuffle); + LevelMemAllocator(N, Ncols, n_unique, tree_params); } template @@ -80,18 +71,18 @@ void TemporaryMemory::print_info(int depth, int nrows, int ncols, } template -void TemporaryMemory::LevelMemAllocator(int nrows, int ncols, - float colper, int n_unique, - int nbins, int depth, - const int split_algo, - bool col_shuffle) { +void TemporaryMemory::LevelMemAllocator( + int nrows, int ncols, int n_unique, + const ML::DecisionTree::DecisionTreeParams& tree_params) { + int nbins = tree_params.n_bins; + int depth = tree_params.max_depth; if (depth > swap_depth || (depth == -1)) { max_nodes_per_level = pow(2, swap_depth); } else { max_nodes_per_level = pow(2, depth); } size_t maxnodes = max_nodes_per_level; - size_t ncols_sampled = (size_t)(ncols * colper); + size_t ncols_sampled = (size_t)(ncols * tree_params.max_features); if (depth < 64) { gather_max_nodes = std::min((size_t)(nrows + 1), (size_t)(pow((size_t)2, (size_t)depth) + 1)); @@ -134,7 +125,7 @@ void TemporaryMemory::LevelMemAllocator(int nrows, int ncols, totalmem = 3 * parentsz * sizeof(int) + childsz * sizeof(T) + (nrows + 1) * sizeof(T); - if (split_algo == 0) { + if (tree_params.split_algo == 0) { d_globalminmax = new MLCommon::device_buffer( device_allocator, stream, 2 * maxnodes * ncols_sampled); h_globalminmax = new MLCommon::host_buffer(host_allocator, stream, @@ -149,7 +140,7 @@ void TemporaryMemory::LevelMemAllocator(int nrows, int ncols, } d_sample_cnt = new MLCommon::device_buffer(device_allocator, stream, nrows); - if (col_shuffle == true) { + if (tree_params.shuffle_features == true) { d_colids = new MLCommon::device_buffer( device_allocator, stream, ncols_sampled * gather_max_nodes); h_colids = new MLCommon::host_buffer( @@ -251,7 +242,7 @@ void TemporaryMemory::LevelMemAllocator(int nrows, int ncols, max_nodes_pred /= 2; // For occupancy purposes. max_nodes_mse /= 2; // For occupancy purposes. } - if (split_algo == ML::SPLIT_ALGO::HIST) { + if (tree_params.split_algo == ML::SPLIT_ALGO::HIST) { size_t shmem_per_node = 2 * sizeof(T); max_nodes_minmax = max_shared_mem / shmem_per_node; max_nodes_minmax /= 2; diff --git a/cpp/src/decisiontree/memory.h b/cpp/src/decisiontree/memory.h index 619a350caf..e4423ea128 100644 --- a/cpp/src/decisiontree/memory.h +++ b/cpp/src/decisiontree/memory.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include #include +#include #include "common/cumlHandle.hpp" template @@ -68,8 +69,6 @@ struct TemporaryMemory { MLCommon::device_buffer *d_colstart = nullptr; MLCommon::host_buffer *h_colids = nullptr; MLCommon::host_buffer *h_colstart = nullptr; - //Split algo - int splitalgo; //For level algorithm MLCommon::device_buffer *d_flags = nullptr; @@ -110,17 +109,21 @@ struct TemporaryMemory { TemporaryMemory( const std::shared_ptr device_allocator_in, const std::shared_ptr host_allocator_in, - const cudaStream_t stream_in, int N, int Ncols, float colper, int n_unique, - int n_bins, const int split_algo, int depth, bool col_shuffle); + const cudaStream_t stream_in, int N, int Ncols, int n_unique, + const ML::DecisionTree::DecisionTreeParams &tree_params); + TemporaryMemory(const ML::cumlHandle_impl &handle, cudaStream_t stream_in, - int N, int Ncols, float colper, int n_unique, int n_bins, - const int split_algo, int depth, bool colshuffle); + int N, int Ncols, int n_unique, + const ML::DecisionTree::DecisionTreeParams &tree_params); + ~TemporaryMemory(); - void LevelMemAllocator(int nrows, int ncols, float colper, int n_unique, - int nbins, int depth, const int split_algo, - bool col_shuffle); + + void LevelMemAllocator( + int nrows, int ncols, int n_unique, + const ML::DecisionTree::DecisionTreeParams &tree_params); void LevelMemCleaner(); + void print_info(int depth, int nrows, int ncols, float colper); }; #include "memory.cuh" diff --git a/cpp/src/randomforest/randomforest_impl.cuh b/cpp/src/randomforest/randomforest_impl.cuh index b1cc9accdd..cc6197b5f5 100644 --- a/cpp/src/randomforest/randomforest_impl.cuh +++ b/cpp/src/randomforest/randomforest_impl.cuh @@ -187,12 +187,8 @@ void rfClassifier::fit(const cumlHandle& user_handle, const T* input, std::shared_ptr> tempmem[n_streams]; for (int i = 0; i < n_streams; i++) { tempmem[i] = std::make_shared>( - handle, handle.getInternalStream(i), n_rows, n_cols, - this->rf_params.tree_params.max_features, n_unique_labels, - this->rf_params.tree_params.n_bins, - this->rf_params.tree_params.split_algo, - this->rf_params.tree_params.max_depth, - this->rf_params.tree_params.shuffle_features); + handle, handle.getInternalStream(i), n_rows, n_cols, n_unique_labels, + this->rf_params.tree_params); } //Preprocess once only per forest if ((this->rf_params.tree_params.split_algo == SPLIT_ALGO::GLOBAL_QUANTILE) && @@ -455,12 +451,8 @@ void rfRegressor::fit(const cumlHandle& user_handle, const T* input, std::shared_ptr> tempmem[n_streams]; for (int i = 0; i < n_streams; i++) { tempmem[i] = std::make_shared>( - handle, handle.getInternalStream(i), n_rows, n_cols, - this->rf_params.tree_params.max_features, 1, - this->rf_params.tree_params.n_bins, - this->rf_params.tree_params.split_algo, - this->rf_params.tree_params.max_depth, - this->rf_params.tree_params.shuffle_features); + handle, handle.getInternalStream(i), n_rows, n_cols, 1, + this->rf_params.tree_params); } //Preprocess once only per forest if ((this->rf_params.tree_params.split_algo == SPLIT_ALGO::GLOBAL_QUANTILE) && From 1fe275cb9b24bbad778e2564adae95f181060368 Mon Sep 17 00:00:00 2001 From: Vinay D Date: Wed, 8 Apr 2020 20:57:01 +0530 Subject: [PATCH 108/330] Fixing file permissions --- cpp/src/decisiontree/decisiontree_impl.h | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 cpp/src/decisiontree/decisiontree_impl.h diff --git a/cpp/src/decisiontree/decisiontree_impl.h b/cpp/src/decisiontree/decisiontree_impl.h old mode 100755 new mode 100644 From 13fd3b28c95b6beeba6a24a4a1c90affe7d9ae2e Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Apr 2020 16:48:49 -0500 Subject: [PATCH 109/330] Revert "using DDH parts_to_sizes" This reverts commit 6d9c861a0cbbd5516b184bf677835bcfba63619c, reversing changes made to 3c98b05a5e358e279c17f6690ca89bd33b87d0e6. --- .gitignore | 3 +- BUILD.md | 2 +- CHANGELOG.md | 6 - build.sh | 20 +-- ci/mg/build.sh | 40 +++--- conda/environments/cuml_dev_cuda10.0.yml | 3 +- conda/environments/cuml_dev_cuda10.1.yml | 3 +- conda/environments/cuml_dev_cuda10.2.yml | 3 +- cpp/README.md | 4 +- cpp/cmake/Dependencies.cmake | 11 +- cpp/cmake/FindClangFormat.cmake | 77 +++++++++++ cpp/cmake/FindClangTidy.cmake | 26 ++++ cpp/scripts/run-clang-format.py | 8 +- .../cuml/dask/neighbors/nearest_neighbors.py | 12 +- .../cuml/neighbors/nearest_neighbors_mg.pyx | 12 +- .../cuml/test/dask/test_nearest_neighbors.py | 5 +- python/cuml/test/test_arima.py | 2 +- python/cuml/test/test_umap.py | 1 - python/setup.py | 130 +++++++----------- python/setuputils.py | 114 ++++++--------- 20 files changed, 255 insertions(+), 227 deletions(-) create mode 100644 cpp/cmake/FindClangFormat.cmake create mode 100644 cpp/cmake/FindClangTidy.cmake diff --git a/.gitignore b/.gitignore index 1e7a42a769..de6e534dc8 100644 --- a/.gitignore +++ b/.gitignore @@ -19,7 +19,6 @@ cuml.egg-info/ dist/ python/cuml/**/*.cpp python/external_repositories -python/record.txt log .ipynb_checkpoints .DS_Store @@ -41,4 +40,4 @@ dask-worker-space/ *.qdrep *.qdrep.cache *.qdstrm -*.nvprof +*.nvprof \ No newline at end of file diff --git a/BUILD.md b/BUILD.md index 868775e0b1..70de6d009b 100644 --- a/BUILD.md +++ b/BUILD.md @@ -11,7 +11,7 @@ To install cuML from source, ensure the following dependencies are met: 5. Cython (>= 0.29) 6. gcc (>=5.4.0) 7. BLAS - Any BLAS compatible with cmake's [FindBLAS](https://cmake.org/cmake/help/v3.14/module/FindBLAS.html). Note that the blas has to be installed to the same folder system as cmake, for example if using conda installed cmake, the blas implementation should also be installed in the conda environment. -8. clang-format (= 8.0.1) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=8` and `clang-tools=8` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager. +8. clang-format (= 8.0.0) - enforces uniform C++ coding style; required for developers. The RAPIDS conda channel provides a package (`conda install -c rapidsai libclang`). If not using conda, install using your OS package manager. 9. NCCL (>=2.4) 10. UCX [optional] (>= 1.7) - enables point-to-point messaging in the cuML standard communicator. This is necessary for many multi-node multi-GPU cuML algorithms to function. diff --git a/CHANGELOG.md b/CHANGELOG.md index 9998d7c650..511fbfe9e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,11 +8,6 @@ - PR #1927: Use Cython's `new_build_ext` (if available) - PR #1946: Removed zlib dependency from cmake - PR #1981: CumlArray and DistributedDataHandler refactor in NearestNeighbors -- PR #1972: updates to our flow to use conda-forge's clang and clang-tools packages -- PR #1974: Reduce ARIMA testing time -- PR #1984: Enable Ninja build -- PR #2016: Add capability to setup.py and build.sh to fully clean all cython build files and artifacts -- PR #1996: Cythonize in parallel ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array @@ -20,7 +15,6 @@ - PR #1971: python: Correctly honor --singlegpu option and CUML_BUILD_PATH env variable - PR #1969: Update libcumlprims to 0.14 - PR #1973: Add missing mg files for setup.py --singlegpu flag -- PR #1993: Set `umap_transform_reproducibility` tests to xfail # cuML 0.13.0 (Date TBD) diff --git a/build.sh b/build.sh index 8b9d539c84..c2c1a1c9d1 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019, NVIDIA CORPORATION. # cuml build script @@ -18,7 +18,7 @@ ARGS=$* # script, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcuml cuml prims bench prims-bench -v -g -n --allgpuarch --singlegpu --nvtx --show_depr_warn -h --help --deep" +VALIDARGS="clean libcuml cuml prims bench prims-bench -v -g -n --allgpuarch --singlegpu --nvtx --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -33,7 +33,7 @@ HELP="$0 [ ...] [ ...] -g - build for debug -n - no install step --allgpuarch - build for all supported GPU architectures - --singlegpu - Build cuml without libcumlprims based multigpu algorithms. + --singlegpu - Build cuml without multigpu support (multigpu requires libcumlprims) --nvtx - Enable nvtx for profiling support --show_depr_warn - show cmake deprecation warnings -h - print this text @@ -53,7 +53,6 @@ BUILD_ALL_GPU_ARCH=0 SINGLEGPU="" NVTX=OFF CLEAN=0 -DEEPCLEAN=0 BUILD_DISABLE_DEPRECATION_WARNING=ON # Set defaults for vars that may not have been defined externally @@ -115,15 +114,10 @@ if (( ${CLEAN} == 1 )); then # The find removes all contents but leaves the dirs, the rmdir # attempts to remove the dirs but can fail safely. for bd in ${BUILD_DIRS}; do - if [ -d ${bd} ]; then - find ${bd} -mindepth 1 -delete - rmdir ${bd} || true - fi - - cd ${REPODIR}/python - python setup.py clean --all - cd ${REPODIR} - + if [ -d ${bd} ]; then + find ${bd} -mindepth 1 -delete + rmdir ${bd} || true + fi done fi diff --git a/ci/mg/build.sh b/ci/mg/build.sh index 2424f8e554..adbefd22c6 100644 --- a/ci/mg/build.sh +++ b/ci/mg/build.sh @@ -22,10 +22,10 @@ export PATH=/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=4 export CUDA_REL=${CUDA_VERSION%.*} -# Parse git describe -cd $WORKSPACE -export GIT_DESCRIBE_TAG=`git describe --tags` -export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` +# Set versions of packages needed to be grabbed +export CUDF_VERSION=0.8.* +export NVSTRINGS_VERSION=0.8.* +export RMM_VERSION=0.8.* # Set home to the job's workspace export HOME=$WORKSPACE @@ -43,25 +43,19 @@ nvidia-smi logger "Activate conda env..." source activate gdf conda install -c conda-forge -c rapidsai -c rapidsai-nightly -c nvidia \ - "cupy>=7,<8.0.0a0" \ - "cudatoolkit=${CUDA_REL}" \ - "cudf=${MINOR_VERSION}" \ - "rmm=${MINOR_VERSION}" \ - "nvstrings=${MINOR_VERSION}" \ - "libcumlprims=${MINOR_VERSION}" \ - "lapack" \ - "cmake==3.14.3" \ - "umap-learn" \ - "protobuf>=3.4.1,<4.0.0" \ - "nccl>=2.5" \ - "dask>=2.12.0" \ - "distributed>=2.12.0" \ - "dask-cudf=${MINOR_VERSION}" \ - "dask-cuda=${MINOR_VERSION}" \ - "ucx-py=${MINOR_VERSION}" \ - "statsmodels" \ - "xgboost====1.0.2dev.rapidsai0.13" \ - "lightgbm" + cudf=${CUDF_VERSION} \ + rmm=${RMM_VERSION} \ + nvstrings=${NVSTRINGS_VERSION} \ + lapack cmake==3.14.3 \ + umap-learn \ + protobuf >=3.4.1,<4.0.0 \ + libclang \ + nccl>=2.4 \ + dask>=2.12.0 \ + distributed>=2.12.0 \ + dask-ml \ + dask-cudf \ + dask-cuda=0.9 logger "Check versions..." python --version diff --git a/conda/environments/cuml_dev_cuda10.0.yml b/conda/environments/cuml_dev_cuda10.0.yml index 6ab783a84f..b95caa572f 100644 --- a/conda/environments/cuml_dev_cuda10.0.yml +++ b/conda/environments/cuml_dev_cuda10.0.yml @@ -6,8 +6,7 @@ channels: - conda-forge dependencies: - cudatoolkit=10.0 -- clang=8.0.1 -- clang-tools=8.0.1 +- libclang=8.0.0 - cmake=3.14.5 - numba>=0.46 - cupy>=7,<8.0.0a0 diff --git a/conda/environments/cuml_dev_cuda10.1.yml b/conda/environments/cuml_dev_cuda10.1.yml index 42c43928f8..fb1c745f9d 100644 --- a/conda/environments/cuml_dev_cuda10.1.yml +++ b/conda/environments/cuml_dev_cuda10.1.yml @@ -6,8 +6,7 @@ channels: - conda-forge dependencies: - cudatoolkit=10.1 -- clang=8.0.1 -- clang-tools=8.0.1 +- libclang=8.0.0 - cmake=3.14.5 - numba>=0.46 - cupy>=7,<8.0.0a0 diff --git a/conda/environments/cuml_dev_cuda10.2.yml b/conda/environments/cuml_dev_cuda10.2.yml index fc627b6e36..5a8099eea4 100644 --- a/conda/environments/cuml_dev_cuda10.2.yml +++ b/conda/environments/cuml_dev_cuda10.2.yml @@ -6,8 +6,7 @@ channels: - conda-forge dependencies: - cudatoolkit=10.2 -- clang=8.0.1 -- clang-tools=8.0.1 +- libclang=8.0.0 - cmake=3.14.5 - numba>=0.46 - cupy>=7,<8.0.0a0 diff --git a/cpp/README.md b/cpp/README.md index fc1e3de2a3..5f57e6cbb9 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -16,10 +16,10 @@ The `test` directory has subdirectories that reflect this distinction between th ### Dependencies 1. cmake (>= 3.14) -2. CUDA (>= 10.0) +2. CUDA (>= 9.2) 3. gcc (>=5.4.0) 4. BLAS - Any BLAS compatible with cmake's [FindBLAS](https://cmake.org/cmake/help/v3.14/module/FindBLAS.html). Note that the blas has to be installed to the same folder system as cmake, for example if using conda installed cmake, the blas implementation should also be installed in the conda environment. -5. clang-format (= 8.0.1) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=8` and `clang-tools=8` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager. +5. clang-format (= 8.0.0) - enforces uniform C++ coding style; required to build cuML from source. The RAPIDS conda channel provides a package. If not using conda, install using your OS package manager. 6. UCX with CUDA support [optional] (>=1.7) - enables point-to-point messaging in the cuML communicator. ### Building cuML: diff --git a/cpp/cmake/Dependencies.cmake b/cpp/cmake/Dependencies.cmake index f4bc3eb5f6..25933288d4 100644 --- a/cpp/cmake/Dependencies.cmake +++ b/cpp/cmake/Dependencies.cmake @@ -59,9 +59,8 @@ ExternalProject_Add(faiss --with-cuda-arch=${FAISS_GPU_ARCHS} -v PREFIX ${FAISS_DIR} - BUILD_COMMAND make -j${PARALLEL_LEVEL} VERBOSE=1 - BUILD_BYPRODUCTS ${FAISS_DIR}/lib/libfaiss.a - INSTALL_COMMAND make -s install > /dev/null + BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} -j${PARALLEL_LEVEL} VERBOSE=1 + INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -s install > /dev/null UPDATE_COMMAND "" BUILD_IN_SOURCE 1) ExternalProject_Get_Property(faiss install_dir) @@ -82,9 +81,6 @@ ExternalProject_Add(treelite -DCMAKE_INSTALL_PREFIX= -DENABLE_PROTOBUF=ON -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} - BUILD_BYPRODUCTS ${TREELITE_DIR}/lib/libtreelite.a - ${TREELITE_DIR}/lib/libdmlc.a - ${TREELITE_DIR}/lib/libtreelite_runtime.so UPDATE_COMMAND "" PATCH_COMMAND patch -p1 -N < ${CMAKE_CURRENT_SOURCE_DIR}/cmake/treelite_protobuf.patch || true) add_library(dmlclib STATIC IMPORTED) @@ -116,8 +112,6 @@ ExternalProject_Add(googletest CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_LIBDIR=lib - BUILD_BYPRODUCTS ${GTEST_DIR}/lib/libgtest.a - ${GTEST_DIR}/lib/libgtest_main.a UPDATE_COMMAND "") add_library(gtestlib STATIC IMPORTED) add_library(gtest_mainlib STATIC IMPORTED) @@ -143,7 +137,6 @@ ExternalProject_Add(benchmark -DCMAKE_INSTALL_PREFIX= -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_LIBDIR=lib - BUILD_BYPRODUCTS ${GBENCH_DIR}/lib/libbenchmark.a UPDATE_COMMAND "") add_library(benchmarklib STATIC IMPORTED) add_dependencies(benchmarklib benchmark) diff --git a/cpp/cmake/FindClangFormat.cmake b/cpp/cmake/FindClangFormat.cmake new file mode 100644 index 0000000000..f522273a02 --- /dev/null +++ b/cpp/cmake/FindClangFormat.cmake @@ -0,0 +1,77 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Finds clang-format exe based on the PATH env variable +string(REPLACE ":" ";" EnvPath $ENV{PATH}) +find_program(ClangFormat_EXE + NAMES clang-format + PATHS EnvPath + DOC "path to clang-format exe") +find_program(ClangFormat_PY + NAMES run-clang-format.py + PATHS ${PROJECT_SOURCE_DIR}/scripts + DOC "path to run-clang-format python script") + +# Figure out the version of clang-format, if found +if(ClangFormat_EXE) + execute_process(COMMAND ${ClangFormat_EXE} --version + OUTPUT_VARIABLE __cf_version_out + OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX REPLACE + "^clang-format version ([0-9.-]+).*$" "\\1" + ClangFormat_VERSION_STRING + "${__cf_version_out}") +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ClangFormat + REQUIRED_VARS ClangFormat_EXE ClangFormat_PY + VERSION_VAR ClangFormat_VERSION_STRING) + +include(CMakeParseArguments) + +set(ClangFormat_TARGET format) + +# clang formatting as a target in the final build stage +function(add_clang_format) + if(ClangFormat_FOUND) + set(options "") + set(oneValueArgs DSTDIR SRCDIR) + set(multiValueArgs "") + cmake_parse_arguments(cf "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + # to flag violations + add_custom_target(${ClangFormat_TARGET} + ALL + COMMAND python + ${ClangFormat_PY} + -dstdir ${cf_DSTDIR} + -exe ${ClangFormat_EXE} + -onlyChangedFiles + COMMENT "Run clang-format on the cpp source files" + WORKING_DIRECTORY ${cf_SRCDIR}) + # to fix the flagged violations (only to be run locally!) + add_custom_target(fix-${ClangFormat_TARGET} + COMMAND python + ${ClangFormat_PY} + -dstdir ${cf_DSTDIR} + -exe ${ClangFormat_EXE} + -onlyChangedFiles + -inplace + COMMENT "Run the inplace fix for clang-format flagged violations" + WORKING_DIRECTORY ${cf_SRCDIR}) + else() + message("add_clang_format: clang-format exe not found") + endif() +endfunction(add_clang_format) diff --git a/cpp/cmake/FindClangTidy.cmake b/cpp/cmake/FindClangTidy.cmake new file mode 100644 index 0000000000..8cbcc4238f --- /dev/null +++ b/cpp/cmake/FindClangTidy.cmake @@ -0,0 +1,26 @@ +# Copyright (c) 2019, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Finds clang-tidy exe based on the PATH env variable +string(REPLACE ":" ";" EnvPath $ENV{PATH}) +find_program(ClangTidy_EXE + NAMES clang-tidy + PATHS EnvPath + DOC "path to clang-tidy exe") +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ClangTidy DEFAULT_MSG + ClangTidy_EXE) + +# TODO: add a clang_tidy dependency on the existing targets diff --git a/cpp/scripts/run-clang-format.py b/cpp/scripts/run-clang-format.py index 4aec7948a6..2957933136 100755 --- a/cpp/scripts/run-clang-format.py +++ b/cpp/scripts/run-clang-format.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ import tempfile -EXPECTED_VERSION = "8.0.1" VERSION_REGEX = re.compile(r"clang-format version ([0-9.]+)") # NOTE: populate this list with more top-level dirs as we add more of them to # to the cuml repo @@ -68,9 +67,8 @@ def parse_args(): if version is None: raise Exception("Failed to figure out clang-format version!") version = version.group(1) - if version != EXPECTED_VERSION: - raise Exception("clang-format exe must be v%s found '%s'" % \ - (EXPECTED_VERSION, version)) + if version != "8.0.0": + raise Exception("clang-format exe must be v8.0.0 found '%s'" % version) if len(args.dirs) == 0: args.dirs = DEFAULT_DIRS return args diff --git a/python/cuml/dask/neighbors/nearest_neighbors.py b/python/cuml/dask/neighbors/nearest_neighbors.py index 9b7da98afc..48b5c4c6d7 100644 --- a/python/cuml/dask/neighbors/nearest_neighbors.py +++ b/python/cuml/dask/neighbors/nearest_neighbors.py @@ -159,6 +159,14 @@ def _query_models(self, n_neighbors, index_handler.calculate_parts_to_sizes(comms=comms) query_handler.calculate_parts_to_sizes(comms=comms) + idx_parts_to_ranks, idx_M = parts_to_ranks(self.client, + worker_info, + index_handler.gpu_futures) + + query_parts_to_ranks, query_M = parts_to_ranks(self.client, + worker_info, + query_handler.gpu_futures) + """ Invoke kneighbors on Dask workers to perform distributed query """ @@ -171,11 +179,11 @@ def _query_models(self, n_neighbors, worker in index_handler.workers else [], index_handler.total_rows, self.n_cols, - index_handler.parts_to_sizes, + idx_parts_to_ranks, query_handler.worker_to_parts[worker] if worker in query_handler.workers else [], query_handler.total_rows, - query_handler.parts_to_sizes, + query_parts_to_ranks, worker_info[worker]["r"], n_neighbors, key="%s-%s" % (key, idx), diff --git a/python/cuml/neighbors/nearest_neighbors_mg.pyx b/python/cuml/neighbors/nearest_neighbors_mg.pyx index 7ad8e9ce90..e592e90ad0 100644 --- a/python/cuml/neighbors/nearest_neighbors_mg.pyx +++ b/python/cuml/neighbors/nearest_neighbors_mg.pyx @@ -203,13 +203,13 @@ def _build_part_inputs(cuda_arr_ifaces, "data": input_ptr, "shape": (n_rows, n_cols)}) - for rank in parts_to_ranks: - for size in parts_to_ranks[rank]: - rsp = malloc(sizeof(RankSizePair)) - rsp.rank = rank - rsp.size = size + for rankSize in parts_to_ranks: + rank, size = rankSize + rsp = malloc(sizeof(RankSizePair)) + rsp.rank = rank + rsp.size = size - vec.push_back(rsp) + vec.push_back(rsp) cdef vector[floatData_t*] *local_parts \ = _build_float_d(arr_ints) diff --git a/python/cuml/test/dask/test_nearest_neighbors.py b/python/cuml/test/dask/test_nearest_neighbors.py index e0eda813aa..c906364c67 100644 --- a/python/cuml/test/dask/test_nearest_neighbors.py +++ b/python/cuml/test/dask/test_nearest_neighbors.py @@ -30,6 +30,9 @@ from sklearn.neighbors import KNeighborsClassifier +from cuml.neighbors.nearest_neighbors_mg import \ + NearestNeighborsMG as cumlNN + from cuml.test.utils import array_equal @@ -197,8 +200,6 @@ def test_default_n_neighbors(cluster): try: from cuml.dask.neighbors import NearestNeighbors as daskNN - from cuml.neighbors.nearest_neighbors_mg import \ - NearestNeighborsMG as cumlNN from sklearn.datasets import make_blobs diff --git a/python/cuml/test/test_arima.py b/python/cuml/test/test_arima.py index 3b8fef0d7b..9bb9fd33ab 100644 --- a/python/cuml/test/test_arima.py +++ b/python/cuml/test/test_arima.py @@ -168,7 +168,7 @@ (1, 0, 1, 1, 1, 1, 4, 0): test_101_111_4, (1, 1, 1, 2, 0, 0, 4, 0): test_111_200_4, (1, 1, 2, 0, 1, 2, 4, 0): test_112_012_4, - # (1, 1, 1, 1, 1, 1, 12, 0): test_111_111_12, + (1, 1, 1, 1, 1, 1, 12, 0): test_111_111_12, } # Dictionary for lazy-loading of datasets diff --git a/python/cuml/test/test_umap.py b/python/cuml/test/test_umap.py index 3b5c626a20..699aee17e1 100644 --- a/python/cuml/test/test_umap.py +++ b/python/cuml/test/test_umap.py @@ -314,7 +314,6 @@ def get_embedding(n_components, random_state): @pytest.mark.parametrize('n_components', [2, 25]) @pytest.mark.parametrize('random_state', [None, 8, np.random.RandomState(42)]) -@pytest.mark.xfail(reason="test intermittently fails") def test_umap_transform_reproducibility(n_components, random_state): n_samples = 5000 diff --git a/python/setup.py b/python/setup.py index 02143fe8c4..0196e12a7a 100644 --- a/python/setup.py +++ b/python/setup.py @@ -14,35 +14,31 @@ # limitations under the License. # +from Cython.Build import cythonize from distutils.sysconfig import get_python_lib -from pathlib import Path -from setuptools import find_packages -from setuptools import setup +from setuptools import setup, find_packages from setuptools.extension import Extension -from setuputils import clean_folder from setuputils import get_submodule_dependencies -import numpy +try: + from Cython.Distutils.build_ext import new_build_ext as build_ext +except ImportError: + from setuptools.command.build_ext import build_ext + import os -import shutil +import subprocess import sys import sysconfig import versioneer import warnings - -try: - if "--singlegpu" in sys.argv: - from Cython.Build import cythonize - else: - from Cython.Distutils.build_ext import new_build_ext as build_ext -except ImportError: - from setuptools.command.build_ext import build_ext +import numpy install_requires = [ 'numba', 'cython' ] + ############################################################################## # - Dependencies include and lib folder setup -------------------------------- @@ -54,76 +50,64 @@ cuda_include_dir = os.path.join(CUDA_HOME, "include") cuda_lib_dir = os.path.join(CUDA_HOME, "lib64") -############################################################################## -# - Clean target ------------------------------------------------------------- - -if "clean" in sys.argv: - print("Cleaning all Python and Cython build artifacts...") - - treelite_path = "" - libcuml_path = "" - - try: - setup_file_path = str(Path(__file__).parent.absolute()) - shutil.rmtree(setup_file_path + '/build') - shutil.rmtree(setup_file_path + '/.pytest_cache', ignore_errors=True) - shutil.rmtree(setup_file_path + '/external_repositories', - ignore_errors=True) - shutil.rmtree(setup_file_path + '/cuml.egg-info', ignore_errors=True) - shutil.rmtree(setup_file_path + '/__pycache__', ignore_errors=True) - - clean_folder(setup_file_path + '/cuml') - - except IOError: - pass - - # need to terminate script so cythonizing doesn't get triggered after - # cleanup unintendedly - sys.argv.remove("clean") - sys.argv.remove("--all") - - if len(sys.argv) == 1: - sys.exit(0) ############################################################################## -# - Cloning dependencies if needed ------------------------------------------- +# - Subrepo checking and cloning --------------------------------------------- subrepos = [ + 'cub', + 'cutlass', + 'faiss', 'treelite' ] # We check if there is a libcuml++ build folder, by default in cpp/build # or in CUML_BUILD_PATH env variable. Otherwise setup.py will clone the -# dependencies defined in cpp/cmake/Dependencies.cmake -if os.environ.get('CUML_BUILD_PATH', False): - libcuml_path = '../' + os.environ.get('CUML_BUILD_PATH') -else: - libcuml_path = '../cpp/build/' +# dependencies defined in cpp/CMakeListst.txt +if "clean" not in sys.argv: + if os.environ.get('CUML_BUILD_PATH', False): + libcuml_path = '../' + os.environ.get('CUML_BUILD_PATH') + else: + libcuml_path = '../cpp/build/' -found_cmake_repos = get_submodule_dependencies(subrepos, - libcuml_path=libcuml_path) + found_cmake_repos = get_submodule_dependencies(subrepos, + libcuml_path=libcuml_path) -if found_cmake_repos: - treelite_path = os.path.join(libcuml_path, - 'treelite/src/treelite/include') -else: - treelite_path = 'external_repositories/treelite/include' + if found_cmake_repos: + treelite_path = os.path.join(libcuml_path, + 'treelite/src/treelite/include') + faiss_path = os.path.join(libcuml_path, 'faiss/src/') + cub_path = os.path.join(libcuml_path, 'cub/src/cub') + cutlass_path = os.path.join(libcuml_path, 'cutlass/src/cutlass') + else: + # faiss requires the include to be to the parent of the root of + # their repo instead of the full path like the others + faiss_path = 'external_repositories/' + treelite_path = 'external_repositories/treelite/include' + cub_path = 'external_repositories/cub' + cutlass_path = 'external_repositories/cutlass' +else: + subprocess.check_call(['rm', '-rf', 'external_repositories']) + treelite_path = "" + faiss_path = "" + cub_path = "" + cutlass_path = "" ############################################################################## # - Cython extensions build and parameters ----------------------------------- -# cumlcomms and nccl are still needed for multigpu algos not based -# on libcumlprims libs = ['cuda', 'cuml++', - 'cumlcomms', - 'nccl', 'rmm'] include_dirs = ['../cpp/src', '../cpp/include', + '../cpp/external', '../cpp/src_prims', + cutlass_path, + cub_path, + faiss_path, treelite_path, '../cpp/comms/std/src', '../cpp/comms/std/include', @@ -148,9 +132,13 @@ exc_list.append('cuml/linear_model/ridge_mg.pyx') exc_list.append('cuml/linear_model/linear_regression_mg.pyx') exc_list.append('cuml/neighbors/nearest_neighbors_mg.pyx') - + sys.argv.remove('--singlegpu') else: libs.append('cumlprims') + # ucx/ucx-py related functionality available in version 0.12+ + # libs.append("ucp") + libs.append('cumlcomms') + libs.append('nccl') sys_include = os.path.dirname(sysconfig.get_path("include")) include_dirs.append("%s/cumlprims" % sys_include) @@ -171,21 +159,6 @@ extra_compile_args=['-std=c++11']) ] -for e in extensions: - # TODO: this exclude is not working, need to research way to properly - # exclude files for parallel build. See issue - # https://github.com/rapidsai/cuml/issues/2037 - # e.exclude = exc_list - e.cython_directives = dict( - profile=False, language_level=3, embedsignature=True - ) - -if "--singlegpu" in sys.argv: - print("Full cythonization in parallel is not supported for singlegpu " + - "target for now.") - extensions = cythonize(extensions, - exclude=exc_list) - sys.argv.remove('--singlegpu') ############################################################################## # - Python package generation ------------------------------------------------ @@ -201,7 +174,8 @@ ], author="NVIDIA Corporation", setup_requires=['cython'], - ext_modules=extensions, + ext_modules=cythonize(extensions, + exclude=exc_list), packages=find_packages(include=['cuml', 'cuml.*']), install_requires=install_requires, license="Apache", diff --git a/python/setuputils.py b/python/setuputils.py index 24ee9a9e08..171efe579e 100644 --- a/python/setuputils.py +++ b/python/setuputils.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,38 +14,12 @@ # limitations under the License. # -import glob import os import re -import shutil import subprocess import warnings -def clean_folder(path): - """ - Function to clean all Cython and Python artifacts and cache folders. It - clean the folder as well as its direct children recursively. - - Parameters - ---------- - path : String - Path to the folder to be cleaned. - """ - shutil.rmtree(path + '/__pycache__', ignore_errors=True) - - folders = glob.glob(path + '/*/') - for folder in folders: - shutil.rmtree(folder + '/__pycache__', ignore_errors=True) - - clean_folder(folder) - - cython_exts = glob.glob(folder + '/*.cpp') - cython_exts.extend(glob.glob(folder + '/*.cpython*')) - for file in cython_exts: - os.remove(file) - - def clone_repo(name, GIT_REPOSITORY, GIT_TAG, force_clone=False): """ Function to clone repos if they have not been cloned already. @@ -53,17 +27,17 @@ def clone_repo(name, GIT_REPOSITORY, GIT_TAG, force_clone=False): in spite of not being very pythonic. Parameters - ---------- - name : String - Name of the repo to be cloned - GIT_REPOSITORY : String - URL of the repo to be cloned - GIT_TAG : String - commit hash or git hash to be cloned. Accepts anything that - `git checkout` accepts - force_clone : Boolean - Set to True to ignore already cloned repositories in - external_repositories and clone + ---------- + name : String + Name of the repo to be cloned + GIT_REPOSITORY : String + URL of the repo to be cloned + GIT_TAG : String + commit hash or git hash to be cloned. Accepts anything that + `git checkout` accepts + force_clone : Boolean + Set to True to ignore already cloned repositories in + external_repositories and clone """ @@ -92,23 +66,23 @@ def get_repo_cmake_info(names, file_path): Function to find information about submodules from cpp/CMakeLists file Parameters - ---------- - name : List of Strings - List containing the names of the repos to be cloned. Must match - the names of the cmake git clone instruction - `ExternalProject_Add(name` - file_path : String - Relative path of the location of the CMakeLists.txt (or the cmake - module which contains ExternalProject_Add definitions) to extract - the information. + ---------- + name : List of Strings + List containing the names of the repos to be cloned. Must match + the names of the cmake git clone instruction + `ExternalProject_Add(name` + file_path : String + Relative path of the location of the CMakeLists.txt (or the cmake + module which contains ExternalProject_Add definitions) to extract + the information. Returns - ------- - results : dictionary - Dictionary where results[name] contains an array, - where results[name][0] is the url of the repo and - repo_info[repo][1] is the tag/commit hash to be cloned as - specified by cmake. + ------- + results : dictionary + Dictionary where results[name] contains an array, + where results[name][0] is the url of the repo and + repo_info[repo][1] is the tag/commit hash to be cloned as + specified by cmake. """ with open(file_path) as f: @@ -139,25 +113,25 @@ def get_submodule_dependencies(repos, repos needed to build the cuML Python package. Parameters - ---------- - repos : List of Strings - List containing the names of the repos to be cloned. Must match - the names of the cmake git clone instruction - `ExternalProject_Add(name` - file_path : String - Relative path of the location of the CMakeLists.txt (or the cmake - module which contains ExternalProject_Add definitions) to extract - the information. By default it will look in the standard location - `cuml_repo_root/cpp` - libcuml_path : String - Relative location of the build folder to look if repositories - already exist + ---------- + repos : List of Strings + List containing the names of the repos to be cloned. Must match + the names of the cmake git clone instruction + `ExternalProject_Add(name` + file_path : String + Relative path of the location of the CMakeLists.txt (or the cmake + module which contains ExternalProject_Add definitions) to extract + the information. By default it will look in the standard location + `cuml_repo_root/cpp` + libcuml_path : String + Relative location of the build folder to look if repositories + already exist Returns - ------- - result : boolean - True if repos were found in libcuml cpp build folder, False - if they were not found. + ------- + result : boolean + True if repos were found in libcuml cpp build folder, False + if they were not found. """ repo_info = get_repo_cmake_info(repos, file_path) From 28d101a15e3a605a0ccd65d917b532422fa465ec Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Wed, 8 Apr 2020 14:54:57 -0700 Subject: [PATCH 110/330] addressed review comments, harmonized style --- cpp/include/cuml/fil/fil.h | 4 +-- cpp/src/fil/fil.cu | 45 +++++++++++++++---------- cpp/src/fil/infer.cu | 10 +++--- cpp/test/sg/fil_test.cu | 68 ++++++++++++++++++++++++++------------ 4 files changed, 81 insertions(+), 46 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index 94985e1e35..e391cdb134 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -125,11 +125,11 @@ enum leaf_value_t { template struct leaf_output_t {}; template <> -struct leaf_output_t { +struct leaf_output_t { typedef float T; }; template <> -struct leaf_output_t { +struct leaf_output_t { typedef int T; }; diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index e8a547e31f..fdb9d993bb 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -85,8 +85,9 @@ __global__ void transform_k(float* preds, size_t n, output_t output, result += global_bias; if ((output & output_t::SIGMOID) != 0) result = sigmoid(result); // will not be done on INT_CLASS_LABEL because the whole kernel will not run - if ((output & output_t::CLASS) != 0) + if ((output & output_t::CLASS) != 0) { result = result > threshold ? 1.0f : 0.0f; + } // sklearn outputs numpy array in 'C' order, with the number of classes being last dimension // that is also the default order, so we should use the same one if (complement_proba) { @@ -134,28 +135,34 @@ struct forest { params.num_rows = num_rows; params.max_shm = max_shm_; params.num_classes = num_classes_; + params.leaf_payload_type = leaf_payload_type_; + + output_t ot = output_; + if (predict_proba) { + ot = output_t(ot & ~output_t::CLASS); // no threshold on probabilities + } + bool complement_proba = ((ot & output_t::CLASS) == 0) && + (leaf_payload_type_ == leaf_value_t::FLOAT_SCALAR); /** FLOAT_SCALAR means inference produces 1 class score/component and transform_k might complement to 2 for classification, if class probabilities are being requested. assuming predict(..., predict_proba=true) will not get called for regression, hence predict_params::num_outputs == 2 */ params.num_outputs = - predict_proba ? (leaf_payload_type_ == INT_CLASS_LABEL ? num_classes_ : 2) - : 1; - params.leaf_payload_type = leaf_payload_type_; + ((ot & output_t::CLASS) != 0) + ? 1 + : (leaf_payload_type_ == leaf_value_t::INT_CLASS_LABEL ? num_classes_ + : 2); // Predict using the forest. cudaStream_t stream = h.getStream(); infer(params, stream); // Transform the output if necessary. - output_t ot = output_; - if (predict_proba) - ot = output_t(ot & ~output_t::CLASS); // no threshold on probabilities - bool complement_proba = predict_proba && leaf_payload_type_ == FLOAT_SCALAR; bool do_transform = ot != output_t::RAW || global_bias_ != 0.0f || complement_proba; - if (leaf_payload_type_ == INT_CLASS_LABEL && !predict_proba) { + if ((leaf_payload_type_ == leaf_value_t::INT_CLASS_LABEL) && + ((ot & output_t::CLASS) != 0)) { // moot since choosing best class and all transforms are monotonic // also, would break current code do_transform = false; @@ -183,7 +190,7 @@ struct forest { output_t output_ = output_t::RAW; float threshold_ = 0.5; float global_bias_ = 0; - leaf_value_t leaf_payload_type_ = FLOAT_SCALAR; + leaf_value_t leaf_payload_type_ = leaf_value_t::FLOAT_SCALAR; int num_classes_ = 0; }; @@ -322,8 +329,8 @@ void check_params(const forest_params_t* params, bool dense) { "output should be a combination of RAW, AVG, SIGMOID and CLASS"); } ASSERT( - params->output & output_t::CLASS || params->num_classes < 2 || - params->leaf_payload_type == INT_CLASS_LABEL, + (params->output & output_t::CLASS || params->num_classes < 2 || + params->leaf_payload_type) == leaf_value_t::INT_CLASS_LABEL, "cannot do two-component regression using FLOAT_SCALAR leaf_payload_type"); } @@ -411,9 +418,10 @@ int find_class_label_from_one_hot(tl::tl_float* vector, int len) { ASSERT(!found_label, "label vector contains multiple 1.0f"); out = i; found_label = true; - } else + } else { ASSERT(vector[i] == 0.0f, "label vector contains values other than 0.0 and 1.0"); + } } ASSERT(found_label, "did not find 1.0f in vector"); return out; @@ -424,18 +432,18 @@ void tl2fil_leaf_payload(fil_node_t* fil_node, const tl::Tree::Node& tl_node, const forest_params_t& forest_params) { auto vec = tl_node.leaf_vector(); switch (forest_params.leaf_payload_type) { - case INT_CLASS_LABEL: + case leaf_value_t::INT_CLASS_LABEL: ASSERT(vec.size() == forest_params.num_classes, "inconsistent number of classes in treelite leaves"); fil_node->val.idx = find_class_label_from_one_hot(&vec[0], vec.size()); break; - case FLOAT_SCALAR: + case leaf_value_t::FLOAT_SCALAR: fil_node->val.f = tl_node.leaf_value(); ASSERT(tl_node.leaf_vector().size() == 0, "some but not all treelite leaves have leaf_vector()"); break; default: - ASSERT(false, "unknown leaf_payload_type"); + ASSERT(false, "internal error: invalid leaf_payload_type"); }; } @@ -519,6 +527,7 @@ int tree2fil_sparse(std::vector* pnodes, const tl::Tree& tree, size_t tl_leaf_vector_size(const tl::Model& model) { const tl::Tree& tree = model.trees[0]; + int _ = max_depth(tree); // just checking for cycles int node_key; for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); node_key = tl_node_at(tree, node_key).cright()) @@ -542,9 +551,9 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, ASSERT(leaf_vec_size == model.num_output_group, "treelite model inconsistent"); params->num_classes = leaf_vec_size; - params->leaf_payload_type = INT_CLASS_LABEL; + params->leaf_payload_type = leaf_value_t::INT_CLASS_LABEL; } else { - params->leaf_payload_type = FLOAT_SCALAR; + params->leaf_payload_type = leaf_value_t::FLOAT_SCALAR; params->num_classes = 0; // ignored } diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 834a7ae377..c915ff6df3 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -53,7 +53,7 @@ __device__ __forceinline__ vec infer_one_tree( do { #pragma unroll for (int j = 0; j < NITEMS; ++j) { - if ((mask >> j) & 1 == 0) continue; + //if ((mask & (1 << j)) == 0) continue; auto n = tree[curr[j]]; if (n.is_leaf()) { mask &= ~(1 << j); @@ -218,11 +218,12 @@ struct tree_aggregator_t { } __device__ __forceinline__ void finalize(float* out, int num_rows, int num_outputs) { - if (num_outputs > 1) + if (num_outputs > 1) { // only supporting num_outputs == num_classes finalize_multiple_outputs(out, num_rows); - else + } else { finalize_class_label(out, num_rows); + } } }; @@ -307,8 +308,9 @@ void infer_k_launcher(storage_type forest, predict_params params, // given_num_cols is a random large int params.num_cols = params.max_shm / sizeof(float); // since we're crashing, this will not take too long - while (get_smem_footprint<1, leaf_payload_type>(params) > params.max_shm) + while (get_smem_footprint<1, leaf_payload_type>(params) > params.max_shm) { --params.num_cols; + } ASSERT(false, "p.num_cols == %d: too many features, only %d allowed%s", given_num_cols, params.num_cols, leaf_payload_type == INT_CLASS_LABEL diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index b46859eb07..5a076230b3 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -64,7 +64,11 @@ struct FilTestParams { // it's used in treelite ModelBuilder initialization int num_classes; - size_t max_outputs_per_row() { return std::max(num_classes, 2); } + size_t num_proba_outputs() { return num_rows * std::max(num_classes, 2); } + size_t num_preds_outputs() { + return ((output & fil::output_t::CLASS) != 0) ? num_rows + : num_proba_outputs(); + } }; std::string output2str(fil::output_t output) { @@ -140,12 +144,13 @@ class BaseFilTest : public testing::TestWithParam { // generate on-GPU random data Random::Rng r(ps.seed); - if (ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR) + if (ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR) { r.uniform(weights_d, num_nodes, -1.0f, 1.0f, stream); - else + } else { r.uniform(weights_d, num_nodes, 0.0f, // [0..num_classes) std::nextafterf(ps.num_classes, 0.0f), stream); + } r.uniform(thresholds_d, num_nodes, -1.0f, 1.0f, stream); r.uniformInt(fids_d, num_nodes, 0, ps.num_cols, stream); r.bernoulli(def_lefts_d, num_nodes, 0.5f, stream); @@ -225,19 +230,26 @@ class BaseFilTest : public testing::TestWithParam { } void transform(float f, float& proba, float& output) { - if ((ps.output & fil::output_t::AVG) != 0) f *= (1.0f / ps.num_trees); + if ((ps.output & fil::output_t::AVG) != 0) { + f *= (1.0f / ps.num_trees); + } f += ps.global_bias; - if ((ps.output & fil::output_t::SIGMOID) != 0) f = sigmoid(f); + if ((ps.output & fil::output_t::SIGMOID) != 0) { + f = sigmoid(f); + } proba = f; - if ((ps.output & fil::output_t::CLASS) != 0) + if ((ps.output & fil::output_t::CLASS) != 0) { f = f > ps.threshold ? 1.0f : 0.0f; + } output = f; } + void complement(float* proba) { proba[0] = 1.0f - proba[1]; } + void predict_on_cpu() { // predict on host - std::vector want_preds_h(ps.num_rows); - std::vector want_proba_h(ps.num_rows * ps.max_outputs_per_row()); + std::vector want_preds_h(ps.num_preds_outputs()); + std::vector want_proba_h(ps.num_proba_outputs()); int num_nodes = tree_num_nodes(); switch (ps.leaf_payload_type) { case fil::leaf_value_t::FLOAT_SCALAR: @@ -247,8 +259,13 @@ class BaseFilTest : public testing::TestWithParam { pred += infer_one_tree(&nodes[j * num_nodes], &data_h[i * ps.num_cols]).f; } - transform(pred, want_proba_h[i * 2 + 1], want_preds_h[i]); - want_proba_h[i * 2] = 1.0f - want_proba_h[i * 2 + 1]; + if ((ps.output & fil::output_t::CLASS) != 0) { + transform(pred, want_proba_h[i * 2 + 1], want_preds_h[i]); + } else { + transform(pred, want_proba_h[i * 2 + 1], want_preds_h[i * 2 + 1]); + complement(&(want_preds_h[i * 2])); + } + complement(&(want_proba_h[i * 2])); } break; case fil::leaf_value_t::INT_CLASS_LABEL: @@ -269,20 +286,28 @@ class BaseFilTest : public testing::TestWithParam { most_votes = pred; best_class = c; } - float _; - transform(pred, want_proba_h[r * ps.num_classes + c], _); + if ((ps.output & fil::output_t::CLASS) != 0) { + float _; + transform(pred, want_proba_h[r * ps.num_classes + c], _); + } else { + transform(pred, want_proba_h[r * ps.num_classes + c], + want_preds_h[r * ps.num_classes + c]); + } + } + if ((ps.output & fil::output_t::CLASS) != 0) { + want_preds_h[r] = best_class; } - want_preds_h[r] = best_class; } break; } // copy to GPU - allocate(want_preds_d, ps.num_rows); - updateDevice(want_preds_d, want_preds_h.data(), ps.num_rows, stream); - allocate(want_proba_d, ps.num_rows * ps.max_outputs_per_row()); - updateDevice(want_proba_d, want_proba_h.data(), - ps.num_rows * ps.max_outputs_per_row(), stream); + allocate(want_preds_d, ps.num_preds_outputs()); + allocate(want_proba_d, ps.num_proba_outputs()); + updateDevice(want_preds_d, want_preds_h.data(), ps.num_preds_outputs(), + stream); + updateDevice(want_proba_d, want_proba_h.data(), ps.num_proba_outputs(), + stream); CUDA_CHECK(cudaStreamSynchronize(stream)); } @@ -293,9 +318,9 @@ class BaseFilTest : public testing::TestWithParam { init_forest(&forest); // predict - allocate(preds_d, ps.num_rows); + allocate(preds_d, ps.num_preds_outputs()); + allocate(proba_d, ps.num_proba_outputs()); fil::predict(handle, forest, preds_d, data_d, ps.num_rows); - allocate(proba_d, ps.num_rows * ps.max_outputs_per_row()); fil::predict(handle, forest, proba_d, data_d, ps.num_rows, true); CUDA_CHECK(cudaStreamSynchronize(stream)); @@ -304,8 +329,7 @@ class BaseFilTest : public testing::TestWithParam { } void compare() { - ASSERT_TRUE(devArrMatch(want_proba_d, proba_d, - ps.num_rows * ps.max_outputs_per_row(), + ASSERT_TRUE(devArrMatch(want_proba_d, proba_d, ps.num_proba_outputs(), CompareApprox(ps.tolerance), stream)); float tolerance = ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR ? ps.tolerance From 284692e5f6434f8499855b152138016932612ca7 Mon Sep 17 00:00:00 2001 From: Vinay D Date: Thu, 9 Apr 2020 13:58:14 +0530 Subject: [PATCH 111/330] Initilized min_impurity_decrease --- cpp/include/cuml/tree/decisiontree.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cuml/tree/decisiontree.hpp b/cpp/include/cuml/tree/decisiontree.hpp index 3efd5cd517..7c9db1b678 100644 --- a/cpp/include/cuml/tree/decisiontree.hpp +++ b/cpp/include/cuml/tree/decisiontree.hpp @@ -68,7 +68,7 @@ struct DecisionTreeParams { /** * Minimum impurity decrease required for spliting a node. If the impurity decrease is below this value, node is leafed out. Default is 0.0 */ - float min_impurity_decrease; + float min_impurity_decrease = 0.0f; }; void set_tree_params(DecisionTreeParams ¶ms, int cfg_max_depth = -1, From e059a129b5b42fcbe19a9b98ca09649bd59e3c6d Mon Sep 17 00:00:00 2001 From: wxbn Date: Thu, 9 Apr 2020 16:56:40 +0000 Subject: [PATCH 112/330] Random forest testing speedup --- python/cuml/test/test_random_forest.py | 61 ++++++++++++++------------ 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py index c99d850adf..6ceaa35bc6 100644 --- a/python/cuml/test/test_random_forest.py +++ b/python/cuml/test/test_random_forest.py @@ -247,7 +247,7 @@ def test_rf_regression_default(datatype, column_info, nrows): @pytest.mark.parametrize('column_info', [unit_param([20, 10]), quality_param([200, 100]), stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), +@pytest.mark.parametrize('nrows', [unit_param(250), quality_param(5000), stress_param(500000)]) def test_rf_classification_seed(datatype, column_info, nrows): @@ -259,7 +259,7 @@ def test_rf_classification_seed(datatype, column_info, nrows): y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0) - for i in range(20): + for i in range(8): seed = random.randint(100, 1e5) # Initialize, fit and predict using cuML's # random forest classification model @@ -352,12 +352,9 @@ def test_rf_classification_float64(datatype, column_info, @pytest.mark.parametrize('column_info', [unit_param([20, 10]), quality_param([200, 100]), stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(5000), quality_param(25000), +@pytest.mark.parametrize('nrows', [unit_param(3000), quality_param(25000), stress_param(500000)]) -@pytest.mark.parametrize('convert_dtype', [True, False]) -def test_rf_regression_float64(datatype, column_info, - nrows, convert_dtype): - +def test_rf_regression_float64(datatype, column_info, nrows): ncols, n_info = column_info X, y = make_regression(n_samples=nrows, n_features=ncols, n_informative=n_info, @@ -387,17 +384,17 @@ def test_rf_regression_float64(datatype, column_info, assert cu_r2 >= (sk_r2 - 0.09) # predict using cuML's GPU based prediction - if datatype[0] == np.float32 and convert_dtype: + if datatype[0] == np.float32: fil_preds = cuml_model.predict(X_test, predict_model="GPU", - convert_dtype=convert_dtype) + convert_dtype=True) fil_preds = np.reshape(fil_preds, np.shape(cu_preds)) - fil_r2 = r2_score(y_test, fil_preds, convert_dtype=datatype[0]) assert fil_r2 >= (cu_r2 - 0.02) - else: - with pytest.raises(TypeError): - fil_preds = cuml_model.predict(X_test, predict_model="GPU", - convert_dtype=convert_dtype) + + # because datatype[0] != np.float32 or datatype[0] != datatype[1] + with pytest.raises(TypeError): + fil_preds = cuml_model.predict(X_test, predict_model="GPU", + convert_dtype=False) @pytest.mark.parametrize('datatype', [(np.float32, np.float32)]) @@ -551,6 +548,9 @@ def test_rf_classification_sparse(datatype, split_algo, rows_sample, def test_rf_regression_sparse(datatype, split_algo, mode, column_info, max_features, rows_sample, fil_sparse_format, algo): + coverage = 0.3 + if random.random() > coverage: + pytest.skip('Randomly skipping the test') ncols, n_info = column_info use_handle = True @@ -560,7 +560,6 @@ def test_rf_regression_sparse(datatype, split_algo, mode, column_info, X, y = make_regression(n_samples=500, n_features=ncols, n_informative=n_info, random_state=123) - elif mode == 'quality': X, y = fetch_california_housing(return_X_y=True) @@ -631,13 +630,13 @@ def test_rf_regression_sparse(datatype, split_algo, mode, column_info, @pytest.mark.memleak @pytest.mark.parametrize('fil_sparse_format', [True, False, 'auto']) -@pytest.mark.parametrize('column_info', [unit_param([100, 50]), +@pytest.mark.parametrize('column_info', [unit_param([80, 40]), quality_param([200, 100]), stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(1000), quality_param(50000), +@pytest.mark.parametrize('nrows', [unit_param(800), quality_param(50000), stress_param(500000)]) def test_rf_memory_leakage(fil_sparse_format, column_info, nrows): - n_iter = 30 + n_iter = 3 datatype = np.float32 use_handle = True ncols, n_info = column_info @@ -656,21 +655,27 @@ def test_rf_memory_leakage(fil_sparse_format, column_info, nrows): # before the first call to get_memory_info. base_model = curfc(handle=handle) base_model.fit(X_train, y_train) + handle.sync() # just to be sure free_mem = cuda.current_context().get_memory_info()[0] - rfc_model = curfc(handle=handle) - rfc_model.fit(X_train, y_train) + def test_for_memory_leak(): + cuml_mods = curfc(handle=handle) + cuml_mods.fit(X_train, y_train) + handle.sync() # just to be sure + # Calculate the memory free after fitting the cuML model + delta_mem = free_mem - cuda.current_context().get_memory_info()[0] + assert delta_mem == 0 - # Calculate the memory free after fitting the cuML RF model - delta_mem = free_mem - cuda.current_context().get_memory_info()[0] - cuml_mods = curfc(handle=handle) - cuml_mods.fit(X_train, y_train) + for i in range(3): + cuml_mods.predict(X_test, predict_model="GPU", + fil_sparse_format=fil_sparse_format) + handle.sync() # just to be sure + # Calculate the memory free after predicting the cuML model + delta_mem = free_mem - cuda.current_context().get_memory_info()[0] + assert delta_mem == 0 for i in range(n_iter): - cuml_mods.predict(X_train, predict_model="GPU") - handle.sync() - delta_mem = free_mem - cuda.current_context().get_memory_info()[0] - assert delta_mem == 0 + test_for_memory_leak() @pytest.mark.parametrize('max_features', [1.0, 'auto', 'log2', 'sqrt']) From 998c5490fd565f4b65e5346f5218e916bc02ef6d Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Thu, 9 Apr 2020 12:02:53 -0500 Subject: [PATCH 113/330] updated the predict_proba test --- python/cuml/test/dask/test_random_forest.py | 122 +++++++++++++------- 1 file changed, 80 insertions(+), 42 deletions(-) diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index 136a87119f..c68f33ea5a 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -99,11 +99,11 @@ def test_rf_classification_dask(partitions_per_worker, cluster): X_train_df, y_train_df = _prep_training_data(c, X_train, y_train, partitions_per_worker) - cu_rf_mg = cuRFC_mg(**cu_rf_params) - cu_rf_mg.fit(X_train_df, y_train_df) - cu_rf_mg_predict = cu_rf_mg.predict(X_test) + cuml_mod = cuRFC_mg(**cu_rf_params) + cuml_mod.fit(X_train_df, y_train_df) + cuml_mod_predict = cuml_mod.predict(X_test) - acc_score = accuracy_score(cu_rf_mg_predict, y_test, normalize=True) + acc_score = accuracy_score(cuml_mod_predict, y_test, normalize=True) assert acc_score > 0.8 finally: @@ -138,10 +138,10 @@ def test_rf_classification_dask_cudf(partitions_per_worker, cluster): partitions_per_worker) X_test_cudf = cudf.DataFrame.from_gpu_matrix(rmm.to_device(X_test)) - cu_rf_mg = cuRFC_mg(**cu_rf_params) - cu_rf_mg.fit(X_train_df, y_train_df) - cu_rf_mg_predict = cu_rf_mg.predict(X_test_cudf) - acc_score = accuracy_score(cu_rf_mg_predict, y_test, normalize=True) + cuml_mod = cuRFC_mg(**cu_rf_params) + cuml_mod.fit(X_train_df, y_train_df) + cuml_mod_predict = cuml_mod.predict(X_test_cudf) + acc_score = accuracy_score(cuml_mod_predict, y_test, normalize=True) assert acc_score > 0.8 @@ -193,13 +193,13 @@ def test_rf_regression_dask_fil(partitions_per_worker, cluster): X_train_df, y_train_df = dask_utils.persist_across_workers( c, [X_train_df, y_train_df], workers=workers) - cu_rf_mg = cuRFR_mg(**cu_rf_params) - cu_rf_mg.fit(X_train_df, y_train_df) + cuml_mod = cuRFR_mg(**cu_rf_params) + cuml_mod.fit(X_train_df, y_train_df) - cu_rf_mg_predict = cu_rf_mg.predict(X_test_df).compute() - cu_rf_mg_predict = cp.asnumpy(cp.array(cu_rf_mg_predict)) + cuml_mod_predict = cuml_mod.predict(X_test_df).compute() + cuml_mod_predict = cp.asnumpy(cp.array(cuml_mod_predict)) - acc_score = r2_score(cu_rf_mg_predict, y_test) + acc_score = r2_score(cuml_mod_predict, y_test) assert acc_score >= 0.67 @@ -237,30 +237,16 @@ def test_rf_classification_dask_fil(partitions_per_worker, cluster, partitions_per_worker) X_test_df, _ = _prep_training_data(c, X_test, y_test, partitions_per_worker) - cu_rf_mg = cuRFC_mg(**cu_rf_params) - cu_rf_mg.fit(X_train_df, y_train_df) - cu_rf_mg_predict = cu_rf_mg.predict(X_test_df, output_class).compute() - cu_rf_mg_predict = cp.asnumpy(cp.array(cu_rf_mg_predict)) + cuml_mod = cuRFC_mg(**cu_rf_params) + cuml_mod.fit(X_train_df, y_train_df) + cuml_mod_predict = cuml_mod.predict(X_test_df, output_class).compute() + cuml_mod_predict = cp.asnumpy(cp.array(cuml_mod_predict)) if not output_class: - cu_rf_mg_predict = np.round(cu_rf_mg_predict) + cuml_mod_predict = np.round(cuml_mod_predict) - fil_preds_proba = cu_rf_mg.predict_proba(X_test_df).compute() - fil_preds_proba = cp.asnumpy(fil_preds_proba.to_gpu_matrix()) - y_proba = np.zeros(np.shape(fil_preds_proba)) - y_proba[:, 1] = y_test - y_proba[:, 0] = 1.0 - y_test - fil_mse = mean_squared_error(y_proba, fil_preds_proba) - sk_model = skrfc(n_estimators=25, - max_depth=13, - random_state=10) - sk_model.fit(X_train, y_train) - sk_preds_proba = sk_model.predict_proba(X_test) - sk_mse = mean_squared_error(y_proba, sk_preds_proba) - - acc_score = accuracy_score(cu_rf_mg_predict, y_test, normalize=True) + acc_score = accuracy_score(cuml_mod_predict, y_test, normalize=True) assert acc_score > 0.8 - assert fil_mse <= (sk_mse + 0.012) finally: c.close() @@ -295,14 +281,14 @@ def test_rf_classification_dask_array(partitions_per_worker, cluster, X_train_df, y_train_df = _prep_training_data(c, X_train, y_train, partitions_per_worker) X_test_dask_array = from_array(X_test) - cu_rf_mg = cuRFC_mg(**cu_rf_params) - cu_rf_mg.fit(X_train_df, y_train_df) - cu_rf_mg_predict = cu_rf_mg.predict(X_test_dask_array, + cuml_mod = cuRFC_mg(**cu_rf_params) + cuml_mod.fit(X_train_df, y_train_df) + cuml_mod_predict = cuml_mod.predict(X_test_dask_array, output_class).compute() if not output_class: - cu_rf_mg_predict = np.round(cu_rf_mg_predict) + cuml_mod_predict = np.round(cuml_mod_predict) - acc_score = accuracy_score(cu_rf_mg_predict, y_test, normalize=True) + acc_score = accuracy_score(cuml_mod_predict, y_test, normalize=True) assert acc_score > 0.8 @@ -349,14 +335,66 @@ def test_rf_regression_dask_cpu(partitions_per_worker, cluster): X_train_df, y_train_df = dask_utils.persist_across_workers( c, [X_train_df, y_train_df], workers=workers) - cu_rf_mg = cuRFR_mg(**cu_rf_params) - cu_rf_mg.fit(X_train_df, y_train_df) + cuml_mod = cuRFR_mg(**cu_rf_params) + cuml_mod.fit(X_train_df, y_train_df) - cu_rf_mg_predict = cu_rf_mg.predict(X_test, predict_model='CPU') + cuml_mod_predict = cuml_mod.predict(X_test, predict_model='CPU') - acc_score = r2_score(cu_rf_mg_predict, y_test) + acc_score = r2_score(cuml_mod_predict, y_test) assert acc_score >= 0.67 finally: c.close() + + +@pytest.mark.parametrize('partitions_per_worker', [1]) +@pytest.mark.parametrize('output_class', [True, False]) +def test_rf_classification_dask_fil_predict_proba(partitions_per_worker, + cluster, + output_class): + + # Use CUDA_VISIBLE_DEVICES to control the number of workers + c = Client(threads_per_worker=1, n_workers=1) + + try: + + X, y = make_classification(n_samples=10000, n_features=30, + n_clusters_per_class=1, n_informative=20, + random_state=123, n_classes=2) + + X = X.astype(np.float32) + y = y.astype(np.int32) + + X_train, X_test, y_train, y_test = \ + train_test_split(X, y, test_size=1000) + + cu_rf_params = {'n_bins': 16, 'n_streams': 1, + 'n_estimators': 40, 'max_depth': 16 + } + + X_train_df, y_train_df = _prep_training_data(c, X_train, y_train, + partitions_per_worker) + X_test_df, _ = _prep_training_data(c, X_test, y_test, + partitions_per_worker) + cu_rf_mg = cuRFC_mg(**cu_rf_params) + cu_rf_mg.fit(X_train_df, y_train_df) + + fil_preds_proba = cu_rf_mg.predict_proba(X_test_df).compute() + fil_preds_proba = cp.asnumpy(fil_preds_proba.to_gpu_matrix()) + y_proba = np.zeros(np.shape(fil_preds_proba)) + y_proba[:, 1] = y_test + y_proba[:, 0] = 1.0 - y_test + fil_mse = mean_squared_error(y_proba, fil_preds_proba) + sk_model = skrfc(n_estimators=40, + max_depth=16, + min_samples_split=2, + random_state=10) + sk_model.fit(X_train, y_train) + sk_preds_proba = sk_model.predict_proba(X_test) + sk_mse = mean_squared_error(y_proba, sk_preds_proba) + + assert fil_mse <= sk_mse + 0.002 + + finally: + c.close() From 0e0d7bcb45ee271eabd08a7a6727205d26890cdd Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Thu, 9 Apr 2020 12:08:11 -0500 Subject: [PATCH 114/330] updated the dask rf predict proba test --- python/cuml/test/dask/test_random_forest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index c68f33ea5a..22bfe41737 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -388,7 +388,6 @@ def test_rf_classification_dask_fil_predict_proba(partitions_per_worker, fil_mse = mean_squared_error(y_proba, fil_preds_proba) sk_model = skrfc(n_estimators=40, max_depth=16, - min_samples_split=2, random_state=10) sk_model.fit(X_train, y_train) sk_preds_proba = sk_model.predict_proba(X_test) From 19e96107c2f92384c1e423b1659638b7dc18dce0 Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Thu, 9 Apr 2020 12:14:48 -0500 Subject: [PATCH 115/330] updated the dask rf predict proba test threshold --- python/cuml/test/dask/test_random_forest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index 22bfe41737..a1cc65bb35 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -393,7 +393,9 @@ def test_rf_classification_dask_fil_predict_proba(partitions_per_worker, sk_preds_proba = sk_model.predict_proba(X_test) sk_mse = mean_squared_error(y_proba, sk_preds_proba) - assert fil_mse <= sk_mse + 0.002 + # The threshold is required as the test would intermitently + # fail with a max difference of 0.003 between the two mse values + assert fil_mse <= sk_mse + 0.003 finally: c.close() From efd67aa3eb67a8107faf9ced30432c16caef8aa4 Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Thu, 9 Apr 2020 14:01:08 -0500 Subject: [PATCH 116/330] update the code based on reviews --- python/cuml/dask/common/base.py | 23 +++++++++++---------- python/cuml/test/dask/test_random_forest.py | 5 ++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/python/cuml/dask/common/base.py b/python/cuml/dask/common/base.py index b0ff04c6fa..f3e0f91127 100644 --- a/python/cuml/dask/common/base.py +++ b/python/cuml/dask/common/base.py @@ -190,34 +190,35 @@ def _run_parallel_func(self, class DelayedPredictionProbaMixin(DelayedParallelFunc): def _predict_proba(self, X, delayed=True, **kwargs): - return self._run_parallel_func(_predict_proba_func, X, 2, delayed, - **kwargs) + return self._run_parallel_func(func=_predict_proba_func, X=X, + n_dims=2, delayed=delayed, **kwargs) class DelayedPredictionMixin(DelayedParallelFunc): def _predict(self, X, delayed=True, **kwargs): - return self._run_parallel_func(_predict_func, X, 1, delayed, + return self._run_parallel_func(func=_predict_func, X=X, + n_dims=1, delayed=delayed, **kwargs) class DelayedTransformMixin(DelayedParallelFunc): def _transform(self, X, n_dims=1, delayed=True, **kwargs): - return self._run_parallel_func(_transform_func, - X, - n_dims, - delayed, + return self._run_parallel_func(func=_transform_func, + X=X, + n_dims=n_dims, + delayed=delayed, **kwargs) class DelayedInverseTransformMixin(DelayedParallelFunc): def _inverse_transform(self, X, n_dims=1, delayed=True, **kwargs): - return self._run_parallel_func(_inverse_transform_func, - X, - n_dims, - delayed, + return self._run_parallel_func(func=_inverse_transform_func, + X=X, + n_dims=n_dims, + delayed=delayed, **kwargs) diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index a1cc65bb35..211a107571 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -354,12 +354,11 @@ def test_rf_classification_dask_fil_predict_proba(partitions_per_worker, cluster, output_class): - # Use CUDA_VISIBLE_DEVICES to control the number of workers c = Client(threads_per_worker=1, n_workers=1) try: - X, y = make_classification(n_samples=10000, n_features=30, + X, y = make_classification(n_samples=1000, n_features=30, n_clusters_per_class=1, n_informative=20, random_state=123, n_classes=2) @@ -367,7 +366,7 @@ def test_rf_classification_dask_fil_predict_proba(partitions_per_worker, y = y.astype(np.int32) X_train, X_test, y_train, y_test = \ - train_test_split(X, y, test_size=1000) + train_test_split(X, y, test_size=100) cu_rf_params = {'n_bins': 16, 'n_streams': 1, 'n_estimators': 40, 'max_depth': 16 From 665f03b35159cb0ad4781389f064bc8883a6128d Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Thu, 9 Apr 2020 17:13:57 -0500 Subject: [PATCH 117/330] reduce the number pf tests and dataset size --- python/cuml/test/dask/test_pca.py | 12 ++++++------ python/cuml/test/dask/test_tsvd.py | 16 +++++++--------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/python/cuml/test/dask/test_pca.py b/python/cuml/test/dask/test_pca.py index 4c91894538..ddf8523b3c 100644 --- a/python/cuml/test/dask/test_pca.py +++ b/python/cuml/test/dask/test_pca.py @@ -20,7 +20,7 @@ @pytest.mark.mg -@pytest.mark.parametrize("nrows", [6e5]) +@pytest.mark.parametrize("nrows", [1000]) @pytest.mark.parametrize("ncols", [20]) @pytest.mark.parametrize("n_parts", [67]) def test_pca_fit(nrows, ncols, n_parts, cluster): @@ -35,7 +35,7 @@ def test_pca_fit(nrows, ncols, n_parts, cluster): from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(nrows, ncols, 1, n_parts, - cluster_std=0.5, verbose=False, + cluster_std=1.5, verbose=False, random_state=10, dtype=np.float32) wait(X_cudf) @@ -71,8 +71,8 @@ def test_pca_fit(nrows, ncols, n_parts, cluster): @pytest.mark.mg -@pytest.mark.parametrize("nrows", [4e3, 7e5]) -@pytest.mark.parametrize("ncols", [100, 1000]) +@pytest.mark.parametrize("nrows", [1000]) +@pytest.mark.parametrize("ncols", [20]) @pytest.mark.parametrize("n_parts", [46]) def test_pca_fit_transform_fp32(nrows, ncols, n_parts, cluster): @@ -96,8 +96,8 @@ def test_pca_fit_transform_fp32(nrows, ncols, n_parts, cluster): @pytest.mark.mg -@pytest.mark.parametrize("nrows", [7e5]) -@pytest.mark.parametrize("ncols", [200]) +@pytest.mark.parametrize("nrows", [1000]) +@pytest.mark.parametrize("ncols", [20]) @pytest.mark.parametrize("n_parts", [33]) def test_pca_fit_transform_fp64(nrows, ncols, n_parts, cluster): diff --git a/python/cuml/test/dask/test_tsvd.py b/python/cuml/test/dask/test_tsvd.py index 6551ea9547..84d1d372f3 100644 --- a/python/cuml/test/dask/test_tsvd.py +++ b/python/cuml/test/dask/test_tsvd.py @@ -22,11 +22,11 @@ @pytest.mark.mg -@pytest.mark.parametrize("nrows", [unit_param(6e5), +@pytest.mark.parametrize("nrows", [unit_param(1000), stress_param(5e6)]) @pytest.mark.parametrize("ncols", [unit_param(20), stress_param(1000)]) -@pytest.mark.parametrize("n_parts", [unit_param(67)]) +@pytest.mark.parametrize("n_parts", [unit_param(30)]) def test_pca_fit(nrows, ncols, n_parts, cluster): client = Client(cluster) @@ -39,7 +39,7 @@ def test_pca_fit(nrows, ncols, n_parts, cluster): from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(nrows, ncols, 1, n_parts, - cluster_std=0.5, verbose=False, + cluster_std=1.5, verbose=False, random_state=10, dtype=np.float32) wait(X_cudf) @@ -71,11 +71,9 @@ def test_pca_fit(nrows, ncols, n_parts, cluster): @pytest.mark.mg -@pytest.mark.parametrize("nrows", [unit_param(4e3), - unit_param(7e5), +@pytest.mark.parametrize("nrows", [unit_param(1000), stress_param(9e6)]) -@pytest.mark.parametrize("ncols", [unit_param(100), - unit_param(1000), +@pytest.mark.parametrize("ncols", [unit_param(20), stress_param(5000)]) @pytest.mark.parametrize("n_parts", [46]) def test_pca_fit_transform_fp32(nrows, ncols, n_parts, cluster): @@ -100,9 +98,9 @@ def test_pca_fit_transform_fp32(nrows, ncols, n_parts, cluster): @pytest.mark.mg -@pytest.mark.parametrize("nrows", [unit_param(7e5), +@pytest.mark.parametrize("nrows", [unit_param(1000), stress_param(9e6)]) -@pytest.mark.parametrize("ncols", [unit_param(200), +@pytest.mark.parametrize("ncols", [unit_param(20), stress_param(5000)]) @pytest.mark.parametrize("n_parts", [unit_param(33)]) def test_pca_fit_transform_fp64(nrows, ncols, n_parts, cluster): From 1711bbfe632cd0dee1af42ceb6651e4b79fd5d2f Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 10 Apr 2020 01:20:25 -0500 Subject: [PATCH 118/330] review changes --- .../cuml/dask/neighbors/nearest_neighbors.py | 19 ++++++---- .../cuml/neighbors/kneighbors_classifier.pyx | 25 +------------ .../cuml/neighbors/kneighbors_regressor.pyx | 20 ---------- python/cuml/neighbors/nearest_neighbors.pyx | 30 +++------------ .../cuml/neighbors/nearest_neighbors_mg.pyx | 2 +- .../cuml/test/test_kneighbors_classifier.py | 37 +++++++++++++------ python/cuml/test/test_kneighbors_regressor.py | 20 ++++++---- 7 files changed, 58 insertions(+), 95 deletions(-) diff --git a/python/cuml/dask/neighbors/nearest_neighbors.py b/python/cuml/dask/neighbors/nearest_neighbors.py index b1332b7131..b98c392f85 100644 --- a/python/cuml/dask/neighbors/nearest_neighbors.py +++ b/python/cuml/dask/neighbors/nearest_neighbors.py @@ -13,9 +13,12 @@ # limitations under the License. # -from cuml.dask.common import extract_ddf_partitions, \ - workers_to_parts, parts_to_ranks, raise_exception_from_futures, \ - flatten_grouped_results, raise_mg_import_exception +from cuml.dask.common import extract_ddf_partitions +from cuml.dask.common import workers_to_parts +from cuml.dask.common import parts_to_ranks +rfrom cuml.dask.common import aise_exception_from_futures +from cuml.dask.common import flatten_grouped_results +from cuml.dask.common import raise_mg_import_exception from cuml.dask.common.base import BaseEstimator from dask.distributed import default_client @@ -235,7 +238,7 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, query_handler = self.X_handler if X is None else \ DistributedDataHandler.create(data=X, client=self.client) - if X is None: + if query_handler is None: raise ValueError("Model needs to be trained using fit() " "before calling kneighbors()") @@ -264,10 +267,10 @@ def kneighbors(self, X=None, n_neighbors=None, return_distance=True, ret = nn_fit, out_i_futures if not return_distance else \ (nn_fit, out_d_futures, out_i_futures) else: - # TODO: Remove hard-code once DataDistributedHandler is used - ret = to_output(out_i_futures, 'float32') \ + ret = to_output(out_i_futures, self.datatype) \ if not return_distance else (to_output(out_d_futures, - 'float32'), to_output( - out_i_futures, 'float32')) + self.datatype), to_output( + out_i_futures, + self.datatype)) return ret diff --git a/python/cuml/neighbors/kneighbors_classifier.pyx b/python/cuml/neighbors/kneighbors_classifier.pyx index 22883ebc7c..5850b41da2 100644 --- a/python/cuml/neighbors/kneighbors_classifier.pyx +++ b/python/cuml/neighbors/kneighbors_classifier.pyx @@ -36,6 +36,7 @@ from cython.operator cimport dereference as deref from cuml.common.handle cimport cumlHandle from libcpp.vector cimport vector +from cuml.utils import with_cupy_rmm from libcpp cimport bool from libcpp.memory cimport shared_ptr @@ -145,23 +146,6 @@ class KNeighborsClassifier(NearestNeighbors): raise ValueError("Only uniform weighting strategy is " "supported currently.") - def __getstate__(self): - state = self.__dict__.copy() - - del state['handle'] - - # Only need to store index if fit() was called - if self.n_indices == 1: - state['y'] = self.y - state['X_m'] = self.X_m - return state - - def __setstate__(self, state): - super(NearestNeighbors, self).__init__(handle=None, - verbose=state['verbose']) - - self.__dict__.update(state) - def fit(self, X, y, convert_dtype=True): """ Fit a GPU index for k-nearest neighbors classifier model. @@ -250,11 +234,9 @@ class KNeighborsClassifier(NearestNeighbors): self.handle.sync() - del knn_indices - del inds - return classes.to_output(out_type) + @with_cupy_rmm def predict_proba(self, X, convert_dtype=True): """ Use the trained k-nearest neighbors classifier to @@ -318,9 +300,6 @@ class KNeighborsClassifier(NearestNeighbors): self.handle.sync() - del knn_indices - del inds - final_classes = [] for out_class in out_classes: final_classes.append(out_class.to_output(out_type)) diff --git a/python/cuml/neighbors/kneighbors_regressor.pyx b/python/cuml/neighbors/kneighbors_regressor.pyx index 910e07c3e6..e9bf1a3792 100644 --- a/python/cuml/neighbors/kneighbors_regressor.pyx +++ b/python/cuml/neighbors/kneighbors_regressor.pyx @@ -151,23 +151,6 @@ class KNeighborsRegressor(NearestNeighbors): raise ValueError("Only uniform weighting strategy " "is supported currently.") - def __getstate__(self): - state = self.__dict__.copy() - - del state['handle'] - - # Only need to store index if fit() was called - if self.n_indices == 1: - state['y'] = self.y - state['X_m'] = self.X_m - return state - - def __setstate__(self, state): - super(NearestNeighbors, self).__init__(handle=None, - verbose=state['verbose']) - - self.__dict__.update(state) - def fit(self, X, y, convert_dtype=True): """ Fit a GPU index for k-nearest neighbors regression model. @@ -253,9 +236,6 @@ class KNeighborsRegressor(NearestNeighbors): self.handle.sync() - del knn_indices - del inds - return results.to_output(out_type) def score(self, X, y, convert_dtype=True): diff --git a/python/cuml/neighbors/nearest_neighbors.pyx b/python/cuml/neighbors/nearest_neighbors.pyx index b78572f6f6..321fa9dd5d 100644 --- a/python/cuml/neighbors/nearest_neighbors.pyx +++ b/python/cuml/neighbors/nearest_neighbors.pyx @@ -170,9 +170,11 @@ class NearestNeighbors(Base): verbose=False, handle=None, algorithm="brute", - metric="euclidean"): + metric="euclidean", + output_type=None): - super(NearestNeighbors, self).__init__(handle, verbose) + super(NearestNeighbors, self).__init__(handle=handle, verbose=verbose, + output_type=output_type) if metric != "euclidean": raise ValueError("Only Euclidean (euclidean) " @@ -183,23 +185,6 @@ class NearestNeighbors(Base): self.metric = metric self.algorithm = algorithm - def __getstate__(self): - state = self.__dict__.copy() - - del state['handle'] - - # Only need to store index if fit() was called - if self.n_indices == 1: - state['X_m'] = self.X_m - - return state - - def __setstate__(self, state): - super(NearestNeighbors, self).__init__(handle=None, - verbose=state['verbose']) - - self.__dict__.update(state) - def fit(self, X, convert_dtype=True): """ Fit GPU index for performing nearest neighbor queries. @@ -216,6 +201,8 @@ class NearestNeighbors(Base): convert the inputs to np.float32. """ + self._set_output_type(X) + if len(X.shape) != 2: raise ValueError("data should be two dimensional") @@ -324,10 +311,5 @@ class NearestNeighbors(Base): False ) - del X_m - - del inputs - del sizes - return (D_ndarr.to_output(out_type), I_ndarr.to_output(out_type)) \ if return_distance else I_ndarr.to_output(out_type) diff --git a/python/cuml/neighbors/nearest_neighbors_mg.pyx b/python/cuml/neighbors/nearest_neighbors_mg.pyx index e592e90ad0..0a8e22c77b 100644 --- a/python/cuml/neighbors/nearest_neighbors_mg.pyx +++ b/python/cuml/neighbors/nearest_neighbors_mg.pyx @@ -267,7 +267,7 @@ class NearestNeighborsMG(NearestNeighbors): ------- output indices, output distances """ - + self._set_output_type(indices[0]) out_type = self._get_output_type(queries[0]) n_neighbors = self.n_neighbors if n_neighbors is None else n_neighbors diff --git a/python/cuml/test/test_kneighbors_classifier.py b/python/cuml/test/test_kneighbors_classifier.py index 2137ec6fd0..07fdf60c05 100644 --- a/python/cuml/test/test_kneighbors_classifier.py +++ b/python/cuml/test/test_kneighbors_classifier.py @@ -29,6 +29,7 @@ from cuml.test.utils import array_equal import pandas as pd +import cupy as cp @pytest.mark.parametrize("datatype", ["dataframe", "numpy"]) @@ -173,43 +174,53 @@ def test_nonmonotonic_labels(): assert array_equal(p.astype(np.int32), y) -@pytest.mark.parametrize("datatype", ["dataframe", "numpy"]) -def test_predict_multioutput(datatype): +@pytest.mark.parametrize("input_type", ["cudf", "numpy", "cupy"]) +@pytest.mark.parametrize("output_type", ["cudf", "numpy", "cupy"]) +def test_predict_multioutput(input_type, output_type): X = np.array([[0, 0, 1], [1, 0, 1]]).astype(np.float32) y = np.array([[15, 2], [5, 4]]).astype(np.int32) - if datatype == "dataframe": + if input_type == "cudf": X = cudf.DataFrame.from_gpu_matrix(rmm.to_device(X)) y = cudf.DataFrame.from_gpu_matrix(rmm.to_device(y)) + elif input_type == "cupy": + X = cp.asarray(X) + y = cp.asarray(y) - knn_cu = cuKNN(n_neighbors=1) + knn_cu = cuKNN(n_neighbors=1, output_type=output_type) knn_cu.fit(X, y) p = knn_cu.predict(X) - if datatype == "dataframe": + if output_type == "cudf": assert isinstance(p, cudf.DataFrame) - else: + elif output_type == "numpy": assert isinstance(p, np.ndarray) + elif output_type == "cupy": + assert isinstance(p, cp.core.core.ndarray) assert array_equal(p.astype(np.int32), y) -@pytest.mark.parametrize("datatype", ["dataframe", "numpy"]) -def test_predict_proba_multioutput(datatype): +@pytest.mark.parametrize("input_type", ["cudf", "numpy", "cupy"]) +@pytest.mark.parametrize("output_type", ["cudf", "numpy", "cupy"]) +def test_predict_proba_multioutput(input_type, output_type): X = np.array([[0, 0, 1], [1, 0, 1]]).astype(np.float32) y = np.array([[15, 2], [5, 4]]).astype(np.int32) - if datatype == "dataframe": + if input_type == "cudf": X = cudf.DataFrame.from_gpu_matrix(rmm.to_device(X)) y = cudf.DataFrame.from_gpu_matrix(rmm.to_device(y)) + elif input_type == "cupy": + X = cp.asarray(X) + y = cp.asarray(y) expected = (np.array([[0., 1.], [1., 0.]]).astype(np.float32), np.array([[1., 0.], [0., 1.]]).astype(np.float32)) - knn_cu = cuKNN(n_neighbors=1) + knn_cu = cuKNN(n_neighbors=1, output_type=output_type) knn_cu.fit(X, y) p = knn_cu.predict_proba(X) @@ -217,10 +228,12 @@ def test_predict_proba_multioutput(datatype): assert isinstance(p, tuple) for i in p: - if datatype == "dataframe": + if output_type == "cudf": assert isinstance(i, cudf.DataFrame) - else: + elif output_type == "numpy": assert isinstance(i, np.ndarray) + elif output_type == "cupy": + assert isinstance(i, cp.core.core.ndarray) assert array_equal(p[0].astype(np.float32), expected[0]) assert array_equal(p[1].astype(np.float32), expected[1]) diff --git a/python/cuml/test/test_kneighbors_regressor.py b/python/cuml/test/test_kneighbors_regressor.py index ba7b4600a3..6549ee5497 100644 --- a/python/cuml/test/test_kneighbors_regressor.py +++ b/python/cuml/test/test_kneighbors_regressor.py @@ -103,24 +103,30 @@ def test_score(nrows, ncols, n_neighbors, n_clusters, datatype): assert knn_cu.score(X, y) >= 0.9999 -@pytest.mark.parametrize("datatype", ["dataframe", "numpy"]) -def test_predict_multioutput(datatype): +@pytest.mark.parametrize("input_type", ["cudf", "numpy", "cupy"]) +@pytest.mark.parametrize("output_type", ["cudf", "numpy", "cupy"]) +def test_predict_multioutput(input_type, output_type): X = np.array([[0, 0, 1], [1, 0, 1]]).astype(np.float32) - y = np.array([[15.0, 2.0], [5.0, 4.0]]).astype(np.int32) + y = np.array([[15, 2], [5, 4]]).astype(np.int32) - if datatype == "dataframe": + if input_type == "cudf": X = cudf.DataFrame.from_gpu_matrix(rmm.to_device(X)) y = cudf.DataFrame.from_gpu_matrix(rmm.to_device(y)) + elif input_type == "cupy": + X = cp.asarray(X) + y = cp.asarray(y) - knn_cu = cuKNN(n_neighbors=1) + knn_cu = cuKNN(n_neighbors=1, output_type=output_type) knn_cu.fit(X, y) p = knn_cu.predict(X) - if datatype == "dataframe": + if output_type == "cudf": assert isinstance(p, cudf.DataFrame) - else: + elif output_type == "numpy": assert isinstance(p, np.ndarray) + elif output_type == "cupy": + assert isinstance(p, cp.core.core.ndarray) assert array_equal(p.astype(np.int32), y) From 593963f1c3e7adf0afda80e0ae81c489af740008 Mon Sep 17 00:00:00 2001 From: wxbn Date: Fri, 10 Apr 2020 08:24:07 +0000 Subject: [PATCH 119/330] Measuring time --- CHANGELOG.md | 1 + ci/gpu/build.sh | 4 +++- python/cuml/test/test_random_forest.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1018df8f77..b350e956d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - PR #1996: Cythonize in parallel - PR #2031: Encapsulating UCX-py interactions in singleton - PR #2029: Add C++ ARIMA log-likelihood benchmark +- PR #2048: Random forest testing speedup ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 9125c03b60..4bc15109e4 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -128,7 +128,9 @@ GTEST_OUTPUT="xml:${WORKSPACE}/test-results/libcuml_cpp/" ./test/ml logger "Python pytest for cuml..." cd $WORKSPACE/python -pytest --cache-clear --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" +pytest --durations=0 --cache-clear --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" cuml/test/test_random_forest.py +pytest --durations=0 --cache-clear --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" cuml/test/test_random_forest.py +pytest --durations=0 --cache-clear --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" cuml/test/test_random_forest.py ################################################################################ # TEST - Run GoogleTest for ml-prims diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py index 6ceaa35bc6..a71d25d096 100644 --- a/python/cuml/test/test_random_forest.py +++ b/python/cuml/test/test_random_forest.py @@ -548,7 +548,7 @@ def test_rf_classification_sparse(datatype, split_algo, rows_sample, def test_rf_regression_sparse(datatype, split_algo, mode, column_info, max_features, rows_sample, fil_sparse_format, algo): - coverage = 0.3 + coverage = 0.6 if random.random() > coverage: pytest.skip('Randomly skipping the test') From 4e503dec59f7400a0e5dd2fa8e3fe5716e0f89ac Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 10 Apr 2020 12:12:25 -0500 Subject: [PATCH 120/330] stupid mistakes --- python/cuml/dask/neighbors/nearest_neighbors.py | 2 +- python/cuml/test/test_kneighbors_regressor.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cuml/dask/neighbors/nearest_neighbors.py b/python/cuml/dask/neighbors/nearest_neighbors.py index b98c392f85..03def26050 100644 --- a/python/cuml/dask/neighbors/nearest_neighbors.py +++ b/python/cuml/dask/neighbors/nearest_neighbors.py @@ -16,7 +16,7 @@ from cuml.dask.common import extract_ddf_partitions from cuml.dask.common import workers_to_parts from cuml.dask.common import parts_to_ranks -rfrom cuml.dask.common import aise_exception_from_futures +from cuml.dask.common import raise_exception_from_futures from cuml.dask.common import flatten_grouped_results from cuml.dask.common import raise_mg_import_exception from cuml.dask.common.base import BaseEstimator diff --git a/python/cuml/test/test_kneighbors_regressor.py b/python/cuml/test/test_kneighbors_regressor.py index 6549ee5497..dbeee88146 100644 --- a/python/cuml/test/test_kneighbors_regressor.py +++ b/python/cuml/test/test_kneighbors_regressor.py @@ -31,6 +31,8 @@ from cuml.test.utils import array_equal +import cupy as cp + def test_kneighbors_regressor(n_samples=40, n_features=5, From 274ab41106c920f0ac47be75fd665b600283983e Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 10 Apr 2020 12:53:55 -0500 Subject: [PATCH 121/330] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f44646f2c4..ae47de1d55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - PR #1996: Cythonize in parallel - PR #2031: Encapsulating UCX-py interactions in singleton - PR #2029: Add C++ ARIMA log-likelihood benchmark +- PR #1981: Using CumlArray in kNN and DistributedDataHandler in dask kNN ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array From 0398661ce5a7cd4fcab3611a9bad3bea0ee9077a Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 10 Apr 2020 13:20:40 -0500 Subject: [PATCH 122/330] style fix --- .../cuml/dask/neighbors/nearest_neighbors.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/python/cuml/dask/neighbors/nearest_neighbors.py b/python/cuml/dask/neighbors/nearest_neighbors.py index 03def26050..5014ae5a2b 100644 --- a/python/cuml/dask/neighbors/nearest_neighbors.py +++ b/python/cuml/dask/neighbors/nearest_neighbors.py @@ -13,15 +13,12 @@ # limitations under the License. # -from cuml.dask.common import extract_ddf_partitions -from cuml.dask.common import workers_to_parts from cuml.dask.common import parts_to_ranks from cuml.dask.common import raise_exception_from_futures -from cuml.dask.common import flatten_grouped_results +from cuml.dask.common import flatten_grouped_results from cuml.dask.common import raise_mg_import_exception from cuml.dask.common.base import BaseEstimator -from dask.distributed import default_client from cuml.dask.common.comms import worker_state, CommsContext from dask.distributed import wait from cuml.dask.common.input_utils import to_output @@ -64,7 +61,8 @@ def fit(self, X): ------- self: NearestNeighbors model """ - self.X_handler = DistributedDataHandler.create(data=X, client=self.client) + self.X_handler = DistributedDataHandler.create(data=X, + client=self.client) self.datatype = self.X_handler.datatype self.n_cols = X.shape[1] return self @@ -162,13 +160,13 @@ def _query_models(self, n_neighbors, index_handler.calculate_parts_to_sizes(comms=comms) query_handler.calculate_parts_to_sizes(comms=comms) - idx_parts_to_ranks, idx_M = parts_to_ranks(self.client, - worker_info, - index_handler.gpu_futures) + idx_parts_to_ranks, _ = parts_to_ranks(self.client, + worker_info, + index_handler.gpu_futures) - query_parts_to_ranks, query_M = parts_to_ranks(self.client, - worker_info, - query_handler.gpu_futures) + query_parts_to_ranks, _ = parts_to_ranks(self.client, + worker_info, + query_handler.gpu_futures) """ Invoke kneighbors on Dask workers to perform distributed query From 7153f8bbdc334f69a613c75623d4fbfefdaecda7 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Apr 2020 21:20:21 -0500 Subject: [PATCH 123/330] cherrypicking to clean history --- .gitignore | 1 + BUILD.md | 2 +- CHANGELOG.md | 5 + build.sh | 20 ++- ci/mg/build.sh | 40 +++--- conda/environments/cuml_dev_cuda10.0.yml | 3 +- conda/environments/cuml_dev_cuda10.1.yml | 3 +- conda/environments/cuml_dev_cuda10.2.yml | 3 +- cpp/README.md | 4 +- cpp/cmake/Dependencies.cmake | 11 +- cpp/cmake/FindClangFormat.cmake | 77 ----------- cpp/cmake/FindClangTidy.cmake | 26 ---- cpp/scripts/run-clang-format.py | 8 +- .../cuml/neighbors/nearest_neighbors_mg.pyx | 4 +- .../cuml/test/dask/test_nearest_neighbors.py | 5 +- python/cuml/test/test_arima.py | 2 +- python/cuml/test/test_umap.py | 1 + python/setup.py | 130 +++++++++++------- python/setuputils.py | 114 +++++++++------ 19 files changed, 219 insertions(+), 240 deletions(-) delete mode 100644 cpp/cmake/FindClangFormat.cmake delete mode 100644 cpp/cmake/FindClangTidy.cmake diff --git a/.gitignore b/.gitignore index 12712f7c1e..1f3a35d54e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ cuml.egg-info/ dist/ python/cuml/**/*.cpp python/external_repositories +python/record.txt log .ipynb_checkpoints .DS_Store diff --git a/BUILD.md b/BUILD.md index dfcf9aecb1..4ee6cce79b 100644 --- a/BUILD.md +++ b/BUILD.md @@ -11,7 +11,7 @@ To install cuML from source, ensure the following dependencies are met: 5. Cython (>= 0.29) 6. gcc (>=5.4.0) 7. BLAS - Any BLAS compatible with cmake's [FindBLAS](https://cmake.org/cmake/help/v3.14/module/FindBLAS.html). Note that the blas has to be installed to the same folder system as cmake, for example if using conda installed cmake, the blas implementation should also be installed in the conda environment. -8. clang-format (= 8.0.0) - enforces uniform C++ coding style; required for developers. The RAPIDS conda channel provides a package (`conda install -c rapidsai libclang`). If not using conda, install using your OS package manager. +8. clang-format (= 8.0.1) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=8` and `clang-tools=8` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager. 9. NCCL (>=2.4) 10. UCX [optional] (>= 1.7) - enables point-to-point messaging in the cuML standard communicator. This is necessary for many multi-node multi-GPU cuML algorithms to function. diff --git a/CHANGELOG.md b/CHANGELOG.md index f73f932ab8..ea3d82bc19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ - PR #2031: Encapsulating UCX-py interactions in singleton - PR #2029: Add C++ ARIMA log-likelihood benchmark - PR #1981: Using CumlArray in kNN and DistributedDataHandler in dask kNN +- PR #1981: CumlArray and DistributedDataHandler refactor in NearestNeighbors +- PR #2016: Add capability to setup.py and build.sh to fully clean all cython build files and artifacts ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array @@ -28,10 +30,13 @@ - PR #1969: Update libcumlprims to 0.14 - PR #1973: Add missing mg files for setup.py --singlegpu flag - PR #1993: Set `umap_transform_reproducibility` tests to xfail +<<<<<<< HEAD - PR #2017: Fixing memory issue in weak cc prim - PR #2028: Skipping UMAP knn reproducibility tests until we figure out why its failing in CUDA 10.2 - PR #2024: Fixed cuda-memcheck errors with sample-without-replacement prim - PR #1540: prims: support for custom math-type used for computation inside adjusted rand index prim +======= +>>>>>>> 6d9c861... using DDH parts_to_sizes # cuML 0.13.0 (Date TBD) diff --git a/build.sh b/build.sh index c2c1a1c9d1..8b9d539c84 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. # cuml build script @@ -18,7 +18,7 @@ ARGS=$* # script, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcuml cuml prims bench prims-bench -v -g -n --allgpuarch --singlegpu --nvtx --show_depr_warn -h --help" +VALIDARGS="clean libcuml cuml prims bench prims-bench -v -g -n --allgpuarch --singlegpu --nvtx --show_depr_warn -h --help --deep" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -33,7 +33,7 @@ HELP="$0 [ ...] [ ...] -g - build for debug -n - no install step --allgpuarch - build for all supported GPU architectures - --singlegpu - Build cuml without multigpu support (multigpu requires libcumlprims) + --singlegpu - Build cuml without libcumlprims based multigpu algorithms. --nvtx - Enable nvtx for profiling support --show_depr_warn - show cmake deprecation warnings -h - print this text @@ -53,6 +53,7 @@ BUILD_ALL_GPU_ARCH=0 SINGLEGPU="" NVTX=OFF CLEAN=0 +DEEPCLEAN=0 BUILD_DISABLE_DEPRECATION_WARNING=ON # Set defaults for vars that may not have been defined externally @@ -114,10 +115,15 @@ if (( ${CLEAN} == 1 )); then # The find removes all contents but leaves the dirs, the rmdir # attempts to remove the dirs but can fail safely. for bd in ${BUILD_DIRS}; do - if [ -d ${bd} ]; then - find ${bd} -mindepth 1 -delete - rmdir ${bd} || true - fi + if [ -d ${bd} ]; then + find ${bd} -mindepth 1 -delete + rmdir ${bd} || true + fi + + cd ${REPODIR}/python + python setup.py clean --all + cd ${REPODIR} + done fi diff --git a/ci/mg/build.sh b/ci/mg/build.sh index adbefd22c6..2424f8e554 100644 --- a/ci/mg/build.sh +++ b/ci/mg/build.sh @@ -22,10 +22,10 @@ export PATH=/conda/bin:/usr/local/cuda/bin:$PATH export PARALLEL_LEVEL=4 export CUDA_REL=${CUDA_VERSION%.*} -# Set versions of packages needed to be grabbed -export CUDF_VERSION=0.8.* -export NVSTRINGS_VERSION=0.8.* -export RMM_VERSION=0.8.* +# Parse git describe +cd $WORKSPACE +export GIT_DESCRIBE_TAG=`git describe --tags` +export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` # Set home to the job's workspace export HOME=$WORKSPACE @@ -43,19 +43,25 @@ nvidia-smi logger "Activate conda env..." source activate gdf conda install -c conda-forge -c rapidsai -c rapidsai-nightly -c nvidia \ - cudf=${CUDF_VERSION} \ - rmm=${RMM_VERSION} \ - nvstrings=${NVSTRINGS_VERSION} \ - lapack cmake==3.14.3 \ - umap-learn \ - protobuf >=3.4.1,<4.0.0 \ - libclang \ - nccl>=2.4 \ - dask>=2.12.0 \ - distributed>=2.12.0 \ - dask-ml \ - dask-cudf \ - dask-cuda=0.9 + "cupy>=7,<8.0.0a0" \ + "cudatoolkit=${CUDA_REL}" \ + "cudf=${MINOR_VERSION}" \ + "rmm=${MINOR_VERSION}" \ + "nvstrings=${MINOR_VERSION}" \ + "libcumlprims=${MINOR_VERSION}" \ + "lapack" \ + "cmake==3.14.3" \ + "umap-learn" \ + "protobuf>=3.4.1,<4.0.0" \ + "nccl>=2.5" \ + "dask>=2.12.0" \ + "distributed>=2.12.0" \ + "dask-cudf=${MINOR_VERSION}" \ + "dask-cuda=${MINOR_VERSION}" \ + "ucx-py=${MINOR_VERSION}" \ + "statsmodels" \ + "xgboost====1.0.2dev.rapidsai0.13" \ + "lightgbm" logger "Check versions..." python --version diff --git a/conda/environments/cuml_dev_cuda10.0.yml b/conda/environments/cuml_dev_cuda10.0.yml index a620f1b192..b2532a0bf6 100644 --- a/conda/environments/cuml_dev_cuda10.0.yml +++ b/conda/environments/cuml_dev_cuda10.0.yml @@ -6,7 +6,8 @@ channels: - conda-forge dependencies: - cudatoolkit=10.0 -- libclang=8.0.0 +- clang=8.0.1 +- clang-tools=8.0.1 - cmake=3.14.5 - numba>=0.46 - cupy>=7,<8.0.0a0 diff --git a/conda/environments/cuml_dev_cuda10.1.yml b/conda/environments/cuml_dev_cuda10.1.yml index cd0b184e2b..e76f7bc2bd 100644 --- a/conda/environments/cuml_dev_cuda10.1.yml +++ b/conda/environments/cuml_dev_cuda10.1.yml @@ -6,7 +6,8 @@ channels: - conda-forge dependencies: - cudatoolkit=10.1 -- libclang=8.0.0 +- clang=8.0.1 +- clang-tools=8.0.1 - cmake=3.14.5 - numba>=0.46 - cupy>=7,<8.0.0a0 diff --git a/conda/environments/cuml_dev_cuda10.2.yml b/conda/environments/cuml_dev_cuda10.2.yml index ab3f3d0b11..d4e52225a1 100644 --- a/conda/environments/cuml_dev_cuda10.2.yml +++ b/conda/environments/cuml_dev_cuda10.2.yml @@ -6,7 +6,8 @@ channels: - conda-forge dependencies: - cudatoolkit=10.2 -- libclang=8.0.0 +- clang=8.0.1 +- clang-tools=8.0.1 - cmake=3.14.5 - numba>=0.46 - cupy>=7,<8.0.0a0 diff --git a/cpp/README.md b/cpp/README.md index 5f57e6cbb9..fc1e3de2a3 100644 --- a/cpp/README.md +++ b/cpp/README.md @@ -16,10 +16,10 @@ The `test` directory has subdirectories that reflect this distinction between th ### Dependencies 1. cmake (>= 3.14) -2. CUDA (>= 9.2) +2. CUDA (>= 10.0) 3. gcc (>=5.4.0) 4. BLAS - Any BLAS compatible with cmake's [FindBLAS](https://cmake.org/cmake/help/v3.14/module/FindBLAS.html). Note that the blas has to be installed to the same folder system as cmake, for example if using conda installed cmake, the blas implementation should also be installed in the conda environment. -5. clang-format (= 8.0.0) - enforces uniform C++ coding style; required to build cuML from source. The RAPIDS conda channel provides a package. If not using conda, install using your OS package manager. +5. clang-format (= 8.0.1) - enforces uniform C++ coding style; required to build cuML from source. The packages `clang=8` and `clang-tools=8` from the conda-forge channel should be sufficient, if you are on conda. If not using conda, install the right version using your OS package manager. 6. UCX with CUDA support [optional] (>=1.7) - enables point-to-point messaging in the cuML communicator. ### Building cuML: diff --git a/cpp/cmake/Dependencies.cmake b/cpp/cmake/Dependencies.cmake index 11edb89228..6ce4b856ae 100644 --- a/cpp/cmake/Dependencies.cmake +++ b/cpp/cmake/Dependencies.cmake @@ -72,8 +72,9 @@ ExternalProject_Add(faiss --with-cuda-arch=${FAISS_GPU_ARCHS} -v PREFIX ${FAISS_DIR} - BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} -j${PARALLEL_LEVEL} VERBOSE=1 - INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} -s install > /dev/null + BUILD_COMMAND make -j${PARALLEL_LEVEL} VERBOSE=1 + BUILD_BYPRODUCTS ${FAISS_DIR}/lib/libfaiss.a + INSTALL_COMMAND make -s install > /dev/null UPDATE_COMMAND "" BUILD_IN_SOURCE 1) ExternalProject_Get_Property(faiss install_dir) @@ -94,6 +95,9 @@ ExternalProject_Add(treelite -DCMAKE_INSTALL_PREFIX= -DENABLE_PROTOBUF=ON -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} + BUILD_BYPRODUCTS ${TREELITE_DIR}/lib/libtreelite.a + ${TREELITE_DIR}/lib/libdmlc.a + ${TREELITE_DIR}/lib/libtreelite_runtime.so UPDATE_COMMAND "" PATCH_COMMAND patch -p1 -N < ${CMAKE_CURRENT_SOURCE_DIR}/cmake/treelite_protobuf.patch || true) add_library(dmlclib STATIC IMPORTED) @@ -125,6 +129,8 @@ ExternalProject_Add(googletest CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_LIBDIR=lib + BUILD_BYPRODUCTS ${GTEST_DIR}/lib/libgtest.a + ${GTEST_DIR}/lib/libgtest_main.a UPDATE_COMMAND "") add_library(gtestlib STATIC IMPORTED) add_library(gtest_mainlib STATIC IMPORTED) @@ -150,6 +156,7 @@ ExternalProject_Add(benchmark -DCMAKE_INSTALL_PREFIX= -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_LIBDIR=lib + BUILD_BYPRODUCTS ${GBENCH_DIR}/lib/libbenchmark.a UPDATE_COMMAND "") add_library(benchmarklib STATIC IMPORTED) add_dependencies(benchmarklib benchmark) diff --git a/cpp/cmake/FindClangFormat.cmake b/cpp/cmake/FindClangFormat.cmake deleted file mode 100644 index f522273a02..0000000000 --- a/cpp/cmake/FindClangFormat.cmake +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Finds clang-format exe based on the PATH env variable -string(REPLACE ":" ";" EnvPath $ENV{PATH}) -find_program(ClangFormat_EXE - NAMES clang-format - PATHS EnvPath - DOC "path to clang-format exe") -find_program(ClangFormat_PY - NAMES run-clang-format.py - PATHS ${PROJECT_SOURCE_DIR}/scripts - DOC "path to run-clang-format python script") - -# Figure out the version of clang-format, if found -if(ClangFormat_EXE) - execute_process(COMMAND ${ClangFormat_EXE} --version - OUTPUT_VARIABLE __cf_version_out - OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REGEX REPLACE - "^clang-format version ([0-9.-]+).*$" "\\1" - ClangFormat_VERSION_STRING - "${__cf_version_out}") -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ClangFormat - REQUIRED_VARS ClangFormat_EXE ClangFormat_PY - VERSION_VAR ClangFormat_VERSION_STRING) - -include(CMakeParseArguments) - -set(ClangFormat_TARGET format) - -# clang formatting as a target in the final build stage -function(add_clang_format) - if(ClangFormat_FOUND) - set(options "") - set(oneValueArgs DSTDIR SRCDIR) - set(multiValueArgs "") - cmake_parse_arguments(cf "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - # to flag violations - add_custom_target(${ClangFormat_TARGET} - ALL - COMMAND python - ${ClangFormat_PY} - -dstdir ${cf_DSTDIR} - -exe ${ClangFormat_EXE} - -onlyChangedFiles - COMMENT "Run clang-format on the cpp source files" - WORKING_DIRECTORY ${cf_SRCDIR}) - # to fix the flagged violations (only to be run locally!) - add_custom_target(fix-${ClangFormat_TARGET} - COMMAND python - ${ClangFormat_PY} - -dstdir ${cf_DSTDIR} - -exe ${ClangFormat_EXE} - -onlyChangedFiles - -inplace - COMMENT "Run the inplace fix for clang-format flagged violations" - WORKING_DIRECTORY ${cf_SRCDIR}) - else() - message("add_clang_format: clang-format exe not found") - endif() -endfunction(add_clang_format) diff --git a/cpp/cmake/FindClangTidy.cmake b/cpp/cmake/FindClangTidy.cmake deleted file mode 100644 index 8cbcc4238f..0000000000 --- a/cpp/cmake/FindClangTidy.cmake +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Finds clang-tidy exe based on the PATH env variable -string(REPLACE ":" ";" EnvPath $ENV{PATH}) -find_program(ClangTidy_EXE - NAMES clang-tidy - PATHS EnvPath - DOC "path to clang-tidy exe") -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(ClangTidy DEFAULT_MSG - ClangTidy_EXE) - -# TODO: add a clang_tidy dependency on the existing targets diff --git a/cpp/scripts/run-clang-format.py b/cpp/scripts/run-clang-format.py index 2957933136..4aec7948a6 100755 --- a/cpp/scripts/run-clang-format.py +++ b/cpp/scripts/run-clang-format.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ import tempfile +EXPECTED_VERSION = "8.0.1" VERSION_REGEX = re.compile(r"clang-format version ([0-9.]+)") # NOTE: populate this list with more top-level dirs as we add more of them to # to the cuml repo @@ -67,8 +68,9 @@ def parse_args(): if version is None: raise Exception("Failed to figure out clang-format version!") version = version.group(1) - if version != "8.0.0": - raise Exception("clang-format exe must be v8.0.0 found '%s'" % version) + if version != EXPECTED_VERSION: + raise Exception("clang-format exe must be v%s found '%s'" % \ + (EXPECTED_VERSION, version)) if len(args.dirs) == 0: args.dirs = DEFAULT_DIRS return args diff --git a/python/cuml/neighbors/nearest_neighbors_mg.pyx b/python/cuml/neighbors/nearest_neighbors_mg.pyx index 0a8e22c77b..89c6a3fdda 100644 --- a/python/cuml/neighbors/nearest_neighbors_mg.pyx +++ b/python/cuml/neighbors/nearest_neighbors_mg.pyx @@ -203,8 +203,8 @@ def _build_part_inputs(cuda_arr_ifaces, "data": input_ptr, "shape": (n_rows, n_cols)}) - for rankSize in parts_to_ranks: - rank, size = rankSize + for idx, rankToSize in enumerate(parts_to_ranks): + rank, size = rankToSize rsp = malloc(sizeof(RankSizePair)) rsp.rank = rank rsp.size = size diff --git a/python/cuml/test/dask/test_nearest_neighbors.py b/python/cuml/test/dask/test_nearest_neighbors.py index c906364c67..e0eda813aa 100644 --- a/python/cuml/test/dask/test_nearest_neighbors.py +++ b/python/cuml/test/dask/test_nearest_neighbors.py @@ -30,9 +30,6 @@ from sklearn.neighbors import KNeighborsClassifier -from cuml.neighbors.nearest_neighbors_mg import \ - NearestNeighborsMG as cumlNN - from cuml.test.utils import array_equal @@ -200,6 +197,8 @@ def test_default_n_neighbors(cluster): try: from cuml.dask.neighbors import NearestNeighbors as daskNN + from cuml.neighbors.nearest_neighbors_mg import \ + NearestNeighborsMG as cumlNN from sklearn.datasets import make_blobs diff --git a/python/cuml/test/test_arima.py b/python/cuml/test/test_arima.py index 9bb9fd33ab..3b8fef0d7b 100644 --- a/python/cuml/test/test_arima.py +++ b/python/cuml/test/test_arima.py @@ -168,7 +168,7 @@ (1, 0, 1, 1, 1, 1, 4, 0): test_101_111_4, (1, 1, 1, 2, 0, 0, 4, 0): test_111_200_4, (1, 1, 2, 0, 1, 2, 4, 0): test_112_012_4, - (1, 1, 1, 1, 1, 1, 12, 0): test_111_111_12, + # (1, 1, 1, 1, 1, 1, 12, 0): test_111_111_12, } # Dictionary for lazy-loading of datasets diff --git a/python/cuml/test/test_umap.py b/python/cuml/test/test_umap.py index 758b696a1e..cc8858303f 100644 --- a/python/cuml/test/test_umap.py +++ b/python/cuml/test/test_umap.py @@ -314,6 +314,7 @@ def get_embedding(n_components, random_state): @pytest.mark.parametrize('n_components', [2, 25]) @pytest.mark.parametrize('random_state', [None, 8, np.random.RandomState(42)]) +@pytest.mark.xfail(reason="test intermittently fails") def test_umap_transform_reproducibility(n_components, random_state): n_samples = 5000 diff --git a/python/setup.py b/python/setup.py index 0196e12a7a..02143fe8c4 100644 --- a/python/setup.py +++ b/python/setup.py @@ -14,31 +14,35 @@ # limitations under the License. # -from Cython.Build import cythonize from distutils.sysconfig import get_python_lib -from setuptools import setup, find_packages +from pathlib import Path +from setuptools import find_packages +from setuptools import setup from setuptools.extension import Extension +from setuputils import clean_folder from setuputils import get_submodule_dependencies -try: - from Cython.Distutils.build_ext import new_build_ext as build_ext -except ImportError: - from setuptools.command.build_ext import build_ext - +import numpy import os -import subprocess +import shutil import sys import sysconfig import versioneer import warnings -import numpy + +try: + if "--singlegpu" in sys.argv: + from Cython.Build import cythonize + else: + from Cython.Distutils.build_ext import new_build_ext as build_ext +except ImportError: + from setuptools.command.build_ext import build_ext install_requires = [ 'numba', 'cython' ] - ############################################################################## # - Dependencies include and lib folder setup -------------------------------- @@ -50,64 +54,76 @@ cuda_include_dir = os.path.join(CUDA_HOME, "include") cuda_lib_dir = os.path.join(CUDA_HOME, "lib64") +############################################################################## +# - Clean target ------------------------------------------------------------- + +if "clean" in sys.argv: + print("Cleaning all Python and Cython build artifacts...") + + treelite_path = "" + libcuml_path = "" + + try: + setup_file_path = str(Path(__file__).parent.absolute()) + shutil.rmtree(setup_file_path + '/build') + shutil.rmtree(setup_file_path + '/.pytest_cache', ignore_errors=True) + shutil.rmtree(setup_file_path + '/external_repositories', + ignore_errors=True) + shutil.rmtree(setup_file_path + '/cuml.egg-info', ignore_errors=True) + shutil.rmtree(setup_file_path + '/__pycache__', ignore_errors=True) + + clean_folder(setup_file_path + '/cuml') + + except IOError: + pass + + # need to terminate script so cythonizing doesn't get triggered after + # cleanup unintendedly + sys.argv.remove("clean") + sys.argv.remove("--all") + + if len(sys.argv) == 1: + sys.exit(0) ############################################################################## -# - Subrepo checking and cloning --------------------------------------------- +# - Cloning dependencies if needed ------------------------------------------- subrepos = [ - 'cub', - 'cutlass', - 'faiss', 'treelite' ] # We check if there is a libcuml++ build folder, by default in cpp/build # or in CUML_BUILD_PATH env variable. Otherwise setup.py will clone the -# dependencies defined in cpp/CMakeListst.txt -if "clean" not in sys.argv: - if os.environ.get('CUML_BUILD_PATH', False): - libcuml_path = '../' + os.environ.get('CUML_BUILD_PATH') - else: - libcuml_path = '../cpp/build/' - - found_cmake_repos = get_submodule_dependencies(subrepos, - libcuml_path=libcuml_path) +# dependencies defined in cpp/cmake/Dependencies.cmake +if os.environ.get('CUML_BUILD_PATH', False): + libcuml_path = '../' + os.environ.get('CUML_BUILD_PATH') +else: + libcuml_path = '../cpp/build/' - if found_cmake_repos: - treelite_path = os.path.join(libcuml_path, - 'treelite/src/treelite/include') - faiss_path = os.path.join(libcuml_path, 'faiss/src/') - cub_path = os.path.join(libcuml_path, 'cub/src/cub') - cutlass_path = os.path.join(libcuml_path, 'cutlass/src/cutlass') - else: - # faiss requires the include to be to the parent of the root of - # their repo instead of the full path like the others - faiss_path = 'external_repositories/' - treelite_path = 'external_repositories/treelite/include' - cub_path = 'external_repositories/cub' - cutlass_path = 'external_repositories/cutlass' +found_cmake_repos = get_submodule_dependencies(subrepos, + libcuml_path=libcuml_path) +if found_cmake_repos: + treelite_path = os.path.join(libcuml_path, + 'treelite/src/treelite/include') else: - subprocess.check_call(['rm', '-rf', 'external_repositories']) - treelite_path = "" - faiss_path = "" - cub_path = "" - cutlass_path = "" + treelite_path = 'external_repositories/treelite/include' + ############################################################################## # - Cython extensions build and parameters ----------------------------------- +# cumlcomms and nccl are still needed for multigpu algos not based +# on libcumlprims libs = ['cuda', 'cuml++', + 'cumlcomms', + 'nccl', 'rmm'] include_dirs = ['../cpp/src', '../cpp/include', - '../cpp/external', '../cpp/src_prims', - cutlass_path, - cub_path, - faiss_path, treelite_path, '../cpp/comms/std/src', '../cpp/comms/std/include', @@ -132,13 +148,9 @@ exc_list.append('cuml/linear_model/ridge_mg.pyx') exc_list.append('cuml/linear_model/linear_regression_mg.pyx') exc_list.append('cuml/neighbors/nearest_neighbors_mg.pyx') - sys.argv.remove('--singlegpu') + else: libs.append('cumlprims') - # ucx/ucx-py related functionality available in version 0.12+ - # libs.append("ucp") - libs.append('cumlcomms') - libs.append('nccl') sys_include = os.path.dirname(sysconfig.get_path("include")) include_dirs.append("%s/cumlprims" % sys_include) @@ -159,6 +171,21 @@ extra_compile_args=['-std=c++11']) ] +for e in extensions: + # TODO: this exclude is not working, need to research way to properly + # exclude files for parallel build. See issue + # https://github.com/rapidsai/cuml/issues/2037 + # e.exclude = exc_list + e.cython_directives = dict( + profile=False, language_level=3, embedsignature=True + ) + +if "--singlegpu" in sys.argv: + print("Full cythonization in parallel is not supported for singlegpu " + + "target for now.") + extensions = cythonize(extensions, + exclude=exc_list) + sys.argv.remove('--singlegpu') ############################################################################## # - Python package generation ------------------------------------------------ @@ -174,8 +201,7 @@ ], author="NVIDIA Corporation", setup_requires=['cython'], - ext_modules=cythonize(extensions, - exclude=exc_list), + ext_modules=extensions, packages=find_packages(include=['cuml', 'cuml.*']), install_requires=install_requires, license="Apache", diff --git a/python/setuputils.py b/python/setuputils.py index 171efe579e..24ee9a9e08 100644 --- a/python/setuputils.py +++ b/python/setuputils.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2020, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,12 +14,38 @@ # limitations under the License. # +import glob import os import re +import shutil import subprocess import warnings +def clean_folder(path): + """ + Function to clean all Cython and Python artifacts and cache folders. It + clean the folder as well as its direct children recursively. + + Parameters + ---------- + path : String + Path to the folder to be cleaned. + """ + shutil.rmtree(path + '/__pycache__', ignore_errors=True) + + folders = glob.glob(path + '/*/') + for folder in folders: + shutil.rmtree(folder + '/__pycache__', ignore_errors=True) + + clean_folder(folder) + + cython_exts = glob.glob(folder + '/*.cpp') + cython_exts.extend(glob.glob(folder + '/*.cpython*')) + for file in cython_exts: + os.remove(file) + + def clone_repo(name, GIT_REPOSITORY, GIT_TAG, force_clone=False): """ Function to clone repos if they have not been cloned already. @@ -27,17 +53,17 @@ def clone_repo(name, GIT_REPOSITORY, GIT_TAG, force_clone=False): in spite of not being very pythonic. Parameters - ---------- - name : String - Name of the repo to be cloned - GIT_REPOSITORY : String - URL of the repo to be cloned - GIT_TAG : String - commit hash or git hash to be cloned. Accepts anything that - `git checkout` accepts - force_clone : Boolean - Set to True to ignore already cloned repositories in - external_repositories and clone + ---------- + name : String + Name of the repo to be cloned + GIT_REPOSITORY : String + URL of the repo to be cloned + GIT_TAG : String + commit hash or git hash to be cloned. Accepts anything that + `git checkout` accepts + force_clone : Boolean + Set to True to ignore already cloned repositories in + external_repositories and clone """ @@ -66,23 +92,23 @@ def get_repo_cmake_info(names, file_path): Function to find information about submodules from cpp/CMakeLists file Parameters - ---------- - name : List of Strings - List containing the names of the repos to be cloned. Must match - the names of the cmake git clone instruction - `ExternalProject_Add(name` - file_path : String - Relative path of the location of the CMakeLists.txt (or the cmake - module which contains ExternalProject_Add definitions) to extract - the information. + ---------- + name : List of Strings + List containing the names of the repos to be cloned. Must match + the names of the cmake git clone instruction + `ExternalProject_Add(name` + file_path : String + Relative path of the location of the CMakeLists.txt (or the cmake + module which contains ExternalProject_Add definitions) to extract + the information. Returns - ------- - results : dictionary - Dictionary where results[name] contains an array, - where results[name][0] is the url of the repo and - repo_info[repo][1] is the tag/commit hash to be cloned as - specified by cmake. + ------- + results : dictionary + Dictionary where results[name] contains an array, + where results[name][0] is the url of the repo and + repo_info[repo][1] is the tag/commit hash to be cloned as + specified by cmake. """ with open(file_path) as f: @@ -113,25 +139,25 @@ def get_submodule_dependencies(repos, repos needed to build the cuML Python package. Parameters - ---------- - repos : List of Strings - List containing the names of the repos to be cloned. Must match - the names of the cmake git clone instruction - `ExternalProject_Add(name` - file_path : String - Relative path of the location of the CMakeLists.txt (or the cmake - module which contains ExternalProject_Add definitions) to extract - the information. By default it will look in the standard location - `cuml_repo_root/cpp` - libcuml_path : String - Relative location of the build folder to look if repositories - already exist + ---------- + repos : List of Strings + List containing the names of the repos to be cloned. Must match + the names of the cmake git clone instruction + `ExternalProject_Add(name` + file_path : String + Relative path of the location of the CMakeLists.txt (or the cmake + module which contains ExternalProject_Add definitions) to extract + the information. By default it will look in the standard location + `cuml_repo_root/cpp` + libcuml_path : String + Relative location of the build folder to look if repositories + already exist Returns - ------- - result : boolean - True if repos were found in libcuml cpp build folder, False - if they were not found. + ------- + result : boolean + True if repos were found in libcuml cpp build folder, False + if they were not found. """ repo_info = get_repo_cmake_info(repos, file_path) From 0cbb4546bc00be359f15fe08b0be18d2b4ee8bb5 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 10 Apr 2020 13:31:08 -0500 Subject: [PATCH 124/330] fixing changelog --- CHANGELOG.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea3d82bc19..b7c7011949 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,13 +30,10 @@ - PR #1969: Update libcumlprims to 0.14 - PR #1973: Add missing mg files for setup.py --singlegpu flag - PR #1993: Set `umap_transform_reproducibility` tests to xfail -<<<<<<< HEAD - PR #2017: Fixing memory issue in weak cc prim - PR #2028: Skipping UMAP knn reproducibility tests until we figure out why its failing in CUDA 10.2 - PR #2024: Fixed cuda-memcheck errors with sample-without-replacement prim - PR #1540: prims: support for custom math-type used for computation inside adjusted rand index prim -======= ->>>>>>> 6d9c861... using DDH parts_to_sizes # cuML 0.13.0 (Date TBD) From 90e8455ea91af78040b94a8d40328cba7012ec20 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 10 Apr 2020 13:32:01 -0500 Subject: [PATCH 125/330] more changelog fix --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7c7011949..e17994807b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,6 @@ - PR #2031: Encapsulating UCX-py interactions in singleton - PR #2029: Add C++ ARIMA log-likelihood benchmark - PR #1981: Using CumlArray in kNN and DistributedDataHandler in dask kNN -- PR #1981: CumlArray and DistributedDataHandler refactor in NearestNeighbors - PR #2016: Add capability to setup.py and build.sh to fully clean all cython build files and artifacts ## Bug Fixes From 286c53ead9807106bd3213ad4d22c7cb464f1787 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Fri, 10 Apr 2020 15:18:32 -0500 Subject: [PATCH 126/330] Fix inverse_transform index --- python/cuml/preprocessing/encoders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 48ee76f6ab..b69ea7a181 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -292,7 +292,7 @@ def inverse_transform(self, X): enc_size = len(cats) x_feature = X[:, j:j + enc_size] idx = cp.argmax(x_feature, axis=1) - inv = Series(cats[idx]) + inv = Series(cats[idx]).reset_index(drop=True) if self.handle_unknown == 'ignore': not_null_idx = x_feature.any(axis=1) From a026baa03b29d9323f47ccbdf356936c13878ca9 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Fri, 10 Apr 2020 16:02:48 -0500 Subject: [PATCH 127/330] Transform now directly fill the entire ohe matrix, removing the use for concat --- python/cuml/preprocessing/encoders.py | 62 ++++++++++++------------ python/cuml/test/test_one_hot_encoder.py | 4 +- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index b69ea7a181..17c1d297eb 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -132,10 +132,6 @@ def _compute_drop_idx(self): if len(self.drop[feature]) != 1: msg = ("Trying to drop multiple values for feature {}, " "this is not supported.").format(feature) - # Dropping multiple values actually works except in inverse - # transform where there is no way to know which categories - # where present before one hot encoding if multiples - # categories where dropped. raise ValueError(msg) cats = self._encoders[feature].classes_ if not self.drop[feature].isin(cats).all(): @@ -212,28 +208,6 @@ def fit_transform(self, X): """ return self.fit(X).transform(X) - @with_cupy_rmm - def _one_hot_encoding(self, feature, X): - encoder = self._encoders[feature] - - col_idx = encoder.transform(X) - col_idx = cp.asarray(col_idx.to_gpu_array(fillna="pandas")) - - ohe = cp.zeros((len(X), len(encoder.classes_)), dtype=self.dtype) - # Filter out rows with null values - idx_to_keep = col_idx > -1 - row_idx = cp.arange(len(ohe))[idx_to_keep] - col_idx = col_idx[idx_to_keep] - ohe[row_idx, col_idx] = 1 - - if self.drop_idx_ is not None: - drop_idx = self.drop_idx_[feature] - mask = cp.ones((ohe.shape[1]), dtype=cp.bool) - mask[drop_idx] = False - ohe = ohe[:, mask] - - return ohe - @with_cupy_rmm def transform(self, X): """ @@ -248,12 +222,38 @@ def transform(self, X): Transformed input. """ self._check_is_fitted() - onehots = [self._one_hot_encoding(feature, X[feature]) - for feature in X.columns] - onehots = cp.concatenate(onehots, axis=1) + + nb_categories = sum(len(e.classes_) for e in self._encoders.values()) + ohe = cp.zeros((len(X), nb_categories), dtype=self.dtype) + + j = 0 + for feature in X.columns: + encoder = self._encoders[feature] + col_idx = encoder.transform(X[feature]) + col_idx = cp.asarray(col_idx.to_gpu_array(fillna="pandas")) + idx_to_keep = col_idx > -1 + + # increase indices to take previous features into account + col_idx += j + + # Filter out rows with null values + row_idx = cp.arange(len(ohe))[idx_to_keep] + col_idx = col_idx[idx_to_keep] + + ohe[row_idx, col_idx] = 1 + + if self.drop_idx_ is not None: + drop_idx = self.drop_idx_[feature] + j + mask = cp.ones(ohe.shape[1], dtype=cp.bool) + mask[drop_idx] = False + ohe = ohe[:, mask] + j -= 1 # account for dropped category in current cats number + j += len(encoder.classes_) + if self.sparse: - onehots = cp.sparse.csr_matrix(onehots) - return onehots + ohe = cp.sparse.csr_matrix(ohe) + + return ohe @with_cupy_rmm def inverse_transform(self, X): diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index 77ea00ecd9..f35f70883a 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import pytest -from cudf import DataFrame, Series +from cudf import DataFrame from cuml.preprocessing import OneHotEncoder import cupy as cp @@ -58,7 +58,7 @@ def test_onehot_vs_skonehot(): @pytest.mark.parametrize('drop', [None, 'first', - {'g': Series('F'), 'i': Series(3)}]) + {'g': 'F', 'i': 3}]) def test_onehot_inverse_transform(drop): X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2]}) From d2319a7afb764e481d1b855f3ce4d1fffd30d072 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Fri, 10 Apr 2020 19:04:13 -0500 Subject: [PATCH 128/330] Add sparse support to OneHotEncoder --- python/cuml/preprocessing/encoders.py | 42 +++++++++++++++--------- python/cuml/test/test_one_hot_encoder.py | 25 ++++++++++---- 2 files changed, 44 insertions(+), 23 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 17c1d297eb..0db7698071 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -77,7 +77,7 @@ class OneHotEncoder: be dropped for each feature. None if all the transformed features will be retained. """ - def __init__(self, categories='auto', drop=None, sparse=False, + def __init__(self, categories='auto', drop=None, sparse=True, dtype=np.float, handle_unknown='error'): self.categories = categories self.sparse = sparse @@ -88,9 +88,6 @@ def __init__(self, categories='auto', drop=None, sparse=False, self.drop_idx_ = None self._features = None self._encoders = None - if sparse: - raise ValueError('Sparse matrix are not fully supported by cupy ' - 'yet, causing incorrect values') if sparse and np.dtype(dtype) not in ['f', 'd', 'F', 'D']: raise ValueError('Only float32, float64, complex64 and complex128 ' 'are supported when using sparse') @@ -223,9 +220,7 @@ def transform(self, X): """ self._check_is_fitted() - nb_categories = sum(len(e.classes_) for e in self._encoders.values()) - ohe = cp.zeros((len(X), nb_categories), dtype=self.dtype) - + cols, rows = list(), list() j = 0 for feature in X.columns: encoder = self._encoders[feature] @@ -237,21 +232,32 @@ def transform(self, X): col_idx += j # Filter out rows with null values - row_idx = cp.arange(len(ohe))[idx_to_keep] + row_idx = cp.arange(len(X))[idx_to_keep] col_idx = col_idx[idx_to_keep] - ohe[row_idx, col_idx] = 1 - if self.drop_idx_ is not None: drop_idx = self.drop_idx_[feature] + j - mask = cp.ones(ohe.shape[1], dtype=cp.bool) - mask[drop_idx] = False - ohe = ohe[:, mask] - j -= 1 # account for dropped category in current cats number + mask = cp.ones(col_idx.shape, dtype=cp.bool) + mask[col_idx == drop_idx] = False + col_idx = col_idx[mask] + row_idx = row_idx[mask] + # account for dropped category in indices + col_idx[col_idx > drop_idx] -= 1 + # account for dropped category in current cats number + j -= 1 j += len(encoder.classes_) + cols.append(col_idx) + rows.append(row_idx) + + cols = cp.concatenate(cols) + rows = cp.concatenate(rows) + val = cp.ones(rows.shape[0], dtype=self.dtype) + ohe = cp.sparse.coo_matrix((val, (rows, cols)), + shape=(len(X), j), + dtype=self.dtype) - if self.sparse: - ohe = cp.sparse.csr_matrix(ohe) + if not self.sparse: + ohe = ohe.toarray() return ohe @@ -272,6 +278,10 @@ def inverse_transform(self, X): """ self._check_is_fitted() if cp.sparse.issparse(X): + # cupy.sparse 7.x does not support argmax, when we upgrade cupy to + # 8.x, we should add a condition in the + # if close: `and cp.sparse.issparsecsc(X)` + # and change the following line by `X = X.tocsc()` X = X.toarray() result = DataFrame(columns=self._encoders.keys()) j = 0 diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index f35f70883a..dbcb730a41 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -47,13 +47,13 @@ def test_onehot_vs_skonehot(): X = DataFrame({'gender': ['Male', 'Female', 'Female'], 'int': [1, 3, 2]}) skX = _from_df_to_array(X) - enc = OneHotEncoder(sparse=False) - skohe = SkOneHotEncoder(sparse=False) + enc = OneHotEncoder(sparse=True) + skohe = SkOneHotEncoder(sparse=True) ohe = enc.fit_transform(X) ref = skohe.fit_transform(skX) - cp.testing.assert_array_equal(ohe, ref) + cp.testing.assert_array_equal(ohe.toarray(), ref.toarray()) @pytest.mark.parametrize('drop', [None, @@ -124,11 +124,8 @@ def test_onehot_inverse_transform_handle_unknown(): @pytest.mark.parametrize('drop', [None, 'first']) @pytest.mark.parametrize('sparse', [True, False], ids=['sparse', 'dense']) -@pytest.mark.parametrize("n_samples", [10, 10000, 50000, stress_param(250000)]) +@pytest.mark.parametrize("n_samples", [10, 1000, 20000, stress_param(250000)]) def test_onehot_random_inputs(drop, sparse, n_samples): - if sparse: - pytest.xfail("Sparse arrays are not fully supported by cupy.") - df, ary = _generate_inputs_from_categories(n_samples=n_samples) enc = OneHotEncoder(sparse=sparse, drop=drop) @@ -195,3 +192,17 @@ def test_onehot_get_categories(): for i in range(len(ref)): np.testing.assert_array_equal(ref[i], cats[i]) + + +def test_onehot_sparse_drop(): + X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2], 'l': [5, 5, 6]}) + drop = {'g': 'F', 'i': 3, 'l': 6} + + ary = _from_df_to_array(X) + drop_ary = ['F', 3, 6] + + enc = OneHotEncoder(sparse=True, drop=drop) + sk_enc = SkOneHotEncoder(sparse=True, drop=drop_ary) + ohe = enc.fit_transform(X) + ref = sk_enc.fit_transform(ary) + cp.testing.assert_array_equal(ohe.toarray(), ref.toarray()) From e80b11fab54fe0a3bf0b33c5fccd69fc03829d1d Mon Sep 17 00:00:00 2001 From: divyegala Date: Sat, 11 Apr 2020 00:01:17 -0500 Subject: [PATCH 129/330] SG classification --- python/cuml/datasets/classification.py | 242 +++++++++++++++++++++++++ python/cuml/datasets/utils.py | 31 ++++ 2 files changed, 273 insertions(+) create mode 100644 python/cuml/datasets/classification.py create mode 100644 python/cuml/datasets/utils.py diff --git a/python/cuml/datasets/classification.py b/python/cuml/datasets/classification.py new file mode 100644 index 0000000000..37da62581e --- /dev/null +++ b/python/cuml/datasets/classification.py @@ -0,0 +1,242 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from sklearn.utils.random import sample_without_replacement +from cuml.datasets.utils import _create_rs_generator +from cuml.utils import with_cupy_rmm + +import cupy as cp +import numpy as np + +from time import sleep + +def _generate_hypercube(samples, dimensions, rng): + """Returns distinct binary samples of length dimensions + """ + if dimensions > 30: + return np.hstack([rng.randint(2, size=(samples, dimensions - 30)), + _generate_hypercube(samples, 30, rng)]) + out = np.random.choice(2 ** dimensions, samples, + replace=False).astype(dtype='>u4', copy=False) + out = np.unpackbits(out.view('>u1')).reshape((-1, 32))[:, -dimensions:] + return out + + +@with_cupy_rmm +def make_classification(n_samples=100, n_features=20, n_informative=2, + n_redundant=2, n_repeated=0, n_classes=2, + n_clusters_per_class=2, weights=None, flip_y=0.01, + class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, + shuffle=True, random_state=None, order='F', + dtype='float32'): + """Generate a random n-class classification problem. + This initially creates clusters of points normally distributed (std=1) + about vertices of an ``n_informative``-dimensional hypercube with sides of + length ``2*class_sep`` and assigns an equal number of clusters to each + class. It introduces interdependence between these features and adds + various types of further noise to the data. + Without shuffling, ``X`` horizontally stacks features in the following + order: the primary ``n_informative`` features, followed by ``n_redundant`` + linear combinations of the informative features, followed by ``n_repeated`` + duplicates, drawn randomly with replacement from the informative and + redundant features. The remaining features are filled with random noise. + Thus, without shuffling, all useful features are contained in the columns + ``X[:, :n_informative + n_redundant + n_repeated]``. + Read more in the :ref:`User Guide `. + Parameters + ---------- + n_samples : int, optional (default=100) + The number of samples. + n_features : int, optional (default=20) + The total number of features. These comprise ``n_informative`` + informative features, ``n_redundant`` redundant features, + ``n_repeated`` duplicated features and + ``n_features-n_informative-n_redundant-n_repeated`` useless features + drawn at random. + n_informative : int, optional (default=2) + The number of informative features. Each class is composed of a number + of gaussian clusters each located around the vertices of a hypercube + in a subspace of dimension ``n_informative``. For each cluster, + informative features are drawn independently from N(0, 1) and then + randomly linearly combined within each cluster in order to add + covariance. The clusters are then placed on the vertices of the + hypercube. + n_redundant : int, optional (default=2) + The number of redundant features. These features are generated as + random linear combinations of the informative features. + n_repeated : int, optional (default=0) + The number of duplicated features, drawn randomly from the informative + and the redundant features. + n_classes : int, optional (default=2) + The number of classes (or labels) of the classification problem. + n_clusters_per_class : int, optional (default=2) + The number of clusters per class. + weights : array-like of shape (n_classes,) or (n_classes - 1,),\ + (default=None) + The proportions of samples assigned to each class. If None, then + classes are balanced. Note that if ``len(weights) == n_classes - 1``, + then the last class weight is automatically inferred. + More than ``n_samples`` samples may be returned if the sum of + ``weights`` exceeds 1. + flip_y : float, optional (default=0.01) + The fraction of samples whose class is assigned randomly. Larger + values introduce noise in the labels and make the classification + task harder. + class_sep : float, optional (default=1.0) + The factor multiplying the hypercube size. Larger values spread + out the clusters/classes and make the classification task easier. + hypercube : boolean, optional (default=True) + If True, the clusters are put on the vertices of a hypercube. If + False, the clusters are put on the vertices of a random polytope. + shift : float, array of shape [n_features] or None, optional (default=0.0) + Shift features by the specified value. If None, then features + are shifted by a random value drawn in [-class_sep, class_sep]. + scale : float, array of shape [n_features] or None, optional (default=1.0) + Multiply features by the specified value. If None, then features + are scaled by a random value drawn in [1, 100]. Note that scaling + happens after shifting. + shuffle : boolean, optional (default=True) + Shuffle the samples and the features. + random_state : int, RandomState instance or None (default) + Determines random number generation for dataset creation. Pass an int + for reproducible output across multiple function calls. + See :term:`Glossary `. + order: str, optional (default='F') + The order of the generated samples + dtype : str, optional (default='float32') + Dtype of the generated samples + Returns + ------- + X : device array of shape [n_samples, n_features] + The generated samples. + y : device array of shape [n_samples] + The integer labels for class membership of each sample. + Notes + ----- + The algorithm is adapted from Guyon [1] and was designed to generate + the "Madelon" dataset. + References + ---------- + .. [1] I. Guyon, "Design of experiments for the NIPS 2003 variable + selection benchmark", 2003. + """ + generator = _create_rs_generator(random_state) + + # Count features, clusters and samples + if n_informative + n_redundant + n_repeated > n_features: + raise ValueError("Number of informative, redundant and repeated " + "features must sum to less than the number of total" + " features") + # Use log2 to avoid overflow errors + if n_informative < np.log2(n_classes * n_clusters_per_class): + msg = "n_classes({}) * n_clusters_per_class({}) must be" + msg += " smaller or equal 2**n_informative({})={}" + raise ValueError(msg.format(n_classes, n_clusters_per_class, + n_informative, 2**n_informative)) + + if weights is not None: + if len(weights) not in [n_classes, n_classes - 1]: + raise ValueError("Weights specified but incompatible with number " + "of classes.") + if len(weights) == n_classes - 1: + if isinstance(weights, list): + weights = weights + [1.0 - sum(weights)] + else: + weights = np.resize(weights, n_classes) + weights[-1] = 1.0 - sum(weights[:-1]) + else: + weights = [1.0 / n_classes] * n_classes + + n_useless = n_features - n_informative - n_redundant - n_repeated + n_clusters = n_classes * n_clusters_per_class + + # Distribute samples among clusters by weight + n_samples_per_cluster = [ + int(n_samples * weights[k % n_classes] / n_clusters_per_class) + for k in range(n_clusters)] + + for i in range(n_samples - sum(n_samples_per_cluster)): + n_samples_per_cluster[i % n_clusters] += 1 + + # Initialize X and y + # X = cp.zeros(n_samples * n_features, dtype=dtype, order=order) + X = generator.randn(n_samples * n_features, dtype=dtype) + X = X.reshape((n_samples, n_features), order=order) + y = cp.zeros(n_samples, dtype=np.int) + + # Build the polytope whose vertices become cluster centroids + centroids = cp.array(_generate_hypercube(n_clusters, n_informative, + generator)).astype(dtype, copy=False) + centroids *= 2 * class_sep + centroids -= class_sep + if not hypercube: + centroids *= generator.rand(n_clusters, 1, dtype=dtype) + centroids *= generator.rand(1, n_informative, dtype=dtype) + + # Initially draw informative features from the standard normal + + # Create each cluster; a variant of make_blobs + if shuffle: + proba_samples_per_cluster = np.array(n_samples_per_cluster) / np.sum(n_samples_per_cluster) + shuffled_sample_indices = cp.array(np.random.choice(n_clusters, n_samples, replace=True, + p=proba_samples_per_cluster)) + for k, centroid in enumerate(centroids): + centroid_indices = cp.where(shuffled_sample_indices == k) + y[centroid_indices[0]] = k % n_classes + X_k = X[centroid_indices[0], :n_informative] # slice a view of the cluster + + A = 2 * generator.rand(n_informative, n_informative, dtype=dtype) - 1 + X_k = cp.dot(X_k, A) # introduce random covariance + + X_k += centroid # shift the cluster to a vertex + else: + stop = 0 + for k, centroid in enumerate(centroids): + start, stop = stop, stop + n_samples_per_cluster[k] + y[start:stop] = k % n_classes # assign labels + X_k = X[start:stop, :n_informative] # slice a view of the cluster + + A = 2 * generator.rand(n_informative, n_informative, dtype=dtype) - 1 + X_k[...] = cp.dot(X_k, A) # introduce random covariance + + X_k += centroid # shift the cluster to a vertex + + # Create redundant features + if n_redundant > 0: + B = 2 * generator.rand(n_informative, n_redundant, dtype=dtype) - 1 + X[:, n_informative:n_informative + n_redundant] = \ + cp.dot(X[:, :n_informative], B) + + # Repeat some features + if n_repeated > 0: + n = n_informative + n_redundant + indices = ((n - 1) * generator.rand(n_repeated, dtype=dtype) + 0.5).astype(np.intp) + X[:, n:n + n_repeated] = X[:, indices] + + # Randomly replace labels + if flip_y >= 0.0: + flip_mask = generator.rand(n_samples, dtype=dtype) < flip_y + y[flip_mask] = generator.randint(n_classes, size=int(flip_mask.sum())) + + # Randomly shift and scale + if shift is None: + shift = (2 * generator.rand(n_features, dtype=dtype) - 1) * class_sep + X += shift + + if scale is None: + scale = 1 + 100 * generator.rand(n_features, dtype=dtype) + X *= scale + + return X, y diff --git a/python/cuml/datasets/utils.py b/python/cuml/datasets/utils.py new file mode 100644 index 0000000000..bf72f04fa6 --- /dev/null +++ b/python/cuml/datasets/utils.py @@ -0,0 +1,31 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import cupy as cp + +def _create_rs_generator(random_state): + if hasattr(random_state, '__module__'): + rs_type = random_state.__module__ + '.' + type(random_state).__name__ + else: + rs_type = type(random_state).__name__ + + rs = None + if rs_type == "NoneType" or rs_type == "int": + rs = cp.random.RandomState(seed=random_state) + elif rs_type == "cupy.random.generator.RandomState": + rs = rs_type + else: + raise ValueError('random_state type must be int or CuPy RandomState') + return rs \ No newline at end of file From a5a2d9e52f4a8e4f5c023c4df7249db8477142d3 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Fri, 10 Apr 2020 23:20:27 -0700 Subject: [PATCH 130/330] Make all Scipy imports conditional --- python/cuml/dask/common/dask_arr_utils.py | 11 +++++---- python/cuml/manifold/umap.pyx | 19 +++++++++++---- python/cuml/naive_bayes/naive_bayes.py | 23 +++++++++++++++---- python/cuml/preprocessing/label.py | 10 +++++--- .../cuml/test/dask/test_nearest_neighbors.py | 8 +++++-- python/cuml/test/test_arima.py | 7 +++++- python/cuml/test/test_label_binarizer.py | 9 ++++++-- python/cuml/test/test_metrics.py | 7 +++++- python/cuml/test/test_nearest_neighbors.py | 6 ++++- python/cuml/test/test_random_projection.py | 13 ++++++++++- 10 files changed, 88 insertions(+), 25 deletions(-) diff --git a/python/cuml/dask/common/dask_arr_utils.py b/python/cuml/dask/common/dask_arr_utils.py index edf47c9276..a3385ead09 100644 --- a/python/cuml/dask/common/dask_arr_utils.py +++ b/python/cuml/dask/common/dask_arr_utils.py @@ -15,7 +15,6 @@ from collections.abc import Iterable -import scipy.sparse import numpy as np import cupy as cp import cupyx @@ -31,7 +30,7 @@ from cuml.dask.common.part_utils import _extract_partitions -from cuml.utils import rmm_cupy_ary +from cuml.utils import rmm_cupy_ary, has_scipy from dask.distributed import wait from dask import delayed @@ -182,9 +181,11 @@ def to_sp_dask_array(cudf_or_array, client=None): return cudf_or_array else: - if scipy.sparse.isspmatrix(cudf_or_array): - cudf_or_array = \ - cupyx.scipy.sparse.csr_matrix(cudf_or_array.tocsr()) + if has_scipy(): + import scipy.sparse + if scipy.sparse.isspmatrix(cudf_or_array): + cudf_or_array = \ + cupyx.scipy.sparse.csr_matrix(cudf_or_array.tocsr()) elif cupyx.scipy.sparse.isspmatrix(cudf_or_array): pass elif isinstance(cudf_or_array, cudf.DataFrame): diff --git a/python/cuml/manifold/umap.pyx b/python/cuml/manifold/umap.pyx index 22987263e0..ce3b073179 100644 --- a/python/cuml/manifold/umap.pyx +++ b/python/cuml/manifold/umap.pyx @@ -32,16 +32,13 @@ import cupy import numba.cuda as cuda -from scipy.optimize import curve_fit - -from scipy.sparse import csr_matrix, coo_matrix, csc_matrix from cupy.sparse import csr_matrix as cp_csr_matrix,\ coo_matrix as cp_coo_matrix, csc_matrix as cp_csc_matrix from cuml.common.base import Base from cuml.common.handle cimport cumlHandle from cuml.utils import get_cudf_column_ptr, get_dev_array_ptr, \ - input_to_cuml_array, zeros, with_cupy_rmm + input_to_cuml_array, zeros, with_cupy_rmm, has_scipy from cuml.common.array import CumlArray import rmm @@ -410,6 +407,11 @@ class UMAP(Base): def curve(x, a, b): return 1.0 / (1.0 + a * x ** (2 * b)) + if has_scipy(): + from scipy.optimize import curve_fit + else: + raise RuntimeError('Scipy is needed to run find_ab_params') + xv = np.linspace(0, spread * 3, 300) yv = np.zeros(xv.shape) yv[xv < min_dist] = 1.0 @@ -419,6 +421,15 @@ class UMAP(Base): @with_cupy_rmm def _extract_knn_graph(self, knn_graph, convert_dtype=True): + if has_scipy(): + from scipy.sparse import csr_matrix, coo_matrix, csc_matrix + else: + class Dummy(object): + pass + csr_matrix = Dummy + coo_matrix = Dummy + csc_matrix = Dummy + if isinstance(knn_graph, (csc_matrix, cp_csc_matrix)): knn_graph = cupy.sparse.csr_matrix(knn_graph) n_samples = knn_graph.shape[0] diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py index 9992f45811..b8df170818 100644 --- a/python/cuml/naive_bayes/naive_bayes.py +++ b/python/cuml/naive_bayes/naive_bayes.py @@ -19,7 +19,6 @@ import numpy as np import cupy as cp -import scipy.sparse import cupy.prof @@ -27,7 +26,7 @@ import warnings -from cuml.utils import cuda_kernel_factory +from cuml.utils import cuda_kernel_factory, has_scipy from cuml.prims.label import make_monotonic from cuml.prims.label import check_labels @@ -241,10 +240,14 @@ def fit(self, X, y, sample_weight=None): @cp.prof.TimeRangeDecorator(message="fit()", color_id=0) @with_cupy_rmm def _partial_fit(self, X, y, sample_weight=None, _classes=None): + if has_scipy(): + from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix + else: + scipy_sparse_isspmatrix = lambda x : False if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray): X = cp.asarray(X, X.dtype) - elif scipy.sparse.isspmatrix(X) or cp.sparse.isspmatrix(X): + elif scipy_sparse_isspmatrix(X) or cp.sparse.isspmatrix(X): X = X.tocoo() rows = cp.asarray(X.row, dtype=X.row.dtype) cols = cp.asarray(X.col, dtype=X.col.dtype) @@ -349,9 +352,14 @@ def predict(self, X): """ + if has_scipy(): + from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix + else: + scipy_sparse_isspmatrix = lambda x : False + if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray): X = cp.asarray(X, X.dtype) - elif scipy.sparse.isspmatrix(X) or cp.sparse.isspmatrix(X): + elif scipy_sparse_isspmatrix(X) or cp.sparse.isspmatrix(X): X = X.tocoo() rows = cp.asarray(X.row, dtype=X.row.dtype) cols = cp.asarray(X.col, dtype=X.col.dtype) @@ -385,9 +393,14 @@ def predict_log_proba(self, X): they appear in the attribute classes_. """ + if has_scipy(): + from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix + else: + scipy_sparse_isspmatrix = lambda x : False + if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray): X = cp.asarray(X, X.dtype) - elif scipy.sparse.isspmatrix(X) or cp.sparse.isspmatrix(X): + elif scipy_sparse_isspmatrix(X) or cp.sparse.isspmatrix(X): X = X.tocoo() rows = cp.asarray(X.row, dtype=X.row.dtype) cols = cp.asarray(X.col, dtype=X.col.dtype) diff --git a/python/cuml/preprocessing/label.py b/python/cuml/preprocessing/label.py index dbd53e7d14..434bc0f68e 100644 --- a/python/cuml/preprocessing/label.py +++ b/python/cuml/preprocessing/label.py @@ -13,13 +13,12 @@ # limitations under the License. # -import scipy import cupy as cp from cuml.prims.label import make_monotonic, check_labels, \ invert_labels -from cuml.utils import rmm_cupy_ary +from cuml.utils import rmm_cupy_ary, has_scipy def label_binarize(y, classes, neg_label=0, pos_label=1, @@ -226,10 +225,15 @@ def inverse_transform(self, y, threshold=None): arr : array with original labels """ + if has_scipy(): + from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix + else: + scipy_sparse_isspmatrix = lambda x : False + # If we are already given multi-class, just return it. if cp.sparse.isspmatrix(y): y_mapped = y.tocsr().indices.astype(self.classes_.dtype) - elif scipy.sparse.isspmatrix(y): + elif scipy_sparse_isspmatrix(y): y = y.tocsr() y_mapped = rmm_cupy_ary(cp.array, y.indices, dtype=y.indices.dtype) diff --git a/python/cuml/test/dask/test_nearest_neighbors.py b/python/cuml/test/dask/test_nearest_neighbors.py index c906364c67..5e5681b4e9 100644 --- a/python/cuml/test/dask/test_nearest_neighbors.py +++ b/python/cuml/test/dask/test_nearest_neighbors.py @@ -18,10 +18,10 @@ import dask_cudf import pandas as pd -import scipy.stats as stats - import numpy as np +from cuml.utils import has_scipy + from cuml.dask.common import utils as dask_utils from dask.distributed import Client, wait @@ -37,6 +37,10 @@ def predict(neigh_ind, _y, n_neighbors): + if has_scipy(): + import scipy.stats as stats + else: + raise RuntimeError('Scipy is needed to run predict()') neigh_ind = neigh_ind.astype(np.int64) diff --git a/python/cuml/test/test_arima.py b/python/cuml/test/test_arima.py index 3b8fef0d7b..55c96ec35e 100644 --- a/python/cuml/test/test_arima.py +++ b/python/cuml/test/test_arima.py @@ -40,11 +40,11 @@ import warnings import pandas as pd -from scipy.optimize.optimize import _approx_fprime_helper import statsmodels.api as sm import cudf import cuml.tsa.arima as arima +from cuml.utils import has_scipy ############################################################################### @@ -363,6 +363,11 @@ def test_gradient(test_case, dtype): """Test batched gradient implementation against scipy non-batched gradient. Note: it doesn't test that the loglikelihood is correct! """ + if has_scipy(): + from scipy.optimize.optimize import _approx_fprime_helper + else: + pytest.skip('Skipping test_gradient because Scipy is missing') + key, data = test_case order, seasonal_order, intercept = extract_order(key) p, _, q = order diff --git a/python/cuml/test/test_label_binarizer.py b/python/cuml/test/test_label_binarizer.py index 4f3987f6c6..c60011c35f 100644 --- a/python/cuml/test/test_label_binarizer.py +++ b/python/cuml/test/test_label_binarizer.py @@ -15,14 +15,13 @@ import pytest from cuml.preprocessing import LabelBinarizer from cuml.test.utils import array_equal +from cuml.utils import has_scipy from sklearn.preprocessing import LabelBinarizer as skLB import numpy as np import cupy as cp -import scipy.sparse - @pytest.mark.parametrize( "labels", [([1, 4, 5, 2, 0, 1, 6, 2, 3, 4], @@ -53,6 +52,12 @@ def test_basic_functions(labels, dtype, sparse_output): if sparse_output: skl_bin_xformed = skl_bin.transform(xform_labels.get()) + if has_scipy(): + import scipy.sparse + else: + pytest.skip('Skipping test_basic_functions(sparse_output=True) ' + + 'because Scipy is missing') + skl_csr = scipy.sparse.coo_matrix(skl_bin_xformed).tocsr() cuml_csr = xformed diff --git a/python/cuml/test/test_metrics.py b/python/cuml/test/test_metrics.py index 024ae4e6ab..100a681488 100644 --- a/python/cuml/test/test_metrics.py +++ b/python/cuml/test/test_metrics.py @@ -41,7 +41,7 @@ from sklearn.metrics.regression import mean_absolute_error as sklearn_mae from sklearn.metrics.regression import mean_squared_log_error as sklearn_msle -from scipy.stats import entropy as sp_entropy +from cuml.utils import has_scipy @pytest.mark.parametrize('datatype', [np.float32, np.float64]) @@ -319,6 +319,11 @@ def test_entropy(use_handle): @pytest.mark.parametrize('base', [None, 2, 10, 50]) @pytest.mark.parametrize('use_handle', [True, False]) def test_entropy_random(n_samples, base, use_handle): + if has_scipy(): + from scipy.stats import entropy as sp_entropy + else: + pytest.skip('Skipping test_entropy_random because Scipy is missing') + handle, stream = get_handle(use_handle) clustering, _ = \ diff --git a/python/cuml/test/test_nearest_neighbors.py b/python/cuml/test/test_nearest_neighbors.py index 45cdbff7a0..519fa365e9 100644 --- a/python/cuml/test/test_nearest_neighbors.py +++ b/python/cuml/test/test_nearest_neighbors.py @@ -28,10 +28,11 @@ import pandas as pd import numpy as np -import scipy.stats as stats +from cuml.utils import has_scipy def predict(neigh_ind, _y, n_neighbors): + import scipy.stats as stats neigh_ind = neigh_ind.astype(np.int32) @@ -46,6 +47,9 @@ def predict(neigh_ind, _y, n_neighbors): @pytest.mark.parametrize("n_clusters", [2, 10]) def test_neighborhood_predictions(nrows, ncols, n_neighbors, n_clusters, datatype): + if not has_scipy(): + pytest.skip('Skipping test_neighborhood_predictions because ' + + 'Scipy is missing') X, y = make_blobs(n_samples=nrows, centers=n_clusters, n_features=ncols, random_state=0) diff --git a/python/cuml/test/test_random_projection.py b/python/cuml/test/test_random_projection.py index 5ff7ddf5ff..dd2f1d9473 100644 --- a/python/cuml/test/test_random_projection.py +++ b/python/cuml/test/test_random_projection.py @@ -25,7 +25,7 @@ as sklearn_johnson_lindenstrauss_min_dim from sklearn.datasets.samples_generator import make_blobs -from scipy.spatial.distance import pdist +from cuml.utils import has_scipy @pytest.mark.parametrize('datatype', [np.float32, np.float64]) @@ -53,6 +53,12 @@ def test_random_projection_fit(datatype, method): @pytest.mark.parametrize('datatype', [np.float32, np.float64]) @pytest.mark.parametrize('method', ['gaussian', 'sparse']) def test_random_projection_fit_transform(datatype, method): + if has_scipy(): + from scipy.spatial.distance import pdist + else: + pytest.skip('Skipping test_random_projection_fit_transform because ' + + 'Scipy is missing') + eps = 0.2 # dataset generation @@ -96,6 +102,11 @@ def test_johnson_lindenstrauss_min_dim(): @pytest.mark.parametrize('datatype', [np.float32, np.float64]) @pytest.mark.parametrize('method', ['sparse']) def test_random_projection_fit_transform_default(datatype, method): + if has_scipy(): + from scipy.spatial.distance import pdist + else: + pytest.skip('Skipping test_random_projection_fit_transform_default ' + + 'because Scipy is missing') eps = 0.8 # dataset generation From 5aa907dbc38fe133abcc28a484c624e90e043605 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Sat, 11 Apr 2020 19:59:49 -0700 Subject: [PATCH 131/330] Fix test test_to_sp_dask_array --- python/cuml/dask/common/dask_arr_utils.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/cuml/dask/common/dask_arr_utils.py b/python/cuml/dask/common/dask_arr_utils.py index a3385ead09..b0f13ca817 100644 --- a/python/cuml/dask/common/dask_arr_utils.py +++ b/python/cuml/dask/common/dask_arr_utils.py @@ -182,10 +182,13 @@ def to_sp_dask_array(cudf_or_array, client=None): else: if has_scipy(): - import scipy.sparse - if scipy.sparse.isspmatrix(cudf_or_array): - cudf_or_array = \ - cupyx.scipy.sparse.csr_matrix(cudf_or_array.tocsr()) + from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix + else: + def scipy_sparse_isspmatrix(x): + return False + if scipy_sparse_isspmatrix(cudf_or_array): + cudf_or_array = \ + cupyx.scipy.sparse.csr_matrix(cudf_or_array.tocsr()) elif cupyx.scipy.sparse.isspmatrix(cudf_or_array): pass elif isinstance(cudf_or_array, cudf.DataFrame): From 3e741759eb2a250a068fb3c33f610367b6bc28e1 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Sat, 11 Apr 2020 20:01:42 -0700 Subject: [PATCH 132/330] Follow style convention --- python/cuml/naive_bayes/naive_bayes.py | 9 ++++++--- python/cuml/preprocessing/label.py | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py index b8df170818..2d515e2f30 100644 --- a/python/cuml/naive_bayes/naive_bayes.py +++ b/python/cuml/naive_bayes/naive_bayes.py @@ -243,7 +243,8 @@ def _partial_fit(self, X, y, sample_weight=None, _classes=None): if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: - scipy_sparse_isspmatrix = lambda x : False + def scipy_sparse_isspmatrix(x): + return False if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray): X = cp.asarray(X, X.dtype) @@ -355,7 +356,8 @@ def predict(self, X): if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: - scipy_sparse_isspmatrix = lambda x : False + def scipy_sparse_isspmatrix(x): + return False if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray): X = cp.asarray(X, X.dtype) @@ -396,7 +398,8 @@ def predict_log_proba(self, X): if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: - scipy_sparse_isspmatrix = lambda x : False + def scipy_sparse_isspmatrix(x): + return False if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray): X = cp.asarray(X, X.dtype) diff --git a/python/cuml/preprocessing/label.py b/python/cuml/preprocessing/label.py index 434bc0f68e..859af96601 100644 --- a/python/cuml/preprocessing/label.py +++ b/python/cuml/preprocessing/label.py @@ -228,7 +228,8 @@ def inverse_transform(self, y, threshold=None): if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: - scipy_sparse_isspmatrix = lambda x : False + def scipy_sparse_isspmatrix(x): + return False # If we are already given multi-class, just return it. if cp.sparse.isspmatrix(y): From 6fbdfee18b354ae81197dcb7bcd4d061243584d2 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Sat, 11 Apr 2020 20:02:31 -0700 Subject: [PATCH 133/330] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c7b7f75ea..e4092856f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ - PR #2028: Skipping UMAP knn reproducibility tests until we figure out why its failing in CUDA 10.2 - PR #2024: Fixed cuda-memcheck errors with sample-without-replacement prim - PR #1540: prims: support for custom math-type used for computation inside adjusted rand index prim +- PR #2059: Make all Scipy imports conditional # cuML 0.13.0 (Date TBD) From 96e32a7f879fdcae1c33f02df4bc2046dcc4f7b3 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 10:38:12 -0400 Subject: [PATCH 134/330] Adding reproducible random state to umap mnmg tests --- python/cuml/test/dask/test_umap.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/python/cuml/test/dask/test_umap.py b/python/cuml/test/dask/test_umap.py index a44dc28de9..ef21f54595 100644 --- a/python/cuml/test/dask/test_umap.py +++ b/python/cuml/test/dask/test_umap.py @@ -36,11 +36,14 @@ def test_umap_mnmg(n_parts, sampling_ratio, supervised, dataset, cluster): from cuml.manifold import UMAP from cuml.dask.manifold import UMAP as MNMG_UMAP - n_neighbors = 10 + n_neighbors = 500 + + print("Dataset: " + str(dataset)) if dataset == "make_blobs": local_X, local_y = make_blobs(n_samples=10000, n_features=10, - centers=200, cluster_std=0.1) + centers=200, cluster_std=0.8, + shuffle=True, random_state=42) else: if dataset == "digits": from sklearn.datasets import load_digits @@ -51,12 +54,14 @@ def test_umap_mnmg(n_parts, sampling_ratio, supervised, dataset, cluster): def umap_mnmg_trustworthiness(): n_samples = local_X.shape[0] - n_sampling = int(n_samples * sampling_ratio) n_samples_per_part = int(n_samples / n_parts) - local_model = UMAP(n_neighbors=n_neighbors) + local_model = UMAP(n_neighbors=n_neighbors, + random_state=42) - selection = np.random.choice(n_samples, n_sampling) + selection = np.random.RandomState(42).choice( + [True, False], n_samples, replace=True, + p=[sampling_ratio, 1.0-sampling_ratio]) X_train = local_X[selection] X_transform = local_X[~selection] X_transform_d = da.from_array(X_transform, @@ -75,7 +80,8 @@ def umap_mnmg_trustworthiness(): return trustworthiness(X_transform, embedding, n_neighbors) def local_umap_trustworthiness(): - local_model = UMAP(n_neighbors=n_neighbors) + local_model = UMAP(n_neighbors=n_neighbors, + random_state=42) local_model.fit(local_X, local_y) embedding = local_model.transform(local_X) return trustworthiness(local_X, embedding, n_neighbors) From 47e3e6cf7f9d5f3e86cf09217d4854b83be77475 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 10:40:45 -0400 Subject: [PATCH 135/330] Updating changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a97c2cb1c..2bbd910984 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - PR #2031: Encapsulating UCX-py interactions in singleton - PR #2029: Add C++ ARIMA log-likelihood benchmark - PR #2058: Use CumlArray in Random Projection +- PR #2062: Adding random state to UMAP mnmg tests ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array From cb21fa3a2993a18b25feab7ebb8294937cc187a6 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 11:09:55 -0400 Subject: [PATCH 136/330] Adding validation function for umap hyperparams --- cpp/src/umap/init_embed/spectral_algo.h | 9 +++++---- python/cuml/manifold/umap.pyx | 7 +++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/cpp/src/umap/init_embed/spectral_algo.h b/cpp/src/umap/init_embed/spectral_algo.h index d4441abe91..2d18bc4eb2 100644 --- a/cpp/src/umap/init_embed/spectral_algo.h +++ b/cpp/src/umap/init_embed/spectral_algo.h @@ -57,20 +57,21 @@ void launcher(const cumlHandle &handle, const T *X, int n, int d, coo->nnz, n, params->n_components, tmp_storage.data()); - MLCommon::LinAlg::transpose(tmp_storage.data(), embedding, n, - params->n_components, - handle.getImpl().getCublasHandle(), stream); - MLCommon::LinAlg::unaryOp( tmp_storage.data(), tmp_storage.data(), n * params->n_components, [=] __device__(T input) { return fabsf(input); }, stream); + MLCommon::LinAlg::transpose(tmp_storage.data(), embedding, n, + params->n_components, + handle.getImpl().getCublasHandle(), stream); + thrust::device_ptr d_ptr = thrust::device_pointer_cast(tmp_storage.data()); T max = *(thrust::max_element(thrust::cuda::par.on(stream), d_ptr, d_ptr + (n * params->n_components))); uint64_t seed = params->random_state; + // Reuse tmp_storage to add random noise MLCommon::Random::Rng r(seed); r.normal(tmp_storage.data(), n * params->n_components, 0.0f, 0.0001f, stream); diff --git a/python/cuml/manifold/umap.pyx b/python/cuml/manifold/umap.pyx index 22987263e0..8708bd1d0c 100644 --- a/python/cuml/manifold/umap.pyx +++ b/python/cuml/manifold/umap.pyx @@ -355,6 +355,13 @@ class UMAP(Base): self.X_m = None self.embedding_ = None + self.validate_hyperparams() + + def validate_hyperparams(self): + + if self.min_dist > self.spread: + raise ValueError("min_dist should be <= spread") + @staticmethod def _build_umap_params(cls): cdef UMAPParams* umap_params = new UMAPParams() From caae00dff3a58d7dffd531282e2e670ce788e37d Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 11:12:34 -0400 Subject: [PATCH 137/330] Fixing trivial typo in comms --- cpp/comms/std/src/cuML_std_comms_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/comms/std/src/cuML_std_comms_impl.cpp b/cpp/comms/std/src/cuML_std_comms_impl.cpp index d37e5aef84..36233a92bd 100644 --- a/cpp/comms/std/src/cuML_std_comms_impl.cpp +++ b/cpp/comms/std/src/cuML_std_comms_impl.cpp @@ -237,7 +237,7 @@ cumlStdCommunicator_impl::~cumlStdCommunicator_impl() { CUDA_CHECK_NO_THROW(cudaFree(_sendbuff)); CUDA_CHECK_NO_THROW(cudaFree(_recvbuff)); -#ifndef WITH_UCX +#ifdef WITH_UCX close_ucp_handle((struct comms_ucp_handle *)_ucp_handle); #endif } From 97ed04ee0287e45f8b8e728a5c2c97266a93bc50 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 11:38:05 -0400 Subject: [PATCH 138/330] Adding more nan checks in umap tests --- python/cuml/test/test_umap.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/cuml/test/test_umap.py b/python/cuml/test/test_umap.py index cc8858303f..5445188370 100644 --- a/python/cuml/test/test_umap.py +++ b/python/cuml/test/test_umap.py @@ -71,6 +71,9 @@ def test_umap_fit_transform_score(nrows, n_feats): embedding = model.fit_transform(data) cuml_embedding = cuml_model.fit_transform(data, convert_dtype=True) + assert not np.isnan(embedding).any() + assert not np.isnan(cuml_embedding).any() + if nrows < 500000: cuml_score = adjusted_rand_score(labels, KMeans(10).fit_predict( @@ -126,6 +129,9 @@ def test_umap_transform_on_iris(): fitter.fit(data, convert_dtype=True) new_data = iris.data[~iris_selection] embedding = fitter.transform(new_data, convert_dtype=True) + + assert not np.isnan(embedding).any() + trust = trustworthiness(new_data, embedding, 10) assert trust >= 0.85 From 9c92d47f780ee3e1cbdd904c7e9c85eea1de6c3e Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 11:33:41 -0700 Subject: [PATCH 139/330] FEA cython wrapper for Logger method calls --- python/cuml/common/logger.pyx | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 python/cuml/common/logger.pyx diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx new file mode 100644 index 0000000000..7e38646fed --- /dev/null +++ b/python/cuml/common/logger.pyx @@ -0,0 +1,30 @@ + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + + +from libcpp.string cimport string + + +cdef extern from "cuml/common/logger.hpp" namespace "ML" nogil: + cdef cppclass Logger: + pass + + +def set_level(level): + Logger::get().setLevel(level) + + +def set_pattern(pattern): + cdef string s = pattern + Logger::get().setPattern(s) + + +def should_log_for(level): + return Logger::get().shouldLogFor(level) + + +def get_pattern(): + return Logger::get().getPattern() From 6749755e36095844f03f9c5c1a0edc344e6148a2 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 11:34:16 -0700 Subject: [PATCH 140/330] DOC added copyright header --- python/cuml/common/logger.pyx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 7e38646fed..628c2cea11 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -1,3 +1,18 @@ +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# # cython: profile=False # distutils: language = c++ From 5ceac22890ac1a4b10848161e1a7097cf0e1d36c Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 11:36:02 -0700 Subject: [PATCH 141/330] BUG return the string with proper decode --- python/cuml/common/logger.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 628c2cea11..226a6a6228 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -42,4 +42,5 @@ def should_log_for(level): def get_pattern(): - return Logger::get().getPattern() + cdef string s = Logger::get().getPattern() + return s.decode("UTF-8") From c0dc5252c2c8d094b542feae6ad188e9cbbfd128 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 11:47:13 -0700 Subject: [PATCH 142/330] BUG fixed multiple cython declaration related issues --- python/cuml/common/logger.pyx | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 226a6a6228..d8cb90f836 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -21,26 +21,32 @@ from libcpp.string cimport string +from libcpp cimport bool cdef extern from "cuml/common/logger.hpp" namespace "ML" nogil: cdef cppclass Logger: - pass + @staticmethod + Logger& get() + void setLevel(int level) + void setPattern(const string& pattern) + bool shouldLogFor(int level) const + string getPattern() const def set_level(level): - Logger::get().setLevel(level) + Logger.get().setLevel(level) def set_pattern(pattern): cdef string s = pattern - Logger::get().setPattern(s) + Logger.get().setPattern(s) def should_log_for(level): - return Logger::get().shouldLogFor(level) + return Logger.get().shouldLogFor(level) def get_pattern(): - cdef string s = Logger::get().getPattern() + cdef string s = Logger.get().getPattern() return s.decode("UTF-8") From 8c54ba8b59d58103e0ca0605c2fd6aa16e3600e1 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 11:52:59 -0700 Subject: [PATCH 143/330] FEA exposed logging methods in cython --- python/cuml/common/logger.pyx | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index d8cb90f836..a7c8039000 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -33,6 +33,14 @@ cdef extern from "cuml/common/logger.hpp" namespace "ML" nogil: bool shouldLogFor(int level) const string getPattern() const +cdef extern from "cuml/common/logger.hpp" nogil: + void CUML_LOG_TRACE(const char* fmt, ...) + void CUML_LOG_DEBUG(const char* fmt, ...) + void CUML_LOG_INFO(const char* fmt, ...) + void CUML_LOG_WARN(const char* fmt, ...) + void CUML_LOG_EEROR(const char* fmt, ...) + void CUML_LOG_CRITICAL(const char* fmt, ...) + def set_level(level): Logger.get().setLevel(level) @@ -50,3 +58,33 @@ def should_log_for(level): def get_pattern(): cdef string s = Logger.get().getPattern() return s.decode("UTF-8") + + +def trace(msg): + cdef string s = msg + CUML_LOG_TRACE(s.c_str()) + + +def debug(msg): + cdef string s = msg + CUML_LOG_DEBUG(s.c_str()) + + +def info(msg): + cdef string s = msg + CUML_LOG_INFO(s.c_str()) + + +def warn(msg): + cdef string s = msg + CUML_LOG_WARN(s.c_str()) + + +def error(msg): + cdef string s = msg + CUML_LOG_ERROR(s.c_str()) + + +def critical(msg): + cdef string s = msg + CUML_LOG_CRITICAL(s.c_str()) From 293b6729aa3fa0e0c73c8ee22a05f63d4f6d5ec5 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 11:59:19 -0700 Subject: [PATCH 144/330] DOC changelog update --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cabc241658..252dcf797f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## New Features - PR #1867: C++: add logging interface support in cuML based spdlog - PR #1906: UMAP MNMG +- PR #2067L python: wrap logging interface in cython ## Improvements - PR #1931: C++: enabled doxygen docs for all of the C++ codebase From dde0f800f3890ff6de4412aa1aefe2a57c63a3e4 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 12:00:44 -0700 Subject: [PATCH 145/330] BUG fixed a typo with cdef extern declaration --- python/cuml/common/logger.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index a7c8039000..b829307f95 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -38,7 +38,7 @@ cdef extern from "cuml/common/logger.hpp" nogil: void CUML_LOG_DEBUG(const char* fmt, ...) void CUML_LOG_INFO(const char* fmt, ...) void CUML_LOG_WARN(const char* fmt, ...) - void CUML_LOG_EEROR(const char* fmt, ...) + void CUML_LOG_ERROR(const char* fmt, ...) void CUML_LOG_CRITICAL(const char* fmt, ...) From 16d067bf09af15a2e29f0707928c9491b742fabc Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 15:13:20 -0400 Subject: [PATCH 146/330] Only closing ucp handle when UCX was initialized on the comms --- cpp/comms/std/src/cuML_std_comms_impl.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cpp/comms/std/src/cuML_std_comms_impl.cpp b/cpp/comms/std/src/cuML_std_comms_impl.cpp index 36233a92bd..828ba30740 100644 --- a/cpp/comms/std/src/cuML_std_comms_impl.cpp +++ b/cpp/comms/std/src/cuML_std_comms_impl.cpp @@ -220,7 +220,8 @@ cumlStdCommunicator_impl::cumlStdCommunicator_impl( cumlStdCommunicator_impl::cumlStdCommunicator_impl(ncclComm_t comm, int size, int rank, bool verbose) - : _nccl_comm(comm), _size(size), _rank(rank), _verbose(verbose) { + : _nccl_comm(comm), _size(size), _rank(rank), _verbose(verbose), + _ucp_worker(nullptr), _ucp_handle(nullptr), _ucp_eps(nullptr){ initialize(); } @@ -238,10 +239,14 @@ cumlStdCommunicator_impl::~cumlStdCommunicator_impl() { CUDA_CHECK_NO_THROW(cudaFree(_recvbuff)); #ifdef WITH_UCX - close_ucp_handle((struct comms_ucp_handle *)_ucp_handle); + if( _ucp_worker != nullptr) { + close_ucp_handle((struct comms_ucp_handle *)_ucp_handle); + } #endif } + + int cumlStdCommunicator_impl::getSize() const { return _size; } int cumlStdCommunicator_impl::getRank() const { return _rank; } From 02055dcd4b66095018dc9013066e2f38e971a589 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 15:28:08 -0400 Subject: [PATCH 147/330] Updating cpp style for comms --- cpp/comms/std/src/cuML_std_comms_impl.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cpp/comms/std/src/cuML_std_comms_impl.cpp b/cpp/comms/std/src/cuML_std_comms_impl.cpp index 828ba30740..ff04e66806 100644 --- a/cpp/comms/std/src/cuML_std_comms_impl.cpp +++ b/cpp/comms/std/src/cuML_std_comms_impl.cpp @@ -220,8 +220,13 @@ cumlStdCommunicator_impl::cumlStdCommunicator_impl( cumlStdCommunicator_impl::cumlStdCommunicator_impl(ncclComm_t comm, int size, int rank, bool verbose) - : _nccl_comm(comm), _size(size), _rank(rank), _verbose(verbose), - _ucp_worker(nullptr), _ucp_handle(nullptr), _ucp_eps(nullptr){ + : _nccl_comm(comm), + _size(size), + _rank(rank), + _verbose(verbose), + _ucp_worker(nullptr), + _ucp_handle(nullptr), + _ucp_eps(nullptr) { initialize(); } @@ -239,14 +244,12 @@ cumlStdCommunicator_impl::~cumlStdCommunicator_impl() { CUDA_CHECK_NO_THROW(cudaFree(_recvbuff)); #ifdef WITH_UCX - if( _ucp_worker != nullptr) { + if (_ucp_worker != nullptr) { close_ucp_handle((struct comms_ucp_handle *)_ucp_handle); } #endif } - - int cumlStdCommunicator_impl::getSize() const { return _size; } int cumlStdCommunicator_impl::getRank() const { return _rank; } From c50760a7839994ec924787651adf6e5337a83820 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 17:35:03 -0400 Subject: [PATCH 148/330] Using make_monotonic for computation of probabilities --- cpp/src_prims/selection/knn.h | 87 +++++------------------------------ 1 file changed, 12 insertions(+), 75 deletions(-) diff --git a/cpp/src_prims/selection/knn.h b/cpp/src_prims/selection/knn.h index 60a4bf7ef7..7059f0bdeb 100644 --- a/cpp/src_prims/selection/knn.h +++ b/cpp/src_prims/selection/knn.h @@ -19,6 +19,7 @@ #include "cuda_utils.h" #include "distance/distance.h" +#include "label/classlabels.h" #include #include @@ -303,61 +304,9 @@ void brute_force_knn(float **input, int *sizes, int n_params, IntType D, translations); } -/** - * @brief Binary tree recursion for finding a label in the unique_labels array. - * This provides a good middle-ground between having to create a new - * labels array just to map non-monotonically increasing labels, or - * the alternative, which is having to search over O(n) space for the labels - * array in each thread. This is going to cause warp divergence of log(n) - * per iteration. - * @param unique_labels array of unique labels - * @param n_labels number of unique labels - * @param target_val the label value to search for in unique_labels - */ -template -__device__ int label_binary_search(IdxType *unique_labels, IdxType n_labels, - IdxType target_val) { - int out_label_idx = -1; - - int level = 1; - int cur_break_idx = round(n_labels / (2.0 * level)); - while (out_label_idx == -1) { - int cur_cached_label = unique_labels[cur_break_idx]; - - // If we found our label, terminate - if (cur_cached_label == target_val) { - return cur_break_idx; - - // check left neighbor - } else if (cur_break_idx > 0 && - unique_labels[cur_break_idx - 1] == target_val) { - return cur_break_idx - 1; - - // check right neighbor - } else if (cur_break_idx < n_labels - 1 && - unique_labels[cur_break_idx + 1] == target_val) { - return cur_break_idx + 1; - - // traverse - } else { - level += 1; - - int subtree = round(n_labels / (2.0 * level)); - if (target_val < cur_cached_label) { - // take left subtree - cur_break_idx -= subtree; - } else { - // take right subtree - cur_break_idx += subtree; - } - } - } - return -1; -} - template __global__ void class_probs_kernel(OutType *out, const int64_t *knn_indices, - const int *labels, int *unique_labels, + const int *labels, int n_uniq_labels, size_t n_samples, int n_neighbors) { int row = (blockIdx.x * blockDim.x) + threadIdx.x; @@ -365,27 +314,12 @@ __global__ void class_probs_kernel(OutType *out, const int64_t *knn_indices, float n_neigh_inv = 1.0f / n_neighbors; - extern __shared__ int label_cache[]; - for (int j = threadIdx.x; j < n_uniq_labels; j += blockDim.x) { - label_cache[j] = unique_labels[j]; - } - - __syncthreads(); - if (row >= n_samples) return; for (int j = 0; j < n_neighbors; j++) { int64_t neighbor_idx = knn_indices[i + j]; int out_label = labels[neighbor_idx]; - - // Trading off warp divergence in the outputs so that we don't - // need to copy / modify the label memory to do these mappings. - // Found a middle-ground between between using shared memory - // for the mappings. - int out_label_idx = - label_binary_search(label_cache, n_uniq_labels, out_label); - - int out_idx = row * n_uniq_labels + out_label_idx; + int out_idx = row * n_uniq_labels + out_label; out[out_idx] += n_neigh_inv; } } @@ -401,6 +335,7 @@ __global__ void class_vote_kernel(OutType *out, const float *class_proba, extern __shared__ int label_cache[]; for (int j = threadIdx.x; j < n_uniq_labels; j += blockDim.x) { label_cache[j] = unique_labels[j]; + printf("Label: %d\n", label_cache[j]); } __syncthreads(); @@ -409,9 +344,9 @@ __global__ void class_vote_kernel(OutType *out, const float *class_proba, float cur_max = -1.0; int cur_label = -1; for (int j = 0; j < n_uniq_labels; j++) { - float cur_count = class_proba[i + j]; - if (cur_count > cur_max) { - cur_max = cur_count; + float cur_proba = class_proba[i + j]; + if (cur_proba > cur_max) { + cur_max = cur_proba; cur_label = j; } } @@ -482,9 +417,11 @@ void class_probs(std::vector &out, const int64_t *knn_indices, * Build array of class probability arrays from * knn_indices and labels */ - int smem = sizeof(int) * n_labels; - class_probs_kernel<<>>( - out[i], knn_indices, y[i], uniq_labels[i], n_labels, n_rows, k); + device_buffer y_normalized(allocator, stream, n_rows); + MLCommon::Label::make_monotonic(y_normalized.data(), y[i], n_rows, stream); + + class_probs_kernel<<>>( + out[i], knn_indices, y_normalized.data(), n_labels, n_rows, k); CUDA_CHECK(cudaPeekAtLastError()); } } From df5e2b304cf409be32fe71e904fd0bc4bb42a4a7 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 18:25:36 -0400 Subject: [PATCH 149/330] Subtracting one from make_monotonic results. --- cpp/src/knn/knn.cu | 2 ++ cpp/src_prims/selection/knn.h | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/src/knn/knn.cu b/cpp/src/knn/knn.cu index c6d43ea29e..4c1c876441 100644 --- a/cpp/src/knn/knn.cu +++ b/cpp/src/knn/knn.cu @@ -59,6 +59,8 @@ void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, for (int i = 0; i < y.size(); i++) { MLCommon::Label::getUniqueLabels(y[i], n_samples, &(uniq_labels[i]), &(n_unique[i]), stream, d_alloc); + + std::cout << MLCommon::arr2Str(uniq_labels[i], n_unique[i], "unique_labels", stream) << std::endl; } MLCommon::Selection::knn_classify(out, knn_indices, y, n_samples, k, diff --git a/cpp/src_prims/selection/knn.h b/cpp/src_prims/selection/knn.h index 7059f0bdeb..3f816deecf 100644 --- a/cpp/src_prims/selection/knn.h +++ b/cpp/src_prims/selection/knn.h @@ -335,7 +335,7 @@ __global__ void class_vote_kernel(OutType *out, const float *class_proba, extern __shared__ int label_cache[]; for (int j = threadIdx.x; j < n_uniq_labels; j += blockDim.x) { label_cache[j] = unique_labels[j]; - printf("Label: %d\n", label_cache[j]); + printf("Label: %d=%d\n", j, label_cache[j]); } __syncthreads(); @@ -419,7 +419,9 @@ void class_probs(std::vector &out, const int64_t *knn_indices, */ device_buffer y_normalized(allocator, stream, n_rows); MLCommon::Label::make_monotonic(y_normalized.data(), y[i], n_rows, stream); - + MLCommon::LinAlg::unaryOp( + y_normalized.data(), y_normalized.data(), n_rows, [] __device__(int input) { return input -1; }, + stream); class_probs_kernel<<>>( out[i], knn_indices, y_normalized.data(), n_labels, n_rows, k); CUDA_CHECK(cudaPeekAtLastError()); From fa69752cda2c2dc3e9c4ea85e8d20a140ff37701 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 18:26:52 -0400 Subject: [PATCH 150/330] Updating cpp style --- cpp/src/knn/knn.cu | 4 +++- cpp/src_prims/selection/knn.h | 9 ++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/src/knn/knn.cu b/cpp/src/knn/knn.cu index 4c1c876441..5896c8e955 100644 --- a/cpp/src/knn/knn.cu +++ b/cpp/src/knn/knn.cu @@ -60,7 +60,9 @@ void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, MLCommon::Label::getUniqueLabels(y[i], n_samples, &(uniq_labels[i]), &(n_unique[i]), stream, d_alloc); - std::cout << MLCommon::arr2Str(uniq_labels[i], n_unique[i], "unique_labels", stream) << std::endl; + std::cout << MLCommon::arr2Str(uniq_labels[i], n_unique[i], "unique_labels", + stream) + << std::endl; } MLCommon::Selection::knn_classify(out, knn_indices, y, n_samples, k, diff --git a/cpp/src_prims/selection/knn.h b/cpp/src_prims/selection/knn.h index 3f816deecf..a3c2ba1d92 100644 --- a/cpp/src_prims/selection/knn.h +++ b/cpp/src_prims/selection/knn.h @@ -306,9 +306,8 @@ void brute_force_knn(float **input, int *sizes, int n_params, IntType D, template __global__ void class_probs_kernel(OutType *out, const int64_t *knn_indices, - const int *labels, - int n_uniq_labels, size_t n_samples, - int n_neighbors) { + const int *labels, int n_uniq_labels, + size_t n_samples, int n_neighbors) { int row = (blockIdx.x * blockDim.x) + threadIdx.x; int i = row * n_neighbors; @@ -420,8 +419,8 @@ void class_probs(std::vector &out, const int64_t *knn_indices, device_buffer y_normalized(allocator, stream, n_rows); MLCommon::Label::make_monotonic(y_normalized.data(), y[i], n_rows, stream); MLCommon::LinAlg::unaryOp( - y_normalized.data(), y_normalized.data(), n_rows, [] __device__(int input) { return input -1; }, - stream); + y_normalized.data(), y_normalized.data(), n_rows, + [] __device__(int input) { return input - 1; }, stream); class_probs_kernel<<>>( out[i], knn_indices, y_normalized.data(), n_labels, n_rows, k); CUDA_CHECK(cudaPeekAtLastError()); From 2fdfffa52cd3227a7e31cdfcc31e286b24a422b0 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 18:27:49 -0400 Subject: [PATCH 151/330] Updating changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5481f6b395..e2454c09ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - PR #2031: Encapsulating UCX-py interactions in singleton - PR #2029: Add C++ ARIMA log-likelihood benchmark - PR #2058: Use CumlArray in Random Projection +- PR #2068: Updating knn class probabilities to use make_monotonic instead of binary search ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array From 520b10b7a0dea676589eca00ebd81c9fe499b4ac Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 18:35:32 -0400 Subject: [PATCH 152/330] Removing debug prints --- cpp/src/knn/knn.cu | 4 ---- cpp/src_prims/selection/knn.h | 1 - 2 files changed, 5 deletions(-) diff --git a/cpp/src/knn/knn.cu b/cpp/src/knn/knn.cu index 5896c8e955..c6d43ea29e 100644 --- a/cpp/src/knn/knn.cu +++ b/cpp/src/knn/knn.cu @@ -59,10 +59,6 @@ void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, for (int i = 0; i < y.size(); i++) { MLCommon::Label::getUniqueLabels(y[i], n_samples, &(uniq_labels[i]), &(n_unique[i]), stream, d_alloc); - - std::cout << MLCommon::arr2Str(uniq_labels[i], n_unique[i], "unique_labels", - stream) - << std::endl; } MLCommon::Selection::knn_classify(out, knn_indices, y, n_samples, k, diff --git a/cpp/src_prims/selection/knn.h b/cpp/src_prims/selection/knn.h index a3c2ba1d92..6bd097e622 100644 --- a/cpp/src_prims/selection/knn.h +++ b/cpp/src_prims/selection/knn.h @@ -334,7 +334,6 @@ __global__ void class_vote_kernel(OutType *out, const float *class_proba, extern __shared__ int label_cache[]; for (int j = threadIdx.x; j < n_uniq_labels; j += blockDim.x) { label_cache[j] = unique_labels[j]; - printf("Label: %d=%d\n", j, label_cache[j]); } __syncthreads(); From c4752d8c6e4bc09c65b00d80b8eba01196bd5a3f Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 13 Apr 2020 19:09:56 -0400 Subject: [PATCH 153/330] Lowering n_neighbors. --- python/cuml/test/dask/test_umap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/test/dask/test_umap.py b/python/cuml/test/dask/test_umap.py index ef21f54595..dd18b3c95a 100644 --- a/python/cuml/test/dask/test_umap.py +++ b/python/cuml/test/dask/test_umap.py @@ -36,7 +36,7 @@ def test_umap_mnmg(n_parts, sampling_ratio, supervised, dataset, cluster): from cuml.manifold import UMAP from cuml.dask.manifold import UMAP as MNMG_UMAP - n_neighbors = 500 + n_neighbors = 10 print("Dataset: " + str(dataset)) From 9367f31b0fb920dc634370d81a5d9dbadf28529e Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 13 Apr 2020 18:51:32 -0500 Subject: [PATCH 154/330] Initial MG make classification --- python/cuml/__init__.py | 1 + python/cuml/datasets/__init__.py | 1 + python/cuml/datasets/classification.py | 45 +++++++++++++++++--------- python/cuml/test/utils.py | 2 +- 4 files changed, 32 insertions(+), 17 deletions(-) diff --git a/python/cuml/__init__.py b/python/cuml/__init__.py index ea126297f5..dafe86022c 100644 --- a/python/cuml/__init__.py +++ b/python/cuml/__init__.py @@ -23,6 +23,7 @@ from cuml.datasets.blobs import blobs as make_blobs from cuml.datasets.regression import make_regression +from cuml.datasets.classification import make_classification from cuml.decomposition.pca import PCA from cuml.decomposition.tsvd import TruncatedSVD diff --git a/python/cuml/datasets/__init__.py b/python/cuml/datasets/__init__.py index dfbd8b216b..2596972dd2 100644 --- a/python/cuml/datasets/__init__.py +++ b/python/cuml/datasets/__init__.py @@ -16,3 +16,4 @@ from cuml.datasets.blobs import blobs as make_blobs from cuml.datasets.regression import make_regression +from cuml.datasets.classification import make_classification diff --git a/python/cuml/datasets/classification.py b/python/cuml/datasets/classification.py index 37da62581e..e0e97fa5e7 100644 --- a/python/cuml/datasets/classification.py +++ b/python/cuml/datasets/classification.py @@ -13,16 +13,13 @@ # limitations under the License. # -from sklearn.utils.random import sample_without_replacement from cuml.datasets.utils import _create_rs_generator from cuml.utils import with_cupy_rmm import cupy as cp import numpy as np -from time import sleep - -def _generate_hypercube(samples, dimensions, rng): +def _generate_hypercube(samples, dimensions, rng, dtype): """Returns distinct binary samples of length dimensions """ if dimensions > 30: @@ -31,7 +28,7 @@ def _generate_hypercube(samples, dimensions, rng): out = np.random.choice(2 ** dimensions, samples, replace=False).astype(dtype='>u4', copy=False) out = np.unpackbits(out.view('>u1')).reshape((-1, 32))[:, -dimensions:] - return out + return cp.array(out).astype(dtype, copy=False) @with_cupy_rmm @@ -40,7 +37,9 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None, order='F', - dtype='float32'): + dtype='float32', _centroids=None, + _informative_covariance=None, + _redundant_covariance=None, _repeated_indices=None): """Generate a random n-class classification problem. This initially creates clusters of points normally distributed (std=1) about vertices of an ``n_informative``-dimensional hypercube with sides of @@ -177,15 +176,17 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, y = cp.zeros(n_samples, dtype=np.int) # Build the polytope whose vertices become cluster centroids - centroids = cp.array(_generate_hypercube(n_clusters, n_informative, - generator)).astype(dtype, copy=False) + if _centroids is None: + centroids = _generate_hypercube(n_clusters, n_informative, + generator, dtype) + else: + centroids = _centroids + centroids *= 2 * class_sep centroids -= class_sep if not hypercube: - centroids *= generator.rand(n_clusters, 1, dtype=dtype) - centroids *= generator.rand(1, n_informative, dtype=dtype) - - # Initially draw informative features from the standard normal + centroids *= generator.rand(samples, 1, dtype=dtype) + centroids *= generator.rand(1, dimensions, dtype=dtype) # Create each cluster; a variant of make_blobs if shuffle: @@ -197,7 +198,10 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, y[centroid_indices[0]] = k % n_classes X_k = X[centroid_indices[0], :n_informative] # slice a view of the cluster - A = 2 * generator.rand(n_informative, n_informative, dtype=dtype) - 1 + if _informative_covariance is None: + A = 2 * generator.rand(n_informative, n_informative, dtype=dtype) - 1 + else: + A = _informative_covariance[k] X_k = cp.dot(X_k, A) # introduce random covariance X_k += centroid # shift the cluster to a vertex @@ -208,21 +212,30 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, y[start:stop] = k % n_classes # assign labels X_k = X[start:stop, :n_informative] # slice a view of the cluster - A = 2 * generator.rand(n_informative, n_informative, dtype=dtype) - 1 + if _informative_covariance is None: + A = 2 * generator.rand(n_informative, n_informative, dtype=dtype) - 1 + else: + A = _informative_covariance[k] X_k[...] = cp.dot(X_k, A) # introduce random covariance X_k += centroid # shift the cluster to a vertex # Create redundant features if n_redundant > 0: - B = 2 * generator.rand(n_informative, n_redundant, dtype=dtype) - 1 + if _redundant_covariance is None: + B = 2 * generator.rand(n_informative, n_redundant, dtype=dtype) - 1 + else: + B = _redundant_covariance X[:, n_informative:n_informative + n_redundant] = \ cp.dot(X[:, :n_informative], B) # Repeat some features if n_repeated > 0: n = n_informative + n_redundant - indices = ((n - 1) * generator.rand(n_repeated, dtype=dtype) + 0.5).astype(np.intp) + if _repeated_indices is None: + indices = ((n - 1) * generator.rand(n_repeated, dtype=dtype) + 0.5).astype(np.intp) + else: + indices = _repeated_indices X[:, n:n + n_repeated] = X[:, indices] # Randomly replace labels diff --git a/python/cuml/test/utils.py b/python/cuml/test/utils.py index 3dc721ec99..7cbec33c8e 100644 --- a/python/cuml/test/utils.py +++ b/python/cuml/test/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2020, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 605bab03c9fc6de5dbd2b581d4c8c205840c1645 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 13 Apr 2020 18:52:00 -0500 Subject: [PATCH 155/330] More MG classification --- python/cuml/dask/datasets/classification.py | 196 ++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 python/cuml/dask/datasets/classification.py diff --git a/python/cuml/dask/datasets/classification.py b/python/cuml/dask/datasets/classification.py new file mode 100644 index 0000000000..bd3134744a --- /dev/null +++ b/python/cuml/dask/datasets/classification.py @@ -0,0 +1,196 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cuml.datasets.classification import _generate_hypercube +from cuml.datasets.classification import make_classification as sg_make_classification +from cuml.datasets.utils import _create_rs_generator +from cuml.utils import with_cupy_rmm + +from dask.distributed import default_client, wait + +import cupy as cp +import numpy as np +import math + +from time import sleep + +def _create_covariance(*args, rs, dtype='float32'): + return 2 * rs.rand(*args, dtype=dtype) - 1 + + +@with_cupy_rmm +def make_classification(n_samples=100, n_features=20, n_informative=2, + n_redundant=2, n_repeated=0, n_classes=2, + n_clusters_per_class=2, weights=None, flip_y=0.01, + class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, + shuffle=True, random_state=None, order='F', + dtype='float32', n_parts=None): + """Generate a random n-class classification problem. + This initially creates clusters of points normally distributed (std=1) + about vertices of an ``n_informative``-dimensional hypercube with sides of + length ``2*class_sep`` and assigns an equal number of clusters to each + class. It introduces interdependence between these features and adds + various types of further noise to the data. + Without shuffling, ``X`` horizontally stacks features in the following + order: the primary ``n_informative`` features, followed by ``n_redundant`` + linear combinations of the informative features, followed by ``n_repeated`` + duplicates, drawn randomly with replacement from the informative and + redundant features. The remaining features are filled with random noise. + Thus, without shuffling, all useful features are contained in the columns + ``X[:, :n_informative + n_redundant + n_repeated]``. + Read more in the :ref:`User Guide `. + Parameters + ---------- + n_samples : int, optional (default=100) + The number of samples. + n_features : int, optional (default=20) + The total number of features. These comprise ``n_informative`` + informative features, ``n_redundant`` redundant features, + ``n_repeated`` duplicated features and + ``n_features-n_informative-n_redundant-n_repeated`` useless features + drawn at random. + n_informative : int, optional (default=2) + The number of informative features. Each class is composed of a number + of gaussian clusters each located around the vertices of a hypercube + in a subspace of dimension ``n_informative``. For each cluster, + informative features are drawn independently from N(0, 1) and then + randomly linearly combined within each cluster in order to add + covariance. The clusters are then placed on the vertices of the + hypercube. + n_redundant : int, optional (default=2) + The number of redundant features. These features are generated as + random linear combinations of the informative features. + n_repeated : int, optional (default=0) + The number of duplicated features, drawn randomly from the informative + and the redundant features. + n_classes : int, optional (default=2) + The number of classes (or labels) of the classification problem. + n_clusters_per_class : int, optional (default=2) + The number of clusters per class. + weights : array-like of shape (n_classes,) or (n_classes - 1,),\ + (default=None) + The proportions of samples assigned to each class. If None, then + classes are balanced. Note that if ``len(weights) == n_classes - 1``, + then the last class weight is automatically inferred. + More than ``n_samples`` samples may be returned if the sum of + ``weights`` exceeds 1. + flip_y : float, optional (default=0.01) + The fraction of samples whose class is assigned randomly. Larger + values introduce noise in the labels and make the classification + task harder. + class_sep : float, optional (default=1.0) + The factor multiplying the hypercube size. Larger values spread + out the clusters/classes and make the classification task easier. + hypercube : boolean, optional (default=True) + If True, the clusters are put on the vertices of a hypercube. If + False, the clusters are put on the vertices of a random polytope. + shift : float, array of shape [n_features] or None, optional (default=0.0) + Shift features by the specified value. If None, then features + are shifted by a random value drawn in [-class_sep, class_sep]. + scale : float, array of shape [n_features] or None, optional (default=1.0) + Multiply features by the specified value. If None, then features + are scaled by a random value drawn in [1, 100]. Note that scaling + happens after shifting. + shuffle : boolean, optional (default=True) + Shuffle the samples and the features. + random_state : int, RandomState instance or None (default) + Determines random number generation for dataset creation. Pass an int + for reproducible output across multiple function calls. + See :term:`Glossary `. + order: str, optional (default='F') + The order of the generated samples + dtype : str, optional (default='float32') + Dtype of the generated samples + n_parts : int (default = None) + number of partitions to generate (this can be greater + than the number of workers) + Returns + ------- + X : device array of shape [n_samples, n_features] + The generated samples. + y : device array of shape [n_samples] + The integer labels for class membership of each sample. + Notes + ----- + The algorithm is adapted from Guyon [1] and was designed to generate + the "Madelon" dataset. + References + ---------- + .. [1] I. Guyon, "Design of experiments for the NIPS 2003 variable + selection benchmark", 2003. + """ + + client = default_client() + + rs = _create_rs_generator(random_state) + + workers = list(client.has_what().keys()) + + n_parts = n_parts if n_parts is not None else len(workers) + parts_workers = (workers * n_parts)[:n_parts] + print(parts_workers) + rows_per_part = math.ceil(n_samples / n_parts) + + n_clusters = n_classes * n_clusters_per_class + + # create centroids + centroids = _generate_hypercube(n_clusters, n_informative, rs, dtype) + + # # create covariance matrices + informative_covariance_local = rs.rand(n_clusters, n_informative, n_informative, dtype=dtype) + informative_covariance = client.scatter(informative_covariance_local, workers=workers) + del informative_covariance_local + + redundant_covariance_local = rs.rand(n_informative, n_redundant, dtype=dtype) + redundant_covariance = client.scatter(redundant_covariance_local, workers=workers) + del redundant_covariance_local + + wait([informative_covariance, redundant_covariance]) + print(client.has_what()) + + # repeated indices + n = n_informative + n_redundant + repeated_indices = ((n - 1) * rs.rand(n_repeated, dtype=dtype) + 0.5).astype(np.intp) + + # scale and shift + if shift is None: + shift = (2 * rs.rand(n_features, dtype=dtype) - 1) * class_sep + + if scale is None: + scale = 1 + 100 * rs.rand(n_features, dtype=dtype) + + # Create arrays on each worker (gpu) + parts = [] + worker_rows = [] + rows_so_far = 0 + for idx, worker in enumerate(parts_workers): + if rows_so_far + rows_per_part <= n_samples: + rows_so_far += rows_per_part + worker_rows.append(rows_per_part) + else: + worker_rows.append((int(n_samples) - rows_so_far)) + + print(parts_workers) + parts = [client.submit(sg_make_classification, worker_rows[i], n_features, + n_informative, n_redundant, n_repeated, n_classes, + n_clusters_per_class, weights, flip_y, + class_sep, hypercube, shift, scale, + shuffle, random_state, order, dtype, + centroids, informative_covariance, redundant_covariance, + repeated_indices, + pure=False, + workers=[parts_workers[i]]) for i in range(len(parts_workers))] + + wait(parts) \ No newline at end of file From 82b8ca73e4278a047bf3d0b5eb93dec8123ae66c Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Mon, 13 Apr 2020 18:37:20 -0700 Subject: [PATCH 156/330] Address reviewer feedback --- python/cuml/dask/common/dask_arr_utils.py | 2 +- python/cuml/manifold/umap.pyx | 9 ++++----- python/cuml/naive_bayes/naive_bayes.py | 12 ++++++------ python/cuml/preprocessing/label.py | 4 ++-- python/cuml/utils/import_utils.py | 8 ++++++++ 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/python/cuml/dask/common/dask_arr_utils.py b/python/cuml/dask/common/dask_arr_utils.py index 21e06e8fa2..3327adc2c3 100644 --- a/python/cuml/dask/common/dask_arr_utils.py +++ b/python/cuml/dask/common/dask_arr_utils.py @@ -27,7 +27,7 @@ from cuml.dask.common.part_utils import _extract_partitions -from cuml.utils import rmm_cupy_ary, has_scipy +from cuml.utils import rmm_cupy_ary def validate_dask_array(darray, client=None): diff --git a/python/cuml/manifold/umap.pyx b/python/cuml/manifold/umap.pyx index ce3b073179..f9b986c920 100644 --- a/python/cuml/manifold/umap.pyx +++ b/python/cuml/manifold/umap.pyx @@ -424,11 +424,10 @@ class UMAP(Base): if has_scipy(): from scipy.sparse import csr_matrix, coo_matrix, csc_matrix else: - class Dummy(object): - pass - csr_matrix = Dummy - coo_matrix = Dummy - csc_matrix = Dummy + from cuml.utils.import_utils import DummyClass + csr_matrix = DummyClass + coo_matrix = DummyClass + csc_matrix = DummyClass if isinstance(knn_graph, (csc_matrix, cp_csc_matrix)): knn_graph = cupy.sparse.csr_matrix(knn_graph) diff --git a/python/cuml/naive_bayes/naive_bayes.py b/python/cuml/naive_bayes/naive_bayes.py index 2d515e2f30..b2d7c6c9ac 100644 --- a/python/cuml/naive_bayes/naive_bayes.py +++ b/python/cuml/naive_bayes/naive_bayes.py @@ -243,8 +243,8 @@ def _partial_fit(self, X, y, sample_weight=None, _classes=None): if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: - def scipy_sparse_isspmatrix(x): - return False + from cuml.utils.import_utils import dummy_function_always_false \ + as scipy_sparse_isspmatrix if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray): X = cp.asarray(X, X.dtype) @@ -356,8 +356,8 @@ def predict(self, X): if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: - def scipy_sparse_isspmatrix(x): - return False + from cuml.utils.import_utils import dummy_function_always_false \ + as scipy_sparse_isspmatrix if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray): X = cp.asarray(X, X.dtype) @@ -398,8 +398,8 @@ def predict_log_proba(self, X): if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: - def scipy_sparse_isspmatrix(x): - return False + from cuml.utils.import_utils import dummy_function_always_false \ + as scipy_sparse_isspmatrix if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray): X = cp.asarray(X, X.dtype) diff --git a/python/cuml/preprocessing/label.py b/python/cuml/preprocessing/label.py index 859af96601..98b4dadd02 100644 --- a/python/cuml/preprocessing/label.py +++ b/python/cuml/preprocessing/label.py @@ -228,8 +228,8 @@ def inverse_transform(self, y, threshold=None): if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: - def scipy_sparse_isspmatrix(x): - return False + from cuml.utils.import_utils import dummy_function_always_false \ + as scipy_sparse_isspmatrix # If we are already given multi-class, just return it. if cp.sparse.isspmatrix(y): diff --git a/python/cuml/utils/import_utils.py b/python/cuml/utils/import_utils.py index c8a8c1a58a..8d0bda9a98 100644 --- a/python/cuml/utils/import_utils.py +++ b/python/cuml/utils/import_utils.py @@ -104,3 +104,11 @@ def has_scipy(): return True except ImportError: return False + + +def dummy_function_always_false(*args, **kwargs): + return False + + +class DummyClass(object): + pass From 62bcee2e04bc45a7e973db38995fef347e136c63 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Mon, 13 Apr 2020 18:40:56 -0700 Subject: [PATCH 157/330] Update _conv_array_to_sparse() --- python/cuml/dask/common/dask_arr_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/cuml/dask/common/dask_arr_utils.py b/python/cuml/dask/common/dask_arr_utils.py index 3327adc2c3..a83265f46a 100644 --- a/python/cuml/dask/common/dask_arr_utils.py +++ b/python/cuml/dask/common/dask_arr_utils.py @@ -52,7 +52,12 @@ def _conv_array_to_sparse(arr): dense numpy or cupy array :return: cupy sparse CSR matrix """ - if scipy.sparse.isspmatrix(arr): + if has_scipy(): + from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix + else: + from cuml.utils.import_utils import dummy_function_always_false \ + as scipy_sparse_isspmatrix + if scipy_sparse_isspmatrix(arr): ret = \ cupyx.scipy.sparse.csr_matrix(arr.tocsr()) elif cupyx.scipy.sparse.isspmatrix(arr): From eda04a08c81d1907b02313a9dbaba6522483560d Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Mon, 13 Apr 2020 18:43:38 -0700 Subject: [PATCH 158/330] Fix typo --- python/cuml/dask/common/dask_arr_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/dask/common/dask_arr_utils.py b/python/cuml/dask/common/dask_arr_utils.py index a83265f46a..b143299a06 100644 --- a/python/cuml/dask/common/dask_arr_utils.py +++ b/python/cuml/dask/common/dask_arr_utils.py @@ -27,7 +27,7 @@ from cuml.dask.common.part_utils import _extract_partitions -from cuml.utils import rmm_cupy_ary +from cuml.utils import rmm_cupy_ary, has_scipy def validate_dask_array(darray, client=None): From 5726a5010997ed392785dcd01372a09eae031548 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:28:55 -0700 Subject: [PATCH 159/330] FEA exposed logging levels in cython --- python/cuml/common/logger.pyx | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index b829307f95..cc10659f5d 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -41,6 +41,23 @@ cdef extern from "cuml/common/logger.hpp" nogil: void CUML_LOG_ERROR(const char* fmt, ...) void CUML_LOG_CRITICAL(const char* fmt, ...) + cdef int CUML_LEVEL_TRACE + cdef int CUML_LEVEL_DEBUG + cdef int CUML_LEVEL_INFO + cdef int CUML_LEVEL_WARN + cdef int CUML_LEVEL_ERROR + cdef int CUML_LEVEL_CRITICAL + cdef int CUML_LEVEL_OFF + + +LEVEL_TRACE = CUML_LEVEL_TRACE +LEVEL_DEBUG = CUML_LEVEL_DEBUG +LEVEL_INFO = CUML_LEVEL_INFO +LEVEL_WARN = CUML_LEVEL_WARN +LEVEL_ERROR = CUML_LEVEL_ERROR +LEVEL_CRITICAL = CUML_LEVEL_CRITICAL +LEVEL_OFF = CUML_LEVEL_OFF + def set_level(level): Logger.get().setLevel(level) From b96871b3ccf1d79f7db531f39a70046d6d3f9e3f Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:43:40 -0700 Subject: [PATCH 160/330] DOC added docstrings for set_level, should_log_for, get_pattern and set_pattern --- python/cuml/common/logger.pyx | 85 +++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index cc10659f5d..11d7b62c49 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -50,29 +50,114 @@ cdef extern from "cuml/common/logger.hpp" nogil: cdef int CUML_LEVEL_OFF +"""Enables all log messages upto and including `trace()`""" LEVEL_TRACE = CUML_LEVEL_TRACE + +"""Enables all log messages upto and including `debug()`""" LEVEL_DEBUG = CUML_LEVEL_DEBUG + +"""Enables all log messages upto and including `info()`""" LEVEL_INFO = CUML_LEVEL_INFO + +"""Enables all log messages upto and including `warn()`""" LEVEL_WARN = CUML_LEVEL_WARN + +"""Enables all log messages upto and include `error()`""" LEVEL_ERROR = CUML_LEVEL_ERROR + +"""Enables only `critical()` messages""" LEVEL_CRITICAL = CUML_LEVEL_CRITICAL + +"""Disables all log messages""" LEVEL_OFF = CUML_LEVEL_OFF def set_level(level): + """ + Set logging level. This setting will be persistent from here onwards until + the end of the process, if left unchanged afterwards. + + Examples + -------- + + .. code-block:: python + + # To enable all log messages upto and including `info()` + import cuml.common.logger as logger + logger.set_level(logger.LEVEL_INFO) + + Parameters + ---------- + level : int + Logging level to be set. It must be one of cuml.common.logger.LEVEL_* + """ Logger.get().setLevel(level) def set_pattern(pattern): + """ + Set the logging pattern. This setting will be persistent from here onwards + until the end of the process, if left unchanged afterwards. + + Examples + -------- + + .. code-block:: python + + import cuml.common.logger as logger + logger.set_pattern("--> [%H-%M-%S] %v") + + Parameters + ---------- + pattern : str + Logging pattern string. Refer to this wiki page for its syntax: + https://github.com/gabime/spdlog/wiki/3.-Custom-formatting + """ cdef string s = pattern Logger.get().setPattern(s) def should_log_for(level): + """ + Check if messages at the given logging level will be logged or not. This is + a useful check to avoid doing unnecessary logging work. + + Examples + -------- + + .. code-block:: python + + import cuml.common.logger as logger + if logger.should_log_for(LEVEL_INFO): + # which could waste precious CPU cycles + my_message = construct_message() + logger.info(my_message) + + Parameters + ---------- + level : int + Logging level to be set. It must be one of cuml.common.logger.LEVEL_* + """ return Logger.get().shouldLogFor(level) def get_pattern(): + """ + Returns the current logging pattern. Useful in case one is temporarily + changing the pattern, like in a method. + + Examples + -------- + + .. code-block:: python + + import cuml.common.logger as logger + def some_func(new_patt): + old_patt = logger.get_pattern() + logger.set_pattern(new_patt) + do_work() + logger.set_pattern(old_patt) + """ cdef string s = Logger.get().getPattern() return s.decode("UTF-8") From 46e3d60bd058fc7069fe6974f3b3e12c0a7d5e49 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:46:08 -0700 Subject: [PATCH 161/330] DOC added docstrings for the rest of methods in logger --- python/cuml/common/logger.pyx | 96 +++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 11d7b62c49..7cbeb1fc75 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -163,30 +163,126 @@ def get_pattern(): def trace(msg): + """ + Logs a trace message, if it is enabled. + + Examples + -------- + + .. code-block:: python + + import cuml.common.logger as logger + logger.trace("Hello world! This is a trace message") + + Parameters + ---------- + msg : str + Message to be logged. + """ cdef string s = msg CUML_LOG_TRACE(s.c_str()) def debug(msg): + """ + Logs a debug message, if it is enabled. + + Examples + -------- + + .. code-block:: python + + import cuml.common.logger as logger + logger.debug("Hello world! This is a debug message") + + Parameters + ---------- + msg : str + Message to be logged. + """ cdef string s = msg CUML_LOG_DEBUG(s.c_str()) def info(msg): + """ + Logs an info message, if it is enabled. + + Examples + -------- + + .. code-block:: python + + import cuml.common.logger as logger + logger.info("Hello world! This is a info message") + + Parameters + ---------- + msg : str + Message to be logged. + """ cdef string s = msg CUML_LOG_INFO(s.c_str()) def warn(msg): + """ + Logs a warning message, if it is enabled. + + Examples + -------- + + .. code-block:: python + + import cuml.common.logger as logger + logger.warn("Hello world! This is a warning message") + + Parameters + ---------- + msg : str + Message to be logged. + """ cdef string s = msg CUML_LOG_WARN(s.c_str()) def error(msg): + """ + Logs an error message, if it is enabled. + + Examples + -------- + + .. code-block:: python + + import cuml.common.logger as logger + logger.error("Hello world! This is a error message") + + Parameters + ---------- + msg : str + Message to be logged. + """ cdef string s = msg CUML_LOG_ERROR(s.c_str()) def critical(msg): + """ + Logs a critical message, if it is enabled. + + Examples + -------- + + .. code-block:: python + + import cuml.common.logger as logger + logger.critical("Hello world! This is a critical message") + + Parameters + ---------- + msg : str + Message to be logged. + """ cdef string s = msg CUML_LOG_CRITICAL(s.c_str()) From 671313d16ccba919c65199c0be99e0d2f262b49a Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:51:13 -0700 Subject: [PATCH 162/330] FEA updated Base class to expose verbosity level --- python/cuml/common/base.pyx | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/cuml/common/base.pyx b/python/cuml/common/base.pyx index cb169ea102..d621a594cb 100644 --- a/python/cuml/common/base.pyx +++ b/python/cuml/common/base.pyx @@ -58,7 +58,8 @@ class Base: .. code-block:: python def __init__(...) - super(KMeans, self).__init__(handle, verbose, output_type) + super(KMeans, self).__init__(handle, verbose, verbosity, + output_type) # initialize numeric variables @@ -105,7 +106,9 @@ class Base: handles in several streams. If it is None, a new one is created just for this class. verbose : bool - Whether to print debug spews + Whether to print debug spews. + verbosity : int + Sets logging level. It must be one of `cuml.common.logger.LEVEL_*`. output_type : {'input', 'cudf', 'cupy', 'numpy'}, optional Variable to control output type of the results and attributes of the estimators. If None, it'll inherit the output type set at the @@ -158,7 +161,8 @@ class Base: del base # optional! """ - def __init__(self, handle=None, verbose=False, output_type=None): + def __init__(self, handle=None, verbose=False, + verbosity=cuml.common.logger.LEVEL_INFO, output_type=None): """ Constructor. All children must call init method of this base class. @@ -172,7 +176,7 @@ class Base: # integer logging-level argument, remove `self.verbose` and have all # algos in python layer accept an integer logging level instead of # the current boolean param - self.logging_level = 1 if verbose else 2 + self.logging_level = verbosity self.output_type = cuml.global_output_type if output_type is None \ else _check_output_type_str(output_type) From 9d17ceb83e04c267133d2ea2d5592300bc7fe253 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:51:52 -0700 Subject: [PATCH 163/330] DOC added deprecation notice for verbose flag in Base class --- python/cuml/common/base.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cuml/common/base.pyx b/python/cuml/common/base.pyx index d621a594cb..767f86a2e3 100644 --- a/python/cuml/common/base.pyx +++ b/python/cuml/common/base.pyx @@ -106,7 +106,8 @@ class Base: handles in several streams. If it is None, a new one is created just for this class. verbose : bool - Whether to print debug spews. + Whether to print debug spews. (This will be deprecated once we have the + verbosity flag updated across all algos) verbosity : int Sets logging level. It must be one of `cuml.common.logger.LEVEL_*`. output_type : {'input', 'cudf', 'cupy', 'numpy'}, optional From 3fa7ba6e371382263411a787f6b2366f78625d7d Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:53:18 -0700 Subject: [PATCH 164/330] FIX simplified imports in base class for logger --- python/cuml/common/base.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/cuml/common/base.pyx b/python/cuml/common/base.pyx index 767f86a2e3..0174c99966 100644 --- a/python/cuml/common/base.pyx +++ b/python/cuml/common/base.pyx @@ -23,6 +23,7 @@ import cuml import cuml.common.handle import cuml.common.cuda import inspect +import cuml.common.logger as logger from cudf.core import Series, DataFrame from cuml.common.array import CumlArray @@ -162,8 +163,8 @@ class Base: del base # optional! """ - def __init__(self, handle=None, verbose=False, - verbosity=cuml.common.logger.LEVEL_INFO, output_type=None): + def __init__(self, handle=None, verbose=False, verbosity=logger.LEVEL_INFO, + output_type=None): """ Constructor. All children must call init method of this base class. From db449151205ae05717da2b590d3a81f38d19d711 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:55:11 -0700 Subject: [PATCH 165/330] ENH renamed Base.logging_level to a more apt Base.verbosity --- python/cuml/common/base.pyx | 2 +- python/cuml/ensemble/randomforestclassifier.pyx | 16 ++++++++-------- python/cuml/ensemble/randomforestregressor.pyx | 12 ++++++------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/python/cuml/common/base.pyx b/python/cuml/common/base.pyx index 0174c99966..bd216ea479 100644 --- a/python/cuml/common/base.pyx +++ b/python/cuml/common/base.pyx @@ -178,7 +178,7 @@ class Base: # integer logging-level argument, remove `self.verbose` and have all # algos in python layer accept an integer logging level instead of # the current boolean param - self.logging_level = verbosity + self.verbosity = verbosity self.output_type = cuml.global_output_type if output_type is None \ else _check_output_type_str(output_type) diff --git a/python/cuml/ensemble/randomforestclassifier.pyx b/python/cuml/ensemble/randomforestclassifier.pyx index b5c6795683..168524c26e 100644 --- a/python/cuml/ensemble/randomforestclassifier.pyx +++ b/python/cuml/ensemble/randomforestclassifier.pyx @@ -617,7 +617,7 @@ class RandomForestClassifier(Base): y_ptr, num_unique_labels, rf_params, - self.logging_level) + self.verbosity) elif self.dtype == np.float64: rf_params64 = rf_params @@ -629,7 +629,7 @@ class RandomForestClassifier(Base): y_ptr, num_unique_labels, rf_params64, - self.logging_level) + self.verbosity) else: raise TypeError("supports only np.float32 and np.float64 input," @@ -719,7 +719,7 @@ class RandomForestClassifier(Base): n_rows, n_cols, preds_ptr, - self.logging_level) + self.verbosity) elif self.dtype == np.float64: predict(handle_[0], @@ -728,7 +728,7 @@ class RandomForestClassifier(Base): n_rows, n_cols, preds_ptr, - self.logging_level) + self.verbosity) else: raise TypeError("supports only np.float32 and np.float64 input," " but input of type '%s' passed." @@ -869,7 +869,7 @@ class RandomForestClassifier(Base): n_rows, n_cols, preds_ptr, - self.logging_level) + self.verbosity) elif self.dtype == np.float64: predictGetAll(handle_[0], @@ -878,7 +878,7 @@ class RandomForestClassifier(Base): n_rows, n_cols, preds_ptr, - self.logging_level) + self.verbosity) else: raise TypeError("supports only np.float32 and np.float64 input," " but input of type '%s' passed." @@ -1064,14 +1064,14 @@ class RandomForestClassifier(Base): y_ptr, n_rows, preds_ptr, - self.logging_level) + self.verbosity) elif self.dtype == np.float64: self.stats = score(handle_[0], rf_forest64, y_ptr, n_rows, preds_ptr, - self.logging_level) + self.verbosity) else: raise TypeError("supports only np.float32 and np.float64 input," " but input of type '%s' passed." diff --git a/python/cuml/ensemble/randomforestregressor.pyx b/python/cuml/ensemble/randomforestregressor.pyx index e388a4f4b8..4dffbed138 100644 --- a/python/cuml/ensemble/randomforestregressor.pyx +++ b/python/cuml/ensemble/randomforestregressor.pyx @@ -573,7 +573,7 @@ class RandomForestRegressor(Base): self.n_cols, y_ptr, rf_params, - self.logging_level) + self.verbosity) else: rf_params64 = rf_params @@ -584,7 +584,7 @@ class RandomForestRegressor(Base): self.n_cols, y_ptr, rf_params64, - self.logging_level) + self.verbosity) # make sure that the `fit` is complete before the following delete # call happens self.handle.sync() @@ -665,7 +665,7 @@ class RandomForestRegressor(Base): n_rows, n_cols, preds_ptr, - self.logging_level) + self.verbosity) elif self.dtype == np.float64: predict(handle_[0], @@ -674,7 +674,7 @@ class RandomForestRegressor(Base): n_rows, n_cols, preds_ptr, - self.logging_level) + self.verbosity) else: raise TypeError("supports only float32 and float64 input," " but input of type '%s' passed." @@ -830,7 +830,7 @@ class RandomForestRegressor(Base): y_ptr, n_rows, preds_ptr, - self.logging_level) + self.verbosity) elif self.dtype == np.float64: self.temp_stats = score(handle_[0], @@ -838,7 +838,7 @@ class RandomForestRegressor(Base): y_ptr, n_rows, preds_ptr, - self.logging_level) + self.verbosity) if self.accuracy_metric == 'median_ae': stats = self.temp_stats['median_abs_error'] From 03c88fb4985c9bd7847e7d415b553f9980aaeeb8 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:56:28 -0700 Subject: [PATCH 166/330] ENH updated kmeans to use verbosity instead of logging_level --- python/cuml/cluster/dbscan.pyx | 8 ++++---- python/cuml/cluster/kmeans.pyx | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/cuml/cluster/dbscan.pyx b/python/cuml/cluster/dbscan.pyx index 43fe54a73b..e119c02ebe 100644 --- a/python/cuml/cluster/dbscan.pyx +++ b/python/cuml/cluster/dbscan.pyx @@ -235,7 +235,7 @@ class DBSCAN(Base): self.min_samples, labels_ptr, self.max_mbytes_per_batch, - self.logging_level) + self.verbosity) else: dbscanFit(handle_[0], input_ptr, @@ -245,7 +245,7 @@ class DBSCAN(Base): self.min_samples, labels_ptr, self.max_mbytes_per_batch, - self.logging_level) + self.verbosity) else: if out_dtype is "int32" or out_dtype is np.int32: @@ -257,7 +257,7 @@ class DBSCAN(Base): self.min_samples, labels_ptr, self.max_mbytes_per_batch, - self.logging_level) + self.verbosity) else: dbscanFit(handle_[0], input_ptr, @@ -267,7 +267,7 @@ class DBSCAN(Base): self.min_samples, labels_ptr, self.max_mbytes_per_batch, - self.logging_level) + self.verbosity) # make sure that the `dbscanFit` is complete before the following # delete call happens diff --git a/python/cuml/cluster/kmeans.pyx b/python/cuml/cluster/kmeans.pyx index 990637063d..50379a4264 100644 --- a/python/cuml/cluster/kmeans.pyx +++ b/python/cuml/cluster/kmeans.pyx @@ -310,7 +310,7 @@ class KMeans(Base): params.max_iter = self.max_iter params.tol = self.tol - params.verbosity = self.logging_level + params.verbosity = self.verbosity params.seed = self.random_state params.metric = 0 # distance metric as squared L2: @todo - support other metrics # noqa: E501 params.batch_samples=self.max_samples_per_batch From 4dc7819d6bdbfc848084aa681bde15cd5b8e6336 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:57:45 -0700 Subject: [PATCH 167/330] ENH updated umap to use verbosity instead of logging_level --- python/cuml/manifold/umap.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/manifold/umap.pyx b/python/cuml/manifold/umap.pyx index 6fbead4579..f62d99c7d8 100644 --- a/python/cuml/manifold/umap.pyx +++ b/python/cuml/manifold/umap.pyx @@ -369,7 +369,7 @@ class UMAP(Base): umap_params.repulsion_strength = cls.repulsion_strength umap_params.negative_sample_rate = cls.negative_sample_rate umap_params.transform_queue_size = cls.transform_queue_size - umap_params.verbosity = cls.logging_level + umap_params.verbosity = cls.verbosity umap_params.a = cls.a umap_params.b = cls.b if cls.init == "spectral": From 2a49d9bfc3304d5c0b91406485cf8b37e0320789 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:57:54 -0700 Subject: [PATCH 168/330] ENH updated tsne to use verbosity instead of logging_level --- python/cuml/manifold/t_sne.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/manifold/t_sne.pyx b/python/cuml/manifold/t_sne.pyx index 165d0ac550..8a7a9b2752 100644 --- a/python/cuml/manifold/t_sne.pyx +++ b/python/cuml/manifold/t_sne.pyx @@ -403,7 +403,7 @@ class TSNE(Base): self.pre_momentum, self.post_momentum, seed, - self.logging_level, + self.verbosity, True, (self.method == 'barnes_hut')) From 52ab52afe06f8fed01f809ff33d8af5bba351ded Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 13 Apr 2020 20:58:44 -0700 Subject: [PATCH 169/330] ENH updated svm to use verbosity instead of logging_level --- python/cuml/svm/svm_base.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/svm/svm_base.pyx b/python/cuml/svm/svm_base.pyx index 46eba023b3..70ec31a7e5 100644 --- a/python/cuml/svm/svm_base.pyx +++ b/python/cuml/svm/svm_base.pyx @@ -313,7 +313,7 @@ class SVMBase(Base): param.max_iter = self.max_iter param.nochange_steps = self.nochange_steps param.tol = self.tol - param.verbosity = self.logging_level + param.verbosity = self.verbosity param.epsilon = self.epsilon param.svmType = self.svmType return param From 11db4ecd8b788bc1ca5d2e0e18bc0e90a8f6902f Mon Sep 17 00:00:00 2001 From: divyegala Date: Sat, 11 Apr 2020 00:01:17 -0500 Subject: [PATCH 170/330] Working MG, weird bug in SG --- python/cuml/dask/datasets/classification.py | 79 ++++++++++++++------- python/cuml/datasets/classification.py | 47 +++++++----- python/cuml/datasets/utils.py | 3 +- 3 files changed, 84 insertions(+), 45 deletions(-) diff --git a/python/cuml/dask/datasets/classification.py b/python/cuml/dask/datasets/classification.py index bd3134744a..e57c9330d5 100644 --- a/python/cuml/dask/datasets/classification.py +++ b/python/cuml/dask/datasets/classification.py @@ -14,17 +14,27 @@ # from cuml.datasets.classification import _generate_hypercube -from cuml.datasets.classification import make_classification as sg_make_classification +from cuml.datasets.classification import make_classification \ + as sg_make_classification from cuml.datasets.utils import _create_rs_generator from cuml.utils import with_cupy_rmm -from dask.distributed import default_client, wait +from dask.distributed import default_client +import dask.array as da +import dask.delayed import cupy as cp import numpy as np import math -from time import sleep + +def get_X(t): + return t[0] + + +def get_labels(t): + return t[1] + def _create_covariance(*args, rs, dtype='float32'): return 2 * rs.rand(*args, dtype=dtype) - 1 @@ -36,7 +46,7 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None, order='F', - dtype='float32', n_parts=None): + dtype='float32', n_parts=None, client=None): """Generate a random n-class classification problem. This initially creates clusters of points normally distributed (std=1) about vertices of an ``n_informative``-dimensional hypercube with sides of @@ -132,7 +142,7 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, selection benchmark", 2003. """ - client = default_client() + client = default_client() if client is None else client rs = _create_rs_generator(random_state) @@ -140,29 +150,31 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, n_parts = n_parts if n_parts is not None else len(workers) parts_workers = (workers * n_parts)[:n_parts] - print(parts_workers) rows_per_part = math.ceil(n_samples / n_parts) n_clusters = n_classes * n_clusters_per_class # create centroids - centroids = _generate_hypercube(n_clusters, n_informative, rs, dtype) + centroids = cp.array(_generate_hypercube(n_clusters, n_informative, + rs)).astype(dtype, copy=False) # # create covariance matrices - informative_covariance_local = rs.rand(n_clusters, n_informative, n_informative, dtype=dtype) - informative_covariance = client.scatter(informative_covariance_local, workers=workers) + informative_covariance_local = rs.rand(n_clusters, n_informative, + n_informative, dtype=dtype) + informative_covariance = client.scatter(informative_covariance_local, + workers=workers) del informative_covariance_local - redundant_covariance_local = rs.rand(n_informative, n_redundant, dtype=dtype) - redundant_covariance = client.scatter(redundant_covariance_local, workers=workers) + redundant_covariance_local = rs.rand(n_informative, n_redundant, + dtype=dtype) + redundant_covariance = client.scatter(redundant_covariance_local, + workers=workers) del redundant_covariance_local - wait([informative_covariance, redundant_covariance]) - print(client.has_what()) - # repeated indices n = n_informative + n_redundant - repeated_indices = ((n - 1) * rs.rand(n_repeated, dtype=dtype) + 0.5).astype(np.intp) + repeated_indices = ((n - 1) * rs.rand(n_repeated, dtype=dtype) + + 0.5).astype(np.intp) # scale and shift if shift is None: @@ -182,15 +194,30 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, else: worker_rows.append((int(n_samples) - rows_so_far)) - print(parts_workers) parts = [client.submit(sg_make_classification, worker_rows[i], n_features, - n_informative, n_redundant, n_repeated, n_classes, - n_clusters_per_class, weights, flip_y, - class_sep, hypercube, shift, scale, - shuffle, random_state, order, dtype, - centroids, informative_covariance, redundant_covariance, - repeated_indices, - pure=False, - workers=[parts_workers[i]]) for i in range(len(parts_workers))] - - wait(parts) \ No newline at end of file + n_informative, n_redundant, n_repeated, n_classes, + n_clusters_per_class, weights, flip_y, class_sep, + hypercube, shift, scale, shuffle, random_state, + order, dtype, centroids, informative_covariance, + redundant_covariance, repeated_indices, + pure=False, workers=[parts_workers[i]]) + for i in range(len(parts_workers))] + + X_parts = [client.submit(get_X, f, pure=False) + for idx, f in enumerate(parts)] + y_parts = [client.submit(get_labels, f, pure=False) + for idx, f in enumerate(parts)] + + X_dela = [da.from_delayed(dask.delayed(Xp), + shape=(worker_rows[idx], n_features), + dtype=dtype, meta=cp.zeros(1, dtype=dtype)) + for idx, Xp in enumerate(X_parts)] + y_dela = [da.from_delayed(dask.delayed(yp), + shape=(worker_rows[idx], ), dtype=dtype, + meta=cp.zeros((1))) + for idx, yp in enumerate(y_parts)] + + X = da.concatenate([Xd for Xd in X_dela], axis=0) + y = da.concatenate([yd for yd in y_dela], axis=0) + + return X, y diff --git a/python/cuml/datasets/classification.py b/python/cuml/datasets/classification.py index e0e97fa5e7..c974a3d2a3 100644 --- a/python/cuml/datasets/classification.py +++ b/python/cuml/datasets/classification.py @@ -19,16 +19,17 @@ import cupy as cp import numpy as np -def _generate_hypercube(samples, dimensions, rng, dtype): + +def _generate_hypercube(samples, dimensions, rng): """Returns distinct binary samples of length dimensions """ if dimensions > 30: return np.hstack([rng.randint(2, size=(samples, dimensions - 30)), _generate_hypercube(samples, 30, rng)]) out = np.random.choice(2 ** dimensions, samples, - replace=False).astype(dtype='>u4', copy=False) + replace=False).astype(dtype='>u4', copy=False) out = np.unpackbits(out.view('>u1')).reshape((-1, 32))[:, -dimensions:] - return cp.array(out).astype(dtype, copy=False) + return out @with_cupy_rmm @@ -39,7 +40,8 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, shuffle=True, random_state=None, order='F', dtype='float32', _centroids=None, _informative_covariance=None, - _redundant_covariance=None, _repeated_indices=None): + _redundant_covariance=None, + _repeated_indices=None): """Generate a random n-class classification problem. This initially creates clusters of points normally distributed (std=1) about vertices of an ``n_informative``-dimensional hypercube with sides of @@ -158,7 +160,6 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, else: weights = [1.0 / n_classes] * n_classes - n_useless = n_features - n_informative - n_redundant - n_repeated n_clusters = n_classes * n_clusters_per_class # Distribute samples among clusters by weight @@ -177,32 +178,39 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, # Build the polytope whose vertices become cluster centroids if _centroids is None: - centroids = _generate_hypercube(n_clusters, n_informative, - generator, dtype) + centroids = cp.array(_generate_hypercube(n_clusters, n_informative, + generator)).astype(dtype, copy=False) else: centroids = _centroids - centroids *= 2 * class_sep centroids -= class_sep if not hypercube: - centroids *= generator.rand(samples, 1, dtype=dtype) - centroids *= generator.rand(1, dimensions, dtype=dtype) + centroids *= generator.rand(n_clusters, 1, dtype=dtype) + centroids *= generator.rand(1, n_informative, dtype=dtype) + + # Initially draw informative features from the standard normal # Create each cluster; a variant of make_blobs if shuffle: - proba_samples_per_cluster = np.array(n_samples_per_cluster) / np.sum(n_samples_per_cluster) - shuffled_sample_indices = cp.array(np.random.choice(n_clusters, n_samples, replace=True, - p=proba_samples_per_cluster)) + proba_samples_per_cluster = np.array(n_samples_per_cluster) / np.sum( + n_samples_per_cluster) + shuffled_sample_indices = cp.array(np.random.choice( + n_clusters, + n_samples, + replace=True, + p=proba_samples_per_cluster + )) for k, centroid in enumerate(centroids): centroid_indices = cp.where(shuffled_sample_indices == k) y[centroid_indices[0]] = k % n_classes - X_k = X[centroid_indices[0], :n_informative] # slice a view of the cluster + X_k = X[centroid_indices[0], :n_informative] if _informative_covariance is None: - A = 2 * generator.rand(n_informative, n_informative, dtype=dtype) - 1 + A = 2 * generator.rand(n_informative, n_informative, + dtype=dtype) - 1 else: A = _informative_covariance[k] - X_k = cp.dot(X_k, A) # introduce random covariance + X_k[...] = cp.dot(X_k, A) # introduce random covariance X_k += centroid # shift the cluster to a vertex else: @@ -213,7 +221,8 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, X_k = X[start:stop, :n_informative] # slice a view of the cluster if _informative_covariance is None: - A = 2 * generator.rand(n_informative, n_informative, dtype=dtype) - 1 + A = 2 * generator.rand(n_informative, n_informative, + dtype=dtype) - 1 else: A = _informative_covariance[k] X_k[...] = cp.dot(X_k, A) # introduce random covariance @@ -233,7 +242,9 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, if n_repeated > 0: n = n_informative + n_redundant if _repeated_indices is None: - indices = ((n - 1) * generator.rand(n_repeated, dtype=dtype) + 0.5).astype(np.intp) + indices = ((n - 1) * generator.rand(n_repeated, + dtype=dtype) + + 0.5).astype(np.intp) else: indices = _repeated_indices X[:, n:n + n_repeated] = X[:, indices] diff --git a/python/cuml/datasets/utils.py b/python/cuml/datasets/utils.py index bf72f04fa6..c8bb5becc7 100644 --- a/python/cuml/datasets/utils.py +++ b/python/cuml/datasets/utils.py @@ -15,6 +15,7 @@ import cupy as cp + def _create_rs_generator(random_state): if hasattr(random_state, '__module__'): rs_type = random_state.__module__ + '.' + type(random_state).__name__ @@ -28,4 +29,4 @@ def _create_rs_generator(random_state): rs = rs_type else: raise ValueError('random_state type must be int or CuPy RandomState') - return rs \ No newline at end of file + return rs From d7a4eb640ae7fa0f5ba2b960fd74ca7cf5fcc57e Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 13 Apr 2020 23:27:18 -0500 Subject: [PATCH 171/330] pytests --- python/cuml/test/dask/test_datasets.py | 36 ++++++++++++++++ python/cuml/test/test_make_classification.py | 44 ++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 python/cuml/test/test_make_classification.py diff --git a/python/cuml/test/dask/test_datasets.py b/python/cuml/test/dask/test_datasets.py index 08170a3f94..ab52923102 100644 --- a/python/cuml/test/dask/test_datasets.py +++ b/python/cuml/test/dask/test_datasets.py @@ -154,3 +154,39 @@ def test_make_regression(n_samples, n_features, n_informative, finally: c.close() + + +@pytest.mark.parametrize('n_samples', [1000]) +@pytest.mark.parametrize('n_features', [100]) +@pytest.mark.parametrize('n_classes', [2, 4]) +@pytest.mark.parametrize('n_clusters_per_class', [2, 4]) +@pytest.mark.parametrize('n_informative', [7]) +@pytest.mark.parametrize('random_state', [None, 1234]) +@pytest.mark.parametrize('n_parts', [2, 23]) +@pytest.mark.parametrize('order', ['C', 'F']) +def test_make_classification(n_samples, n_features, n_classes, + n_clusters_per_class, n_informative, + random_state, n_parts, order, cluster): + client = Client(cluster) + try: + from cuml.dask.datasets.classification import make_classification + + X, y = make_classification(n_samples=n_samples, n_features=n_features, + n_classes=n_classes, + n_clusters_per_class=n_clusters_per_class, + n_informative=n_informative, + random_state=random_state, n_parts=n_parts, + order=order) + assert(len(X.chunks[0])) == n_parts + assert(len(X.chunks[1])) == 1 + + X_local = X.compute() + y_local = y.compute() + + assert X_local.shape == (n_samples, n_features) + import cupy as cp + assert len(cp.unique(y_local)) == n_classes + assert y_local.shape == (n_samples, ) + + finally: + client.close() diff --git a/python/cuml/test/test_make_classification.py b/python/cuml/test/test_make_classification.py new file mode 100644 index 0000000000..8d10cca72b --- /dev/null +++ b/python/cuml/test/test_make_classification.py @@ -0,0 +1,44 @@ +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest +from cuml.datasets.classification import make_classification + +@pytest.mark.parametrize('n_samples', [1000]) +@pytest.mark.parametrize('n_features', [100]) +@pytest.mark.parametrize('n_classes', [2, 4]) +@pytest.mark.parametrize('n_clusters_per_class', [2, 4]) +@pytest.mark.parametrize('n_informative', [7]) +@pytest.mark.parametrize('random_state', [None, 1234]) +@pytest.mark.parametrize('order', ['C', 'F']) +def test_make_classification(n_samples, n_features, n_classes, + n_clusters_per_class, n_informative, + random_state, order): + + X, y = make_classification(n_samples=n_samples, n_features=n_features, + n_classes=n_classes, + n_clusters_per_class=n_clusters_per_class, + n_informative=n_informative, + random_state=random_state, order=order) + + assert X.shape == (n_samples, n_features) + import cupy as cp + assert len(cp.unique(y)) == n_classes + assert y.shape == (n_samples, ) + if order == 'F': + assert X.flags['F_CONTIGUOUS'] == True + elif order == 'C': + assert X.flags['C_CONTIGUOUS'] == True From fcc31a5f3bb48b5aa549c8b0b15d5cd5cd1ed128 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 13 Apr 2020 23:40:43 -0500 Subject: [PATCH 172/330] changelog and style fix --- CHANGELOG.md | 1 + python/cuml/test/test_make_classification.py | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5481f6b395..82cda6b57a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## New Features - PR #1867: C++: add logging interface support in cuML based spdlog - PR #1906: UMAP MNMG +- PR #2074: SG and MNMG `make_classification` ## Improvements - PR #1931: C++: enabled doxygen docs for all of the C++ codebase diff --git a/python/cuml/test/test_make_classification.py b/python/cuml/test/test_make_classification.py index 8d10cca72b..1617e5a48f 100644 --- a/python/cuml/test/test_make_classification.py +++ b/python/cuml/test/test_make_classification.py @@ -17,6 +17,7 @@ import pytest from cuml.datasets.classification import make_classification + @pytest.mark.parametrize('n_samples', [1000]) @pytest.mark.parametrize('n_features', [100]) @pytest.mark.parametrize('n_classes', [2, 4]) @@ -29,16 +30,16 @@ def test_make_classification(n_samples, n_features, n_classes, random_state, order): X, y = make_classification(n_samples=n_samples, n_features=n_features, - n_classes=n_classes, - n_clusters_per_class=n_clusters_per_class, - n_informative=n_informative, - random_state=random_state, order=order) + n_classes=n_classes, + n_clusters_per_class=n_clusters_per_class, + n_informative=n_informative, + random_state=random_state, order=order) assert X.shape == (n_samples, n_features) import cupy as cp assert len(cp.unique(y)) == n_classes assert y.shape == (n_samples, ) if order == 'F': - assert X.flags['F_CONTIGUOUS'] == True + assert X.flags['F_CONTIGUOUS'] elif order == 'C': - assert X.flags['C_CONTIGUOUS'] == True + assert X.flags['C_CONTIGUOUS'] From aaf87dc94b21e6014964c8b6cb419c8bedccd1b7 Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 13 Apr 2020 23:45:08 -0500 Subject: [PATCH 173/330] remove unused import --- python/cuml/dask/neighbors/nearest_neighbors.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cuml/dask/neighbors/nearest_neighbors.py b/python/cuml/dask/neighbors/nearest_neighbors.py index 49a86642d3..5014ae5a2b 100644 --- a/python/cuml/dask/neighbors/nearest_neighbors.py +++ b/python/cuml/dask/neighbors/nearest_neighbors.py @@ -24,8 +24,6 @@ from cuml.dask.common.input_utils import to_output from cuml.dask.common.input_utils import DistributedDataHandler -from cuml.dask.common.part_utils import _extract_partitions - from uuid import uuid1 From 23a407f68f83a7aca6a8641339e7c000e8960c62 Mon Sep 17 00:00:00 2001 From: wxbn Date: Tue, 14 Apr 2020 09:57:38 +0000 Subject: [PATCH 174/330] sparse test functions --- python/cuml/test/test_random_forest.py | 30 ++++++-------------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py index a71d25d096..14df74e4c7 100644 --- a/python/cuml/test/test_random_forest.py +++ b/python/cuml/test/test_random_forest.py @@ -109,7 +109,6 @@ def test_rf_regression(datatype, split_algo, mode, column_info, X, y = make_regression(n_samples=500, n_features=ncols, n_informative=n_info, random_state=123) - elif mode == 'quality': X, y = fetch_california_housing(return_X_y=True) @@ -449,17 +448,12 @@ def test_rf_classification_multi_class(datatype, column_info, nrows, @pytest.mark.parametrize('column_info', [unit_param([20, 10]), quality_param([200, 100]), stress_param([500, 350])]) -@pytest.mark.parametrize('rows_sample', [unit_param(1.0), quality_param(0.90), - stress_param(0.95)]) @pytest.mark.parametrize('datatype', [np.float32]) -@pytest.mark.parametrize('split_algo', [0, 1]) -@pytest.mark.parametrize('max_features', [1.0, 'auto', 'log2', 'sqrt']) @pytest.mark.parametrize('fil_sparse_format', ['not_supported', True, 'auto', False]) @pytest.mark.parametrize('algo', ['auto', 'naive', 'tree_reorg', 'batch_tree_reorg']) -def test_rf_classification_sparse(datatype, split_algo, rows_sample, - nrows, column_info, max_features, +def test_rf_classification_sparse(datatype, nrows, column_info, fil_sparse_format, algo): use_handle = True ncols, n_info = column_info @@ -477,8 +471,7 @@ def test_rf_classification_sparse(datatype, split_algo, rows_sample, # Initialize, fit and predict using cuML's # random forest classification model - cuml_model = curfc(max_features=max_features, rows_sample=rows_sample, - n_bins=16, split_algo=split_algo, split_criterion=0, + cuml_model = curfc(n_bins=16, split_criterion=0, min_rows_per_node=2, seed=123, n_streams=1, n_estimators=num_treees, handle=handle, max_leaves=-1, max_depth=40) @@ -521,7 +514,7 @@ def test_rf_classification_sparse(datatype, split_algo, rows_sample, if nrows < 500000: sk_model = skrfc(n_estimators=50, max_depth=40, - min_samples_split=2, max_features=max_features, + min_samples_split=2, random_state=10) sk_model.fit(X_train, y_train) sk_preds = sk_model.predict(X_test) @@ -536,22 +529,13 @@ def test_rf_classification_sparse(datatype, split_algo, rows_sample, @pytest.mark.parametrize('column_info', [unit_param([20, 10]), quality_param([200, 50]), stress_param([400, 100])]) -@pytest.mark.parametrize('rows_sample', [unit_param(1.0), quality_param(0.90), - stress_param(0.95)]) @pytest.mark.parametrize('datatype', [np.float32]) -@pytest.mark.parametrize('split_algo', [0, 1]) -@pytest.mark.parametrize('max_features', [1.0, 'auto', 'log2', 'sqrt']) @pytest.mark.parametrize('fil_sparse_format', ['not_supported', True, 'auto', False]) @pytest.mark.parametrize('algo', ['auto', 'naive', 'tree_reorg', 'batch_tree_reorg']) -def test_rf_regression_sparse(datatype, split_algo, mode, column_info, - max_features, rows_sample, +def test_rf_regression_sparse(datatype, mode, column_info, fil_sparse_format, algo): - coverage = 0.6 - if random.random() > coverage: - pytest.skip('Randomly skipping the test') - ncols, n_info = column_info use_handle = True num_treees = 50 @@ -564,7 +548,7 @@ def test_rf_regression_sparse(datatype, split_algo, mode, column_info, X, y = fetch_california_housing(return_X_y=True) else: - X, y = make_regression(n_samples=100000, n_features=ncols, + X, y = make_regression(n_samples=3000, n_features=ncols, n_informative=n_info, random_state=123) X = X.astype(datatype) @@ -576,8 +560,7 @@ def test_rf_regression_sparse(datatype, split_algo, mode, column_info, handle, stream = get_handle(use_handle, n_streams=1) # Initialize and fit using cuML's random forest regression model - cuml_model = curfr(max_features=max_features, rows_sample=rows_sample, - n_bins=16, split_algo=split_algo, split_criterion=2, + cuml_model = curfr(n_bins=16, split_criterion=2, min_rows_per_node=2, seed=123, n_streams=1, n_estimators=num_treees, handle=handle, max_leaves=-1, max_depth=40, accuracy_metric='mse') @@ -619,7 +602,6 @@ def test_rf_regression_sparse(datatype, split_algo, mode, column_info, if mode != "stress": sk_model = skrfr(n_estimators=50, max_depth=40, min_samples_split=2, - max_features=max_features, random_state=10) sk_model.fit(X_train, y_train) sk_preds = sk_model.predict(X_test) From 28069350db49bfb034afe1126f625c3ff8c87ad0 Mon Sep 17 00:00:00 2001 From: Salonijain27 Date: Tue, 14 Apr 2020 07:07:49 -0500 Subject: [PATCH 175/330] Update test_pca.py --- python/cuml/test/dask/test_pca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/test/dask/test_pca.py b/python/cuml/test/dask/test_pca.py index ddf8523b3c..388464e2ed 100644 --- a/python/cuml/test/dask/test_pca.py +++ b/python/cuml/test/dask/test_pca.py @@ -35,7 +35,7 @@ def test_pca_fit(nrows, ncols, n_parts, cluster): from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(nrows, ncols, 1, n_parts, - cluster_std=1.5, verbose=False, + cluster_std=0.5, verbose=False, random_state=10, dtype=np.float32) wait(X_cudf) From c5fba9339c2b5c52e00c69b2646befcd425020c6 Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Tue, 14 Apr 2020 08:44:19 -0500 Subject: [PATCH 176/330] updated the changelog --- CHANGELOG.md | 1 + python/cuml/test/dask/test_tsvd.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f44646f2c4..e6221ada72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - PR #1996: Cythonize in parallel - PR #2031: Encapsulating UCX-py interactions in singleton - PR #2029: Add C++ ARIMA log-likelihood benchmark +- PR #2051: Reduce the time required to run dask pca and dask tsvd tests ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array diff --git a/python/cuml/test/dask/test_tsvd.py b/python/cuml/test/dask/test_tsvd.py index 84d1d372f3..a7234cd1e6 100644 --- a/python/cuml/test/dask/test_tsvd.py +++ b/python/cuml/test/dask/test_tsvd.py @@ -39,7 +39,7 @@ def test_pca_fit(nrows, ncols, n_parts, cluster): from cuml.dask.datasets import make_blobs X_cudf, _ = make_blobs(nrows, ncols, 1, n_parts, - cluster_std=1.5, verbose=False, + cluster_std=0.5, verbose=False, random_state=10, dtype=np.float32) wait(X_cudf) From 6defb074ee264bc8374af934dc4896bab75e1c3a Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Tue, 14 Apr 2020 09:09:19 -0500 Subject: [PATCH 177/330] updated dask tsvd test file --- python/cuml/test/dask/test_tsvd.py | 31 ++++++++++++------------------ 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/python/cuml/test/dask/test_tsvd.py b/python/cuml/test/dask/test_tsvd.py index a7234cd1e6..3fff211b7f 100644 --- a/python/cuml/test/dask/test_tsvd.py +++ b/python/cuml/test/dask/test_tsvd.py @@ -22,14 +22,12 @@ @pytest.mark.mg -@pytest.mark.parametrize("nrows", [unit_param(1000), - stress_param(5e6)]) -@pytest.mark.parametrize("ncols", [unit_param(20), - stress_param(1000)]) -@pytest.mark.parametrize("n_parts", [unit_param(30)]) -def test_pca_fit(nrows, ncols, n_parts, cluster): +@pytest.mark.parametrize("data_info", [unit_param([1000, 20, 30]), + stress_param([9e6, 5000, 30])]) +def test_pca_fit(data_info, cluster): client = Client(cluster) + nrows, ncols, n_parts = data_info try: @@ -71,15 +69,12 @@ def test_pca_fit(nrows, ncols, n_parts, cluster): @pytest.mark.mg -@pytest.mark.parametrize("nrows", [unit_param(1000), - stress_param(9e6)]) -@pytest.mark.parametrize("ncols", [unit_param(20), - stress_param(5000)]) -@pytest.mark.parametrize("n_parts", [46]) -def test_pca_fit_transform_fp32(nrows, ncols, n_parts, cluster): +@pytest.mark.parametrize("data_info", [unit_param([1000, 20, 46]), + stress_param([9e6, 5000, 46])]) +def test_pca_fit_transform_fp32(data_info, cluster): client = Client(cluster) - + nrows, ncols, n_parts = data_info try: from cuml.dask.decomposition import TruncatedSVD as daskTPCA from cuml.dask.datasets import make_blobs @@ -98,14 +93,12 @@ def test_pca_fit_transform_fp32(nrows, ncols, n_parts, cluster): @pytest.mark.mg -@pytest.mark.parametrize("nrows", [unit_param(1000), - stress_param(9e6)]) -@pytest.mark.parametrize("ncols", [unit_param(20), - stress_param(5000)]) -@pytest.mark.parametrize("n_parts", [unit_param(33)]) -def test_pca_fit_transform_fp64(nrows, ncols, n_parts, cluster): +@pytest.mark.parametrize("data_info", [unit_param([1000, 20, 33]), + stress_param([9e6, 5000, 33])]) +def test_pca_fit_transform_fp64(data_info, cluster): client = Client(cluster) + nrows, ncols, n_parts = data_info try: from cuml.dask.decomposition import TruncatedSVD as daskTPCA From 9a676e11ec216a3039cfb857c2173a8bf09ffd07 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 14 Apr 2020 12:12:31 -0400 Subject: [PATCH 178/330] Refactoring ucp_helper a bit to make memory management more straightforward --- cpp/comms/std/src/cuML_std_comms_impl.cpp | 37 +-- cpp/comms/std/src/cuML_std_comms_impl.hpp | 31 +-- cpp/comms/std/src/ucp_helper.h | 305 ++++++++++++---------- python/cuml/test/dask/test_comms.py | 2 +- 4 files changed, 186 insertions(+), 189 deletions(-) diff --git a/cpp/comms/std/src/cuML_std_comms_impl.cpp b/cpp/comms/std/src/cuML_std_comms_impl.cpp index ff04e66806..aaa1816ff5 100644 --- a/cpp/comms/std/src/cuML_std_comms_impl.cpp +++ b/cpp/comms/std/src/cuML_std_comms_impl.cpp @@ -209,12 +209,9 @@ cumlStdCommunicator_impl::cumlStdCommunicator_impl( _size(size), _rank(rank), _next_request_id(0), - _ucp_handle(NULL), _verbose(verbose) { initialize(); - - _ucp_handle = (void *)malloc(sizeof(struct comms_ucp_handle)); - init_comms_ucp_handle((struct comms_ucp_handle *)_ucp_handle); + p2p_enabled = true; } #endif @@ -223,10 +220,7 @@ cumlStdCommunicator_impl::cumlStdCommunicator_impl(ncclComm_t comm, int size, : _nccl_comm(comm), _size(size), _rank(rank), - _verbose(verbose), - _ucp_worker(nullptr), - _ucp_handle(nullptr), - _ucp_eps(nullptr) { + _verbose(verbose) { initialize(); } @@ -242,12 +236,6 @@ cumlStdCommunicator_impl::~cumlStdCommunicator_impl() { CUDA_CHECK_NO_THROW(cudaFree(_sendbuff)); CUDA_CHECK_NO_THROW(cudaFree(_recvbuff)); - -#ifdef WITH_UCX - if (_ucp_worker != nullptr) { - close_ucp_handle((struct comms_ucp_handle *)_ucp_handle); - } -#endif } int cumlStdCommunicator_impl::getSize() const { return _size; } @@ -292,6 +280,7 @@ void cumlStdCommunicator_impl::get_request_id(request_t *req) const { void cumlStdCommunicator_impl::isend(const void *buf, int size, int dest, int tag, request_t *request) const { ASSERT(UCX_ENABLED, "cuML Comms not built with UCX support"); + ASSERT(p2p_enabled, "cuML Comms instance was not initialized for point-to-point"); #ifdef WITH_UCX ASSERT(_ucp_worker != nullptr, @@ -300,8 +289,9 @@ void cumlStdCommunicator_impl::isend(const void *buf, int size, int dest, get_request_id(request); ucp_ep_h ep_ptr = (*_ucp_eps)[dest]; - struct ucp_request *ucp_req = - ucp_isend((struct comms_ucp_handle *)_ucp_handle, ep_ptr, buf, size, tag, + ucp_request *ucp_req = (ucp_request *)malloc(sizeof(ucp_request)); + + this->_ucp_handler.ucp_isend(ucp_req, ep_ptr, buf, size, tag, default_tag_mask, getRank(), _verbose); CUML_LOG_DEBUG( @@ -316,6 +306,7 @@ void cumlStdCommunicator_impl::isend(const void *buf, int size, int dest, void cumlStdCommunicator_impl::irecv(void *buf, int size, int source, int tag, request_t *request) const { ASSERT(UCX_ENABLED, "cuML Comms not built with UCX support"); + ASSERT(p2p_enabled, "cuML Comms instance was not initialized for point-to-point"); #ifdef WITH_UCX ASSERT(_ucp_worker != nullptr, @@ -331,8 +322,8 @@ void cumlStdCommunicator_impl::irecv(void *buf, int size, int source, int tag, tag_mask = any_rank_tag_mask; } - struct ucp_request *ucp_req = - ucp_irecv((struct comms_ucp_handle *)_ucp_handle, _ucp_worker, ep_ptr, buf, + ucp_request *ucp_req = (ucp_request *)malloc(sizeof(ucp_request)); + _ucp_handler.ucp_irecv(ucp_req, _ucp_worker, ep_ptr, buf, size, tag, tag_mask, source, _verbose); CUML_LOG_DEBUG( @@ -347,12 +338,13 @@ void cumlStdCommunicator_impl::irecv(void *buf, int size, int source, int tag, void cumlStdCommunicator_impl::waitall(int count, request_t array_of_requests[]) const { ASSERT(UCX_ENABLED, "cuML Comms not built with UCX support"); + ASSERT(p2p_enabled, "cuML Comms instance was not initialized for point-to-point"); #ifdef WITH_UCX ASSERT(_ucp_worker != nullptr, "ERROR: UCX comms not initialized on communicator."); - std::vector requests; + std::vector requests; requests.reserve(count); time_t start = time(NULL); @@ -373,13 +365,12 @@ void cumlStdCommunicator_impl::waitall(int count, // in 10 or more seconds. ASSERT(now - start < 10, "Timed out waiting for requests."); - for (std::vector::iterator it = requests.begin(); + for (std::vector::iterator it = requests.begin(); it != requests.end();) { bool restart = false; // resets the timeout when any progress was made // Causes UCP to progress through the send/recv message queue - while (ucp_progress((struct comms_ucp_handle *)_ucp_handle, - _ucp_worker) != 0) { + while (_ucp_handler.ucp_progress(_ucp_worker) != 0) { restart = true; } @@ -409,7 +400,7 @@ void cumlStdCommunicator_impl::waitall(int count, req->other_rank, req->is_send_request, !req->needs_release); // perform cleanup - free_ucp_request((struct comms_ucp_handle *)_ucp_handle, req); + _ucp_handler.free_ucp_request(req); // remove from pending requests it = requests.erase(it); diff --git a/cpp/comms/std/src/cuML_std_comms_impl.hpp b/cpp/comms/std/src/cuML_std_comms_impl.hpp index 048a8bdc28..3b894fac72 100644 --- a/cpp/comms/std/src/cuML_std_comms_impl.hpp +++ b/cpp/comms/std/src/cuML_std_comms_impl.hpp @@ -26,33 +26,11 @@ #ifdef WITH_UCX #include - -/** - * Standard UCX request object that will be passed - * around asynchronously. This object is really - * opaque and the comms layer only cares that it - * has been completed. Because cuml comms do not - * initialize the ucx application context, it doesn't - * own this object and thus it's important not to - * modify this struct. - */ -struct ucx_context { - int completed; -}; - -/** - * The ucp_request struct is owned by cuml comms. It - * wraps the `ucx_context` request and adds a few - * other fields for logging and cleanup. - */ -struct ucp_request { - struct ucx_context* req; - bool needs_release = true; - int other_rank = -1; - bool is_send_request = false; -}; +#include "ucp_helper.h" +#include #endif + namespace ML { /** @@ -155,7 +133,8 @@ class cumlStdCommunicator_impl : public MLCommon::cumlCommunicator_iface { void get_request_id(request_t* req) const; #ifdef WITH_UCX - void* _ucp_handle; + bool p2p_enabled = false; + comms_ucp_handler _ucp_handler; ucp_worker_h _ucp_worker; std::shared_ptr _ucp_eps; mutable request_t _next_request_id; diff --git a/cpp/comms/std/src/ucp_helper.h b/cpp/comms/std/src/ucp_helper.h index 86192f013b..c285e83ebc 100644 --- a/cpp/comms/std/src/ucp_helper.h +++ b/cpp/comms/std/src/ucp_helper.h @@ -22,23 +22,35 @@ #include #include +#pragma once + + /** - * An opaque handle for managing `dlopen` state within - * a cuml comms instance. + * Standard UCX request object that will be passed + * around asynchronously. This object is really + * opaque and the comms layer only cares that it + * has been completed. Because cuml comms do not + * initialize the ucx application context, it doesn't + * own this object and thus it's important not to + * modify this struct. */ -struct comms_ucp_handle { - void *ucp_handle; +struct ucx_context { + int completed; +}; - ucs_status_ptr_t (*send_func)(ucp_ep_h, const void *, size_t, ucp_datatype_t, - ucp_tag_t, ucp_send_callback_t); - ucs_status_ptr_t (*recv_func)(ucp_worker_h, void *, size_t count, - ucp_datatype_t datatype, ucp_tag_t, ucp_tag_t, - ucp_tag_recv_callback_t); - void (*print_info_func)(ucp_ep_h, FILE *); - void (*req_free_func)(void *); - int (*worker_progress_func)(ucp_worker_h); +/** + * Wraps the `ucx_context` request and adds a few + * other fields for trace logging and cleanup. + */ +class ucp_request { + public: + struct ucx_context* req; + bool needs_release = true; + int other_rank = -1; + bool is_send_request = false; }; + // by default, match the whole tag static const ucp_tag_t default_tag_mask = -1; @@ -53,7 +65,7 @@ static const int UCP_ANY_RANK = -1; /** * @brief Asynchronous send callback sets request to completed */ -static void send_handle(void *request, ucs_status_t status) { +static void send_callback(void *request, ucs_status_t status) { struct ucx_context *context = (struct ucx_context *)request; context->completed = 1; } @@ -61,158 +73,173 @@ static void send_handle(void *request, ucs_status_t status) { /** * @brief Asynchronous recv callback sets request to completed */ -static void recv_handle(void *request, ucs_status_t status, +static void recv_callback(void *request, ucs_status_t status, ucp_tag_recv_info_t *info) { struct ucx_context *context = (struct ucx_context *)request; context->completed = 1; } -void load_ucp_handle(struct comms_ucp_handle *ucp_handle) { - ucp_handle->ucp_handle = - dlopen("libucp.so", RTLD_LAZY | RTLD_NOLOAD | RTLD_NODELETE); - if (!ucp_handle->ucp_handle) { - ucp_handle->ucp_handle = dlopen("libucp.so", RTLD_LAZY | RTLD_NODELETE); - ASSERT(ucp_handle->ucp_handle, "Cannot open UCX library: %s\n", dlerror()); + +/** + * Helper class for managing `dlopen` state and + * interacting with ucp. + */ +class comms_ucp_handler { +public: + comms_ucp_handler() { + load_ucp_handle(); + load_send_func(); + load_recv_func(); + load_free_req_func(); + load_print_info_func(); + load_worker_progress_func(); } - dlerror(); -} -void close_ucp_handle(struct comms_ucp_handle *handle) { - dlclose(handle->ucp_handle); -} + ~comms_ucp_handler() { + dlclose(ucp_handle); + } -void assert_dlerror() { - char *error = dlerror(); - ASSERT(error == NULL, "Error loading function symbol: %s\n", error); -} +private: + void *ucp_handle; -void load_send_func(struct comms_ucp_handle *ucp_handle) { - ucp_handle->send_func = (ucs_status_ptr_t(*)( - ucp_ep_h, const void *, size_t, ucp_datatype_t, ucp_tag_t, - ucp_send_callback_t))dlsym(ucp_handle->ucp_handle, "ucp_tag_send_nb"); - assert_dlerror(); -} + ucs_status_ptr_t (*send_func)(ucp_ep_h, const void *, size_t, ucp_datatype_t, + ucp_tag_t, ucp_send_callback_t); + ucs_status_ptr_t (*recv_func)(ucp_worker_h, void *, size_t count, + ucp_datatype_t datatype, ucp_tag_t, ucp_tag_t, + ucp_tag_recv_callback_t); + void (*print_info_func)(ucp_ep_h, FILE *); + void (*req_free_func)(void *); + int (*worker_progress_func)(ucp_worker_h); + void load_ucp_handle() { + ucp_handle = + dlopen("libucp.so", RTLD_LAZY | RTLD_NOLOAD | RTLD_NODELETE); + if (!ucp_handle) { + ucp_handle = dlopen("libucp.so", RTLD_LAZY | RTLD_NODELETE); + ASSERT(ucp_handle, "Cannot open UCX library: %s\n", dlerror()); + } + dlerror(); + } -void load_free_req_func(struct comms_ucp_handle *ucp_handle) { - ucp_handle->req_free_func = - (void (*)(void *request))dlsym(ucp_handle->ucp_handle, "ucp_request_free"); - assert_dlerror(); -} + void assert_dlerror() { + char *error = dlerror(); + ASSERT(error == NULL, "Error loading function symbol: %s\n", error); + } -void load_print_info_func(struct comms_ucp_handle *ucp_handle) { - ucp_handle->print_info_func = (void (*)(ucp_ep_h, FILE *))dlsym( - ucp_handle->ucp_handle, "ucp_ep_print_info"); - assert_dlerror(); -} + void load_send_func() { + send_func = (ucs_status_ptr_t(*)( + ucp_ep_h, const void *, size_t, ucp_datatype_t, ucp_tag_t, + ucp_send_callback_t))dlsym(ucp_handle, "ucp_tag_send_nb"); + assert_dlerror(); + } -void load_worker_progress_func(struct comms_ucp_handle *ucp_handle) { - ucp_handle->worker_progress_func = - (int (*)(ucp_worker_h))dlsym(ucp_handle->ucp_handle, "ucp_worker_progress"); - assert_dlerror(); -} + void load_free_req_func() { + req_free_func = + (void (*)(void *request))dlsym(ucp_handle, "ucp_request_free"); + assert_dlerror(); + } -void load_recv_func(struct comms_ucp_handle *ucp_handle) { - ucp_handle->recv_func = (ucs_status_ptr_t(*)( - ucp_worker_h, void *, size_t, ucp_datatype_t, ucp_tag_t, ucp_tag_t, - ucp_tag_recv_callback_t))dlsym(ucp_handle->ucp_handle, "ucp_tag_recv_nb"); - assert_dlerror(); -} + void load_print_info_func() { + print_info_func = (void (*)(ucp_ep_h, FILE *))dlsym( + ucp_handle, "ucp_ep_print_info"); + assert_dlerror(); + } -void init_comms_ucp_handle(struct comms_ucp_handle *handle) { - load_ucp_handle(handle); + void load_worker_progress_func() { + worker_progress_func = + (int (*)(ucp_worker_h))dlsym(ucp_handle, "ucp_worker_progress"); + assert_dlerror(); + } - load_send_func(handle); - load_recv_func(handle); - load_free_req_func(handle); - load_print_info_func(handle); - load_worker_progress_func(handle); -} + void load_recv_func() { + recv_func = (ucs_status_ptr_t(*)( + ucp_worker_h, void *, size_t, ucp_datatype_t, ucp_tag_t, ucp_tag_t, + ucp_tag_recv_callback_t))dlsym(ucp_handle, "ucp_tag_recv_nb"); + assert_dlerror(); + } -/** - * @brief Frees any memory underlying the given ucp request object - */ -void free_ucp_request(struct comms_ucp_handle *ucp_handle, - ucp_request *request) { - if (request->needs_release) { - request->req->completed = 0; - (*(ucp_handle->req_free_func))(request->req); + ucp_tag_t build_message_tag(int rank, int tag) const { + // keeping the rank in the lower bits enables debugging. + return ((uint32_t)tag << 31) | (uint32_t)rank; } - free(request); -} -int ucp_progress(struct comms_ucp_handle *ucp_handle, ucp_worker_h worker) { - return (*(ucp_handle->worker_progress_func))(worker); -} -ucp_tag_t build_message_tag(int rank, int tag) { - // keeping the rank in the lower bits enables debugging. - return ((uint32_t)tag << 31) | (uint32_t)rank; -} +public: + int ucp_progress(ucp_worker_h worker) const { + return (*(worker_progress_func))(worker); + } -/** - * @brief Asynchronously send data to the given endpoint using the given tag - */ -struct ucp_request *ucp_isend(struct comms_ucp_handle *ucp_handle, - ucp_ep_h ep_ptr, const void *buf, int size, - int tag, ucp_tag_t tag_mask, int rank, - bool verbose) { - ucp_tag_t ucp_tag = build_message_tag(rank, tag); - - CUML_LOG_DEBUG("Sending tag: %ld", ucp_tag); - - ucs_status_ptr_t send_result = (*(ucp_handle->send_func))( - ep_ptr, buf, size, ucp_dt_make_contig(1), ucp_tag, send_handle); - struct ucx_context *ucp_req = (struct ucx_context *)send_result; - struct ucp_request *req = (struct ucp_request *)malloc(sizeof(ucp_request)); - if (UCS_PTR_IS_ERR(send_result)) { - ASSERT(!UCS_PTR_IS_ERR(send_result), - "unable to send UCX data message (%d)\n", - UCS_PTR_STATUS(send_result)); - /** - * If the request didn't fail, but it's not OK, it is in flight. - * Expect the handler to be invoked + + /** + * @brief Frees any memory underlying the given ucp request object */ - } else if (UCS_PTR_STATUS(send_result) != UCS_OK) { - /** - * If the request is OK, it's already been completed and we don't need to wait on it. - * The request will be a nullptr, however, so we need to create a new request - * and set it to completed to make the "waitall()" function work properly. - */ - req->needs_release = true; - } else { - req->needs_release = false; + void free_ucp_request(ucp_request *request) const { + if (request->needs_release) { + request->req->completed = 0; + (*(req_free_func))(request->req); + } + free(request); } - req->other_rank = rank; - req->is_send_request = true; - req->req = ucp_req; - return req; -} + /** + * @brief Asynchronously send data to the given endpoint using the given tag + */ + void ucp_isend(ucp_request *req, ucp_ep_h ep_ptr, + const void *buf, int size, int tag, ucp_tag_t tag_mask, int rank, + bool verbose) const { + ucp_tag_t ucp_tag = build_message_tag(rank, tag); + + CUML_LOG_DEBUG("Sending tag: %ld", ucp_tag); + + ucs_status_ptr_t send_result = (*(send_func))( + ep_ptr, buf, size, ucp_dt_make_contig(1), ucp_tag, send_callback); + struct ucx_context *ucp_req = (struct ucx_context *)send_result; + if (UCS_PTR_IS_ERR(send_result)) { + ASSERT(!UCS_PTR_IS_ERR(send_result), + "unable to send UCX data message (%d)\n", + UCS_PTR_STATUS(send_result)); + /** + * If the request didn't fail, but it's not OK, it is in flight. + * Expect the handler to be invoked + */ + } else if (UCS_PTR_STATUS(send_result) != UCS_OK) { + /** + * If the request is OK, it's already been completed and we don't need to wait on it. + * The request will be a nullptr, however, so we need to create a new request + * and set it to completed to make the "waitall()" function work properly. + */ + req->needs_release = true; + } else { + req->needs_release = false; + } + + req->other_rank = rank; + req->is_send_request = true; + req->req = ucp_req; + } -/** - * @brief Asynchronously receive data from given endpoint with the given tag. - */ -struct ucp_request *ucp_irecv(struct comms_ucp_handle *ucp_handle, - ucp_worker_h worker, ucp_ep_h ep_ptr, void *buf, - int size, int tag, ucp_tag_t tag_mask, - int sender_rank, bool verbose) { - ucp_tag_t ucp_tag = build_message_tag(sender_rank, tag); + /** + * @brief Asynchronously receive data from given endpoint with the given tag. + */ + void ucp_irecv(ucp_request *req, + ucp_worker_h worker, ucp_ep_h ep_ptr, void *buf, + int size, int tag, ucp_tag_t tag_mask, + int sender_rank, bool verbose) const { + ucp_tag_t ucp_tag = build_message_tag(sender_rank, tag); - CUML_LOG_DEBUG("%d: Receiving tag: %ld", ucp_tag); + CUML_LOG_DEBUG("%d: Receiving tag: %ld", ucp_tag); - ucs_status_ptr_t recv_result = (*(ucp_handle->recv_func))( - worker, buf, size, ucp_dt_make_contig(1), ucp_tag, tag_mask, recv_handle); - struct ucx_context *ucp_req = (struct ucx_context *)recv_result; + ucs_status_ptr_t recv_result = (*(recv_func))( + worker, buf, size, ucp_dt_make_contig(1), ucp_tag, tag_mask, recv_callback); - struct ucp_request *req = (struct ucp_request *)malloc(sizeof(ucp_request)); + struct ucx_context *ucp_req = (struct ucx_context *)recv_result; - req->req = ucp_req; - req->needs_release = true; - req->is_send_request = false; - req->other_rank = sender_rank; + req->req = ucp_req; + req->needs_release = true; + req->is_send_request = false; + req->other_rank = sender_rank; - ASSERT(!UCS_PTR_IS_ERR(recv_result), - "unable to receive UCX data message (%d)\n", - UCS_PTR_STATUS(recv_result)); - return req; -} + ASSERT(!UCS_PTR_IS_ERR(recv_result), + "unable to receive UCX data message (%d)\n", + UCS_PTR_STATUS(recv_result)); + } +}; diff --git a/python/cuml/test/dask/test_comms.py b/python/cuml/test/dask/test_comms.py index 4871604f86..ec9521c22b 100644 --- a/python/cuml/test/dask/test_comms.py +++ b/python/cuml/test/dask/test_comms.py @@ -115,7 +115,7 @@ def test_allreduce(cluster): @pytest.mark.ucx -@pytest.mark.parametrize("n_trials", [5]) +@pytest.mark.parametrize("n_trials", [1, 5]) def test_send_recv(n_trials, ucx_cluster): client = Client(ucx_cluster) From 5c4bd06188a7766f08be657c4fe20acfc7a8fe22 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 14 Apr 2020 12:15:54 -0400 Subject: [PATCH 179/330] Removing stray debug print --- python/cuml/test/dask/test_umap.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cuml/test/dask/test_umap.py b/python/cuml/test/dask/test_umap.py index dd18b3c95a..4e3d591f0a 100644 --- a/python/cuml/test/dask/test_umap.py +++ b/python/cuml/test/dask/test_umap.py @@ -38,8 +38,6 @@ def test_umap_mnmg(n_parts, sampling_ratio, supervised, dataset, cluster): n_neighbors = 10 - print("Dataset: " + str(dataset)) - if dataset == "make_blobs": local_X, local_y = make_blobs(n_samples=10000, n_features=10, centers=200, cluster_std=0.8, From 72388e506137a3e3b49e98f53d2ef9da23afa564 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 14 Apr 2020 12:18:31 -0400 Subject: [PATCH 180/330] Updating style for cpp files --- cpp/comms/std/src/cuML_std_comms_impl.cpp | 20 ++++----- cpp/comms/std/src/cuML_std_comms_impl.hpp | 3 +- cpp/comms/std/src/ucp_helper.h | 51 ++++++++++------------- 3 files changed, 32 insertions(+), 42 deletions(-) diff --git a/cpp/comms/std/src/cuML_std_comms_impl.cpp b/cpp/comms/std/src/cuML_std_comms_impl.cpp index aaa1816ff5..0a0fecb15f 100644 --- a/cpp/comms/std/src/cuML_std_comms_impl.cpp +++ b/cpp/comms/std/src/cuML_std_comms_impl.cpp @@ -217,10 +217,7 @@ cumlStdCommunicator_impl::cumlStdCommunicator_impl( cumlStdCommunicator_impl::cumlStdCommunicator_impl(ncclComm_t comm, int size, int rank, bool verbose) - : _nccl_comm(comm), - _size(size), - _rank(rank), - _verbose(verbose) { + : _nccl_comm(comm), _size(size), _rank(rank), _verbose(verbose) { initialize(); } @@ -280,7 +277,8 @@ void cumlStdCommunicator_impl::get_request_id(request_t *req) const { void cumlStdCommunicator_impl::isend(const void *buf, int size, int dest, int tag, request_t *request) const { ASSERT(UCX_ENABLED, "cuML Comms not built with UCX support"); - ASSERT(p2p_enabled, "cuML Comms instance was not initialized for point-to-point"); + ASSERT(p2p_enabled, + "cuML Comms instance was not initialized for point-to-point"); #ifdef WITH_UCX ASSERT(_ucp_worker != nullptr, @@ -292,7 +290,7 @@ void cumlStdCommunicator_impl::isend(const void *buf, int size, int dest, ucp_request *ucp_req = (ucp_request *)malloc(sizeof(ucp_request)); this->_ucp_handler.ucp_isend(ucp_req, ep_ptr, buf, size, tag, - default_tag_mask, getRank(), _verbose); + default_tag_mask, getRank(), _verbose); CUML_LOG_DEBUG( "%d: Created send request [id=%llu], ptr=%llu, to=%llu, ep=%llu", getRank(), @@ -306,7 +304,8 @@ void cumlStdCommunicator_impl::isend(const void *buf, int size, int dest, void cumlStdCommunicator_impl::irecv(void *buf, int size, int source, int tag, request_t *request) const { ASSERT(UCX_ENABLED, "cuML Comms not built with UCX support"); - ASSERT(p2p_enabled, "cuML Comms instance was not initialized for point-to-point"); + ASSERT(p2p_enabled, + "cuML Comms instance was not initialized for point-to-point"); #ifdef WITH_UCX ASSERT(_ucp_worker != nullptr, @@ -323,8 +322,8 @@ void cumlStdCommunicator_impl::irecv(void *buf, int size, int source, int tag, } ucp_request *ucp_req = (ucp_request *)malloc(sizeof(ucp_request)); - _ucp_handler.ucp_irecv(ucp_req, _ucp_worker, ep_ptr, buf, - size, tag, tag_mask, source, _verbose); + _ucp_handler.ucp_irecv(ucp_req, _ucp_worker, ep_ptr, buf, size, tag, tag_mask, + source, _verbose); CUML_LOG_DEBUG( "%d: Created receive request [id=%llu], ptr=%llu, from=%llu, ep=%llu", @@ -338,7 +337,8 @@ void cumlStdCommunicator_impl::irecv(void *buf, int size, int source, int tag, void cumlStdCommunicator_impl::waitall(int count, request_t array_of_requests[]) const { ASSERT(UCX_ENABLED, "cuML Comms not built with UCX support"); - ASSERT(p2p_enabled, "cuML Comms instance was not initialized for point-to-point"); + ASSERT(p2p_enabled, + "cuML Comms instance was not initialized for point-to-point"); #ifdef WITH_UCX ASSERT(_ucp_worker != nullptr, diff --git a/cpp/comms/std/src/cuML_std_comms_impl.hpp b/cpp/comms/std/src/cuML_std_comms_impl.hpp index 3b894fac72..c6ae513b61 100644 --- a/cpp/comms/std/src/cuML_std_comms_impl.hpp +++ b/cpp/comms/std/src/cuML_std_comms_impl.hpp @@ -26,11 +26,10 @@ #ifdef WITH_UCX #include -#include "ucp_helper.h" #include +#include "ucp_helper.h" #endif - namespace ML { /** diff --git a/cpp/comms/std/src/ucp_helper.h b/cpp/comms/std/src/ucp_helper.h index c285e83ebc..449bc99cb3 100644 --- a/cpp/comms/std/src/ucp_helper.h +++ b/cpp/comms/std/src/ucp_helper.h @@ -24,7 +24,6 @@ #pragma once - /** * Standard UCX request object that will be passed * around asynchronously. This object is really @@ -43,14 +42,13 @@ struct ucx_context { * other fields for trace logging and cleanup. */ class ucp_request { - public: - struct ucx_context* req; - bool needs_release = true; - int other_rank = -1; - bool is_send_request = false; + public: + struct ucx_context *req; + bool needs_release = true; + int other_rank = -1; + bool is_send_request = false; }; - // by default, match the whole tag static const ucp_tag_t default_tag_mask = -1; @@ -74,18 +72,17 @@ static void send_callback(void *request, ucs_status_t status) { * @brief Asynchronous recv callback sets request to completed */ static void recv_callback(void *request, ucs_status_t status, - ucp_tag_recv_info_t *info) { + ucp_tag_recv_info_t *info) { struct ucx_context *context = (struct ucx_context *)request; context->completed = 1; } - /** * Helper class for managing `dlopen` state and * interacting with ucp. */ class comms_ucp_handler { -public: + public: comms_ucp_handler() { load_ucp_handle(); load_send_func(); @@ -95,11 +92,9 @@ class comms_ucp_handler { load_worker_progress_func(); } - ~comms_ucp_handler() { - dlclose(ucp_handle); - } + ~comms_ucp_handler() { dlclose(ucp_handle); } -private: + private: void *ucp_handle; ucs_status_ptr_t (*send_func)(ucp_ep_h, const void *, size_t, ucp_datatype_t, @@ -111,8 +106,7 @@ class comms_ucp_handler { void (*req_free_func)(void *); int (*worker_progress_func)(ucp_worker_h); void load_ucp_handle() { - ucp_handle = - dlopen("libucp.so", RTLD_LAZY | RTLD_NOLOAD | RTLD_NODELETE); + ucp_handle = dlopen("libucp.so", RTLD_LAZY | RTLD_NOLOAD | RTLD_NODELETE); if (!ucp_handle) { ucp_handle = dlopen("libucp.so", RTLD_LAZY | RTLD_NODELETE); ASSERT(ucp_handle, "Cannot open UCX library: %s\n", dlerror()); @@ -139,8 +133,8 @@ class comms_ucp_handler { } void load_print_info_func() { - print_info_func = (void (*)(ucp_ep_h, FILE *))dlsym( - ucp_handle, "ucp_ep_print_info"); + print_info_func = + (void (*)(ucp_ep_h, FILE *))dlsym(ucp_handle, "ucp_ep_print_info"); assert_dlerror(); } @@ -162,13 +156,11 @@ class comms_ucp_handler { return ((uint32_t)tag << 31) | (uint32_t)rank; } - -public: + public: int ucp_progress(ucp_worker_h worker) const { return (*(worker_progress_func))(worker); } - /** * @brief Frees any memory underlying the given ucp request object */ @@ -183,9 +175,8 @@ class comms_ucp_handler { /** * @brief Asynchronously send data to the given endpoint using the given tag */ - void ucp_isend(ucp_request *req, ucp_ep_h ep_ptr, - const void *buf, int size, int tag, ucp_tag_t tag_mask, int rank, - bool verbose) const { + void ucp_isend(ucp_request *req, ucp_ep_h ep_ptr, const void *buf, int size, + int tag, ucp_tag_t tag_mask, int rank, bool verbose) const { ucp_tag_t ucp_tag = build_message_tag(rank, tag); CUML_LOG_DEBUG("Sending tag: %ld", ucp_tag); @@ -220,16 +211,16 @@ class comms_ucp_handler { /** * @brief Asynchronously receive data from given endpoint with the given tag. */ - void ucp_irecv(ucp_request *req, - ucp_worker_h worker, ucp_ep_h ep_ptr, void *buf, - int size, int tag, ucp_tag_t tag_mask, - int sender_rank, bool verbose) const { + void ucp_irecv(ucp_request *req, ucp_worker_h worker, ucp_ep_h ep_ptr, + void *buf, int size, int tag, ucp_tag_t tag_mask, + int sender_rank, bool verbose) const { ucp_tag_t ucp_tag = build_message_tag(sender_rank, tag); CUML_LOG_DEBUG("%d: Receiving tag: %ld", ucp_tag); - ucs_status_ptr_t recv_result = (*(recv_func))( - worker, buf, size, ucp_dt_make_contig(1), ucp_tag, tag_mask, recv_callback); + ucs_status_ptr_t recv_result = + (*(recv_func))(worker, buf, size, ucp_dt_make_contig(1), ucp_tag, + tag_mask, recv_callback); struct ucx_context *ucp_req = (struct ucx_context *)recv_result; From 5257c86d560c9c5dce6a6bb718c3e2369fb18791 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 14 Apr 2020 12:31:34 -0400 Subject: [PATCH 181/330] adding sync to knn cython, removing syncs from fit() functions --- python/cuml/neighbors/kneighbors_classifier.pyx | 2 -- python/cuml/neighbors/kneighbors_regressor.pyx | 2 -- python/cuml/neighbors/nearest_neighbors.pyx | 2 ++ 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/python/cuml/neighbors/kneighbors_classifier.pyx b/python/cuml/neighbors/kneighbors_classifier.pyx index 7b25cd2829..e3a99f7a9f 100644 --- a/python/cuml/neighbors/kneighbors_classifier.pyx +++ b/python/cuml/neighbors/kneighbors_classifier.pyx @@ -195,8 +195,6 @@ class KNeighborsClassifier(NearestNeighbors): if convert_dtype else None)) - self.handle.sync() - def predict(self, X, convert_dtype=True): """ Use the trained k-nearest neighbors classifier to diff --git a/python/cuml/neighbors/kneighbors_regressor.pyx b/python/cuml/neighbors/kneighbors_regressor.pyx index fb57e0e9e2..4508898ea1 100644 --- a/python/cuml/neighbors/kneighbors_regressor.pyx +++ b/python/cuml/neighbors/kneighbors_regressor.pyx @@ -202,8 +202,6 @@ class KNeighborsRegressor(NearestNeighbors): if convert_dtype else None)) - self.handle.sync() - def predict(self, X, convert_dtype=True): """ Use the trained k-nearest neighbors regression model to diff --git a/python/cuml/neighbors/nearest_neighbors.pyx b/python/cuml/neighbors/nearest_neighbors.pyx index 3e903c745b..6121dfbe23 100644 --- a/python/cuml/neighbors/nearest_neighbors.pyx +++ b/python/cuml/neighbors/nearest_neighbors.pyx @@ -329,6 +329,8 @@ class NearestNeighbors(Base): False ) + self.handle.sync() + I_ndarr = I_ndarr.reshape((N, n_neighbors)) D_ndarr = D_ndarr.reshape((N, n_neighbors)) From 132bab7d12cfaff001e7c28cf52266a1f49f2159 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 14 Apr 2020 14:26:38 -0500 Subject: [PATCH 182/330] changelog issue --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb5fdac0f5..a108342c02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,6 @@ - PR #2031: Encapsulating UCX-py interactions in singleton - PR #2029: Add C++ ARIMA log-likelihood benchmark - PR #1981: Using CumlArray in kNN and DistributedDataHandler in dask kNN -- PR #2016: Add capability to setup.py and build.sh to fully clean all cython build files and artifacts - PR #2047: Make internal streams non-blocking w.r.t. NULL stream - PR #2058: Use CumlArray in Random Projection From 899c8149efbf51baa70810ff4916ce9c4d34c637 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 14 Apr 2020 23:07:59 -0500 Subject: [PATCH 183/330] adding dtype --- python/cuml/dask/datasets/regression.py | 29 +++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 495697a96c..1612f2ab6f 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -42,7 +42,7 @@ def create_rs_generator(random_state): def make_low_rank_matrix(n_samples=100, n_features=100, effective_rank=10, tail_strength=0.5, random_state=None, n_parts=1, - n_samples_per_part=None): + n_samples_per_part=None, dtype='float32'): """ Generate a mostly low rank matrix with bell-shaped singular values Parameters @@ -63,6 +63,8 @@ def make_low_rank_matrix(n_samples=100, n_features=100, effective_rank=10, for reproducible output across multiple function calls. n_parts : int, optional (default=1) The number of parts of work. + dtype: str, optional (default='float32') + dtype of generated data Returns ------- @@ -87,12 +89,14 @@ def generate_chunks_for_qr(total_size, min_size, n_parts): # Random (ortho normal) vectors m1 = rs.standard_normal((n_samples, n), chunks=(generate_chunks_for_qr(n_samples, - n, n_parts), -1)) + n, n_parts), -1), + dtype=dtype) u, _ = da.linalg.qr(m1) m2 = rs.standard_normal((n, n_features), chunks=(-1, generate_chunks_for_qr(n_features, - n, n_parts))) + n, n_parts)), + dtype=dtype) v, _ = da.linalg.qr(m2) # For final multiplication @@ -119,7 +123,8 @@ def generate_chunks_for_qr(total_size, min_size, n_parts): def make_regression(n_samples=100, n_features=100, n_informative=10, n_targets=1, bias=0.0, effective_rank=None, tail_strength=0.5, noise=0.0, shuffle=False, coef=False, - random_state=None, n_parts=1, n_samples_per_part=None): + random_state=None, n_parts=1, n_samples_per_part=None, + order='F', dtype='float32'): """Generate a random regression problem. The input set can either be well conditioned (by default) or have a low rank-fat tail singular profile. @@ -167,6 +172,10 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, for reproducible output across multiple function calls. n_parts : int, optional (default=1) The number of parts of work. + order : str, optional (default='F') + Row-major or Col-major + dtype: str, optional (default='float32') + dtype of generated data Returns ------- @@ -190,7 +199,8 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, X = rs.standard_normal((n_samples, n_features), chunks=(n_samples_per_part, (n_informative, n_features - - n_informative))) + n_informative)), + dtype=dtype) else: # Randomly generate a low rank, fat tail input set @@ -199,7 +209,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, effective_rank=effective_rank, tail_strength=tail_strength, random_state=rs, - n_parts=n_parts) + n_parts=n_parts, dtype=dtype) X = X.rechunk({0: n_samples_per_part, 1: (n_informative, n_features-n_informative)}) @@ -208,7 +218,8 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, # by a sparsifying regularizers such as L1 or elastic net) ground_truth = 100.0 * rs.standard_normal((n_informative, n_targets), - chunks=(n_samples_per_part, -1)) + chunks=(n_samples_per_part, -1), + dtype=dtype) y = da.dot(X[:, :n_informative], ground_truth) + bias X = X.rechunk((None, -1)) @@ -216,13 +227,13 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, if n_informative != n_features: zeroes = 0.0 * rs.standard_normal((n_features - n_informative, - n_targets)) + n_targets), dtype=dtype) ground_truth = da.concatenate([ground_truth, zeroes], axis=0) ground_truth = ground_truth.rechunk(-1) # Add noise if noise > 0.0: - y += rs.normal(scale=noise, size=y.shape) + y += rs.normal(scale=noise, size=y.shape, dtype=dtype) # Randomly permute samples and features if shuffle: From 30f10aa427c9b2a58ce6d39838e0cd0c68c2a2fb Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Tue, 14 Apr 2020 22:47:36 -0700 Subject: [PATCH 184/330] addressed PR review comments --- cpp/src/fil/fil.cu | 10 +++---- cpp/src/fil/infer.cu | 36 ++++++++++++++--------- cpp/test/sg/fil_test.cu | 63 ++++++++++++++++++++++++++++++++--------- 3 files changed, 76 insertions(+), 33 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index fdb9d993bb..3c993ae21f 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -78,7 +78,7 @@ __global__ void transform_k(float* preds, size_t n, output_t output, float global_bias, bool complement_proba) { size_t i = threadIdx.x + size_t(blockIdx.x) * blockDim.x; if (i >= n) return; - if (complement_proba && (i % 2) != 0) return; + if (complement_proba && i % 2 != 0) return; float result = preds[i]; if ((output & output_t::AVG) != 0) result *= inv_num_trees; @@ -314,7 +314,7 @@ void check_params(const forest_params_t* params, bool dense) { break; case leaf_value_t::INT_CLASS_LABEL: ASSERT(params->num_classes >= 2, - "num_classes is not ignored for " + "num_classes >= 2 is required for " "leaf_payload_type == INT_CLASS_LABEL"); break; default: @@ -527,7 +527,6 @@ int tree2fil_sparse(std::vector* pnodes, const tl::Tree& tree, size_t tl_leaf_vector_size(const tl::Model& model) { const tl::Tree& tree = model.trees[0]; - int _ = max_depth(tree); // just checking for cycles int node_key; for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); node_key = tl_node_at(tree, node_key).cright()) @@ -545,6 +544,9 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, params->algo = tl_params->algo; params->threshold = tl_params->threshold; + // fill in forest-dependent params + params->depth = max_depth(model); // also checks for cycles + // assuming either all leaves use the .leaf_vector() or all leaves use .leaf_value() size_t leaf_vec_size = tl_leaf_vector_size(model); if (leaf_vec_size > 0) { @@ -557,7 +559,6 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, params->num_classes = 0; // ignored } - // fill in forest-dependent params params->num_cols = model.num_feature; const tl::ModelParam& param = model.param; ASSERT(param.sigmoid_alpha == 1.0f, "sigmoid_alpha not supported"); @@ -577,7 +578,6 @@ void tl2fil_common(forest_params_t* params, const tl::Model& model, param.pred_transform.c_str()); } params->num_trees = model.trees.size(); - params->depth = max_depth(model); } // uses treelite model with additional tl_params to initialize FIL params diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index c915ff6df3..60ba185976 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -53,7 +53,6 @@ __device__ __forceinline__ vec infer_one_tree( do { #pragma unroll for (int j = 0; j < NITEMS; ++j) { - //if ((mask & (1 << j)) == 0) continue; auto n = tree[curr[j]]; if (n.is_leaf()) { mask &= ~(1 << j); @@ -94,13 +93,14 @@ __device__ __forceinline__ vec<1, output_type> infer_one_tree(tree_type tree, // the device template should achieve the best performance, using up-to-date // CUB defaults -#define BlockReduceDevice typename cub::BlockReduce, FIL_TPB> +template +using BlockReduce = typename cub::BlockReduce, FIL_TPB>; /** The shared memory requirements for finalization stage may differ based on the set of PTX architectures the kernels were compiled for, as well as the CUDA compute capability of the device chosen for computation. -TODO: run a test kernel during forest init to determine the compute capability +TODO (levsnv): run a test kernel during forest init to determine the compute capability chosen for the inference, for an accurate sizeof(BlockReduce::TempStorage), which is used in determining max NITEMS or max input data columns. @@ -111,11 +111,9 @@ See https://rapids.ai/start.html as well as cmake defaults. */ // values below are defaults as of this change. template -struct BlockReduceHost { - typedef typename cub::BlockReduce, FIL_TPB, - cub::BLOCK_REDUCE_WARP_REDUCTIONS, 1, 1, - 600>::TempStorage TempStorage; -}; +using BlockReduceHost = + typename cub::BlockReduce, FIL_TPB, + cub::BLOCK_REDUCE_WARP_REDUCTIONS, 1, 1, 600>; template // = FLOAT_SCALAR @@ -123,14 +121,24 @@ struct tree_aggregator_t { vec acc; void* tmp_storage; - static size_t smem_finalize_footprint(int) { + /** shared memory footprint of the accumulator during + the finalization of forest inference kernel, when infer_k output + value is computed. + num_classes is used for other template parameters */ + static size_t smem_finalize_footprint(int num_classes) { return sizeof(typename BlockReduceHost::TempStorage); } - static size_t smem_accumulate_footprint(int) { return 0; } + /** shared memory footprint of the accumulator during + the accumulation of forest inference, when individual trees + are inferred and partial aggregates are accumulated. + num_classes is used for other template parameters */ + static size_t smem_accumulate_footprint(int num_classes) { return 0; } - __device__ __forceinline__ tree_aggregator_t(int, void* shared_workspace, - size_t) + /** + num_classes is used for other template parameters */ + __device__ __forceinline__ tree_aggregator_t(int num_classes, + void* shared_workspace, size_t) : tmp_storage(shared_workspace) {} __device__ __forceinline__ void accumulate( @@ -141,8 +149,8 @@ struct tree_aggregator_t { __device__ __forceinline__ void finalize(float* out, int num_rows, int output_stride) { __syncthreads(); - acc = - BlockReduceDevice(*(BlockReduceDevice::TempStorage*)tmp_storage).Sum(acc); + typedef typename BlockReduce::TempStorage TempStorage; + acc = BlockReduce(*(TempStorage*)tmp_storage).Sum(acc); if (threadIdx.x == 0) { for (int i = 0; i < NITEMS; ++i) { int row = blockIdx.x * NITEMS + i; diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 5a076230b3..a1a412740c 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -127,7 +127,8 @@ class BaseFilTest : public testing::TestWithParam { size_t num_nodes = forest_num_nodes(); // helper data - float* weights_d = nullptr; + /// weights, used as float* or int* + int* weights_d = nullptr; float* thresholds_d = nullptr; int* fids_d = nullptr; bool* def_lefts_d = nullptr; @@ -137,6 +138,7 @@ class BaseFilTest : public testing::TestWithParam { // allocate GPU data allocate(weights_d, num_nodes); + // sizeof(float) == sizeof(int) allocate(thresholds_d, num_nodes); allocate(fids_d, num_nodes); allocate(def_lefts_d, num_nodes); @@ -145,11 +147,10 @@ class BaseFilTest : public testing::TestWithParam { // generate on-GPU random data Random::Rng r(ps.seed); if (ps.leaf_payload_type == fil::leaf_value_t::FLOAT_SCALAR) { - r.uniform(weights_d, num_nodes, -1.0f, 1.0f, stream); + r.uniform((float*)weights_d, num_nodes, -1.0f, 1.0f, stream); } else { - r.uniform(weights_d, num_nodes, 0.0f, - // [0..num_classes) - std::nextafterf(ps.num_classes, 0.0f), stream); + // [0..num_classes) + r.uniformInt((int*)weights_d, num_nodes, 0, ps.num_classes, stream); } r.uniform(thresholds_d, num_nodes, -1.0f, 1.0f, stream); r.uniformInt(fids_d, num_nodes, 0, ps.num_cols, stream); @@ -157,12 +158,12 @@ class BaseFilTest : public testing::TestWithParam { r.bernoulli(is_leafs_d, num_nodes, 1.0f - ps.leaf_prob, stream); // copy data to host - std::vector weights_h(num_nodes), thresholds_h(num_nodes); - std::vector fids_h(num_nodes); + std::vector thresholds_h(num_nodes); + std::vector weights_h(num_nodes), fids_h(num_nodes); def_lefts_h = new bool[num_nodes]; is_leafs_h = new bool[num_nodes]; - updateHost(weights_h.data(), weights_d, num_nodes, stream); + updateHost(weights_h.data(), (int*)weights_d, num_nodes, stream); updateHost(thresholds_h.data(), thresholds_d, num_nodes, stream); updateHost(fids_h.data(), fids_d, num_nodes, stream); updateHost(def_lefts_h, def_lefts_d, num_nodes, stream); @@ -185,10 +186,12 @@ class BaseFilTest : public testing::TestWithParam { fil::val_t w; switch (ps.leaf_payload_type) { case fil::leaf_value_t::INT_CLASS_LABEL: - w.idx = int(weights_h[i]); + w.idx = weights_h[i]; break; case fil::leaf_value_t::FLOAT_SCALAR: - w.f = weights_h[i]; + // not relying on fil::val_t internals + // merely that we copied floats into weights_h earlier + std::memcpy(&w.f, &weights_h[i], sizeof w.f); } fil::dense_node_init(&nodes[i], w, thresholds_h[i], fids_h[i], def_lefts_h[i], is_leafs_h[i]); @@ -271,7 +274,7 @@ class BaseFilTest : public testing::TestWithParam { case fil::leaf_value_t::INT_CLASS_LABEL: std::vector class_votes(ps.num_classes); for (int r = 0; r < ps.num_rows; ++r) { - for (auto& v : class_votes) v = 0; + std::fill(class_votes.begin(), class_votes.end(), 0); for (int j = 0; j < ps.num_trees; ++j) { int class_label = infer_one_tree(&nodes[j * num_nodes], &data_h[r * ps.num_cols]) @@ -482,7 +485,7 @@ class TreeliteFilTest : public BaseFilTest { break; case fil::leaf_value_t::INT_CLASS_LABEL: std::vector vec(ps.num_classes); - for (int i = 0; i < ps.num_classes; ++i) vec[i] = i == output.idx; + for (int i = 0; i < ps.num_classes; ++i) vec[i] = i == output.idx ? 1.0f : 0.0f; TL_CPP_CHECK(builder->SetLeafVectorNode(key, vec)); } } else { @@ -628,10 +631,18 @@ std::vector predict_dense_inputs = { fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), @@ -672,6 +683,10 @@ std::vector predict_sparse_inputs = { fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0.5, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), @@ -683,6 +698,10 @@ std::vector predict_sparse_inputs = { fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 10}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), @@ -718,6 +737,10 @@ std::vector import_dense_inputs = { fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, @@ -730,7 +753,7 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::INT_CLASS_LABEL, 6}, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, @@ -738,6 +761,10 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, @@ -811,7 +838,7 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0.5, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 4}, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, @@ -850,6 +877,10 @@ std::vector import_sparse_inputs = { fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, @@ -863,6 +894,10 @@ std::vector import_sparse_inputs = { fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_t::INT_CLASS_LABEL, 10}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 4}, From 7b7728616c78e696ac3a2b1b2143e592e7b13e9f Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 01:08:13 -0500 Subject: [PATCH 185/330] adding F order --- python/cuml/dask/datasets/regression.py | 132 ++++++++++++++++++++---- 1 file changed, 114 insertions(+), 18 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 1612f2ab6f..0eed1695a7 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -15,9 +15,12 @@ # import dask.array as da +import dask.delayed +from dask.distributed import default_client import numpy as np import cupy as cp from cuml.utils import rmm_cupy_ary +from cuml.dask.common.part_utils import _extract_partitions def create_rs_generator(random_state): @@ -40,9 +43,82 @@ def create_rs_generator(random_state): return rs -def make_low_rank_matrix(n_samples=100, n_features=100, effective_rank=10, - tail_strength=0.5, random_state=None, n_parts=1, - n_samples_per_part=None, dtype='float32'): +def _f_order_standard_normal(nrows, ncols, dtype, seed): + local_rs = cp.random.RandomState(seed=seed) + x = local_rs.standard_normal(nrows * ncols, dtype=dtype) + x = x.reshape((nrows, ncols), order='F') + return x + + +def f_order_standard_normal(client, rs, nrows, ncols, chunksizes, dtype): + chunk_seeds = rs.permutation(len(chunksizes)) + chunks = [client.submit(_f_order_standard_normal, chunksize, ncols, dtype, + chunk_seeds[idx]) + for idx, chunksize in enumerate(chunksizes)] + + chunks_dela = [da.from_delayed(dask.delayed(chunk), + shape=(chunksizes[idx], ncols), + meta=cp.zeros((1)), dtype=dtype) + for idx, chunk in enumerate(chunks)] + return da.concatenate(chunks_dela, axis=0) + + +def get_X(t): + return t[0] + + +def get_labels(t): + return t[1] + + +def _f_order_shuffle(X, y, n_samples_per_part, seed, features_indices): + local_rs = cp.random.RandomState(seed=seed) + samples_indices = local_rs.permutation(n_samples_per_part) + + X[...] = X[samples_indices, :] + X[...] = X[:, features_indices] + + y[...] = y[samples_indices, :] + return X, y + + +def f_order_shuffle(client, rs, X, y, n_parts, n_samples_per_part, + n_features, features_indices, dtype): + X_parts = client.sync(_extract_partitions, X) + y_parts = client.sync(_extract_partitions, y) + + chunk_seeds = rs.permutation(n_parts) + + shuffled = [client.submit(_f_order_shuffle, X_part, y_parts[idx][1], + n_samples_per_part, + chunk_seeds[idx], features_indices, + workers=[w]) + for idx, (w, X_part) in enumerate(X_parts)] + + X_shuffled = [client.submit(get_X, f, pure=False) + for idx, f in enumerate(shuffled)] + Y_shuffled = [client.submit(get_labels, f, pure=False) + for idx, f in enumerate(shuffled)] + + X_dela = [da.from_delayed(dask.delayed(Xs), + shape=(n_samples_per_part, n_features), + meta=cp.zeros((1)), + dtype=dtype) + for Xs in X_shuffled] + + y_dela = [da.from_delayed(dask.delayed(Xs), + shape=(n_samples_per_part,), + meta=cp.zeros((1)), + dtype=dtype) + for Xs in X_shuffled] + + return da.concatenate(X_dela, axis=0), da.concatenate(y_dela, axis=0) + + +def make_low_rank_matrix(client=None, n_samples=100, n_features=100, + effective_rank=10, tail_strength=0.5, + random_state=None, n_parts=1, + n_samples_per_part=None, dtype='float32', order='F'): """ Generate a mostly low rank matrix with bell-shaped singular values Parameters @@ -71,6 +147,9 @@ def make_low_rank_matrix(n_samples=100, n_features=100, effective_rank=10, X : Dask-CuPy array of shape [n_samples, n_features] The matrix. """ + + client = default_client() if client is None else client + rs = create_rs_generator(random_state) n = min(n_samples, n_features) @@ -89,9 +168,8 @@ def generate_chunks_for_qr(total_size, min_size, n_parts): # Random (ortho normal) vectors m1 = rs.standard_normal((n_samples, n), chunks=(generate_chunks_for_qr(n_samples, - n, n_parts), -1), + n, n_parts), -1), dtype=dtype) - u, _ = da.linalg.qr(m1) m2 = rs.standard_normal((n, n_features), chunks=(-1, generate_chunks_for_qr(n_features, @@ -124,7 +202,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, n_targets=1, bias=0.0, effective_rank=None, tail_strength=0.5, noise=0.0, shuffle=False, coef=False, random_state=None, n_parts=1, n_samples_per_part=None, - order='F', dtype='float32'): + order='F', dtype='float32', client=None): """Generate a random regression problem. The input set can either be well conditioned (by default) or have a low rank-fat tail singular profile. @@ -188,6 +266,9 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, The coefficient of the underlying linear model. It is returned only if coef is True. """ + + client = default_client() if client is None else client + n_informative = min(n_features, n_informative) rs = create_rs_generator(random_state) @@ -196,20 +277,27 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, if effective_rank is None: # Randomly generate a well conditioned input set - X = rs.standard_normal((n_samples, n_features), - chunks=(n_samples_per_part, (n_informative, - n_features - - n_informative)), - dtype=dtype) + if order == 'F': + X = f_order_standard_normal(client, rs, n_samples, n_features, + [n_samples_per_part] * n_parts, dtype) + elif order == 'C': + X = rs.standard_normal((n_samples, n_features), + chunks=(n_samples_per_part, (n_informative, + n_features - + n_informative)), + dtype=dtype) else: # Randomly generate a low rank, fat tail input set - X = make_low_rank_matrix(n_samples=n_samples, + X = make_low_rank_matrix(client=client, + n_samples=n_samples, n_features=n_features, effective_rank=effective_rank, tail_strength=tail_strength, random_state=rs, - n_parts=n_parts, dtype=dtype) + n_parts=n_parts, + dtype=dtype, + order=order) X = X.rechunk({0: n_samples_per_part, 1: (n_informative, n_features-n_informative)}) @@ -237,12 +325,20 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, # Randomly permute samples and features if shuffle: - samples_indices = np.random.permutation(n_samples) - X = X[samples_indices, :] - y = y[samples_indices, :] - features_indices = np.random.permutation(n_features) - X = X[:, features_indices] + if order == 'F': + X, y = f_order_shuffle(client, rs, X, y, n_parts, + n_samples_per_part, + n_features, features_indices, + dtype) + + elif order == 'C': + samples_indices = np.random.permutation(n_samples) + + X = X[samples_indices, :] + y = y[samples_indices, :] + + X = X[:, features_indices] ground_truth = ground_truth[features_indices, :] y = da.squeeze(y) From a4ba24a08f2da30eff23e0cef21b1f73eb6f175d Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 03:31:13 -0500 Subject: [PATCH 186/330] use full low rank introduced --- python/cuml/dask/datasets/regression.py | 72 ++++++++++++++++--------- python/cuml/test/dask/test_datasets.py | 19 +++++-- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 0eed1695a7..e68331f808 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -16,11 +16,13 @@ import dask.array as da import dask.delayed -from dask.distributed import default_client +from dask.distributed import default_client, get_worker import numpy as np import cupy as cp from cuml.utils import rmm_cupy_ary from cuml.dask.common.part_utils import _extract_partitions +from cuml.datasets.regression import make_regression as sg_make_regression +from cuml.utils import with_cupy_rmm def create_rs_generator(random_state): @@ -83,7 +85,7 @@ def _f_order_shuffle(X, y, n_samples_per_part, seed, features_indices): def f_order_shuffle(client, rs, X, y, n_parts, n_samples_per_part, - n_features, features_indices, dtype): + n_features, features_indices, n_targets, dtype): X_parts = client.sync(_extract_partitions, X) y_parts = client.sync(_extract_partitions, y) @@ -97,7 +99,7 @@ def f_order_shuffle(client, rs, X, y, n_parts, n_samples_per_part, X_shuffled = [client.submit(get_X, f, pure=False) for idx, f in enumerate(shuffled)] - Y_shuffled = [client.submit(get_labels, f, pure=False) + y_shuffled = [client.submit(get_labels, f, pure=False) for idx, f in enumerate(shuffled)] X_dela = [da.from_delayed(dask.delayed(Xs), @@ -106,11 +108,11 @@ def f_order_shuffle(client, rs, X, y, n_parts, n_samples_per_part, dtype=dtype) for Xs in X_shuffled] - y_dela = [da.from_delayed(dask.delayed(Xs), - shape=(n_samples_per_part,), + y_dela = [da.from_delayed(dask.delayed(ys), + shape=(n_samples_per_part, n_targets), meta=cp.zeros((1)), dtype=dtype) - for Xs in X_shuffled] + for ys in y_shuffled] return da.concatenate(X_dela, axis=0), da.concatenate(y_dela, axis=0) @@ -170,6 +172,7 @@ def generate_chunks_for_qr(total_size, min_size, n_parts): chunks=(generate_chunks_for_qr(n_samples, n, n_parts), -1), dtype=dtype) + u, _ = da.linalg.qr(m1) m2 = rs.standard_normal((n, n_features), chunks=(-1, generate_chunks_for_qr(n_features, @@ -198,11 +201,13 @@ def generate_chunks_for_qr(total_size, min_size, n_parts): return da.dot(u, v) +@with_cupy_rmm def make_regression(n_samples=100, n_features=100, n_informative=10, n_targets=1, bias=0.0, effective_rank=None, tail_strength=0.5, noise=0.0, shuffle=False, coef=False, random_state=None, n_parts=1, n_samples_per_part=None, - order='F', dtype='float32', client=None): + order='F', dtype='float32', client=None, + use_full_low_rank=True): """Generate a random regression problem. The input set can either be well conditioned (by default) or have a low rank-fat tail singular profile. @@ -254,6 +259,9 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, Row-major or Col-major dtype: str, optional (default='float32') dtype of generated data + use_full_low_rank : boolean (default=True) + Whether to use the entire dataset to generate the low rank matrix. + If False, it uses the first chunk Returns ------- @@ -275,7 +283,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, if n_samples_per_part is None: n_samples_per_part = max(1, int(n_samples / n_parts)) - if effective_rank is None: + if (effective_rank is None) or (effective_rank and not use_full_low_rank): # Randomly generate a well conditioned input set if order == 'F': X = f_order_standard_normal(client, rs, n_samples, n_features, @@ -290,29 +298,45 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, else: # Randomly generate a low rank, fat tail input set X = make_low_rank_matrix(client=client, - n_samples=n_samples, - n_features=n_features, - effective_rank=effective_rank, - tail_strength=tail_strength, - random_state=rs, - n_parts=n_parts, - dtype=dtype, - order=order) + n_samples=n_samples, + n_features=n_features, + effective_rank=effective_rank, + tail_strength=tail_strength, + random_state=rs, + n_parts=n_parts, + dtype=dtype, + order=order) X = X.rechunk({0: n_samples_per_part, - 1: (n_informative, n_features-n_informative)}) + 1: (n_informative, n_features-n_informative)}) # Generate a ground truth model with only n_informative features being non # zeros (the other features are not correlated to y and should be ignored # by a sparsifying regularizers such as L1 or elastic net) + if effective_rank and not use_full_low_rank: + _, _, coef_ = sg_make_regression(n_samples=n_samples_per_part, + n_features=n_features, + n_informative=n_informative, + n_targets=n_targets, + bias=bias, + effective_rank=effective_rank, + tail_strength=tail_strength, + noise=noise, + shuffle=shuffle, + coef=True, + random_state=random_state, + dtype='double') + coef_ = cp.array(coef_, dtype=dtype) + ground_truth = da.from_array(coef_, chunks=(n_samples_per_part, -1)) + y = da.dot(X, ground_truth) + bias + else: + ground_truth = 100.0 * rs.standard_normal((n_informative, n_targets), + chunks=(n_samples_per_part, -1), + dtype=dtype) - ground_truth = 100.0 * rs.standard_normal((n_informative, n_targets), - chunks=(n_samples_per_part, -1), - dtype=dtype) - - y = da.dot(X[:, :n_informative], ground_truth) + bias + y = da.dot(X[:, :n_informative], ground_truth) + bias X = X.rechunk((None, -1)) - if n_informative != n_features: + if n_informative != n_features and (effective_rank is None or use_full_low_rank): zeroes = 0.0 * rs.standard_normal((n_features - n_informative, n_targets), dtype=dtype) @@ -330,7 +354,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, X, y = f_order_shuffle(client, rs, X, y, n_parts, n_samples_per_part, n_features, features_indices, - dtype) + n_targets, dtype) elif order == 'C': samples_indices = np.random.permutation(n_samples) diff --git a/python/cuml/test/dask/test_datasets.py b/python/cuml/test/dask/test_datasets.py index 08170a3f94..7da75b8084 100644 --- a/python/cuml/test/dask/test_datasets.py +++ b/python/cuml/test/dask/test_datasets.py @@ -25,6 +25,7 @@ from cuml.dask.datasets import make_blobs from cuml.test.utils import unit_param, quality_param, stress_param +from cuml.dask.common.part_utils import _extract_partitions @pytest.mark.parametrize('nrows', [unit_param(1e3), quality_param(1e5), @@ -97,11 +98,13 @@ def test_make_blobs(nrows, @pytest.mark.parametrize('random_state', [None, 1234]) @pytest.mark.parametrize('n_parts', [unit_param(1), stress_param(3)]) +@pytest.mark.parametrize('order', ['F', 'C']) +@pytest.mark.parametrize('use_full_low_rank', [True, False]) def test_make_regression(n_samples, n_features, n_informative, n_targets, bias, effective_rank, tail_strength, noise, shuffle, - coef, random_state, n_parts, - cluster): + coef, random_state, n_parts, order, + use_full_low_rank, cluster): c = Client(cluster) try: from cuml.dask.datasets import make_regression @@ -111,7 +114,9 @@ def test_make_regression(n_samples, n_features, n_informative, n_targets=n_targets, bias=bias, effective_rank=effective_rank, noise=noise, shuffle=shuffle, coef=coef, - random_state=random_state, n_parts=n_parts) + random_state=random_state, n_parts=n_parts, + use_full_low_rank=use_full_low_rank, + order=order) if coef: out, values, coefs = result @@ -152,5 +157,13 @@ def test_make_regression(n_samples, n_features, n_informative, assert test2, "Unexpectedly incongruent outputs" + X_part = c.sync(_extract_partitions, out) + out_part = X_part[0][1].result() + if order == 'F': + if effective_rank is None or (effective_rank and not use_full_low_rank): + assert out_part.flags['F_CONTIGUOUS'] + elif order == 'C': + assert out_part.flags['C_CONTIGUOUS'] + finally: c.close() From 0e7ce6ab3b9ca74a91d59f6fc065f97a19d3b4a0 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 03:41:04 -0500 Subject: [PATCH 187/330] changelog and style fix --- CHANGELOG.md | 1 + python/cuml/dask/datasets/regression.py | 74 +++++++++++++------------ python/cuml/test/dask/test_datasets.py | 3 +- 3 files changed, 42 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5481f6b395..e6ecabff72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## New Features - PR #1867: C++: add logging interface support in cuML based spdlog - PR #1906: UMAP MNMG +- PR #2083: Added dtype, order, and use_full_low_rank to MNMG `make_regression` ## Improvements - PR #1931: C++: enabled doxygen docs for all of the C++ codebase diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index e68331f808..34b4bf8c16 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -16,7 +16,7 @@ import dask.array as da import dask.delayed -from dask.distributed import default_client, get_worker +from dask.distributed import default_client import numpy as np import cupy as cp from cuml.utils import rmm_cupy_ary @@ -92,10 +92,10 @@ def f_order_shuffle(client, rs, X, y, n_parts, n_samples_per_part, chunk_seeds = rs.permutation(n_parts) shuffled = [client.submit(_f_order_shuffle, X_part, y_parts[idx][1], - n_samples_per_part, - chunk_seeds[idx], features_indices, - workers=[w]) - for idx, (w, X_part) in enumerate(X_parts)] + n_samples_per_part, + chunk_seeds[idx], features_indices, + workers=[w]) + for idx, (w, X_part) in enumerate(X_parts)] X_shuffled = [client.submit(get_X, f, pure=False) for idx, f in enumerate(shuffled)] @@ -107,7 +107,7 @@ def f_order_shuffle(client, rs, X, y, n_parts, n_samples_per_part, meta=cp.zeros((1)), dtype=dtype) for Xs in X_shuffled] - + y_dela = [da.from_delayed(dask.delayed(ys), shape=(n_samples_per_part, n_targets), meta=cp.zeros((1)), @@ -170,7 +170,7 @@ def generate_chunks_for_qr(total_size, min_size, n_parts): # Random (ortho normal) vectors m1 = rs.standard_normal((n_samples, n), chunks=(generate_chunks_for_qr(n_samples, - n, n_parts), -1), + n, n_parts), -1), dtype=dtype) u, _ = da.linalg.qr(m1) @@ -290,53 +290,57 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, [n_samples_per_part] * n_parts, dtype) elif order == 'C': X = rs.standard_normal((n_samples, n_features), - chunks=(n_samples_per_part, (n_informative, + chunks=(n_samples_per_part, ( + n_informative, n_features - - n_informative)), - dtype=dtype) + n_informative) + ), + dtype=dtype) else: # Randomly generate a low rank, fat tail input set - X = make_low_rank_matrix(client=client, - n_samples=n_samples, - n_features=n_features, - effective_rank=effective_rank, - tail_strength=tail_strength, - random_state=rs, - n_parts=n_parts, - dtype=dtype, - order=order) + X = make_low_rank_matrix(client=client, + n_samples=n_samples, + n_features=n_features, + effective_rank=effective_rank, + tail_strength=tail_strength, + random_state=rs, + n_parts=n_parts, + dtype=dtype, + order=order) X = X.rechunk({0: n_samples_per_part, - 1: (n_informative, n_features-n_informative)}) + 1: (n_informative, n_features-n_informative)}) # Generate a ground truth model with only n_informative features being non # zeros (the other features are not correlated to y and should be ignored # by a sparsifying regularizers such as L1 or elastic net) if effective_rank and not use_full_low_rank: _, _, coef_ = sg_make_regression(n_samples=n_samples_per_part, - n_features=n_features, - n_informative=n_informative, - n_targets=n_targets, - bias=bias, - effective_rank=effective_rank, - tail_strength=tail_strength, - noise=noise, - shuffle=shuffle, - coef=True, - random_state=random_state, - dtype='double') + n_features=n_features, + n_informative=n_informative, + n_targets=n_targets, + bias=bias, + effective_rank=effective_rank, + tail_strength=tail_strength, + noise=noise, + shuffle=shuffle, + coef=True, + random_state=random_state, + dtype='double') coef_ = cp.array(coef_, dtype=dtype) ground_truth = da.from_array(coef_, chunks=(n_samples_per_part, -1)) y = da.dot(X, ground_truth) + bias else: ground_truth = 100.0 * rs.standard_normal((n_informative, n_targets), - chunks=(n_samples_per_part, -1), - dtype=dtype) + chunks=(n_samples_per_part, + -1), + dtype=dtype) y = da.dot(X[:, :n_informative], ground_truth) + bias X = X.rechunk((None, -1)) - if n_informative != n_features and (effective_rank is None or use_full_low_rank): + if n_informative != n_features and (effective_rank is None + or use_full_low_rank): zeroes = 0.0 * rs.standard_normal((n_features - n_informative, n_targets), dtype=dtype) @@ -355,7 +359,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, n_samples_per_part, n_features, features_indices, n_targets, dtype) - + elif order == 'C': samples_indices = np.random.permutation(n_samples) diff --git a/python/cuml/test/dask/test_datasets.py b/python/cuml/test/dask/test_datasets.py index 7da75b8084..6d7a5407f8 100644 --- a/python/cuml/test/dask/test_datasets.py +++ b/python/cuml/test/dask/test_datasets.py @@ -160,7 +160,8 @@ def test_make_regression(n_samples, n_features, n_informative, X_part = c.sync(_extract_partitions, out) out_part = X_part[0][1].result() if order == 'F': - if effective_rank is None or (effective_rank and not use_full_low_rank): + if effective_rank is None or (effective_rank + and not use_full_low_rank): assert out_part.flags['F_CONTIGUOUS'] elif order == 'C': assert out_part.flags['C_CONTIGUOUS'] From 3a65c30a3387e53fca2999b2cfc32fc4d341f31e Mon Sep 17 00:00:00 2001 From: wxbn Date: Wed, 15 Apr 2020 12:43:18 +0000 Subject: [PATCH 188/330] Restoring ci/gpu/build.sh file --- ci/gpu/build.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 4bc15109e4..9125c03b60 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -128,9 +128,7 @@ GTEST_OUTPUT="xml:${WORKSPACE}/test-results/libcuml_cpp/" ./test/ml logger "Python pytest for cuml..." cd $WORKSPACE/python -pytest --durations=0 --cache-clear --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" cuml/test/test_random_forest.py -pytest --durations=0 --cache-clear --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" cuml/test/test_random_forest.py -pytest --durations=0 --cache-clear --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" cuml/test/test_random_forest.py +pytest --cache-clear --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" ################################################################################ # TEST - Run GoogleTest for ml-prims From a695d46408a941de7e703910fd2cb08dec98b0cd Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 12:48:20 -0700 Subject: [PATCH 189/330] FEA exposed PatternSetter to cython world for a "with" statement usage --- python/cuml/common/logger.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 7cbeb1fc75..a218035ba0 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -33,6 +33,10 @@ cdef extern from "cuml/common/logger.hpp" namespace "ML" nogil: bool shouldLogFor(int level) const string getPattern() const + cdef cppclass PatternSetter: + PatternSetter(const std::String& pattern) + + cdef extern from "cuml/common/logger.hpp" nogil: void CUML_LOG_TRACE(const char* fmt, ...) void CUML_LOG_DEBUG(const char* fmt, ...) From 233101407c0b22344283fc3780b8fdbfedb73039 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 13:09:13 -0700 Subject: [PATCH 190/330] ENH created a context-manager for set_pattern in logging --- python/cuml/common/logger.pyx | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index a218035ba0..4f23fc8199 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -98,6 +98,20 @@ def set_level(level): Logger.get().setLevel(level) +class PatternSetter: + """Internal "context manager" object for restoring previous log pattern""" + + def __init__(self, prev_pattern): + self.prev_pattern = prev_pattern + + def __enter__(self): + pass + + def __exit__(self, a, b, c): + cdef string s = self.prev_pattern + Logger.get().setPattern(s) + + def set_pattern(pattern): """ Set the logging pattern. This setting will be persistent from here onwards @@ -109,16 +123,31 @@ def set_pattern(pattern): .. code-block:: python import cuml.common.logger as logger + + # regular usage of setting a logging pattern for all subsequent logs logger.set_pattern("--> [%H-%M-%S] %v") + # in case one wants to temporarily set the pattern for a code block + with logger.set_pattern("--> [%H-%M-%s] %v") as _: + logger.info("Hello world!") + Parameters ---------- pattern : str Logging pattern string. Refer to this wiki page for its syntax: https://github.com/gabime/spdlog/wiki/3.-Custom-formatting + + Returns + ------- + context_object : PatternSetter + This is useful if one wants to temporarily set a different logging + pattern for a code section, as described in the example section above. """ + cdef string prev = Logger.get().getPattern() + context_object = PatternSetter(prev.decode("UTF-8")) cdef string s = pattern Logger.get().setPattern(s) + return context_object def should_log_for(level): From 4271d4a7f417742082d2c78f50471d749ca2cc81 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 13:09:51 -0700 Subject: [PATCH 191/330] ENH removed the get_pattern method from cython as it is no more needed due to the context-manager object --- python/cuml/common/logger.pyx | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 4f23fc8199..611ca6500b 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -174,27 +174,6 @@ def should_log_for(level): return Logger.get().shouldLogFor(level) -def get_pattern(): - """ - Returns the current logging pattern. Useful in case one is temporarily - changing the pattern, like in a method. - - Examples - -------- - - .. code-block:: python - - import cuml.common.logger as logger - def some_func(new_patt): - old_patt = logger.get_pattern() - logger.set_pattern(new_patt) - do_work() - logger.set_pattern(old_patt) - """ - cdef string s = Logger.get().getPattern() - return s.decode("UTF-8") - - def trace(msg): """ Logs a trace message, if it is enabled. From 305b0b7572ad2d37ad57fbaf5c290ccef500bb30 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 13:10:39 -0700 Subject: [PATCH 192/330] FIX removed the unwanted PatternSetter cdef cppclass declaration --- python/cuml/common/logger.pyx | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 611ca6500b..077d67e295 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -33,9 +33,6 @@ cdef extern from "cuml/common/logger.hpp" namespace "ML" nogil: bool shouldLogFor(int level) const string getPattern() const - cdef cppclass PatternSetter: - PatternSetter(const std::String& pattern) - cdef extern from "cuml/common/logger.hpp" nogil: void CUML_LOG_TRACE(const char* fmt, ...) From 88986d65f7733f7369e7d54313f36d2e1cd683da Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 13:15:23 -0700 Subject: [PATCH 193/330] FEA added a Logger::getLevel method --- cpp/include/cuml/common/logger.hpp | 7 +++++++ cpp/src/common/logger.cpp | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/cpp/include/cuml/common/logger.hpp b/cpp/include/cuml/common/logger.hpp index 6b6fe302b8..a64b2f264d 100644 --- a/cpp/include/cuml/common/logger.hpp +++ b/cpp/include/cuml/common/logger.hpp @@ -107,6 +107,13 @@ class Logger { */ bool shouldLogFor(int level) const; + /** + * @brief Query for the current log level + * + * @return the current log level + */ + int getLevel() const; + /** * @brief Get the current logging pattern * @return the pattern diff --git a/cpp/src/common/logger.cpp b/cpp/src/common/logger.cpp index 05d605eb81..2a0e173a0c 100644 --- a/cpp/src/common/logger.cpp +++ b/cpp/src/common/logger.cpp @@ -61,6 +61,11 @@ bool Logger::shouldLogFor(int level) const { return logger->should_log(level_e); } +int Logger::getLevel() const { + auto level_e = logger->level(); + return static_cast(level_e); +} + void Logger::log(int level, const char* fmt, ...) { auto level_e = static_cast(level); // explicit check to make sure that we only expand messages when required From bb93665bc9657d6698ab2544794d9983ece64c66 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 13:19:17 -0700 Subject: [PATCH 194/330] ENH tests for getLevel and setLevel for Logger --- cpp/test/sg/logger.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/test/sg/logger.cpp b/cpp/test/sg/logger.cpp index 35677a9f93..e1d9cdcf08 100644 --- a/cpp/test/sg/logger.cpp +++ b/cpp/test/sg/logger.cpp @@ -22,4 +22,9 @@ TEST(Logger, Test) { CUML_LOG_ERROR("This is an error message"); CUML_LOG_WARN("This is a warning message"); CUML_LOG_INFO("This is an info message"); + + Logger::get().setLevel(CUML_LEVEL_WARN); + ASSERT_EQ(CUML_LEVEL_WARN, Logger::get().getLevel()); + Logger::get().setLevel(CUML_LEVEL_INFO); + ASSERT_EQ(CUML_LEVEL_INFO, Logger::get().getLevel()); } From 016a0b40c8f3b1d9daeaac796fadbcd45ae854df Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 13:24:11 -0700 Subject: [PATCH 195/330] FIX added logger.cpp unit-test to sg suite --- cpp/test/CMakeLists.txt | 1 + cpp/test/sg/logger.cpp | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 49234b0f95..e9e325224e 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -43,6 +43,7 @@ if(BUILD_CUML_TESTS) sg/kmeans_test.cu sg/knn_test.cu sg/lkf_test.cu + sg/logger.cpp sg/ols.cu sg/pca_test.cu sg/quasi_newton.cu diff --git a/cpp/test/sg/logger.cpp b/cpp/test/sg/logger.cpp index e1d9cdcf08..3777e0c560 100644 --- a/cpp/test/sg/logger.cpp +++ b/cpp/test/sg/logger.cpp @@ -17,6 +17,8 @@ #include #include +namespace ML { + TEST(Logger, Test) { CUML_LOG_CRITICAL("This is a critical message"); CUML_LOG_ERROR("This is an error message"); @@ -28,3 +30,5 @@ TEST(Logger, Test) { Logger::get().setLevel(CUML_LEVEL_INFO); ASSERT_EQ(CUML_LEVEL_INFO, Logger::get().getLevel()); } + +} // namespace ML From c76d38aad59c74cfb3b6da66bbeb720126bf9e2d Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 13:25:29 -0700 Subject: [PATCH 196/330] ENH added getLevel to cython cdef --- python/cuml/common/logger.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 077d67e295..6f34dc6731 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -31,6 +31,7 @@ cdef extern from "cuml/common/logger.hpp" namespace "ML" nogil: void setLevel(int level) void setPattern(const string& pattern) bool shouldLogFor(int level) const + int getLevel() const string getPattern() const From d739c1b170298e288d3d9ff81679af9548f8eaf3 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 13:29:21 -0700 Subject: [PATCH 197/330] FEA added a "with" context manager interface for set_level in cython --- python/cuml/common/logger.pyx | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 6f34dc6731..4638ca0256 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -74,6 +74,19 @@ LEVEL_CRITICAL = CUML_LEVEL_CRITICAL LEVEL_OFF = CUML_LEVEL_OFF +class LogLevelSetter: + """Internal "context manager" object for restoring previous log level""" + + def __init__(self, prev_log_level): + self.prev_log_level = prev_log_level + + def __enter__(self): + pass + + def __exit__(self, a, b, c): + Logger.get().setLevel(self.prev_log_level) + + def set_level(level): """ Set logging level. This setting will be persistent from here onwards until @@ -93,7 +106,10 @@ def set_level(level): level : int Logging level to be set. It must be one of cuml.common.logger.LEVEL_* """ + cdef int prev = Logger.get().getLevel() + context_object = LogLevelSetter(prev) Logger.get().setLevel(level) + return context_object class PatternSetter: From 12c5749851cf4b69fc877fa67f4355c2e7a92831 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 13:31:46 -0700 Subject: [PATCH 198/330] DOC updated pydocs for set_level --- python/cuml/common/logger.pyx | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 4638ca0256..4c996f4e19 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -97,14 +97,26 @@ def set_level(level): .. code-block:: python - # To enable all log messages upto and including `info()` import cuml.common.logger as logger + + # regular usage of setting a logging level for all subsequent logs + # in this case, it will enable all logs upto and including `info()` logger.set_level(logger.LEVEL_INFO) + # in case one wants to temporarily set the log level for a code block + with logger.set_level(logger.LEVEL_DEBUG) as _: + logger.debug("Hello world!") + Parameters ---------- level : int Logging level to be set. It must be one of cuml.common.logger.LEVEL_* + + Returns + ------- + context_object : LogLevelSetter + This is useful if one wants to temporarily set a different logging + level for a code section, as described in the example section above. """ cdef int prev = Logger.get().getLevel() context_object = LogLevelSetter(prev) From 87f01be5706f359cc57e747c04f86840ee841d47 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 16:21:45 -0500 Subject: [PATCH 199/330] review comments, explicit transpose --- python/cuml/dask/datasets/regression.py | 69 ++++++++++++++++--------- python/cuml/test/dask/test_datasets.py | 19 +++++-- 2 files changed, 62 insertions(+), 26 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 34b4bf8c16..b0937e9a65 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -52,7 +52,7 @@ def _f_order_standard_normal(nrows, ncols, dtype, seed): return x -def f_order_standard_normal(client, rs, nrows, ncols, chunksizes, dtype): +def f_order_standard_normal(client, rs, chunksizes, ncols, dtype): chunk_seeds = rs.permutation(len(chunksizes)) chunks = [client.submit(_f_order_standard_normal, chunksize, ncols, dtype, chunk_seeds[idx]) @@ -73,9 +73,9 @@ def get_labels(t): return t[1] -def _f_order_shuffle(X, y, n_samples_per_part, seed, features_indices): +def _f_order_shuffle(X, y, n_samples, seed, features_indices): local_rs = cp.random.RandomState(seed=seed) - samples_indices = local_rs.permutation(n_samples_per_part) + samples_indices = local_rs.permutation(n_samples) X[...] = X[samples_indices, :] X[...] = X[:, features_indices] @@ -84,7 +84,7 @@ def _f_order_shuffle(X, y, n_samples_per_part, seed, features_indices): return X, y -def f_order_shuffle(client, rs, X, y, n_parts, n_samples_per_part, +def f_order_shuffle(client, rs, X, y, n_parts, chunksizes, n_features, features_indices, n_targets, dtype): X_parts = client.sync(_extract_partitions, X) y_parts = client.sync(_extract_partitions, y) @@ -92,7 +92,7 @@ def f_order_shuffle(client, rs, X, y, n_parts, n_samples_per_part, chunk_seeds = rs.permutation(n_parts) shuffled = [client.submit(_f_order_shuffle, X_part, y_parts[idx][1], - n_samples_per_part, + chunksizes[idx], chunk_seeds[idx], features_indices, workers=[w]) for idx, (w, X_part) in enumerate(X_parts)] @@ -103,20 +103,34 @@ def f_order_shuffle(client, rs, X, y, n_parts, n_samples_per_part, for idx, f in enumerate(shuffled)] X_dela = [da.from_delayed(dask.delayed(Xs), - shape=(n_samples_per_part, n_features), + shape=(chunksizes[idx], n_features), meta=cp.zeros((1)), dtype=dtype) - for Xs in X_shuffled] + for idx, Xs in enumerate(X_shuffled)] y_dela = [da.from_delayed(dask.delayed(ys), - shape=(n_samples_per_part, n_targets), + shape=(chunksizes[idx], n_targets), meta=cp.zeros((1)), dtype=dtype) - for ys in y_shuffled] + for idx, ys in enumerate(y_shuffled)] return da.concatenate(X_dela, axis=0), da.concatenate(y_dela, axis=0) +def convert_C_to_F_order(client, X, chunksizes, n_features, dtype): + X_parts = client.sync(_extract_partitions, X) + X_converted = [client.submit(cp.array, X_part, copy=False, order='F', + workers=[w]) + for idx, (w, X_part) in enumerate(X_parts)] + + X_dela = [da.from_delayed(dask.delayed(Xc), + shape=(chunksizes[idx], n_features), + meta=cp.zeros((1)), + dtype=dtype) + for idx, Xc in enumerate(X_converted)] + return da.concatenate(X_dela, axis=0) + + def make_low_rank_matrix(client=None, n_samples=100, n_features=100, effective_rank=10, tail_strength=0.5, random_state=None, n_parts=1, @@ -283,18 +297,18 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, if n_samples_per_part is None: n_samples_per_part = max(1, int(n_samples / n_parts)) + data_chunksizes = [n_samples_per_part] * n_parts + \ + [n_samples % n_samples_per_part] + if (effective_rank is None) or (effective_rank and not use_full_low_rank): # Randomly generate a well conditioned input set if order == 'F': - X = f_order_standard_normal(client, rs, n_samples, n_features, - [n_samples_per_part] * n_parts, dtype) + X = f_order_standard_normal(client, rs, data_chunksizes, + n_features, dtype) + elif order == 'C': X = rs.standard_normal((n_samples, n_features), - chunks=(n_samples_per_part, ( - n_informative, - n_features - - n_informative) - ), + chunks=(n_samples_per_part, -1), dtype=dtype) else: @@ -308,8 +322,11 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, n_parts=n_parts, dtype=dtype, order=order) + if order == 'F': + X = convert_C_to_F_order(client, X, data_chunksizes, + n_features, dtype) X = X.rechunk({0: n_samples_per_part, - 1: (n_informative, n_features-n_informative)}) + 1: -1}) # Generate a ground truth model with only n_informative features being non # zeros (the other features are not correlated to y and should be ignored @@ -327,17 +344,16 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, coef=True, random_state=random_state, dtype='double') - coef_ = cp.array(coef_, dtype=dtype) + coef_ = cp.array(coef_, dtype=dtype, order=order) ground_truth = da.from_array(coef_, chunks=(n_samples_per_part, -1)) y = da.dot(X, ground_truth) + bias else: ground_truth = 100.0 * rs.standard_normal((n_informative, n_targets), - chunks=(n_samples_per_part, - -1), - dtype=dtype) + chunks=(n_samples_per_part, + -1), + dtype=dtype) y = da.dot(X[:, :n_informative], ground_truth) + bias - X = X.rechunk((None, -1)) if n_informative != n_features and (effective_rank is None or use_full_low_rank): @@ -356,7 +372,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, features_indices = np.random.permutation(n_features) if order == 'F': X, y = f_order_shuffle(client, rs, X, y, n_parts, - n_samples_per_part, + data_chunksizes, n_features, features_indices, n_targets, dtype) @@ -371,6 +387,13 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, y = da.squeeze(y) + if order == 'F' and n_targets > 1: + y = convert_C_to_F_order(client, y, y.chunks[0], n_targets, dtype) + if coef: + ground_truth = convert_C_to_F_order(client, ground_truth, + ground_truth.chunks[0], + n_targets, dtype) + if coef: ground_truth = da.squeeze(ground_truth) return X, y, ground_truth diff --git a/python/cuml/test/dask/test_datasets.py b/python/cuml/test/dask/test_datasets.py index 6d7a5407f8..51046de2e0 100644 --- a/python/cuml/test/dask/test_datasets.py +++ b/python/cuml/test/dask/test_datasets.py @@ -159,12 +159,25 @@ def test_make_regression(n_samples, n_features, n_informative, X_part = c.sync(_extract_partitions, out) out_part = X_part[0][1].result() + + y_part = c.sync(_extract_partitions, values) + value_part = y_part[0][1].result() + + if coef: + coefs_part = c.sync(_extract_partitions, coefs) + coefs_part = coefs_part[0][1].result() if order == 'F': - if effective_rank is None or (effective_rank - and not use_full_low_rank): - assert out_part.flags['F_CONTIGUOUS'] + assert out_part.flags['F_CONTIGUOUS'] + if n_targets > 1: + assert value_part.flags['F_CONTIGUOUS'] + if coef: + assert coefs_part.flags['F_CONTIGUOUS'] elif order == 'C': assert out_part.flags['C_CONTIGUOUS'] + if n_targets > 1: + assert value_part.flags['C_CONTIGUOUS'] + if coef: + assert coefs_part.flags['C_CONTIGUOUS'] finally: c.close() From 185e5518acd842caf00f99ef60c2f876a4446289 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 16:29:02 -0500 Subject: [PATCH 200/330] docstring describing limitations --- python/cuml/dask/datasets/regression.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index b0937e9a65..95ebedfe49 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -287,6 +287,21 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, or [n_features, n_targets], optional The coefficient of the underlying linear model. It is returned only if coef is True. + + Known Performance Limitations: + 1. When `effective_rank` is set and `use_full_low_rank` is True, + we cannot generate order `F` by construction, and an explicit + transpose is performed on each part. This may cause memory to spike + (other parameters make order `F` by construction) + 2. When `n_targets > 3` and `order = 'F'` as above, we have to + explicity transpose the `y` array. If `coef = True`, then we also + explicity transpose the `ground_truth` array + 3. When `shuffle = True` and `order = F`, there are memory spikes to + shuffle the `F` order arrays + + NOTE: If one runs into Out-Of-Memory errors when any of the above + known-limitations are breached, try increasing the `n_parts` + parameter. """ client = default_client() if client is None else client From 9e17033b7a0a0b45624c8b476df3a3cf2af78605 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 16:32:30 -0500 Subject: [PATCH 201/330] style fixes --- python/cuml/dask/datasets/regression.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 95ebedfe49..06e6bb71a7 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -120,9 +120,9 @@ def f_order_shuffle(client, rs, X, y, n_parts, chunksizes, def convert_C_to_F_order(client, X, chunksizes, n_features, dtype): X_parts = client.sync(_extract_partitions, X) X_converted = [client.submit(cp.array, X_part, copy=False, order='F', - workers=[w]) + workers=[w]) for idx, (w, X_part) in enumerate(X_parts)] - + X_dela = [da.from_delayed(dask.delayed(Xc), shape=(chunksizes[idx], n_features), meta=cp.zeros((1)), @@ -364,9 +364,9 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, y = da.dot(X, ground_truth) + bias else: ground_truth = 100.0 * rs.standard_normal((n_informative, n_targets), - chunks=(n_samples_per_part, - -1), - dtype=dtype) + chunks=(n_samples_per_part, + -1), + dtype=dtype) y = da.dot(X[:, :n_informative], ground_truth) + bias From 492c8798e5e8b4eda424d97496e9f85b897e9cfe Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 15 Apr 2020 17:14:50 -0500 Subject: [PATCH 202/330] Tests with cupy input for all features --- python/cuml/preprocessing/encoders.py | 215 +++++++++++++++++------ python/cuml/test/test_one_hot_encoder.py | 157 +++++++++++++---- 2 files changed, 285 insertions(+), 87 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 0db7698071..bd25843dba 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -112,47 +112,141 @@ def _check_is_fitted(self): "with appropriate arguments before using this estimator.") raise NotFittedError(msg) + def _take_feature(self, collection, key): + """Helper to handle both df and array as input""" + if self.input_type == 'df': + return collection[key] + else: + return collection[:, key] + def _compute_drop_idx(self): if self.drop is None: return None elif isinstance(self.drop, str) and self.drop == 'first': - return {feature: 0 for feature in self._encoders.keys()} - elif isinstance(self.drop, dict): - if len(self.drop.keys()) != len(self._encoders): + if self.input_type == 'df': + return {feature: 0 for feature in self._encoders.keys()} + else: + return cp.zeros(shape=(len(self._encoders),), dtype=cp.int32) + elif isinstance(self.drop, (dict, list)): + if self.input_type == 'df': + drop_columns = self.drop.keys() + drop_idx = dict() + make_collection, get_size = Series, len + else: + drop_columns = range(len(self.drop)) + drop_idx = cp.empty(shape=(len(drop_columns),), dtype=cp.int32) + make_collection, get_size = cp.array, cp.size + + if len(drop_columns) != len(self._encoders): msg = ("`drop` should have as many columns as the number " "of features ({}), got {}") raise ValueError(msg.format(len(self._encoders), - len(self.drop.keys()))) - drop_idx = dict() - for feature in self.drop.keys(): - self.drop[feature] = Series(self.drop[feature]) - if len(self.drop[feature]) != 1: + len(drop_columns))) + for feature in drop_columns: + drop_feature = make_collection(self.drop[feature]) + if get_size(drop_feature) != 1: msg = ("Trying to drop multiple values for feature {}, " "this is not supported.").format(feature) raise ValueError(msg) cats = self._encoders[feature].classes_ - if not self.drop[feature].isin(cats).all(): + if not self.isin(drop_feature, cats).all(): msg = ("Some categories for feature {} were supposed " "to be dropped, but were not found in the encoder " "categories.".format(feature)) raise ValueError(msg) + idx = self.isin(cats, drop_feature) cats = Series(cats) - idx = cats.isin(self.drop[feature]) - drop_idx[feature] = cp.asarray(cats[idx].index) + idx_val = cats[idx].index.values + if self.input_type == 'array': + idx_val = idx_val[0] + drop_idx[feature] = idx_val return drop_idx else: msg = ("Wrong input for parameter `drop`. Expected " - "'first', None or a dict, got {}") + "'first', None, a dict or a list, got {}") raise ValueError(msg.format(type(self.drop))) - def get_categories_(self): + @property + def categories_(self): + """ + Returns categories used for the one hot encoding in the order used by + transform. + """ + return [self._encoders[f].classes_ for f in self._features] + + def _set_input_type(self, X): + if isinstance(X, cp.ndarray): + self.input_type = 'array' + self.isin = cp.isin + elif isinstance(X, DataFrame): + self.input_type = 'df' + self.isin = lambda a, b: Series(a).isin(b) + else: + raise TypeError( + 'Expected input to be cupy.ndarray or cudf.DataFrame, ' + 'got {}'.format(type(X))) + + class _ArrayEncoder: + """Helper for OneHotEncoder. + + This simplified LabelEncoder reflect the same interface + but using cp.arrays instead of cudf.Series internally. """ - Returns categories used for the one hot encoding in the correct order. - This copies the categories to the CPU and should only be used to check - the order of the categories. + def __init__(self, handle_unknown='error'): + self.classes_ = None + self.handle_unknown = handle_unknown + + def fit(self, X): + self.classes_ = cp.unique(X) + return self + + def transform(self, X): + sorted_index = cp.searchsorted(self.classes_, X) + + xindex = cp.take(cp.arange(len(self.classes_)), sorted_index) + mask = self.classes_[xindex] != X + + if mask.any(): + if self.handle_unknown == 'error': + raise KeyError("Attempted to encode unseen key") + else: + xindex[mask] = -1 + + return xindex + + def _fit_encoders(self, X, categories=None): """ - return [self._encoders[f].classes_.to_array() for f in self._features] + Helper to reduce code duplication in fit method + """ + fit_from_categories = categories is not None + _X = categories if fit_from_categories else X + + if self.input_type == 'df': + _encoders = dict() + def append(d, k, v): d[k] = v + Encoder = LabelEncoder + self._features = X.columns + else: + _encoders = list() + def append(l, _, v): l.append(v) + Encoder = self._ArrayEncoder + # used as indices for a list, no need to use a gpu array here + self._features = np.arange(0, _X.shape[1], dtype=cp.int32) + + for feature in self._features: + le = Encoder(handle_unknown=self.handle_unknown) + x_feature = self._take_feature(_X, feature) + append(_encoders, feature, le.fit(x_feature)) + + if fit_from_categories and self.handle_unknown == 'error': + x_categories = x_feature + if not self.isin(self._take_feature(X, feature), + x_categories).all(): + msg = ("Found unknown categories in column {0}" + " during fit".format(feature)) + raise KeyError(msg) + return _encoders def fit(self, X): """ @@ -166,24 +260,16 @@ def fit(self, X): self """ self._validate_keywords() + + self._set_input_type(X) + if type(self.categories) is str and self.categories == 'auto': - self._features = X.columns - self._encoders = { - feature: LabelEncoder(handle_unknown=self.handle_unknown).fit( - X[feature]) - for feature in X.columns - } + self._encoders = self._fit_encoders(X) else: - self._features = self.categories.columns - self._encoders = dict() - for feature in self.categories.columns: - le = LabelEncoder(handle_unknown=self.handle_unknown) - self._encoders[feature] = le.fit(self.categories[feature]) - if self.handle_unknown == 'error': - if not X[feature].isin(self.categories[feature]).all(): - msg = ("Found unknown categories in column {0}" - " during fit".format(feature)) - raise KeyError(msg) + _categories = self.categories + if self.input_type == 'array': + _categories = _categories.transpose() # same format as X + self._encoders = self._fit_encoders(X, categories=_categories) self.drop_idx_ = self._compute_drop_idx() self._fitted = True @@ -222,10 +308,13 @@ def transform(self, X): cols, rows = list(), list() j = 0 - for feature in X.columns: + for feature in self._features: encoder = self._encoders[feature] - col_idx = encoder.transform(X[feature]) - col_idx = cp.asarray(col_idx.to_gpu_array(fillna="pandas")) + + col_idx = encoder.transform(self._take_feature(X, feature)) + if self.input_type == 'df': + col_idx = cp.asarray(col_idx.to_gpu_array(fillna="pandas")) + idx_to_keep = col_idx > -1 # increase indices to take previous features into account @@ -278,44 +367,68 @@ def inverse_transform(self, X): """ self._check_is_fitted() if cp.sparse.issparse(X): - # cupy.sparse 7.x does not support argmax, when we upgrade cupy to - # 8.x, we should add a condition in the - # if close: `and cp.sparse.issparsecsc(X)` - # and change the following line by `X = X.tocsc()` X = X.toarray() - result = DataFrame(columns=self._encoders.keys()) + + if self.input_type == 'df': + result = DataFrame(columns=self._features) + def add_result_column(res, key, col): res[key] = col + + def dropped_1cat_inverse(value): + return Series(GenericIndex(value).repeat(X.shape[0])) + + def drop_inverse(enc, drop_index): + return enc.inverse_transform(Series(drop_index))[0] + else: + result = cp.empty(shape=(len(X), len(self._features))) + def add_result_column(res, key, col): res[:, key] = col + + def dropped_1cat_inverse(value): + return cp.full(len(X), value.item(), dtype=self.dtype) + + def drop_inverse(enc, drop_index): + return enc.classes_[drop_index] + j = 0 - for feature in self._encoders.keys(): + for feature in self._features: feature_enc = self._encoders[feature] cats = feature_enc.classes_ if self.drop is not None: # Remove dropped categories - dropped_class_idx = Series(self.drop_idx_[feature]) - dropped_class_mask = Series(cats).isin(cats[dropped_class_idx]) + drop_idx = self.drop_idx_[feature] + dropped_class_mask = self.isin(cats, cats[drop_idx]) if len(cats) == 1: - inv = Series(GenericIndex(cats[0]).repeat(X.shape[0])) - result[feature] = inv + # if there is only one category and we drop it, then we + # know that the full inverse column is this category + inv = dropped_1cat_inverse(cats[0]) + add_result_column(result, feature, inv) continue cats = cats[~dropped_class_mask] enc_size = len(cats) x_feature = X[:, j:j + enc_size] idx = cp.argmax(x_feature, axis=1) - inv = Series(cats[idx]).reset_index(drop=True) + inv = cats[idx] + if self.input_type == 'df': + inv = Series(cats[idx]).reset_index(drop=True) if self.handle_unknown == 'ignore': not_null_idx = x_feature.any(axis=1) - inv.iloc[~not_null_idx] = None + if not_null_idx.any(): + if self.input_type == 'array': + raise ValueError('Found an unknown category during ' + 'inverse_transform, which is not ' + 'supported with cupy arrays') + inv[~not_null_idx] = None elif self.drop is not None: # drop will either be None or handle_unknown will be error. If # self.drop is not None, then we can safely assume that all of # the nulls in each column are the dropped value dropped_mask = cp.asarray(x_feature.sum(axis=1) == 0).flatten() if dropped_mask.any(): - inv[dropped_mask] = feature_enc.inverse_transform( - Series(self.drop_idx_[feature]))[0] + drop_idx = self.drop_idx_[feature] + inv[dropped_mask] = drop_inverse(feature_enc, drop_idx) - result[feature] = inv + add_result_column(result, feature, inv) j += enc_size return result diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index dbcb730a41..3ad0900bc9 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -29,23 +29,58 @@ def _from_df_to_array(df): return list(zip(*[df[feature] for feature in df.columns])) +def _from_df_to_cupy(df): + # transform char columns to integer columns + for col in df.columns: + if not np.issubdtype(df[col].dtype, np.number): + df[col] = [ord(c) for c in df[col]] + return cp.array(_from_df_to_array(df)) + + +def _convert_drop(drop): + if drop is None or drop == 'first': + return drop + return [ord(x) if isinstance(x, str) else x for x in drop.values()] + + def _generate_inputs_from_categories(categories=None, - n_samples=10, seed=5060): + n_samples=10, + seed=5060, + as_array=False): if categories is None: - categories = {'strings': ['Foo', 'Bar', 'Baz'], - 'integers': list(range(1000))} + if as_array: + categories = {'strings': list(range(1000, 4000, 3)), + 'integers': list(range(1000))} + else: + categories = {'strings': ['Foo', 'Bar', 'Baz'], + 'integers': list(range(1000))} rd = np.random.RandomState(seed) pandas_df = pd.DataFrame({name: rd.choice(cat, n_samples) for name, cat in categories.items()}) ary = _from_df_to_array(pandas_df) - df = DataFrame.from_pandas(pandas_df) - return df, ary + if as_array: + inp_ary = cp.array(ary) + return inp_ary, ary + else: + df = DataFrame.from_pandas(pandas_df) + return df, ary + + +def assert_inverse_equal(ours, ref): + if isinstance(ours, cp.ndarray): + cp.testing.assert_array_equal(ours, ref) + else: + assert_frame_equal(ours.to_pandas(), ref.to_pandas()) -def test_onehot_vs_skonehot(): - X = DataFrame({'gender': ['Male', 'Female', 'Female'], 'int': [1, 3, 2]}) +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_vs_skonehot(as_array): + X = DataFrame({'gender': ['M', 'F', 'F'], 'int': [1, 3, 2]}) skX = _from_df_to_array(X) + if as_array: + X = _from_df_to_cupy(X) + skX = cp.asnumpy(X) enc = OneHotEncoder(sparse=True) skohe = SkOneHotEncoder(sparse=True) @@ -59,31 +94,42 @@ def test_onehot_vs_skonehot(): @pytest.mark.parametrize('drop', [None, 'first', {'g': 'F', 'i': 3}]) -def test_onehot_inverse_transform(drop): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_inverse_transform(drop, as_array): X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2]}) + if as_array: + X = _from_df_to_cupy(X) + drop = _convert_drop(drop) enc = OneHotEncoder(drop=drop) ohe = enc.fit_transform(X) inv = enc.inverse_transform(ohe) - assert_frame_equal(inv.to_pandas(), X.to_pandas()) + assert_inverse_equal(inv, X) -def test_onehot_categories(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_categories(as_array): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) - enc = OneHotEncoder( - categories=DataFrame({'chars': ['a', 'b', 'c'], 'int': [0, 1, 2]}), - sparse=False - ) + categories = DataFrame({'chars': ['a', 'b', 'c'], 'int': [0, 1, 2]}) + if as_array: + X = _from_df_to_cupy(X) + categories = _from_df_to_cupy(categories).transpose() + + enc = OneHotEncoder(categories=categories, sparse=False) ref = cp.array([[1., 0., 0., 1., 0., 0.], [0., 1., 0., 0., 0., 1.]]) res = enc.fit_transform(X) cp.testing.assert_array_equal(res, ref) -def test_onehot_fit_handle_unknown(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_fit_handle_unknown(as_array): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]}) + if as_array: + X = _from_df_to_cupy(X) + Y = _from_df_to_cupy(Y) enc = OneHotEncoder(handle_unknown='error', categories=Y) with pytest.raises(KeyError): @@ -93,9 +139,13 @@ def test_onehot_fit_handle_unknown(): enc.fit(X) -def test_onehot_transform_handle_unknown(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_transform_handle_unknown(as_array): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]}) + if as_array: + X = _from_df_to_cupy(X) + Y = _from_df_to_cupy(Y) enc = OneHotEncoder(handle_unknown='error', sparse=False) enc = enc.fit(X) @@ -110,27 +160,36 @@ def test_onehot_transform_handle_unknown(): cp.testing.assert_array_equal(ohe, ref) -def test_onehot_inverse_transform_handle_unknown(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_inverse_transform_handle_unknown(as_array): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) Y_ohe = cp.array([[0., 0., 1., 0.], [0., 1., 0., 1.]]) + if as_array: + X = _from_df_to_cupy(X) enc = OneHotEncoder(handle_unknown='ignore') enc = enc.fit(X) - df = enc.inverse_transform(Y_ohe) - ref = DataFrame({'chars': [None, 'b'], 'int': [0, 2]}) - assert_frame_equal(df.to_pandas(), ref.to_pandas()) + if as_array: + with pytest.raises(ValueError): + enc.inverse_transform(Y_ohe) + else: + df = enc.inverse_transform(Y_ohe) + ref = DataFrame({'chars': [None, 'b'], 'int': [0, 2]}) + assert_inverse_equal(df, ref) @pytest.mark.parametrize('drop', [None, 'first']) @pytest.mark.parametrize('sparse', [True, False], ids=['sparse', 'dense']) @pytest.mark.parametrize("n_samples", [10, 1000, 20000, stress_param(250000)]) -def test_onehot_random_inputs(drop, sparse, n_samples): - df, ary = _generate_inputs_from_categories(n_samples=n_samples) +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_random_inputs(drop, sparse, n_samples, as_array): + X, ary = _generate_inputs_from_categories(n_samples=n_samples, + as_array=as_array) enc = OneHotEncoder(sparse=sparse, drop=drop) sk_enc = SkOneHotEncoder(sparse=sparse, drop=drop) - ohe = enc.fit_transform(df) + ohe = enc.fit_transform(X) ref = sk_enc.fit_transform(ary) if sparse: cp.testing.assert_array_equal(ohe.toarray(), ref.toarray()) @@ -139,13 +198,17 @@ def test_onehot_random_inputs(drop, sparse, n_samples): inv_ohe = enc.inverse_transform(ohe) - assert_frame_equal(inv_ohe.to_pandas(), df.to_pandas()) + assert_inverse_equal(inv_ohe, X) -def test_onehot_drop_idx_first(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_drop_idx_first(as_array): X_ary = [['c', 2, 'a'], ['b', 2, 'b']] X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) + if as_array: + X = _from_df_to_cupy(X) + X_ary = cp.asnumpy(X) enc = OneHotEncoder(sparse=False, drop='first') sk_enc = SkOneHotEncoder(sparse=False, drop='first') @@ -153,18 +216,27 @@ def test_onehot_drop_idx_first(): ref = sk_enc.fit_transform(X_ary) cp.testing.assert_array_equal(ohe, ref) inv = enc.inverse_transform(ohe) - assert_frame_equal(inv.to_pandas(), X.to_pandas()) + assert_inverse_equal(inv, X) -def test_onehot_drop_one_of_each(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_drop_one_of_each(as_array): X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) drop = dict({'chars': 'b', 'int': 2, 'letters': 'b'}) + X_ary = _from_df_to_array(X) + drop_ary = ['b', 2, 'b'] + if as_array: + X = _from_df_to_cupy(X) + X_ary = cp.asnumpy(X) + drop = drop_ary = _convert_drop(drop) + enc = OneHotEncoder(sparse=False, drop=drop) ohe = enc.fit_transform(X) - ref = SkOneHotEncoder(sparse=False, drop=['b', 2, 'b']).fit_transform(X) + print(ohe.dtype) + ref = SkOneHotEncoder(sparse=False, drop=drop_ary).fit_transform(X_ary) cp.testing.assert_array_equal(ohe, ref) inv = enc.inverse_transform(ohe) - assert_frame_equal(inv.to_pandas(), X.to_pandas()) + assert_inverse_equal(inv, X) @pytest.mark.parametrize("drop, pattern", @@ -173,33 +245,46 @@ def test_onehot_drop_one_of_each(): [dict({'chars': 'b', 'int': [2, 0]}), 'Trying to drop multiple values'], [dict({'chars': 'b', 'int': 3}), - 'Some categories [a-zA-Z, ]* were not found'], + 'Some categories [0-9a-zA-Z, ]* were not found'], [DataFrame({'chars': 'b', 'int': 3}), 'Wrong input for parameter `drop`.']]) -def test_onehot_drop_exceptions(drop, pattern): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_drop_exceptions(drop, pattern, as_array): X = DataFrame({'chars': ['c', 'b', 'd'], 'int': [2, 1, 0]}) + if as_array: + X = _from_df_to_cupy(X) + drop = _convert_drop(drop) if not isinstance(drop, DataFrame) else drop with pytest.raises(ValueError, match=pattern): OneHotEncoder(sparse=False, drop=drop).fit(X) -def test_onehot_get_categories(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_get_categories(as_array): X = DataFrame({'chars': ['c', 'b', 'd'], 'ints': [2, 1, 0]}) - ref = [np.array(['b', 'c', 'd']), np.array([0, 1, 2])] + if as_array: + X = _from_df_to_cupy(X) + ref[0] = np.array([ord(x) for x in ref[0]]) + enc = OneHotEncoder().fit(X) - cats = enc.get_categories_() + cats = enc.categories_ for i in range(len(ref)): - np.testing.assert_array_equal(ref[i], cats[i]) + cp.testing.assert_array_equal(ref[i], cats[i]) -def test_onehot_sparse_drop(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_sparse_drop(as_array): X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2], 'l': [5, 5, 6]}) drop = {'g': 'F', 'i': 3, 'l': 6} ary = _from_df_to_array(X) drop_ary = ['F', 3, 6] + if as_array: + X = _from_df_to_cupy(X) + ary = cp.asnumpy(X) + drop = drop_ary = _convert_drop(drop) enc = OneHotEncoder(sparse=True, drop=drop) sk_enc = SkOneHotEncoder(sparse=True, drop=drop_ary) From c53dab05437fe7c37ba7b0a68a92d7d64152a678 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 15 Apr 2020 17:27:12 -0500 Subject: [PATCH 203/330] Update documentation for input as array --- python/cuml/preprocessing/encoders.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index bd25843dba..8a2d99ba47 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -40,12 +40,12 @@ class OneHotEncoder: Parameters ---------- - categories : 'auto' or a cudf.DataFrame, default='auto' + categories : 'auto' an cupy.ndarray or a cudf.DataFrame, default='auto' Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - - DataFrame : ``categories[col]`` holds the categories expected in the + - DataFrame/Array : ``categories[col]`` holds the categories expected in the feature col. - drop : 'first', None or a dict, default=None + drop : 'first', None, a dict or a list, default=None Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear features cause problems, such as when feeding the resulting data @@ -53,7 +53,7 @@ class OneHotEncoder: - None : retain all features (the default). - 'first' : drop the first category in each feature. If only one category is present, the feature will be dropped entirely. - - Dict : ``drop[col]`` is the category in feature col that + - dict/list : ``drop[col]`` is the category in feature col that should be dropped. sparse : bool, default=False This feature was deactivated and will give an exception when True. @@ -120,6 +120,7 @@ def _take_feature(self, collection, key): return collection[:, key] def _compute_drop_idx(self): + """Helper to compute indices to drop from category to drop""" if self.drop is None: return None elif isinstance(self.drop, str) and self.drop == 'first': @@ -253,7 +254,7 @@ def fit(self, X): Fit OneHotEncoder to X. Parameters ---------- - X : cuDF.DataFrame + X : cuDF.DataFrame or cupy.ndarray The data to determine the categories of each feature. Returns ------- @@ -282,7 +283,7 @@ def fit_transform(self, X): Parameters ---------- - X : cudf.DataFrame + X : cudf.DataFrame or cupy.ndarray The data to encode. Returns ------- @@ -297,7 +298,7 @@ def transform(self, X): Transform X using one-hot encoding. Parameters ---------- - X : cudf.DataFrame + X : cudf.DataFrame or cupy.ndarray The data to encode. Returns ------- From e14567a1f65a64291d3c817fbc5eb7c1e9b02e04 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 15 Apr 2020 17:42:57 -0500 Subject: [PATCH 204/330] Fix coding style --- python/cuml/preprocessing/encoders.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 8a2d99ba47..c98c76f0e9 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -43,8 +43,8 @@ class OneHotEncoder: categories : 'auto' an cupy.ndarray or a cudf.DataFrame, default='auto' Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - - DataFrame/Array : ``categories[col]`` holds the categories expected in the - feature col. + - DataFrame/Array : ``categories[col]`` holds the categories + expected in the feature col. drop : 'first', None, a dict or a list, default=None Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear From 7b0c7304fea413bbe4ff51694b01329c1c80967a Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 15 Apr 2020 17:45:05 -0500 Subject: [PATCH 205/330] Fix whitespaces --- python/cuml/preprocessing/encoders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index c98c76f0e9..8a4c7d11ac 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -43,7 +43,7 @@ class OneHotEncoder: categories : 'auto' an cupy.ndarray or a cudf.DataFrame, default='auto' Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - - DataFrame/Array : ``categories[col]`` holds the categories + - DataFrame/Array : ``categories[col]`` holds the categories expected in the feature col. drop : 'first', None, a dict or a list, default=None Specifies a methodology to use to drop one of the categories per From d7c8b0d7f51dd92d7a4d95983a956dac0fc41596 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 15 Apr 2020 19:16:44 -0400 Subject: [PATCH 206/330] Passing along separate sizes for index vs query vertices --- cpp/include/cuml/neighbors/knn.hpp | 6 +-- cpp/src/knn/knn.cu | 12 +++--- cpp/src_prims/selection/knn.h | 39 ++++++++++++------- cpp/test/prims/knn_classify.cu | 2 +- cpp/test/prims/knn_regression.cu | 6 +-- .../cuml/neighbors/kneighbors_classifier.pyx | 8 +++- .../cuml/neighbors/kneighbors_regressor.pyx | 4 +- python/cuml/neighbors/nearest_neighbors.pyx | 2 + python/cuml/test/test_pickle.py | 26 +++++++++---- 9 files changed, 64 insertions(+), 41 deletions(-) diff --git a/cpp/include/cuml/neighbors/knn.hpp b/cpp/include/cuml/neighbors/knn.hpp index e7b999372b..d6854ef62e 100644 --- a/cpp/include/cuml/neighbors/knn.hpp +++ b/cpp/include/cuml/neighbors/knn.hpp @@ -56,7 +56,7 @@ void brute_force_knn(cumlHandle &handle, std::vector &input, * @param k number of nearest neighbors in knn_indices */ void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, - std::vector &y, size_t n_samples, int k); + std::vector &y, size_t n_labels, size_t n_samples, int k); /** * @brief Flat C++ API function to perform a knn regression using @@ -72,7 +72,7 @@ void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, * @param k number of nearest neighbors in knn_indices */ void knn_regress(cumlHandle &handle, float *out, int64_t *knn_indices, - std::vector &y, size_t n_samples, int k); + std::vector &y, size_t n_labels, size_t n_samples, int k); /** * @brief Flat C++ API function to compute knn class probabilities @@ -89,7 +89,7 @@ void knn_regress(cumlHandle &handle, float *out, int64_t *knn_indices, */ void knn_class_proba(cumlHandle &handle, std::vector &out, int64_t *knn_indices, std::vector &y, - size_t n_samples, int k); + size_t n_labels, size_t n_samples, int k); class kNN { float **ptrs; diff --git a/cpp/src/knn/knn.cu b/cpp/src/knn/knn.cu index c6d43ea29e..ca28bb63bd 100644 --- a/cpp/src/knn/knn.cu +++ b/cpp/src/knn/knn.cu @@ -49,7 +49,7 @@ void brute_force_knn(cumlHandle &handle, std::vector &input, } void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, - std::vector &y, size_t n_samples, int k) { + std::vector &y, size_t n_labels, size_t n_samples, int k) { auto d_alloc = handle.getDeviceAllocator(); cudaStream_t stream = handle.getStream(); @@ -61,19 +61,19 @@ void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, &(n_unique[i]), stream, d_alloc); } - MLCommon::Selection::knn_classify(out, knn_indices, y, n_samples, k, + MLCommon::Selection::knn_classify(out, knn_indices, y, n_labels, n_samples, k, uniq_labels, n_unique, d_alloc, stream); } void knn_regress(cumlHandle &handle, float *out, int64_t *knn_indices, - std::vector &y, size_t n_samples, int k) { - MLCommon::Selection::knn_regress(out, knn_indices, y, n_samples, k, + std::vector &y, size_t n_labels, size_t n_samples, int k) { + MLCommon::Selection::knn_regress(out, knn_indices, y, n_labels, n_samples, k, handle.getStream()); } void knn_class_proba(cumlHandle &handle, std::vector &out, int64_t *knn_indices, std::vector &y, - size_t n_samples, int k) { + size_t n_labels, size_t n_samples, int k) { auto d_alloc = handle.getDeviceAllocator(); cudaStream_t stream = handle.getStream(); @@ -85,7 +85,7 @@ void knn_class_proba(cumlHandle &handle, std::vector &out, &(n_unique[i]), stream, d_alloc); } - MLCommon::Selection::class_probs(out, knn_indices, y, n_samples, k, + MLCommon::Selection::class_probs(out, knn_indices, y, n_labels, n_samples, k, uniq_labels, n_unique, d_alloc, stream); } diff --git a/cpp/src_prims/selection/knn.h b/cpp/src_prims/selection/knn.h index 6bd097e622..1abad1980b 100644 --- a/cpp/src_prims/selection/knn.h +++ b/cpp/src_prims/selection/knn.h @@ -307,6 +307,7 @@ void brute_force_knn(float **input, int *sizes, int n_params, IntType D, template __global__ void class_probs_kernel(OutType *out, const int64_t *knn_indices, const int *labels, int n_uniq_labels, + size_t n_labels, size_t n_samples, int n_neighbors) { int row = (blockIdx.x * blockDim.x) + threadIdx.x; int i = row * n_neighbors; @@ -326,7 +327,7 @@ __global__ void class_probs_kernel(OutType *out, const int64_t *knn_indices, template __global__ void class_vote_kernel(OutType *out, const float *class_proba, int *unique_labels, int n_uniq_labels, - size_t n_samples, int n_outputs, + size_t n_labels, size_t n_samples, int n_outputs, int output_offset) { int row = (blockIdx.x * blockDim.x) + threadIdx.x; int i = row * n_uniq_labels; @@ -353,7 +354,8 @@ __global__ void class_vote_kernel(OutType *out, const float *class_proba, template __global__ void regress_avg_kernel(LabelType *out, const int64_t *knn_indices, - const LabelType *labels, size_t n_samples, + const LabelType *labels, size_t n_labels, + size_t n_samples, int n_neighbors, int n_outputs, int output_offset) { int row = (blockIdx.x * blockDim.x) + threadIdx.x; @@ -383,6 +385,7 @@ __global__ void regress_avg_kernel(LabelType *out, const int64_t *knn_indices, * each output in the vector is a different array of labels * corresponding to the i'th output. * @param n_rows number of rows in knn_indices + * @param n_labels number of vertices in index * @param k number of neighbors in knn_indices * @param uniq_labels vector of the sorted unique labels for each array in y * @param n_unique vector of sizes for each array in uniq_labels @@ -394,7 +397,7 @@ __global__ void regress_avg_kernel(LabelType *out, const int64_t *knn_indices, */ template void class_probs(std::vector &out, const int64_t *knn_indices, - std::vector &y, size_t n_rows, int k, + std::vector &y, size_t n_labels, size_t n_rows, int k, std::vector &uniq_labels, std::vector &n_unique, std::shared_ptr allocator, cudaStream_t user_stream, cudaStream_t *int_streams = nullptr, @@ -403,8 +406,8 @@ void class_probs(std::vector &out, const int64_t *knn_indices, cudaStream_t stream = select_stream(user_stream, int_streams, n_int_streams, i); - int n_labels = n_unique[i]; - int cur_size = n_rows * n_labels; + int n_unique_labels = n_unique[i]; + int cur_size = n_rows * n_unique_labels; CUDA_CHECK(cudaMemsetAsync(out[i], 0, cur_size * sizeof(float), stream)); @@ -415,13 +418,13 @@ void class_probs(std::vector &out, const int64_t *knn_indices, * Build array of class probability arrays from * knn_indices and labels */ - device_buffer y_normalized(allocator, stream, n_rows); - MLCommon::Label::make_monotonic(y_normalized.data(), y[i], n_rows, stream); + device_buffer y_normalized(allocator, stream, n_labels); + MLCommon::Label::make_monotonic(y_normalized.data(), y[i], n_labels, stream); MLCommon::LinAlg::unaryOp( - y_normalized.data(), y_normalized.data(), n_rows, + y_normalized.data(), y_normalized.data(), n_labels, [] __device__(int input) { return input - 1; }, stream); class_probs_kernel<<>>( - out[i], knn_indices, y_normalized.data(), n_labels, n_rows, k); + out[i], knn_indices, y_normalized.data(), n_unique_labels, n_labels, n_rows, k); CUDA_CHECK(cudaPeekAtLastError()); } } @@ -437,8 +440,9 @@ void class_probs(std::vector &out, const int64_t *knn_indices, * @param y vector of label arrays. for multilabel classification, each * element in the vector is a different "output" array of labels corresponding * to the i'th output. + * @param n_labels number of vertices in index * @param n_rows number of rows in knn_indices - * @param k number of neighbors in knn_indices + * * @param k number of neighbors in knn_indices * @param uniq_labels vector of the sorted unique labels for each array in y * @param n_unique vector of sizes for each array in uniq_labels * @param allocator device allocator to use for temporary workspace @@ -449,6 +453,7 @@ void class_probs(std::vector &out, const int64_t *knn_indices, */ template void knn_classify(int *out, const int64_t *knn_indices, std::vector &y, + size_t n_labels, size_t n_rows, int k, std::vector &uniq_labels, std::vector &n_unique, std::shared_ptr &allocator, @@ -477,7 +482,9 @@ void knn_classify(int *out, const int64_t *knn_indices, std::vector &y, * Note: Since class_probs will use the same round robin strategy for distributing * work to the streams, we don't need to explicitly synchronize the streams here. */ - class_probs(probs, knn_indices, y, n_rows, k, uniq_labels, n_unique, + + std::cout << "n_rows classify: " << n_rows << std::endl; + class_probs(probs, knn_indices, y, n_labels, n_rows, k, uniq_labels, n_unique, allocator, user_stream, int_streams, n_int_streams); dim3 grid(MLCommon::ceildiv(n_rows, (size_t)TPB_X), 1, 1); @@ -487,14 +494,14 @@ void knn_classify(int *out, const int64_t *knn_indices, std::vector &y, cudaStream_t stream = select_stream(user_stream, int_streams, n_int_streams, i); - int n_labels = n_unique[i]; + int n_unique_labels = n_unique[i]; /** * Choose max probability */ int smem = sizeof(int) * n_labels; class_vote_kernel<<>>( - out, probs[i], uniq_labels[i], n_labels, n_rows, y.size(), i); + out, probs[i], uniq_labels[i], n_unique_labels, n_labels, n_rows, y.size(), i); CUDA_CHECK(cudaPeekAtLastError()); delete tmp_probs[i]; @@ -511,6 +518,7 @@ void knn_classify(int *out, const int64_t *knn_indices, std::vector &y, * @param y vector of label arrays. for multilabel classification, each * element in the vector is a different "output" array of labels corresponding * to the i'th output. + * @param n_labels number of vertices in index * @param n_rows number of rows in knn_indices * @param k number of neighbors in knn_indices * @param user_stream main stream to use for queuing isolated CUDA events @@ -521,7 +529,8 @@ void knn_classify(int *out, const int64_t *knn_indices, std::vector &y, template void knn_regress(ValType *out, const int64_t *knn_indices, - const std::vector &y, size_t n_rows, int k, + const std::vector &y, + size_t n_labels, size_t n_rows, int k, cudaStream_t user_stream, cudaStream_t *int_streams = nullptr, int n_int_streams = 0) { dim3 grid(MLCommon::ceildiv(n_rows, (size_t)TPB_X), 1, 1); @@ -533,7 +542,7 @@ void knn_regress(ValType *out, const int64_t *knn_indices, for (int i = 0; i < y.size(); i++) { cudaStream_t stream = select_stream(user_stream, int_streams, n_int_streams, i); - regress_avg_kernel<<>>(out, knn_indices, y[i], n_rows, + regress_avg_kernel<<>>(out, knn_indices, y[i], n_labels, n_rows, k, y.size(), i); CUDA_CHECK(cudaPeekAtLastError()); } diff --git a/cpp/test/prims/knn_classify.cu b/cpp/test/prims/knn_classify.cu index 0bb6f087bf..756f0840e8 100644 --- a/cpp/test/prims/knn_classify.cu +++ b/cpp/test/prims/knn_classify.cu @@ -78,7 +78,7 @@ class KNNClassifyTest : public ::testing::TestWithParam { std::vector n_unique; n_unique.push_back(n_classes); - knn_classify(pred_labels, knn_indices, y, params.rows, params.k, + knn_classify(pred_labels, knn_indices, y, params.rows, params.rows, params.k, uniq_labels, n_unique, alloc, stream); CUDA_CHECK(cudaStreamSynchronize(stream)); diff --git a/cpp/test/prims/knn_regression.cu b/cpp/test/prims/knn_regression.cu index 4975489cb3..507d1c05ca 100644 --- a/cpp/test/prims/knn_regression.cu +++ b/cpp/test/prims/knn_regression.cu @@ -27,12 +27,8 @@ #include "linalg/reduce.h" -//#include #include #include -//#include -//#include -//#include namespace MLCommon { namespace Selection { @@ -107,7 +103,7 @@ class KNNRegressionTest : public ::testing::TestWithParam { std::vector y; y.push_back(train_labels); - knn_regress(pred_labels, knn_indices, y, params.rows, params.k, stream); + knn_regress(pred_labels, knn_indices, y, params.rows, params.rows, params.k, stream); CUDA_CHECK(cudaStreamSynchronize(stream)); CUDA_CHECK(cudaStreamDestroy(stream)); diff --git a/python/cuml/neighbors/kneighbors_classifier.pyx b/python/cuml/neighbors/kneighbors_classifier.pyx index e3a99f7a9f..db3d27f3b3 100644 --- a/python/cuml/neighbors/kneighbors_classifier.pyx +++ b/python/cuml/neighbors/kneighbors_classifier.pyx @@ -58,6 +58,7 @@ cdef extern from "cuml/neighbors/knn.hpp" namespace "ML": int* out, int64_t *knn_indices, vector[int*] &y, + size_t n_labels, size_t n_samples, int k ) except + @@ -67,6 +68,7 @@ cdef extern from "cuml/neighbors/knn.hpp" namespace "ML": vector[float*] &out, int64_t *knn_indices, vector[int*] &y, + size_t n_labels, size_t n_samples, int k ) except + @@ -248,7 +250,8 @@ class KNeighborsClassifier(NearestNeighbors): classes_ptr, inds_ctype, deref(y_vec), - X.shape[0], + self.n_rows, + n_rows, self.n_neighbors ) @@ -317,7 +320,8 @@ class KNeighborsClassifier(NearestNeighbors): deref(out_vec), inds_ctype, deref(y_vec), - X.shape[0], + self.n_rows, + n_rows, self.n_neighbors ) diff --git a/python/cuml/neighbors/kneighbors_regressor.pyx b/python/cuml/neighbors/kneighbors_regressor.pyx index 4508898ea1..beade1e30c 100644 --- a/python/cuml/neighbors/kneighbors_regressor.pyx +++ b/python/cuml/neighbors/kneighbors_regressor.pyx @@ -68,6 +68,7 @@ cdef extern from "cuml/neighbors/knn.hpp" namespace "ML": float *out, int64_t *knn_indices, vector[float *] &y, + size_t n_rows, size_t n_samples, int k, ) except + @@ -250,7 +251,8 @@ class KNeighborsRegressor(NearestNeighbors): results_ptr, inds_ctype, deref(y_vec), - X.shape[0], + self.n_rows, + n_rows, self.n_neighbors ) diff --git a/python/cuml/neighbors/nearest_neighbors.pyx b/python/cuml/neighbors/nearest_neighbors.pyx index 6121dfbe23..487ed85eb6 100644 --- a/python/cuml/neighbors/nearest_neighbors.pyx +++ b/python/cuml/neighbors/nearest_neighbors.pyx @@ -233,6 +233,8 @@ class NearestNeighbors(Base): if convert_dtype else None)) + self.n_rows = n_rows + self.n_indices = 1 return self diff --git a/python/cuml/test/test_pickle.py b/python/cuml/test/test_pickle.py index da95fd2fe2..a7a8ba246c 100644 --- a/python/cuml/test/test_pickle.py +++ b/python/cuml/test/test_pickle.py @@ -105,10 +105,10 @@ def pickle_save_load(tmpdir, func_create_model, func_assert): del model - with open(pickle_file, 'rb') as pf: - cu_after_pickle_model = pickle.load(pf) + # with open(pickle_file, 'rb') as pf: + # cu_after_pickle_model = pickle.load(pf) - func_assert(cu_after_pickle_model, X_test) + # func_assert(cu_after_pickle_model, X_test) def make_classification_dataset(datatype, nrows, ncols, n_info, n_classes): @@ -395,14 +395,24 @@ def create_mod(): model = k_neighbors_models[keys](n_neighbors=k) model.fit(X_train, y_train) result["neighbors"] = model.predict(X_test) + + print(str(model.__dict__)) return model, X_test def assert_model(pickled_model, X_test): - D_after = pickled_model.predict(X_test) - assert array_equal(result["neighbors"], D_after) - state = pickled_model.__dict__ - assert state["n_indices"] == 1 - assert "X_m" in state + + print(str(pickled_model.y.copy_to_host())) + print(str(pickled_model.X_m.copy_to_host())) + print(str(pickled_model.y.shape)) + print(str(pickled_model.X_m.shape)) + + print(str(pickled_model.__dict__)) + + # D_after = pickled_model.predict(X_test) + # assert array_equal(result["neighbors"], D_after) + # state = pickled_model.__dict__ + # assert state["n_indices"] == 1 + # assert "X_m" in state pickle_save_load(tmpdir, create_mod, assert_model) From d012637fd9a34374187cab0ebc8f04bf17af3e18 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 21:15:19 -0500 Subject: [PATCH 207/330] fixed cupy bug and removed centroid bottleneck --- python/cuml/datasets/classification.py | 47 ++++++++++++++++++-------- 1 file changed, 32 insertions(+), 15 deletions(-) diff --git a/python/cuml/datasets/classification.py b/python/cuml/datasets/classification.py index c974a3d2a3..90759d1318 100644 --- a/python/cuml/datasets/classification.py +++ b/python/cuml/datasets/classification.py @@ -13,6 +13,7 @@ # limitations under the License. # +from sklearn.utils.random import sample_without_replacement from cuml.datasets.utils import _create_rs_generator from cuml.utils import with_cupy_rmm @@ -24,10 +25,12 @@ def _generate_hypercube(samples, dimensions, rng): """Returns distinct binary samples of length dimensions """ if dimensions > 30: - return np.hstack([rng.randint(2, size=(samples, dimensions - 30)), + return np.hstack([np.random.randint(2, size=(samples, dimensions - 30)), _generate_hypercube(samples, 30, rng)]) - out = np.random.choice(2 ** dimensions, samples, - replace=False).astype(dtype='>u4', copy=False) + random_state = int(rng.randint(dimensions)) + out = sample_without_replacement(2 ** dimensions, samples, + random_state=random_state).astype( + dtype='>u4', copy=False) out = np.unpackbits(out.view('>u1')).reshape((-1, 32))[:, -dimensions:] return out @@ -118,6 +121,13 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, The order of the generated samples dtype : str, optional (default='float32') Dtype of the generated samples + _centroids: array of centroids of shape (n_clusters, n_informative) + _informative_covariance: array for covariance between informative features + of shape (n_clusters, n_informative, n_informative) + _redundant_covariance: array for covariance between redundant features + of shape (n_informative, n_redundant) + _repeated_indices: array of indices for the repeated features + of shape (n_repeated, ) Returns ------- X : device array of shape [n_samples, n_features] @@ -188,7 +198,12 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, centroids *= generator.rand(n_clusters, 1, dtype=dtype) centroids *= generator.rand(1, n_informative, dtype=dtype) - # Initially draw informative features from the standard normal + # Create redundant features + if n_redundant > 0: + if _redundant_covariance is None: + B = 2 * generator.rand(n_informative, n_redundant, dtype=dtype) - 1 + else: + B = _redundant_covariance # Create each cluster; a variant of make_blobs if shuffle: @@ -203,6 +218,7 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, for k, centroid in enumerate(centroids): centroid_indices = cp.where(shuffled_sample_indices == k) y[centroid_indices[0]] = k % n_classes + X_k = X[centroid_indices[0], :n_informative] if _informative_covariance is None: @@ -210,9 +226,15 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, dtype=dtype) - 1 else: A = _informative_covariance[k] - X_k[...] = cp.dot(X_k, A) # introduce random covariance + X_k = cp.dot(X_k, A) + + #NOTE: This could be done outside the loop, but a current + # cupy bug does not allow that + X[centroid_indices[0], n_informative:n_informative + n_redundant] = \ + cp.dot(X_k, B) X_k += centroid # shift the cluster to a vertex + X[centroid_indices[0], :n_informative] = X_k else: stop = 0 for k, centroid in enumerate(centroids): @@ -225,18 +247,13 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, dtype=dtype) - 1 else: A = _informative_covariance[k] - X_k[...] = cp.dot(X_k, A) # introduce random covariance + X_k = cp.dot(X_k, A) # introduce random covariance - X_k += centroid # shift the cluster to a vertex + X[start:stop, n_informative:n_informative + n_redundant] = \ + cp.dot(X_k, B) - # Create redundant features - if n_redundant > 0: - if _redundant_covariance is None: - B = 2 * generator.rand(n_informative, n_redundant, dtype=dtype) - 1 - else: - B = _redundant_covariance - X[:, n_informative:n_informative + n_redundant] = \ - cp.dot(X[:, :n_informative], B) + X_k += centroid # shift the cluster to a vertex + X[start:stop, :n_informative] = X_k # Repeat some features if n_repeated > 0: From c23bc2dd42d15401b5b1549bde639af551e0143c Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 15 Apr 2020 22:32:41 -0400 Subject: [PATCH 208/330] Reverting test_pickle --- python/cuml/test/test_pickle.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/python/cuml/test/test_pickle.py b/python/cuml/test/test_pickle.py index a7a8ba246c..da95fd2fe2 100644 --- a/python/cuml/test/test_pickle.py +++ b/python/cuml/test/test_pickle.py @@ -105,10 +105,10 @@ def pickle_save_load(tmpdir, func_create_model, func_assert): del model - # with open(pickle_file, 'rb') as pf: - # cu_after_pickle_model = pickle.load(pf) + with open(pickle_file, 'rb') as pf: + cu_after_pickle_model = pickle.load(pf) - # func_assert(cu_after_pickle_model, X_test) + func_assert(cu_after_pickle_model, X_test) def make_classification_dataset(datatype, nrows, ncols, n_info, n_classes): @@ -395,24 +395,14 @@ def create_mod(): model = k_neighbors_models[keys](n_neighbors=k) model.fit(X_train, y_train) result["neighbors"] = model.predict(X_test) - - print(str(model.__dict__)) return model, X_test def assert_model(pickled_model, X_test): - - print(str(pickled_model.y.copy_to_host())) - print(str(pickled_model.X_m.copy_to_host())) - print(str(pickled_model.y.shape)) - print(str(pickled_model.X_m.shape)) - - print(str(pickled_model.__dict__)) - - # D_after = pickled_model.predict(X_test) - # assert array_equal(result["neighbors"], D_after) - # state = pickled_model.__dict__ - # assert state["n_indices"] == 1 - # assert "X_m" in state + D_after = pickled_model.predict(X_test) + assert array_equal(result["neighbors"], D_after) + state = pickled_model.__dict__ + assert state["n_indices"] == 1 + assert "X_m" in state pickle_save_load(tmpdir, create_mod, assert_model) From 3251cfba9a715807c1a711b28d4676ea8bf3d173 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 22:46:01 -0500 Subject: [PATCH 209/330] debugging n_parts > 1 --- python/cuml/dask/datasets/regression.py | 28 ++++++++++++++++++------- python/cuml/test/dask/test_datasets.py | 8 +++---- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 06e6bb71a7..be9e32cd6f 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -53,9 +53,15 @@ def _f_order_standard_normal(nrows, ncols, dtype, seed): def f_order_standard_normal(client, rs, chunksizes, ncols, dtype): + workers = list(client.has_what().keys()) + + n_chunks = len(chunksizes) + chunks_workers = (workers * n_chunks)[:n_chunks] + chunk_seeds = rs.permutation(len(chunksizes)) chunks = [client.submit(_f_order_standard_normal, chunksize, ncols, dtype, - chunk_seeds[idx]) + chunk_seeds[idx], workers=[chunks_workers[idx]], + pure=False) for idx, chunksize in enumerate(chunksizes)] chunks_dela = [da.from_delayed(dask.delayed(chunk), @@ -94,7 +100,7 @@ def f_order_shuffle(client, rs, X, y, n_parts, chunksizes, shuffled = [client.submit(_f_order_shuffle, X_part, y_parts[idx][1], chunksizes[idx], chunk_seeds[idx], features_indices, - workers=[w]) + workers=[w], pure=False) for idx, (w, X_part) in enumerate(X_parts)] X_shuffled = [client.submit(get_X, f, pure=False) @@ -312,8 +318,11 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, if n_samples_per_part is None: n_samples_per_part = max(1, int(n_samples / n_parts)) - data_chunksizes = [n_samples_per_part] * n_parts + \ - [n_samples % n_samples_per_part] + data_chunksizes = [n_samples_per_part] * n_parts + if n_samples % n_samples_per_part > 0: + data_chunksizes[-1] += n_samples % n_samples_per_part + + data_chunksizes = tuple(data_chunksizes) if (effective_rank is None) or (effective_rank and not use_full_low_rank): # Randomly generate a well conditioned input set @@ -323,7 +332,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, elif order == 'C': X = rs.standard_normal((n_samples, n_features), - chunks=(n_samples_per_part, -1), + chunks=(data_chunksizes, -1), dtype=dtype) else: @@ -337,11 +346,11 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, n_parts=n_parts, dtype=dtype, order=order) + X = X.rechunk({0: data_chunksizes, + 1: -1}) if order == 'F': X = convert_C_to_F_order(client, X, data_chunksizes, n_features, dtype) - X = X.rechunk({0: n_samples_per_part, - 1: -1}) # Generate a ground truth model with only n_informative features being non # zeros (the other features are not correlated to y and should be ignored @@ -376,7 +385,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, n_informative, n_targets), dtype=dtype) ground_truth = da.concatenate([ground_truth, zeroes], axis=0) - ground_truth = ground_truth.rechunk(-1) + ground_truth = ground_truth.rechunk(-1) # Add noise if noise > 0.0: @@ -398,6 +407,9 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, y = y[samples_indices, :] X = X[:, features_indices] + + X = X.rechunk((data_chunksizes, -1)) + y = y.rechunk((data_chunksizes, -1)) ground_truth = ground_truth[features_indices, :] y = da.squeeze(y) diff --git a/python/cuml/test/dask/test_datasets.py b/python/cuml/test/dask/test_datasets.py index 51046de2e0..e4d238f8c3 100644 --- a/python/cuml/test/dask/test_datasets.py +++ b/python/cuml/test/dask/test_datasets.py @@ -95,15 +95,13 @@ def test_make_blobs(nrows, @pytest.mark.parametrize('noise', [1.0]) @pytest.mark.parametrize('shuffle', [True, False]) @pytest.mark.parametrize('coef', [True, False]) -@pytest.mark.parametrize('random_state', [None, 1234]) -@pytest.mark.parametrize('n_parts', [unit_param(1), - stress_param(3)]) +@pytest.mark.parametrize('n_parts', [1, 4, 23]) @pytest.mark.parametrize('order', ['F', 'C']) @pytest.mark.parametrize('use_full_low_rank', [True, False]) def test_make_regression(n_samples, n_features, n_informative, n_targets, bias, effective_rank, tail_strength, noise, shuffle, - coef, random_state, n_parts, order, + coef, n_parts, order, use_full_low_rank, cluster): c = Client(cluster) try: @@ -114,7 +112,7 @@ def test_make_regression(n_samples, n_features, n_informative, n_targets=n_targets, bias=bias, effective_rank=effective_rank, noise=noise, shuffle=shuffle, coef=coef, - random_state=random_state, n_parts=n_parts, + n_parts=n_parts, use_full_low_rank=use_full_low_rank, order=order) From 8bebb700f7e22b7d1818e618e06cb85fb99ed7f2 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 22:49:36 -0500 Subject: [PATCH 210/330] style fix --- python/cuml/dask/datasets/regression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index be9e32cd6f..cc2121207c 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -321,7 +321,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, data_chunksizes = [n_samples_per_part] * n_parts if n_samples % n_samples_per_part > 0: data_chunksizes[-1] += n_samples % n_samples_per_part - + data_chunksizes = tuple(data_chunksizes) if (effective_rank is None) or (effective_rank and not use_full_low_rank): From 9b92d97cb706a21a942d4a7789b02b4fcb69f941 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 23:22:43 -0500 Subject: [PATCH 211/330] working dask make classification --- python/cuml/dask/datasets/classification.py | 53 ++++++++++++--------- python/cuml/test/dask/test_datasets.py | 20 ++++++-- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/python/cuml/dask/datasets/classification.py b/python/cuml/dask/datasets/classification.py index e57c9330d5..8caee2152a 100644 --- a/python/cuml/dask/datasets/classification.py +++ b/python/cuml/dask/datasets/classification.py @@ -36,8 +36,9 @@ def get_labels(t): return t[1] -def _create_covariance(*args, rs, dtype='float32'): - return 2 * rs.rand(*args, dtype=dtype) - 1 +def _create_covariance(dims, seed, dtype='float32'): + local_rs = cp.random.RandomState(seed=seed) + return 2 * local_rs.rand(*dims, dtype=dtype) - 1 @with_cupy_rmm @@ -159,17 +160,27 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, rs)).astype(dtype, copy=False) # # create covariance matrices - informative_covariance_local = rs.rand(n_clusters, n_informative, - n_informative, dtype=dtype) - informative_covariance = client.scatter(informative_covariance_local, - workers=workers) - del informative_covariance_local - - redundant_covariance_local = rs.rand(n_informative, n_redundant, - dtype=dtype) - redundant_covariance = client.scatter(redundant_covariance_local, - workers=workers) - del redundant_covariance_local + # informative_covariance_local = rs.rand(n_clusters, n_informative, + # n_informative, dtype=dtype) + # informative_covariance = client.scatter(informative_covariance_local, + # workers=workers) + + # redundant_covariance_local = rs.rand(n_informative, n_redundant, + # dtype=dtype) + # redundant_covariance = client.scatter(redundant_covariance_local, + # workers=workers) + covariance_seeds = rs.randint(n_features, size=2) + informative_covariance = client.submit(_create_covariance, + (n_clusters, n_informative, + n_informative), + int(covariance_seeds[0]), + pure=False) + + redundant_covariance = client.submit(_create_covariance, + (n_informative, + n_redundant), + int(covariance_seeds[1]), + pure=False) # repeated indices n = n_informative + n_redundant @@ -184,7 +195,6 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, scale = 1 + 100 * rs.rand(n_features, dtype=dtype) # Create arrays on each worker (gpu) - parts = [] worker_rows = [] rows_so_far = 0 for idx, worker in enumerate(parts_workers): @@ -194,13 +204,15 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, else: worker_rows.append((int(n_samples) - rows_so_far)) + part_seeds = rs.permutation(n_parts) parts = [client.submit(sg_make_classification, worker_rows[i], n_features, n_informative, n_redundant, n_repeated, n_classes, n_clusters_per_class, weights, flip_y, class_sep, - hypercube, shift, scale, shuffle, random_state, - order, dtype, centroids, informative_covariance, - redundant_covariance, repeated_indices, - pure=False, workers=[parts_workers[i]]) + hypercube, shift, scale, shuffle, + int(part_seeds[i]), order, dtype, centroids, + informative_covariance, redundant_covariance, + repeated_indices, pure=False, + workers=[parts_workers[i]]) for i in range(len(parts_workers))] X_parts = [client.submit(get_X, f, pure=False) @@ -217,7 +229,4 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, meta=cp.zeros((1))) for idx, yp in enumerate(y_parts)] - X = da.concatenate([Xd for Xd in X_dela], axis=0) - y = da.concatenate([yd for yd in y_dela], axis=0) - - return X, y + return da.concatenate(X_dela, axis=0), da.concatenate(y_dela, axis=0) diff --git a/python/cuml/test/dask/test_datasets.py b/python/cuml/test/dask/test_datasets.py index ab52923102..8adf019f0a 100644 --- a/python/cuml/test/dask/test_datasets.py +++ b/python/cuml/test/dask/test_datasets.py @@ -26,6 +26,8 @@ from cuml.test.utils import unit_param, quality_param, stress_param +from cuml.dask.common.part_utils import _extract_partitions + @pytest.mark.parametrize('nrows', [unit_param(1e3), quality_param(1e5), stress_param(1e6)]) @@ -180,13 +182,23 @@ def test_make_classification(n_samples, n_features, n_classes, assert(len(X.chunks[0])) == n_parts assert(len(X.chunks[1])) == 1 - X_local = X.compute() - y_local = y.compute() + assert X.shape == (n_samples, n_features) + assert y.shape == (n_samples, ) + + assert len(X.chunks[0]) == n_parts + assert len(y.chunks[0]) == n_parts - assert X_local.shape == (n_samples, n_features) import cupy as cp + y_local = y.compute() assert len(cp.unique(y_local)) == n_classes - assert y_local.shape == (n_samples, ) + + X_parts = client.sync(_extract_partitions, X) + X_first = X_parts[0][1].result() + + if order == 'F': + assert X_first.flags['F_CONTIGUOUS'] + elif order == 'C': + assert X_first.flags['C_CONTIGUOUS'] finally: client.close() From 24f66d3d1a0f38ffc314d27882aa8b16b1bdca30 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 15 Apr 2020 23:26:41 -0500 Subject: [PATCH 212/330] style fixes --- python/cuml/dask/datasets/classification.py | 12 ++++++------ python/cuml/datasets/classification.py | 11 ++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/python/cuml/dask/datasets/classification.py b/python/cuml/dask/datasets/classification.py index 8caee2152a..e1765afb05 100644 --- a/python/cuml/dask/datasets/classification.py +++ b/python/cuml/dask/datasets/classification.py @@ -170,17 +170,17 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, # redundant_covariance = client.scatter(redundant_covariance_local, # workers=workers) covariance_seeds = rs.randint(n_features, size=2) - informative_covariance = client.submit(_create_covariance, + informative_covariance = client.submit(_create_covariance, (n_clusters, n_informative, n_informative), int(covariance_seeds[0]), pure=False) - redundant_covariance = client.submit(_create_covariance, - (n_informative, - n_redundant), - int(covariance_seeds[1]), - pure=False) + redundant_covariance = client.submit(_create_covariance, + (n_informative, + n_redundant), + int(covariance_seeds[1]), + pure=False) # repeated indices n = n_informative + n_redundant diff --git a/python/cuml/datasets/classification.py b/python/cuml/datasets/classification.py index 90759d1318..141f5eef0a 100644 --- a/python/cuml/datasets/classification.py +++ b/python/cuml/datasets/classification.py @@ -25,7 +25,8 @@ def _generate_hypercube(samples, dimensions, rng): """Returns distinct binary samples of length dimensions """ if dimensions > 30: - return np.hstack([np.random.randint(2, size=(samples, dimensions - 30)), + return np.hstack([np.random.randint(2, size=(samples, + dimensions - 30)), _generate_hypercube(samples, 30, rng)]) random_state = int(rng.randint(dimensions)) out = sample_without_replacement(2 ** dimensions, samples, @@ -127,7 +128,7 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, _redundant_covariance: array for covariance between redundant features of shape (n_informative, n_redundant) _repeated_indices: array of indices for the repeated features - of shape (n_repeated, ) + of shape (n_repeated, ) Returns ------- X : device array of shape [n_samples, n_features] @@ -228,10 +229,10 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, A = _informative_covariance[k] X_k = cp.dot(X_k, A) - #NOTE: This could be done outside the loop, but a current + # NOTE: This could be done outside the loop, but a current # cupy bug does not allow that - X[centroid_indices[0], n_informative:n_informative + n_redundant] = \ - cp.dot(X_k, B) + X[centroid_indices[0], n_informative:n_informative + + n_redundant] = cp.dot(X_k, B) X_k += centroid # shift the cluster to a vertex X[centroid_indices[0], :n_informative] = X_k From 27cbae01f6a722200382b6180c0646912a86ae1e Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 22:30:33 -0700 Subject: [PATCH 213/330] ENH added unit-tests for logger --- python/cuml/test/test_logger.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 python/cuml/test/test_logger.py diff --git a/python/cuml/test/test_logger.py b/python/cuml/test/test_logger.py new file mode 100644 index 0000000000..b6c15b9023 --- /dev/null +++ b/python/cuml/test/test_logger.py @@ -0,0 +1,32 @@ +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest + +from cuml.common.logger import logger + + +def test_logger(): + logger.trace("This is a trace message") + logger.debug("This is a debug message") + logger.info("This is a info message") + logger.warn("This is a warn message") + logger.error("This is a error message") + logger.critical("This is a critical message") + + with logger.set_level(logger.LOG_LEVEL_WARN): + assert(logger.should_log_for(logger.LOG_LEVEL_WARN)) + assert(not logger.should_log_for(logger.LOG_LEVEL_INFO)) From 1d68383c6c6c70108a755d681a85dc5d5b74ba33 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 22:42:37 -0700 Subject: [PATCH 214/330] ENH test for set_pattern --- python/cuml/test/test_logger.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/cuml/test/test_logger.py b/python/cuml/test/test_logger.py index b6c15b9023..07237073af 100644 --- a/python/cuml/test/test_logger.py +++ b/python/cuml/test/test_logger.py @@ -22,7 +22,7 @@ def test_logger(): logger.trace("This is a trace message") logger.debug("This is a debug message") - logger.info("This is a info message") + logger.info("This is an info message") logger.warn("This is a warn message") logger.error("This is a error message") logger.critical("This is a critical message") @@ -30,3 +30,6 @@ def test_logger(): with logger.set_level(logger.LOG_LEVEL_WARN): assert(logger.should_log_for(logger.LOG_LEVEL_WARN)) assert(not logger.should_log_for(logger.LOG_LEVEL_INFO)) + + with logger.set_pattern("%v"): + logger.info("This is an info message") From 65edf3bb0e5768926a22fe3981c21aaabfc2b608 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Wed, 15 Apr 2020 22:48:45 -0700 Subject: [PATCH 215/330] DOC pep8 style fixes --- python/cuml/test/test_logger.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cuml/test/test_logger.py b/python/cuml/test/test_logger.py index 07237073af..85ba4e8a56 100644 --- a/python/cuml/test/test_logger.py +++ b/python/cuml/test/test_logger.py @@ -14,8 +14,6 @@ # limitations under the License. # -import pytest - from cuml.common.logger import logger From c445daa65f7233bb5782b03ab0e8222da8a8faf8 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Thu, 16 Apr 2020 00:51:18 -0700 Subject: [PATCH 216/330] FIX proper import statement for logger --- python/cuml/test/test_logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/test/test_logger.py b/python/cuml/test/test_logger.py index 85ba4e8a56..85cc16fd4a 100644 --- a/python/cuml/test/test_logger.py +++ b/python/cuml/test/test_logger.py @@ -14,7 +14,7 @@ # limitations under the License. # -from cuml.common.logger import logger +import cuml.common.logger as logger def test_logger(): From 99af2ad163cadf43e103375c75b266ec9c9f45e4 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 16 Apr 2020 00:53:05 -0700 Subject: [PATCH 217/330] re-added previously replaced tests --- cpp/test/sg/fil_test.cu | 182 +++++++++++++++++++++------------------- 1 file changed, 98 insertions(+), 84 deletions(-) diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index a1a412740c..f7427ef629 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -485,7 +485,8 @@ class TreeliteFilTest : public BaseFilTest { break; case fil::leaf_value_t::INT_CLASS_LABEL: std::vector vec(ps.num_classes); - for (int i = 0; i < ps.num_classes; ++i) vec[i] = i == output.idx ? 1.0f : 0.0f; + for (int i = 0; i < ps.num_classes; ++i) + vec[i] = i == output.idx ? 1.0f : 0.0f; TL_CPP_CHECK(builder->SetLeafVectorNode(key, vec)); } } else { @@ -588,19 +589,19 @@ class TreeliteAutoFilTest : public TreeliteFilTest { }; // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, -// global_bias, algo, seed, tolerance +// global_bias, algo, seed, tolerance, branch comparison operator, FIL implementation, number of classes std::vector predict_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), - fil::leaf_value_t::INT_CLASS_LABEL, 5}, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), - fil::leaf_value_t::INT_CLASS_LABEL, 7}, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, @@ -631,30 +632,22 @@ std::vector predict_dense_inputs = { fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), - fil::leaf_value_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), - fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), - fil::leaf_value_t::INT_CLASS_LABEL, 4}, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 4}, + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), @@ -662,6 +655,25 @@ std::vector predict_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 5}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 2}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 5}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 7}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 4}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 4}, }; TEST_P(PredictDenseFilTest, Predict) { compare(); } @@ -670,38 +682,30 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictDenseFilTest, testing::ValuesIn(predict_dense_inputs)); // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, -// global_bias, algo, seed, tolerance +// global_bias, algo, seed, tolerance, branch comparison operator, FIL implementation, number of classes std::vector predict_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 3}, + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), - fil::leaf_value_t::INT_CLASS_LABEL, 3}, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), - fil::leaf_value_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 6}, + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), - fil::leaf_value_t::INT_CLASS_LABEL, 10}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), @@ -710,6 +714,21 @@ std::vector predict_sparse_inputs = { fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 10}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 6}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0.5, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 2}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), + fil::leaf_value_t::INT_CLASS_LABEL, 3}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 3}, }; TEST_P(PredictSparseFilTest, Predict) { compare(); } @@ -718,12 +737,10 @@ INSTANTIATE_TEST_CASE_P(FilTests, PredictSparseFilTest, testing::ValuesIn(predict_sparse_inputs)); // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, -// global_bias, algo, seed, tolerance +// global_bias, algo, seed, tolerance, branch comparison operator, FIL implementation, number of classes std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, @@ -737,23 +754,12 @@ std::vector import_dense_inputs = { fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 7}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kGE, @@ -761,10 +767,6 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, @@ -775,7 +777,7 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, @@ -784,16 +786,6 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 5}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 3}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, - fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, @@ -803,10 +795,6 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, - fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, @@ -835,10 +823,6 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0.5, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, @@ -848,6 +832,36 @@ std::vector import_dense_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGE, + fil::leaf_value_t::INT_CLASS_LABEL, 5}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kGT, + fil::leaf_value_t::INT_CLASS_LABEL, 6}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 3}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 5}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 5}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, + fil::algo_t::TREE_REORG, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 7}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 6}, }; TEST_P(TreeliteDenseFilTest, Import) { compare(); } @@ -856,17 +870,13 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteDenseFilTest, testing::ValuesIn(import_dense_inputs)); // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, -// global_bias, algo, seed, tolerance +// global_bias, algo, seed, tolerance, branch comparison operator, FIL implementation, number of classes std::vector import_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, - fil::leaf_value_t::INT_CLASS_LABEL, 5}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::SIGMOID | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGT, @@ -877,14 +887,8 @@ std::vector import_sparse_inputs = { fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, - fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, - fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 2}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, - 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::SIGMOID, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, fil::leaf_value_t::FLOAT_SCALAR, 0}, @@ -894,6 +898,9 @@ std::vector import_sparse_inputs = { fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::FLOAT_SCALAR, 0}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kGE, @@ -901,9 +908,16 @@ std::vector import_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 4}, - {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::SIGMOID | fil::output_t::AVG), 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLE, + fil::leaf_value_t::INT_CLASS_LABEL, 5}, + {20000, 50, 0.05, 8, 50, 0.05, + fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 0, 0, + fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::INT_CLASS_LABEL, 2}, + {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::AVG, 0, 0.5, fil::algo_t::NAIVE, + 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::INT_CLASS_LABEL, 3}, }; TEST_P(TreeliteSparseFilTest, Import) { compare(); } @@ -912,11 +926,8 @@ INSTANTIATE_TEST_CASE_P(FilTests, TreeliteSparseFilTest, testing::ValuesIn(import_sparse_inputs)); // rows, cols, nan_prob, depth, num_trees, leaf_prob, output, threshold, -// global_bias, algo, seed, tolerance +// global_bias, algo, seed, tolerance, branch comparison operator, FIL implementation, number of classes std::vector import_auto_inputs = { - {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 3}, {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, @@ -926,12 +937,15 @@ std::vector import_auto_inputs = { {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, - {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::AVG, 0, 0, - fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, - fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::RAW, 0, 0, fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, fil::leaf_value_t::FLOAT_SCALAR, 0}, + {20000, 50, 0.05, 10, 50, 0.05, fil::output_t::AVG, 0, 0, + fil::algo_t::ALGO_AUTO, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::INT_CLASS_LABEL, 3}, + {20000, 50, 0.05, 19, 50, 0.05, fil::output_t::AVG, 0, 0, + fil::algo_t::BATCH_TREE_REORG, 42, 2e-3f, tl::Operator::kLT, + fil::leaf_value_t::INT_CLASS_LABEL, 6}, }; TEST_P(TreeliteAutoFilTest, Import) { compare(); } From ce04e0e4645eb2d93e1674dc97c1272ca0fe74e7 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 16 Apr 2020 01:33:12 -0700 Subject: [PATCH 218/330] addressed most review comments; new regression logic still buggy --- cpp/include/cuml/fil/fil.h | 2 +- cpp/src/fil/fil.cu | 84 ++++++++++++++++++++++++-------------- cpp/src/fil/infer.cu | 7 ++-- cpp/test/sg/fil_test.cu | 5 +-- 4 files changed, 59 insertions(+), 39 deletions(-) diff --git a/cpp/include/cuml/fil/fil.h b/cpp/include/cuml/fil/fil.h index e391cdb134..7642375255 100644 --- a/cpp/include/cuml/fil/fil.h +++ b/cpp/include/cuml/fil/fil.h @@ -172,7 +172,7 @@ struct forest_params_t { algo_t algo; // output is the desired output type output_t output; - // threshold is used to for classification if ((output & OUTPUT_CLASS) != 0), + // threshold is used to for classification if leaf_payload_type == FLOAT_SCALAR && (output & OUTPUT_CLASS) != 0 && !predict_proba, // and is ignored otherwise float threshold; // global_bias is added to the sum of tree predictions diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 3c993ae21f..575329eb09 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -137,37 +137,66 @@ struct forest { params.num_classes = num_classes_; params.leaf_payload_type = leaf_payload_type_; + /** + The binary classification / regression (FLOAT_SCALAR) predict_proba() works as follows + (always 2 outputs): + RAW: output the sum of tree predictions + AVG is set: divide by the number of trees (averaging) + SIGMOID is set: apply sigmoid + CLASS is set: ignored + write the output of the previous stages and its complement + + The binary classification / regression (FLOAT_SCALAR) predict() works as follows + (always 1 output): + RAW (no values set): output the sum of tree predictions + AVG is set: divide by the number of trees (averaging) + SIGMOID is set: apply sigmoid + CLASS is set: apply threshold (equivalent to choosing best class) + + The multi-class classification / regression (INT_CLASS_LABEL) predict_proba() works as follows + (always num_classes outputs): + RAW (no values set): output class votes + AVG is set: divide by the number of trees (averaging, output class probability) + SIGMOID is set: apply sigmoid + CLASS is set: ignored + + The multi-class classification / regression (INT_CLASS_LABEL) predict() works as follows + (always 1 output): + RAW (no values set): output the label of the class with highest probability, else output label 0. + AVG is set: ignored + SIGMOID is set: ignored + CLASS is set: ignored + */ output_t ot = output_; - if (predict_proba) { - ot = output_t(ot & ~output_t::CLASS); // no threshold on probabilities + bool complement_proba = false, do_transform = global_bias_ != 0.0f; + + if (leaf_payload_type_ == leaf_value_t::FLOAT_SCALAR) { + if (predict_proba) { + params.num_outputs = 2; + ot = output_t(ot & ~output_t::CLASS); // no threshold on probabilities + complement_proba = true; + do_transform = true; + } else { + params.num_outputs = 1; + if (ot != output_t::RAW) do_transform = true; + } + } else if (leaf_payload_type_ == leaf_value_t::INT_CLASS_LABEL) { + if (predict_proba) { + params.num_outputs = num_classes_; + ot = output_t(ot & ~output_t::CLASS); // no threshold on probabilities + if (ot != output_t::RAW) do_transform = true; + } else { + params.num_outputs = 1; + // moot since choosing best class and all transforms are monotonic + // also, would break current code + do_transform = false; + } } - bool complement_proba = ((ot & output_t::CLASS) == 0) && - (leaf_payload_type_ == leaf_value_t::FLOAT_SCALAR); - /** FLOAT_SCALAR means inference produces 1 class score/component and - transform_k might complement to 2 for classification, - if class probabilities are being requested. - assuming predict(..., predict_proba=true) will not get called - for regression, hence predict_params::num_outputs == 2 */ - params.num_outputs = - ((ot & output_t::CLASS) != 0) - ? 1 - : (leaf_payload_type_ == leaf_value_t::INT_CLASS_LABEL ? num_classes_ - : 2); // Predict using the forest. cudaStream_t stream = h.getStream(); infer(params, stream); - // Transform the output if necessary. - bool do_transform = - ot != output_t::RAW || global_bias_ != 0.0f || complement_proba; - if ((leaf_payload_type_ == leaf_value_t::INT_CLASS_LABEL) && - ((ot & output_t::CLASS) != 0)) { - // moot since choosing best class and all transforms are monotonic - // also, would break current code - do_transform = false; - } - if (do_transform) { size_t num_values_to_transform = (size_t)num_rows * (size_t)params.num_outputs; @@ -328,10 +357,6 @@ void check_params(const forest_params_t* params, bool dense) { ASSERT(false, "output should be a combination of RAW, AVG, SIGMOID and CLASS"); } - ASSERT( - (params->output & output_t::CLASS || params->num_classes < 2 || - params->leaf_payload_type) == leaf_value_t::INT_CLASS_LABEL, - "cannot do two-component regression using FLOAT_SCALAR leaf_payload_type"); } // tl_node_at is a checked version of tree[i] @@ -529,8 +554,7 @@ size_t tl_leaf_vector_size(const tl::Model& model) { const tl::Tree& tree = model.trees[0]; int node_key; for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); - node_key = tl_node_at(tree, node_key).cright()) - ; + node_key = tl_node_at(tree, node_key).cright()); const tl::Tree::Node& node = tl_node_at(tree, node_key); if (node.has_leaf_vector()) return node.leaf_vector().size(); return 0; diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 60ba185976..bd73167786 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -160,8 +160,6 @@ struct tree_aggregator_t { } }; -#undef BlockReduce_ - template struct tree_aggregator_t { // could switch to unsigned short to save shared memory @@ -282,10 +280,11 @@ void infer_k_launcher(storage_type forest, predict_params params, params.max_items = params.algo == algo_t::BATCH_TREE_REORG ? MAX_BATCH_ITEMS : 1; + /** searching for the most items per block while respecting the shared + * memory limits creates a full linear programming problem. + * solving it in a single equation looks less tractable than this */ int num_items = 0; size_t shm_sz = 0; - // solving this linear programming problem in a single equation - // looks less tractable than this for (int nitems = 1; nitems <= params.max_items; ++nitems) { size_t peak_footprint; switch (nitems) { diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index f7427ef629..5d540d0ae1 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -65,10 +65,7 @@ struct FilTestParams { int num_classes; size_t num_proba_outputs() { return num_rows * std::max(num_classes, 2); } - size_t num_preds_outputs() { - return ((output & fil::output_t::CLASS) != 0) ? num_rows - : num_proba_outputs(); - } + size_t num_preds_outputs() { return num_rows; } }; std::string output2str(fil::output_t output) { From 970d74719b3110bed025e63dfe8600aaed09e8f7 Mon Sep 17 00:00:00 2001 From: wxbn Date: Thu, 16 Apr 2020 08:43:04 +0000 Subject: [PATCH 219/330] Requested modifications (except fixtures) --- python/cuml/test/test_random_forest.py | 82 ++++++++++---------------- 1 file changed, 32 insertions(+), 50 deletions(-) diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py index 14df74e4c7..3cf0dd26c4 100644 --- a/python/cuml/test/test_random_forest.py +++ b/python/cuml/test/test_random_forest.py @@ -151,47 +151,6 @@ def test_rf_regression(datatype, split_algo, mode, column_info, assert fil_r2 >= (cu_r2 - 0.02) -@pytest.mark.parametrize('datatype', [np.float32]) -@pytest.mark.parametrize('column_info', [unit_param([20, 10]), - quality_param([200, 100]), - stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), - stress_param(500000)]) -def test_rf_classification_default(datatype, column_info, nrows): - - ncols, n_info = column_info - X, y = make_classification(n_samples=nrows, n_features=ncols, - n_clusters_per_class=1, n_informative=n_info, - random_state=0, n_classes=2) - X = X.astype(datatype) - y = y.astype(np.int32) - X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, - random_state=0) - # Initialize, fit and predict using cuML's - # random forest classification model - cuml_model = curfc() - cuml_model.fit(X_train, y_train) - fil_preds = cuml_model.predict(X_test, predict_model="GPU") - cu_preds = cuml_model.predict(X_test, predict_model="CPU") - fil_preds = np.reshape(fil_preds, np.shape(cu_preds)) - - fil_acc = accuracy_score(y_test, fil_preds) - cu_acc = accuracy_score(y_test, cu_preds) - - score_acc = cuml_model.score(X_test, y_test) - assert cu_acc == pytest.approx(score_acc) - - # sklearn random forest classification model - # initialization, fit and predict - if nrows < 500000: - sk_model = skrfc(max_depth=16, random_state=10) - sk_model.fit(X_train, y_train) - sk_preds = sk_model.predict(X_test) - sk_acc = accuracy_score(y_test, sk_preds) - assert fil_acc >= (sk_acc - 0.07) - assert fil_acc >= (cu_acc - 0.02) - - @pytest.mark.parametrize('datatype', [np.float32]) @pytest.mark.parametrize('column_info', [unit_param([20, 10]), quality_param([200, 100]), @@ -258,6 +217,29 @@ def test_rf_classification_seed(datatype, column_info, nrows): y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0) + + cuml_model = curfc() + cuml_model.fit(X_train, y_train) + fil_preds = cuml_model.predict(X_test, predict_model="GPU") + cu_preds = cuml_model.predict(X_test, predict_model="CPU") + fil_preds = np.reshape(fil_preds, np.shape(cu_preds)) + + fil_acc = accuracy_score(y_test, fil_preds) + cu_acc = accuracy_score(y_test, cu_preds) + + score_acc = cuml_model.score(X_test, y_test) + assert cu_acc == pytest.approx(score_acc) + + # sklearn random forest classification model + # initialization, fit and predict + if nrows < 500000: + sk_model = skrfc(max_depth=16, random_state=10) + sk_model.fit(X_train, y_train) + sk_preds = sk_model.predict(X_test) + sk_acc = accuracy_score(y_test, sk_preds) + assert fil_acc >= (sk_acc - 0.07) + assert fil_acc >= (cu_acc - 0.02) + for i in range(8): seed = random.randint(100, 1e5) # Initialize, fit and predict using cuML's @@ -351,7 +333,7 @@ def test_rf_classification_float64(datatype, column_info, @pytest.mark.parametrize('column_info', [unit_param([20, 10]), quality_param([200, 100]), stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(3000), quality_param(25000), +@pytest.mark.parametrize('nrows', [unit_param(1000), quality_param(25000), stress_param(500000)]) def test_rf_regression_float64(datatype, column_info, nrows): ncols, n_info = column_info @@ -402,7 +384,7 @@ def test_rf_regression_float64(datatype, column_info, nrows): stress_param([500, 350])]) @pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), stress_param(500000)]) -@pytest.mark.parametrize('n_classes', [5, 10]) +@pytest.mark.parametrize('n_classes', [10]) @pytest.mark.parametrize('type', ['dataframe', 'numpy']) def test_rf_classification_multi_class(datatype, column_info, nrows, n_classes, type): @@ -548,7 +530,7 @@ def test_rf_regression_sparse(datatype, mode, column_info, X, y = fetch_california_housing(return_X_y=True) else: - X, y = make_regression(n_samples=3000, n_features=ncols, + X, y = make_regression(n_samples=1000, n_features=ncols, n_informative=n_info, random_state=123) X = X.astype(datatype) @@ -617,8 +599,9 @@ def test_rf_regression_sparse(datatype, mode, column_info, stress_param([500, 350])]) @pytest.mark.parametrize('nrows', [unit_param(800), quality_param(50000), stress_param(500000)]) -def test_rf_memory_leakage(fil_sparse_format, column_info, nrows): - n_iter = 3 +@pytest.mark.parametrize('n_iter', [unit_param(5), quality_param(30), + stress_param(80)]) +def test_rf_memory_leakage(fil_sparse_format, column_info, nrows, n_iter): datatype = np.float32 use_handle = True ncols, n_info = column_info @@ -648,7 +631,7 @@ def test_for_memory_leak(): delta_mem = free_mem - cuda.current_context().get_memory_info()[0] assert delta_mem == 0 - for i in range(3): + for i in range(2): cuml_mods.predict(X_test, predict_model="GPU", fil_sparse_format=fil_sparse_format) handle.sync() # just to be sure @@ -751,9 +734,8 @@ def test_multiple_fits_regression(column_info, nrows, n_estimators, n_bins): @pytest.mark.parametrize('rows_sample', [unit_param(1.0), stress_param(0.95)]) @pytest.mark.parametrize('datatype', [np.float32]) -@pytest.mark.parametrize('split_algo', [0, 1]) @pytest.mark.parametrize('max_features', [1.0, 'auto', 'log2', 'sqrt']) -def test_rf_classification_proba(datatype, split_algo, rows_sample, nrows, +def test_rf_classification_proba(datatype, rows_sample, nrows, column_info, max_features): use_handle = True ncols, n_info = column_info @@ -771,7 +753,7 @@ def test_rf_classification_proba(datatype, split_algo, rows_sample, nrows, # Initialize, fit and predict using cuML's # random forest classification model cuml_model = curfc(max_features=max_features, rows_sample=rows_sample, - n_bins=16, split_algo=split_algo, split_criterion=0, + n_bins=16, split_criterion=0, min_rows_per_node=2, seed=123, n_streams=1, n_estimators=40, handle=handle, max_leaves=-1, max_depth=16) From 9711a28ba536a11ddc67021ac4d4d7eb43c253ff Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 16 Apr 2020 09:03:39 -0400 Subject: [PATCH 220/330] Updating c++ style --- cpp/include/cuml/neighbors/knn.hpp | 6 +++-- cpp/src/knn/knn.cu | 6 +++-- cpp/src_prims/selection/knn.h | 38 +++++++++++++++--------------- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/cpp/include/cuml/neighbors/knn.hpp b/cpp/include/cuml/neighbors/knn.hpp index d6854ef62e..d6cb2d6599 100644 --- a/cpp/include/cuml/neighbors/knn.hpp +++ b/cpp/include/cuml/neighbors/knn.hpp @@ -56,7 +56,8 @@ void brute_force_knn(cumlHandle &handle, std::vector &input, * @param k number of nearest neighbors in knn_indices */ void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, - std::vector &y, size_t n_labels, size_t n_samples, int k); + std::vector &y, size_t n_labels, size_t n_samples, + int k); /** * @brief Flat C++ API function to perform a knn regression using @@ -72,7 +73,8 @@ void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, * @param k number of nearest neighbors in knn_indices */ void knn_regress(cumlHandle &handle, float *out, int64_t *knn_indices, - std::vector &y, size_t n_labels, size_t n_samples, int k); + std::vector &y, size_t n_labels, size_t n_samples, + int k); /** * @brief Flat C++ API function to compute knn class probabilities diff --git a/cpp/src/knn/knn.cu b/cpp/src/knn/knn.cu index ca28bb63bd..7acf2f8576 100644 --- a/cpp/src/knn/knn.cu +++ b/cpp/src/knn/knn.cu @@ -49,7 +49,8 @@ void brute_force_knn(cumlHandle &handle, std::vector &input, } void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, - std::vector &y, size_t n_labels, size_t n_samples, int k) { + std::vector &y, size_t n_labels, size_t n_samples, + int k) { auto d_alloc = handle.getDeviceAllocator(); cudaStream_t stream = handle.getStream(); @@ -66,7 +67,8 @@ void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, } void knn_regress(cumlHandle &handle, float *out, int64_t *knn_indices, - std::vector &y, size_t n_labels, size_t n_samples, int k) { + std::vector &y, size_t n_labels, size_t n_samples, + int k) { MLCommon::Selection::knn_regress(out, knn_indices, y, n_labels, n_samples, k, handle.getStream()); } diff --git a/cpp/src_prims/selection/knn.h b/cpp/src_prims/selection/knn.h index 1abad1980b..89605a95f3 100644 --- a/cpp/src_prims/selection/knn.h +++ b/cpp/src_prims/selection/knn.h @@ -307,8 +307,8 @@ void brute_force_knn(float **input, int *sizes, int n_params, IntType D, template __global__ void class_probs_kernel(OutType *out, const int64_t *knn_indices, const int *labels, int n_uniq_labels, - size_t n_labels, - size_t n_samples, int n_neighbors) { + size_t n_labels, size_t n_samples, + int n_neighbors) { int row = (blockIdx.x * blockDim.x) + threadIdx.x; int i = row * n_neighbors; @@ -327,8 +327,8 @@ __global__ void class_probs_kernel(OutType *out, const int64_t *knn_indices, template __global__ void class_vote_kernel(OutType *out, const float *class_proba, int *unique_labels, int n_uniq_labels, - size_t n_labels, size_t n_samples, int n_outputs, - int output_offset) { + size_t n_labels, size_t n_samples, + int n_outputs, int output_offset) { int row = (blockIdx.x * blockDim.x) + threadIdx.x; int i = row * n_uniq_labels; @@ -355,9 +355,8 @@ __global__ void class_vote_kernel(OutType *out, const float *class_proba, template __global__ void regress_avg_kernel(LabelType *out, const int64_t *knn_indices, const LabelType *labels, size_t n_labels, - size_t n_samples, - int n_neighbors, int n_outputs, - int output_offset) { + size_t n_samples, int n_neighbors, + int n_outputs, int output_offset) { int row = (blockIdx.x * blockDim.x) + threadIdx.x; int i = row * n_neighbors; @@ -419,12 +418,14 @@ void class_probs(std::vector &out, const int64_t *knn_indices, * knn_indices and labels */ device_buffer y_normalized(allocator, stream, n_labels); - MLCommon::Label::make_monotonic(y_normalized.data(), y[i], n_labels, stream); + MLCommon::Label::make_monotonic(y_normalized.data(), y[i], n_labels, + stream); MLCommon::LinAlg::unaryOp( y_normalized.data(), y_normalized.data(), n_labels, [] __device__(int input) { return input - 1; }, stream); class_probs_kernel<<>>( - out[i], knn_indices, y_normalized.data(), n_unique_labels, n_labels, n_rows, k); + out[i], knn_indices, y_normalized.data(), n_unique_labels, n_labels, + n_rows, k); CUDA_CHECK(cudaPeekAtLastError()); } } @@ -453,9 +454,8 @@ void class_probs(std::vector &out, const int64_t *knn_indices, */ template void knn_classify(int *out, const int64_t *knn_indices, std::vector &y, - size_t n_labels, - size_t n_rows, int k, std::vector &uniq_labels, - std::vector &n_unique, + size_t n_labels, size_t n_rows, int k, + std::vector &uniq_labels, std::vector &n_unique, std::shared_ptr &allocator, cudaStream_t user_stream, cudaStream_t *int_streams = nullptr, int n_int_streams = 0) { @@ -501,7 +501,8 @@ void knn_classify(int *out, const int64_t *knn_indices, std::vector &y, */ int smem = sizeof(int) * n_labels; class_vote_kernel<<>>( - out, probs[i], uniq_labels[i], n_unique_labels, n_labels, n_rows, y.size(), i); + out, probs[i], uniq_labels[i], n_unique_labels, n_labels, n_rows, + y.size(), i); CUDA_CHECK(cudaPeekAtLastError()); delete tmp_probs[i]; @@ -529,10 +530,9 @@ void knn_classify(int *out, const int64_t *knn_indices, std::vector &y, template void knn_regress(ValType *out, const int64_t *knn_indices, - const std::vector &y, - size_t n_labels, size_t n_rows, int k, - cudaStream_t user_stream, cudaStream_t *int_streams = nullptr, - int n_int_streams = 0) { + const std::vector &y, size_t n_labels, + size_t n_rows, int k, cudaStream_t user_stream, + cudaStream_t *int_streams = nullptr, int n_int_streams = 0) { dim3 grid(MLCommon::ceildiv(n_rows, (size_t)TPB_X), 1, 1); dim3 blk(TPB_X, 1, 1); @@ -542,8 +542,8 @@ void knn_regress(ValType *out, const int64_t *knn_indices, for (int i = 0; i < y.size(); i++) { cudaStream_t stream = select_stream(user_stream, int_streams, n_int_streams, i); - regress_avg_kernel<<>>(out, knn_indices, y[i], n_labels, n_rows, - k, y.size(), i); + regress_avg_kernel<<>>( + out, knn_indices, y[i], n_labels, n_rows, k, y.size(), i); CUDA_CHECK(cudaPeekAtLastError()); } } From 40c8edef921b45d350dc57676738088cf7d88ccb Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 16 Apr 2020 09:05:44 -0400 Subject: [PATCH 221/330] Updating doxygen --- cpp/include/cuml/neighbors/knn.hpp | 2 ++ cpp/src_prims/selection/knn.h | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/include/cuml/neighbors/knn.hpp b/cpp/include/cuml/neighbors/knn.hpp index d6cb2d6599..d5b18706c2 100644 --- a/cpp/include/cuml/neighbors/knn.hpp +++ b/cpp/include/cuml/neighbors/knn.hpp @@ -52,6 +52,7 @@ void brute_force_knn(cumlHandle &handle, std::vector &input, * @param out output array on device (size n_samples * size of y vector) * @param knn_indices index array on device resulting from knn query (size n_samples * k) * @param y vector of label arrays on device vector size is number of (size n_samples) + * @param n_labels number of vertices in index (eg. size of each y array) * @param n_samples number of samples in knn_indices * @param k number of nearest neighbors in knn_indices */ @@ -69,6 +70,7 @@ void knn_classify(cumlHandle &handle, int *out, int64_t *knn_indices, * @param out output array on device (size n_samples) * @param knn_indices array on device of knn indices (size n_samples * k) * @param y array of labels on device (size n_samples) + * @param n_labels number of vertices in index (eg. size of each y array) * @param n_samples number of samples in knn_indices and out * @param k number of nearest neighbors in knn_indices */ diff --git a/cpp/src_prims/selection/knn.h b/cpp/src_prims/selection/knn.h index 89605a95f3..ae547c9329 100644 --- a/cpp/src_prims/selection/knn.h +++ b/cpp/src_prims/selection/knn.h @@ -384,7 +384,7 @@ __global__ void regress_avg_kernel(LabelType *out, const int64_t *knn_indices, * each output in the vector is a different array of labels * corresponding to the i'th output. * @param n_rows number of rows in knn_indices - * @param n_labels number of vertices in index + * @param n_labels number of vertices in index (eg. size of each y array) * @param k number of neighbors in knn_indices * @param uniq_labels vector of the sorted unique labels for each array in y * @param n_unique vector of sizes for each array in uniq_labels @@ -441,7 +441,7 @@ void class_probs(std::vector &out, const int64_t *knn_indices, * @param y vector of label arrays. for multilabel classification, each * element in the vector is a different "output" array of labels corresponding * to the i'th output. - * @param n_labels number of vertices in index + * @param n_labels number of vertices in index (eg. size of each y array) * @param n_rows number of rows in knn_indices * * @param k number of neighbors in knn_indices * @param uniq_labels vector of the sorted unique labels for each array in y @@ -519,7 +519,7 @@ void knn_classify(int *out, const int64_t *knn_indices, std::vector &y, * @param y vector of label arrays. for multilabel classification, each * element in the vector is a different "output" array of labels corresponding * to the i'th output. - * @param n_labels number of vertices in index + * @param n_labels number of vertices in index (eg. size of each y array) * @param n_rows number of rows in knn_indices * @param k number of neighbors in knn_indices * @param user_stream main stream to use for queuing isolated CUDA events From a49f4f67bbdba2ce7e33c21c448d49b155e457c8 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 16 Apr 2020 09:47:35 -0400 Subject: [PATCH 222/330] Updating tests to use train and test --- cpp/src_prims/selection/knn.h | 1 - .../cuml/test/test_kneighbors_classifier.py | 86 +++++++++++++------ 2 files changed, 60 insertions(+), 27 deletions(-) diff --git a/cpp/src_prims/selection/knn.h b/cpp/src_prims/selection/knn.h index ae547c9329..dfb6775420 100644 --- a/cpp/src_prims/selection/knn.h +++ b/cpp/src_prims/selection/knn.h @@ -483,7 +483,6 @@ void knn_classify(int *out, const int64_t *knn_indices, std::vector &y, * work to the streams, we don't need to explicitly synchronize the streams here. */ - std::cout << "n_rows classify: " << n_rows << std::endl; class_probs(probs, knn_indices, y, n_labels, n_rows, k, uniq_labels, n_unique, allocator, user_stream, int_streams, n_int_streams); diff --git a/python/cuml/test/test_kneighbors_classifier.py b/python/cuml/test/test_kneighbors_classifier.py index eedf8be115..85e071727e 100644 --- a/python/cuml/test/test_kneighbors_classifier.py +++ b/python/cuml/test/test_kneighbors_classifier.py @@ -31,6 +31,30 @@ import pandas as pd +def _build_train_test_data(X, y, datatype, train_ratio=0.9): + + train_selection = np.random.RandomState(42).choice( + [True, False], X.shape[0], replace=True, + p=[train_ratio, 1.0-train_ratio]) + + X_train = X[train_selection] + y_train = y[train_selection] + X_test = X[~train_selection] + y_test = y[~train_selection] + + if datatype == "dataframe": + X_train = cudf.DataFrame.from_gpu_matrix( + rmm.to_device(X_train)) + y_train = cudf.DataFrame.from_gpu_matrix( + rmm.to_device(y_train.reshape(y_train.shape[0], 1))) + X_test = cudf.DataFrame.from_gpu_matrix( + rmm.to_device(X_test)) + y_test = cudf.DataFrame.from_gpu_matrix( + rmm.to_device(y_test.reshape(y_test.shape[0], 1))) + + return X_train, X_test, y_train, y_test + + @pytest.mark.parametrize("datatype", ["dataframe", "numpy"]) @pytest.mark.parametrize("nrows", [1000, 10000]) @pytest.mark.parametrize("ncols", [50, 100]) @@ -47,21 +71,19 @@ def test_neighborhood_predictions(nrows, ncols, n_neighbors, X = X.astype(np.float32) - if datatype == "dataframe": - X = cudf.DataFrame.from_gpu_matrix(rmm.to_device(X)) - y = cudf.DataFrame.from_gpu_matrix(rmm.to_device(y.reshape(nrows, 1))) + X_train, X_test, y_train, y_test = _build_train_test_data(X, y, datatype) knn_cu = cuKNN(n_neighbors=n_neighbors) - knn_cu.fit(X, y) + knn_cu.fit(X_train, y_train) - predictions = knn_cu.predict(X) + predictions = knn_cu.predict(X_test) if datatype == "dataframe": assert isinstance(predictions, cudf.DataFrame) else: assert isinstance(predictions, np.ndarray) - assert array_equal(predictions.astype(np.int32), y.astype(np.int32)) + assert array_equal(predictions.astype(np.int32), y_test.astype(np.int32)) @pytest.mark.parametrize("datatype", ["dataframe", "numpy"]) @@ -76,15 +98,12 @@ def test_score(nrows, ncols, n_neighbors, n_clusters, datatype): cluster_std=0.01) X = X.astype(np.float32) - - if datatype == "dataframe": - X = cudf.DataFrame.from_gpu_matrix(rmm.to_device(X)) - y = cudf.DataFrame.from_gpu_matrix(rmm.to_device(y.reshape(nrows, 1))) + X_train, X_test, y_train, y_test = _build_train_test_data(X, y, datatype) knn_cu = cuKNN(n_neighbors=n_neighbors) - knn_cu.fit(X, y) + knn_cu.fit(X_train, y_train) - assert knn_cu.score(X, y) >= (1.0 - 0.004) + assert knn_cu.score(X_test, y_test) >= (1.0 - 0.004) @pytest.mark.parametrize("datatype", ["dataframe", "numpy"]) @@ -102,25 +121,23 @@ def test_predict_proba(nrows, ncols, n_neighbors, n_clusters, datatype): X = X.astype(np.float32) - if datatype == "dataframe": - X = cudf.DataFrame.from_gpu_matrix(rmm.to_device(X)) - y = cudf.DataFrame.from_gpu_matrix(rmm.to_device(y.reshape(nrows, 1))) + X_train, X_test, y_train, y_test = _build_train_test_data(X, y, datatype) knn_cu = cuKNN(n_neighbors=n_neighbors) - knn_cu.fit(X, y) + knn_cu.fit(X_train, y_train) - predictions = knn_cu.predict_proba(X) + predictions = knn_cu.predict_proba(X_test) if datatype == "dataframe": assert isinstance(predictions, cudf.DataFrame) predictions = predictions.as_gpu_matrix().copy_to_host() - y = y.as_gpu_matrix().copy_to_host().reshape(nrows) + y_test = y_test.as_gpu_matrix().copy_to_host().reshape(nrows) else: assert isinstance(predictions, np.ndarray) y_hat = np.argmax(predictions, axis=1) - assert array_equal(y_hat.astype(np.int32), y.astype(np.int32)) + assert array_equal(y_hat.astype(np.int32), y_test.astype(np.int32)) assert array_equal(predictions.sum(axis=1), np.ones(nrows)) @@ -157,18 +174,35 @@ def test_predict_non_gaussian(n_samples, n_features, n_neighbors, n_query): np.asarray(cuml_result.as_gpu_matrix())[:, 0], sk_result) -def test_nonmonotonic_labels(): +@pytest.mark.parametrize("n_classes", [2, 5]) +@pytest.mark.parametrize("n_rows", [50, 100]) +@pytest.mark.parametrize("n_cols", [25, 50]) +@pytest.mark.parametrize("n_neighbors", [3, 5]) +@pytest.mark.parametrize("datatype", ["numpy", "dataframe"]) +def test_nonmonotonic_labels(n_classes, n_rows, n_cols, + datatype, n_neighbors): - X = np.array([[0, 0, 1], [1, 0, 1]]).astype(np.float32) + X, y = make_blobs(n_samples=n_rows, + centers=n_classes, + n_features=n_cols, + cluster_std=0.01, + random_state=0) + + X = X.astype(np.float32) - y = np.array([15, 5]).astype(np.int32) + # Draw labels from non-monotonically increasing set + classes = np.arange(0, n_classes*5, 5) + for i in range(n_classes): + y[y == i] = classes[i] - knn_cu = cuKNN(n_neighbors=1) - knn_cu.fit(X, y) + X_train, _, y_train, _ = _build_train_test_data(X, y, datatype) - p = knn_cu.predict(X) + knn_cu = cuKNN(n_neighbors=n_neighbors) + knn_cu.fit(X_train, y_train) - assert array_equal(p.astype(np.int32), y) + p = knn_cu.predict(X_train) + + assert array_equal(p.astype(np.int32), y_train) @pytest.mark.parametrize("datatype", ["dataframe", "numpy"]) From 6a5209a57e40b462fa9f3d66403719ade73e3b19 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 16 Apr 2020 10:00:47 -0400 Subject: [PATCH 223/330] Removing unecessary character in doxygen --- cpp/src_prims/selection/knn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src_prims/selection/knn.h b/cpp/src_prims/selection/knn.h index dfb6775420..ad2b7e26d6 100644 --- a/cpp/src_prims/selection/knn.h +++ b/cpp/src_prims/selection/knn.h @@ -443,7 +443,7 @@ void class_probs(std::vector &out, const int64_t *knn_indices, * to the i'th output. * @param n_labels number of vertices in index (eg. size of each y array) * @param n_rows number of rows in knn_indices - * * @param k number of neighbors in knn_indices + * @param k number of neighbors in knn_indices * @param uniq_labels vector of the sorted unique labels for each array in y * @param n_unique vector of sizes for each array in uniq_labels * @param allocator device allocator to use for temporary workspace From 89eddb8f0131bd376996bb90b1b98ead8f05eaaa Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 16 Apr 2020 11:20:45 -0400 Subject: [PATCH 224/330] Adjusting thresholds for umap mnmg tests --- cpp/src/umap/init_embed/spectral_algo.h | 8 +-- python/cuml/test/dask/test_umap.py | 96 ++++++++++++++++--------- 2 files changed, 68 insertions(+), 36 deletions(-) diff --git a/cpp/src/umap/init_embed/spectral_algo.h b/cpp/src/umap/init_embed/spectral_algo.h index 2d18bc4eb2..2c4b3774bc 100644 --- a/cpp/src/umap/init_embed/spectral_algo.h +++ b/cpp/src/umap/init_embed/spectral_algo.h @@ -57,14 +57,14 @@ void launcher(const cumlHandle &handle, const T *X, int n, int d, coo->nnz, n, params->n_components, tmp_storage.data()); - MLCommon::LinAlg::unaryOp( - tmp_storage.data(), tmp_storage.data(), n * params->n_components, - [=] __device__(T input) { return fabsf(input); }, stream); - MLCommon::LinAlg::transpose(tmp_storage.data(), embedding, n, params->n_components, handle.getImpl().getCublasHandle(), stream); + MLCommon::LinAlg::unaryOp( + tmp_storage.data(), tmp_storage.data(), n * params->n_components, + [=] __device__(T input) { return fabsf(input); }, stream); + thrust::device_ptr d_ptr = thrust::device_pointer_cast(tmp_storage.data()); T max = *(thrust::max_element(thrust::cuda::par.on(stream), d_ptr, d_ptr + (n * params->n_components))); diff --git a/python/cuml/test/dask/test_umap.py b/python/cuml/test/dask/test_umap.py index 4e3d591f0a..127a5d4a96 100644 --- a/python/cuml/test/dask/test_umap.py +++ b/python/cuml/test/dask/test_umap.py @@ -18,16 +18,55 @@ import cupy as cp import numpy as np -from sklearn.manifold.t_sne import trustworthiness -from sklearn.datasets import make_blobs +from cuml.metrics import trustworthiness +from cuml.datasets import make_blobs + +import math + +from sklearn.datasets import load_digits +from sklearn.datasets import load_iris + + +def _load_dataset(dataset, n_rows): + + if dataset == "make_blobs": + local_X, local_y = make_blobs(n_samples=n_rows, n_features=10, + centers=200, cluster_std=0.8, + random_state=42) + + local_X = cp.asarray(local_X) + local_y = cp.asarray(local_y) + + else: + if dataset == "digits": + local_X, local_y = load_digits(return_X_y=True) + + else: # dataset == "iris" + local_X, local_y = load_iris(return_X_y=True) + + local_X = cp.asarray(local_X) + local_y = cp.asarray(local_y) + + local_X = local_X.repeat( + math.ceil(n_rows / len(local_X)), axis=0) + local_y = local_y.repeat( + math.ceil(n_rows / len(local_y)), axis=0) + + # Add some gaussian noise + local_X += cp.random.standard_normal(local_X.shape, + dtype=cp.float32) + + return local_X, local_y @pytest.mark.mg -@pytest.mark.parametrize("n_parts", [2, 5]) -@pytest.mark.parametrize("sampling_ratio", [0.1, 0.2, 0.4]) +@pytest.mark.parametrize("n_parts", [2, 5, 10]) +@pytest.mark.parametrize("n_rows", [10000, 50000]) +@pytest.mark.parametrize("sampling_ratio", [0.1, 0.2, 0.4, 0.5]) @pytest.mark.parametrize("supervised", [True, False]) -@pytest.mark.parametrize("dataset", ["make_blobs", "digits", "iris"]) -def test_umap_mnmg(n_parts, sampling_ratio, supervised, dataset, cluster): +@pytest.mark.parametrize("dataset", ["digits", "iris"]) +def test_umap_mnmg(n_parts, n_rows, sampling_ratio, supervised, + dataset, cluster): client = Client(cluster) @@ -38,17 +77,7 @@ def test_umap_mnmg(n_parts, sampling_ratio, supervised, dataset, cluster): n_neighbors = 10 - if dataset == "make_blobs": - local_X, local_y = make_blobs(n_samples=10000, n_features=10, - centers=200, cluster_std=0.8, - shuffle=True, random_state=42) - else: - if dataset == "digits": - from sklearn.datasets import load_digits - local_X, local_y = load_digits(return_X_y=True) - else: # dataset == "iris" - from sklearn.datasets import load_iris - local_X, local_y = load_iris(return_X_y=True) + local_X, local_y = _load_dataset(dataset, n_rows) def umap_mnmg_trustworthiness(): n_samples = local_X.shape[0] @@ -74,15 +103,17 @@ def umap_mnmg_trustworthiness(): distributed_model = MNMG_UMAP(local_model) embedding = distributed_model.transform(X_transform_d) - embedding = cp.asnumpy(embedding.compute()) - return trustworthiness(X_transform, embedding, n_neighbors) + embedding = embedding.compute() + return trustworthiness(X_transform, embedding, + n_neighbors=n_neighbors) def local_umap_trustworthiness(): local_model = UMAP(n_neighbors=n_neighbors, random_state=42) local_model.fit(local_X, local_y) embedding = local_model.transform(local_X) - return trustworthiness(local_X, embedding, n_neighbors) + return trustworthiness(local_X, embedding, + n_neighbors=n_neighbors) loc_umap = local_umap_trustworthiness() dist_umap = umap_mnmg_trustworthiness() @@ -90,18 +121,19 @@ def local_umap_trustworthiness(): print("\nLocal UMAP trustworthiness score : {:.2f}".format(loc_umap)) print("UMAP MNMG trustworthiness score : {:.2f}".format(dist_umap)) - if dataset == "make_blobs": - assert loc_umap > 0.98 - if sampling_ratio <= 0.1: - assert dist_umap > 0.74 - else: - assert dist_umap > 0.9 - elif dataset == "digits": - assert loc_umap > 0.88 - assert dist_umap > 0.8 - else: # dataset == "iris" - assert loc_umap > 0.88 - assert dist_umap > 0.78 + trust_diff = loc_umap - dist_umap + + if sampling_ratio == 0.1: + assert trust_diff <= 0.4 + elif sampling_ratio == 0.2: + assert trust_diff <= 0.3 + elif sampling_ratio == 0.4: + assert trust_diff <= 0.2 + elif sampling_ratio == 0.5: + assert trust_diff <= 0.11 + else: + raise ValueError("No assertion for sampling ratio. " + "Please update.") finally: client.close() From 04ee1583b9d165ea49e09bb8417fdb0de6fa1162 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 16 Apr 2020 13:11:20 -0400 Subject: [PATCH 225/330] Fixing style in test prims --- cpp/test/prims/knn_classify.cu | 4 ++-- cpp/test/prims/knn_regression.cu | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/test/prims/knn_classify.cu b/cpp/test/prims/knn_classify.cu index 756f0840e8..cfb8bd3af1 100644 --- a/cpp/test/prims/knn_classify.cu +++ b/cpp/test/prims/knn_classify.cu @@ -78,8 +78,8 @@ class KNNClassifyTest : public ::testing::TestWithParam { std::vector n_unique; n_unique.push_back(n_classes); - knn_classify(pred_labels, knn_indices, y, params.rows, params.rows, params.k, - uniq_labels, n_unique, alloc, stream); + knn_classify(pred_labels, knn_indices, y, params.rows, params.rows, + params.k, uniq_labels, n_unique, alloc, stream); CUDA_CHECK(cudaStreamSynchronize(stream)); CUDA_CHECK(cudaStreamDestroy(stream)); diff --git a/cpp/test/prims/knn_regression.cu b/cpp/test/prims/knn_regression.cu index 507d1c05ca..080cdfb6fa 100644 --- a/cpp/test/prims/knn_regression.cu +++ b/cpp/test/prims/knn_regression.cu @@ -103,7 +103,8 @@ class KNNRegressionTest : public ::testing::TestWithParam { std::vector y; y.push_back(train_labels); - knn_regress(pred_labels, knn_indices, y, params.rows, params.rows, params.k, stream); + knn_regress(pred_labels, knn_indices, y, params.rows, params.rows, params.k, + stream); CUDA_CHECK(cudaStreamSynchronize(stream)); CUDA_CHECK(cudaStreamDestroy(stream)); From 9c5d19b02a8aec2e0a3483dc386a1e547f748e03 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 16 Apr 2020 13:28:14 -0400 Subject: [PATCH 226/330] Bumping fais to v1.6.2 --- cpp/cmake/Dependencies.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/Dependencies.cmake b/cpp/cmake/Dependencies.cmake index 6ce4b856ae..a42617b23f 100644 --- a/cpp/cmake/Dependencies.cmake +++ b/cpp/cmake/Dependencies.cmake @@ -61,7 +61,7 @@ set(FAISS_DIR ${CMAKE_CURRENT_BINARY_DIR}/faiss CACHE STRING "Path to FAISS source directory") ExternalProject_Add(faiss GIT_REPOSITORY https://github.com/facebookresearch/faiss.git - GIT_TAG v1.6.1 + GIT_TAG v1.6.2 CONFIGURE_COMMAND LIBS=-pthread CPPFLAGS=-w LDFLAGS=-L${CMAKE_INSTALL_PREFIX}/lib From b1d7c16132b06fb12cd157c7a831057deb88016a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 16 Apr 2020 13:48:40 -0400 Subject: [PATCH 227/330] Updating trustworthiness test w/ threshold. --- python/cuml/metrics/trustworthiness.pyx | 8 +++----- python/cuml/test/dask/test_umap.py | 10 ++++++---- python/cuml/test/test_trustworthiness.py | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/python/cuml/metrics/trustworthiness.pyx b/python/cuml/metrics/trustworthiness.pyx index 6fe27ac300..44fdc00123 100644 --- a/python/cuml/metrics/trustworthiness.pyx +++ b/python/cuml/metrics/trustworthiness.pyx @@ -26,6 +26,7 @@ import warnings from numba import cuda from libc.stdint cimport uintptr_t +import cuml.common.handle from cuml.common.handle cimport cumlHandle from cuml.utils import get_cudf_column_ptr, get_dev_array_ptr, \ input_to_dev_array @@ -102,11 +103,8 @@ def trustworthiness(X, X_embedded, handle=None, n_neighbors=5, convert_to_dtype=(np.float32 if convert_dtype else None)) - cdef cumlHandle* handle_ = 0 - if handle is None: - handle_ = (new cumlHandle()) - else: - handle_ = handle.getHandle() + handle = cuml.common.handle.Handle() if handle is None else handle + cdef cumlHandle* handle_ = handle.getHandle() if metric == 'euclidean': res = trustworthiness_score[float, euclidean](handle_[0], diff --git a/python/cuml/test/dask/test_umap.py b/python/cuml/test/dask/test_umap.py index 4dd7dc2b57..9884c1ad84 100644 --- a/python/cuml/test/dask/test_umap.py +++ b/python/cuml/test/dask/test_umap.py @@ -75,7 +75,8 @@ def _local_umap_trustworthiness(local_X, local_y, local_model.fit(local_X, y=y_train) embedding = local_model.transform(local_X) return trustworthiness(local_X, embedding, - n_neighbors=n_neighbors) + n_neighbors=n_neighbors, + batch_size=5000) def _umap_mnmg_trustworthiness(local_X, local_y, @@ -96,13 +97,13 @@ def _umap_mnmg_trustworthiness(local_X, local_y, random_state=42) n_samples = local_X.shape[0] - n_samples_per_part = int(n_samples / n_parts) + n_samples_per_part = math.ceil(n_samples / n_parts) selection = np.random.RandomState(42).choice( [True, False], n_samples, replace=True, p=[sampling_ratio, 1.0 - sampling_ratio]) X_train = local_X[selection] - X_transform = local_X[~selection] + X_transform = local_X X_transform_d = da.from_array(X_transform, chunks=(n_samples_per_part, -1)) @@ -117,7 +118,8 @@ def _umap_mnmg_trustworthiness(local_X, local_y, embedding = embedding.compute() return trustworthiness(X_transform, embedding, - n_neighbors=n_neighbors) + n_neighbors=n_neighbors, + batch_size=5000) @pytest.mark.mg diff --git a/python/cuml/test/test_trustworthiness.py b/python/cuml/test/test_trustworthiness.py index 177109f964..866fd9bfdb 100644 --- a/python/cuml/test/test_trustworthiness.py +++ b/python/cuml/test/test_trustworthiness.py @@ -25,7 +25,7 @@ @pytest.mark.parametrize('input_type', ['ndarray', 'dataframe']) -@pytest.mark.parametrize('n_samples', [10, 100, 1000]) +@pytest.mark.parametrize('n_samples', [10, 100, 500]) @pytest.mark.parametrize('batch_size', [512, 2]) @pytest.mark.parametrize('n_features', [10, 100]) @pytest.mark.parametrize('n_components', [2, 8]) @@ -51,4 +51,4 @@ def test_trustworthiness(input_type, n_samples, n_features, n_components, score = cuml_trustworthiness(X, X_embedded, batch_size=batch_size) - assert score == sk_score + assert abs(score - sk_score) <= 1e-3 From a5156e72fd18518ce30f7b5d3c271fdf50c6bac4 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 16 Apr 2020 13:37:27 -0700 Subject: [PATCH 228/330] updated tests to match new logic --- cpp/test/sg/fil_test.cu | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 5d540d0ae1..939de13dfd 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -259,12 +259,7 @@ class BaseFilTest : public testing::TestWithParam { pred += infer_one_tree(&nodes[j * num_nodes], &data_h[i * ps.num_cols]).f; } - if ((ps.output & fil::output_t::CLASS) != 0) { - transform(pred, want_proba_h[i * 2 + 1], want_preds_h[i]); - } else { - transform(pred, want_proba_h[i * 2 + 1], want_preds_h[i * 2 + 1]); - complement(&(want_preds_h[i * 2])); - } + transform(pred, want_proba_h[i * 2 + 1], want_preds_h[i]); complement(&(want_proba_h[i * 2])); } break; @@ -286,17 +281,10 @@ class BaseFilTest : public testing::TestWithParam { most_votes = pred; best_class = c; } - if ((ps.output & fil::output_t::CLASS) != 0) { - float _; - transform(pred, want_proba_h[r * ps.num_classes + c], _); - } else { - transform(pred, want_proba_h[r * ps.num_classes + c], - want_preds_h[r * ps.num_classes + c]); - } - } - if ((ps.output & fil::output_t::CLASS) != 0) { - want_preds_h[r] = best_class; + float thresholded_proba; // not used; do argmax instead + transform(pred, want_proba_h[r * ps.num_classes + c], thresholded_proba); } + want_preds_h[r] = best_class; } break; } From 07d348717bfb44f41699952475f7846673e39941 Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Thu, 16 Apr 2020 13:51:10 -0700 Subject: [PATCH 229/330] fixed style --- cpp/src/fil/fil.cu | 3 ++- cpp/test/sg/fil_test.cu | 14 ++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 575329eb09..e9d3f417f5 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -554,7 +554,8 @@ size_t tl_leaf_vector_size(const tl::Model& model) { const tl::Tree& tree = model.trees[0]; int node_key; for (node_key = tree_root(tree); !tl_node_at(tree, node_key).is_leaf(); - node_key = tl_node_at(tree, node_key).cright()); + node_key = tl_node_at(tree, node_key).cright()) + ; const tl::Tree::Node& node = tl_node_at(tree, node_key); if (node.has_leaf_vector()) return node.leaf_vector().size(); return 0; diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 939de13dfd..c6854821ac 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -276,15 +276,13 @@ class BaseFilTest : public testing::TestWithParam { int best_class = 0; float most_votes = 0.0; for (int c = 0; c < ps.num_classes; ++c) { - float pred = class_votes[c]; - if (pred > most_votes) { - most_votes = pred; - best_class = c; - } - float thresholded_proba; // not used; do argmax instead - transform(pred, want_proba_h[r * ps.num_classes + c], thresholded_proba); + float thresholded_proba; // not used; do argmax instead + transform(class_votes[c], want_proba_h[r * ps.num_classes + c], + thresholded_proba); } - want_preds_h[r] = best_class; + want_preds_h[r] = + std::max_element(class_votes.begin(), class_votes.end()) - + class_votes.begin(); } break; } From 5db8ca274cb4d3e1a7308c2e85e26c1ce25466d9 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 16 Apr 2020 18:44:17 -0400 Subject: [PATCH 230/330] Fixing kneighbors classifier tests to use proper test size --- python/cuml/test/test_kneighbors_classifier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/test/test_kneighbors_classifier.py b/python/cuml/test/test_kneighbors_classifier.py index 85e071727e..ff98684846 100644 --- a/python/cuml/test/test_kneighbors_classifier.py +++ b/python/cuml/test/test_kneighbors_classifier.py @@ -131,14 +131,14 @@ def test_predict_proba(nrows, ncols, n_neighbors, n_clusters, datatype): if datatype == "dataframe": assert isinstance(predictions, cudf.DataFrame) predictions = predictions.as_gpu_matrix().copy_to_host() - y_test = y_test.as_gpu_matrix().copy_to_host().reshape(nrows) + y_test = y_test.as_gpu_matrix().copy_to_host().reshape(y_test.shape[0]) else: assert isinstance(predictions, np.ndarray) y_hat = np.argmax(predictions, axis=1) assert array_equal(y_hat.astype(np.int32), y_test.astype(np.int32)) - assert array_equal(predictions.sum(axis=1), np.ones(nrows)) + assert array_equal(predictions.sum(axis=1), np.ones(y_test.shape[0])) @pytest.mark.parametrize("n_samples", [100]) From eb1e3b10095a909ddec57fa7f4aad51f5d934921 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 17 Apr 2020 02:23:21 -0500 Subject: [PATCH 231/330] low rank covariance --- python/cuml/dask/datasets/regression.py | 187 +++++++++++++++--------- 1 file changed, 119 insertions(+), 68 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index cc2121207c..bb059686a3 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -19,7 +19,6 @@ from dask.distributed import default_client import numpy as np import cupy as cp -from cuml.utils import rmm_cupy_ary from cuml.dask.common.part_utils import _extract_partitions from cuml.datasets.regression import make_regression as sg_make_regression from cuml.utils import with_cupy_rmm @@ -71,6 +70,36 @@ def f_order_standard_normal(client, rs, chunksizes, ncols, dtype): return da.concatenate(chunks_dela, axis=0) +def _data_from_multivariate_normal(seed, covar, n_samples, n_features, dtype): + mean = cp.zeros(n_features) + local_rs = cp.random.RandomState() + return local_rs.multivariate_normal(mean, covar, n_samples, + dtype=dtype) + +def data_from_multivariate_normal(client, rs, covar, chunksizes, n_features, + dtype): + workers = list(client.has_what().keys()) + + n_chunks = len(chunksizes) + chunks_workers = (workers * n_chunks)[:n_chunks] + + chunk_seeds = rs.permutation(len(chunksizes)) + covar = covar.compute() + + data_parts = [client.submit(_data_from_multivariate_normal, + chunk_seeds[idx], covar, + chunksizes[idx], n_features, + dtype, workers=[chunks_workers[idx]], + pure=False) + for idx, chunk in enumerate(chunksizes)] + + data_dela = [da.from_delayed(dask.delayed(chunk), + shape=(chunksizes[idx], n_features), + meta=cp.zeros((1)), dtype=dtype) + for idx, chunk in enumerate(data_parts)] + return da.concatenate(data_dela, axis=0) + + def get_X(t): return t[0] @@ -90,12 +119,12 @@ def _f_order_shuffle(X, y, n_samples, seed, features_indices): return X, y -def f_order_shuffle(client, rs, X, y, n_parts, chunksizes, - n_features, features_indices, n_targets, dtype): +def f_order_shuffle(client, rs, X, y, chunksizes, n_features, + features_indices, n_targets, dtype): X_parts = client.sync(_extract_partitions, X) y_parts = client.sync(_extract_partitions, y) - chunk_seeds = rs.permutation(n_parts) + chunk_seeds = rs.permutation(len(chunksizes)) shuffled = [client.submit(_f_order_shuffle, X_part, y_parts[idx][1], chunksizes[idx], @@ -137,10 +166,62 @@ def convert_C_to_F_order(client, X, chunksizes, n_features, dtype): return da.concatenate(X_dela, axis=0) -def make_low_rank_matrix(client=None, n_samples=100, n_features=100, +def generate_chunks_for_qr(total_size, min_size, n_parts): + + n_total_per_part = max(1, int(total_size / n_parts)) + if n_total_per_part > min_size: + min_size = n_total_per_part + + n_partitions = int(max(1, total_size / min_size)) + rest = total_size % (n_partitions * min_size) + chunks_list = [min_size for i in range(n_partitions-1)] + chunks_list.append(min_size + rest) + return tuple(chunks_list) + + +def generate_singular_values(n, effective_rank, tail_strength, + n_samples_per_part): + # Index of the singular values + sing_ind = cp.arange(n, dtype=cp.float64) + + # Build the singular profile by assembling signal and noise components + tmp = sing_ind / effective_rank + low_rank = (1 - tail_strength) * cp.exp(-1.0 * tmp ** 2) + tail = tail_strength * cp.exp(-0.1 * tmp) + local_s = low_rank + tail + s = da.from_array(local_s, + chunks=(int(n_samples_per_part),)) + return s + + +def make_low_rank_covariance(n_features, effective_rank, + tail_strength, random_state, n_parts, + n_samples_per_part, dtype): + + rs = create_rs_generator(random_state) + + m2 = rs.standard_normal((n_features, n_features), + chunks=(-1, generate_chunks_for_qr(n_features, + n_features, + n_parts)), + dtype=dtype) + v, _ = da.linalg.qr(m2) + + if n_samples_per_part is None: + n_samples_per_part = max(1, int(n_samples / n_parts)) + v = v.rechunk({0: n_samples_per_part, 1: -1}) + + s = generate_singular_values(n_features, effective_rank, tail_strength, + n_samples_per_part) + + v *= (s ** 2) + return da.dot(v, da.transpose(v)) + + +def make_low_rank_matrix(n_samples=100, n_features=100, effective_rank=10, tail_strength=0.5, random_state=None, n_parts=1, - n_samples_per_part=None, dtype='float32', order='F'): + n_samples_per_part=None, dtype='float32'): """ Generate a mostly low rank matrix with bell-shaped singular values Parameters @@ -170,23 +251,9 @@ def make_low_rank_matrix(client=None, n_samples=100, n_features=100, The matrix. """ - client = default_client() if client is None else client - rs = create_rs_generator(random_state) n = min(n_samples, n_features) - def generate_chunks_for_qr(total_size, min_size, n_parts): - - n_total_per_part = max(1, int(total_size / n_parts)) - if n_total_per_part > min_size: - min_size = n_total_per_part - - n_partitions = int(max(1, total_size / min_size)) - rest = total_size % (n_partitions * min_size) - chunks_list = [min_size for i in range(n_partitions-1)] - chunks_list.append(min_size + rest) - return tuple(chunks_list) - # Random (ortho normal) vectors m1 = rs.standard_normal((n_samples, n), chunks=(generate_chunks_for_qr(n_samples, @@ -206,16 +273,8 @@ def generate_chunks_for_qr(total_size, min_size, n_parts): u = u.rechunk({0: n_samples_per_part, 1: -1}) v = v.rechunk({0: n_samples_per_part, 1: -1}) - # Index of the singular values - sing_ind = rmm_cupy_ary(cp.arange, n, dtype=cp.float64) - - # Build the singular profile by assembling signal and noise components - tmp = sing_ind / effective_rank - low_rank = ((1 - tail_strength) * rmm_cupy_ary(cp.exp, -1.0 * tmp ** 2)) - tail = tail_strength * rmm_cupy_ary(cp.exp, -0.1 * tmp) - local_s = low_rank + tail - s = da.from_array(local_s, - chunks=(int(n_samples_per_part),)) + s = generate_singular_values(n, effective_rank, tail_strength, + n_samples_per_part) u *= s return da.dot(u, v) @@ -324,7 +383,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, data_chunksizes = tuple(data_chunksizes) - if (effective_rank is None) or (effective_rank and not use_full_low_rank): + if effective_rank is None: # Randomly generate a well conditioned input set if order == 'F': X = f_order_standard_normal(client, rs, data_chunksizes, @@ -337,17 +396,26 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, else: # Randomly generate a low rank, fat tail input set - X = make_low_rank_matrix(client=client, - n_samples=n_samples, - n_features=n_features, - effective_rank=effective_rank, - tail_strength=tail_strength, - random_state=rs, - n_parts=n_parts, - dtype=dtype, - order=order) - X = X.rechunk({0: data_chunksizes, - 1: -1}) + if use_full_low_rank: + X = make_low_rank_matrix(n_samples=n_samples, + n_features=n_features, + effective_rank=effective_rank, + tail_strength=tail_strength, + random_state=rs, + n_parts=n_parts, + n_samples_per_part=n_samples_per_part, + dtype=dtype) + + X = X.rechunk({0: data_chunksizes, + 1: -1}) + else: + covar = make_low_rank_covariance(n_features, effective_rank, + tail_strength, rs, n_parts, + n_samples_per_part, dtype) + X = data_from_multivariate_normal(client, rs, covar, + data_chunksizes, n_features, + dtype) + if order == 'F': X = convert_C_to_F_order(client, X, data_chunksizes, n_features, dtype) @@ -355,36 +423,19 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, # Generate a ground truth model with only n_informative features being non # zeros (the other features are not correlated to y and should be ignored # by a sparsifying regularizers such as L1 or elastic net) - if effective_rank and not use_full_low_rank: - _, _, coef_ = sg_make_regression(n_samples=n_samples_per_part, - n_features=n_features, - n_informative=n_informative, - n_targets=n_targets, - bias=bias, - effective_rank=effective_rank, - tail_strength=tail_strength, - noise=noise, - shuffle=shuffle, - coef=True, - random_state=random_state, - dtype='double') - coef_ = cp.array(coef_, dtype=dtype, order=order) - ground_truth = da.from_array(coef_, chunks=(n_samples_per_part, -1)) - y = da.dot(X, ground_truth) + bias - else: - ground_truth = 100.0 * rs.standard_normal((n_informative, n_targets), - chunks=(n_samples_per_part, - -1), - dtype=dtype) + ground_truth = 100.0 * rs.standard_normal((n_informative, n_targets), + chunks=(n_samples_per_part, + -1), + dtype=dtype) - y = da.dot(X[:, :n_informative], ground_truth) + bias + y = da.dot(X[:, :n_informative], ground_truth) + bias - if n_informative != n_features and (effective_rank is None - or use_full_low_rank): + if n_informative != n_features: zeroes = 0.0 * rs.standard_normal((n_features - n_informative, n_targets), dtype=dtype) ground_truth = da.concatenate([ground_truth, zeroes], axis=0) + ground_truth = ground_truth.rechunk(-1) # Add noise @@ -395,8 +446,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, if shuffle: features_indices = np.random.permutation(n_features) if order == 'F': - X, y = f_order_shuffle(client, rs, X, y, n_parts, - data_chunksizes, + X, y = f_order_shuffle(client, rs, X, y, data_chunksizes, n_features, features_indices, n_targets, dtype) @@ -410,6 +460,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, X = X.rechunk((data_chunksizes, -1)) y = y.rechunk((data_chunksizes, -1)) + ground_truth = ground_truth[features_indices, :] y = da.squeeze(y) From 88f050feed54395fe4fa077c8f4d7848f44006cd Mon Sep 17 00:00:00 2001 From: Vinay D Date: Fri, 17 Apr 2020 15:20:34 +0530 Subject: [PATCH 232/330] Fixing an error introduced in commit d38b4f970 --- cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh index 0afc55e2ed..27634ae4dc 100644 --- a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh +++ b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh @@ -144,7 +144,7 @@ void grow_deep_tree_classification( CUDA_CHECK(cudaStreamSynchronize(tempmem->stream)); leaf_eval_classification( infogain, depth, tree_params.min_impurity_decrease, tree_params.max_depth, - n_unique_labels, tree_params.max_depth, h_new_node_flags, sparsetree, + n_unique_labels, tree_params.max_leaves, h_new_node_flags, sparsetree, sparsesize, h_parent_hist, n_nodes_nextitr, sparse_nodelist, leaf_cnt); MLCommon::updateDevice(d_new_node_flags, h_new_node_flags, n_nodes, From 803c3f7839b3d60bc022f9348441d93f3013a9ea Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Fri, 17 Apr 2020 08:37:40 -0700 Subject: [PATCH 233/330] FIX syntax error with previous memcheck nightly command --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index a72f1da435..5412762c54 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -142,7 +142,7 @@ GTEST_OUTPUT="xml:${WORKSPACE}/test-results/prims/" ./test/prims # TEST - Run GoogleTest for ml-prims, but with cuda-memcheck enabled ################################################################################ -if [ "$BUILD_MODE" = "branch" && "$BUILD_TYPE" = "gpu" ]; then +if [ "$BUILD_MODE" = "branch" ] && [ "$BUILD_TYPE" = "gpu" ]; then cd $WORKSPACE/cpp/build python ../scripts/cuda-memcheck.py -tool memcheck -exe ./test/prims fi From d65f2e6791bd5616693d4b435a97826d0684b422 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Fri, 17 Apr 2020 08:39:51 -0700 Subject: [PATCH 234/330] DOC update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47cdc4b5dd..c36327cb8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ - PR #2078: Ignore negative cache indices in get_vecs - PR #2084: Fixed cuda-memcheck errors with COO unit-tests - PR #2087: Fixed cuda-memcheck errors with dispersion prim +- PR #2096: Fixed syntax error with nightly build command for memcheck unit-tests # cuML 0.13.0 (Date TBD) From 5a86d234e8045ea6910ded334f2c43b4ebc82834 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Fri, 17 Apr 2020 08:46:59 -0700 Subject: [PATCH 235/330] DOC added a logger print for the nightly build command for debugging purposes --- ci/gpu/build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 5412762c54..317c5c55ed 100644 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -143,6 +143,7 @@ GTEST_OUTPUT="xml:${WORKSPACE}/test-results/prims/" ./test/prims ################################################################################ if [ "$BUILD_MODE" = "branch" ] && [ "$BUILD_TYPE" = "gpu" ]; then + logger "GoogleTest for ml-prims with cuda-memcheck enabled..." cd $WORKSPACE/cpp/build python ../scripts/cuda-memcheck.py -tool memcheck -exe ./test/prims fi From a5d7d31e5906860408cc06bd7998f1b1e1942246 Mon Sep 17 00:00:00 2001 From: chaithyagr Date: Fri, 17 Apr 2020 18:51:26 +0200 Subject: [PATCH 236/330] Refactor headers for decision_tree, glm, pca --- cpp/src/decisiontree/decisiontree_impl.h | 2 +- cpp/src/decisiontree/levelalgo/metric.cuh | 2 +- .../levelalgo/{metric_def.h => metric_def.cuh} | 0 cpp/src/glm/glm.cu | 6 +++--- cpp/src/glm/{ols.h => ols.cuh} | 2 +- cpp/src/glm/{preprocess.h => preprocess.cuh} | 0 cpp/src/glm/qn/{glm_base.h => glm_base.cuh} | 2 +- cpp/src/glm/qn/{glm_linear.h => glm_linear.cuh} | 4 ++-- cpp/src/glm/qn/{glm_logistic.h => glm_logistic.cuh} | 4 ++-- .../qn/{glm_regularizer.h => glm_regularizer.cuh} | 2 +- cpp/src/glm/qn/{glm_softmax.h => glm_softmax.cuh} | 4 ++-- cpp/src/glm/qn/{qn.h => qn.cuh} | 12 ++++++------ .../glm/qn/{qn_linesearch.h => qn_linesearch.cuh} | 2 +- cpp/src/glm/qn/{qn_solvers.h => qn_solvers.cuh} | 6 +++--- cpp/src/glm/qn/{qn_util.h => qn_util.cuh} | 0 cpp/src/glm/qn/{simple_mat.h => simple_mat.cuh} | 0 cpp/src/glm/{ridge.h => ridge.cuh} | 2 +- cpp/src/pca/pca.cu | 2 +- cpp/src/pca/{pca.h => pca.cuh} | 0 cpp/src/solver/cd.cuh | 2 +- cpp/src/solver/sgd.cuh | 2 +- cpp/test/sg/ols.cu | 2 +- cpp/test/sg/pca_test.cu | 2 +- cpp/test/sg/quasi_newton.cu | 8 ++++---- cpp/test/sg/ridge.cu | 2 +- 25 files changed, 35 insertions(+), 35 deletions(-) rename cpp/src/decisiontree/levelalgo/{metric_def.h => metric_def.cuh} (100%) rename cpp/src/glm/{ols.h => ols.cuh} (99%) rename cpp/src/glm/{preprocess.h => preprocess.cuh} (100%) rename cpp/src/glm/qn/{glm_base.h => glm_base.cuh} (99%) rename cpp/src/glm/qn/{glm_linear.h => glm_linear.cuh} (94%) rename cpp/src/glm/qn/{glm_logistic.h => glm_logistic.cuh} (95%) rename cpp/src/glm/qn/{glm_regularizer.h => glm_regularizer.cuh} (98%) rename cpp/src/glm/qn/{glm_softmax.h => glm_softmax.cuh} (98%) rename cpp/src/glm/qn/{qn.h => qn.cuh} (96%) rename cpp/src/glm/qn/{qn_linesearch.h => qn_linesearch.cuh} (99%) rename cpp/src/glm/qn/{qn_solvers.h => qn_solvers.cuh} (99%) rename cpp/src/glm/qn/{qn_util.h => qn_util.cuh} (100%) rename cpp/src/glm/qn/{simple_mat.h => simple_mat.cuh} (100%) rename cpp/src/glm/{ridge.h => ridge.cuh} (99%) rename cpp/src/pca/{pca.h => pca.cuh} (100%) diff --git a/cpp/src/decisiontree/decisiontree_impl.h b/cpp/src/decisiontree/decisiontree_impl.h index 2c30028a1e..0e65c6224d 100644 --- a/cpp/src/decisiontree/decisiontree_impl.h +++ b/cpp/src/decisiontree/decisiontree_impl.h @@ -25,7 +25,7 @@ #include #include #include -#include "levelalgo/metric_def.h" +#include "memory.h" /** check for treelite runtime API errors and assert accordingly */ #define TREELITE_CHECK(call) \ diff --git a/cpp/src/decisiontree/levelalgo/metric.cuh b/cpp/src/decisiontree/levelalgo/metric.cuh index 13b0cef2a0..152bec9d27 100644 --- a/cpp/src/decisiontree/levelalgo/metric.cuh +++ b/cpp/src/decisiontree/levelalgo/metric.cuh @@ -16,7 +16,7 @@ #pragma once #include "cuda_utils.h" -#include "metric_def.h" +#include "metric_def.cuh" template DI T SquareFunctor::exec(T x) { diff --git a/cpp/src/decisiontree/levelalgo/metric_def.h b/cpp/src/decisiontree/levelalgo/metric_def.cuh similarity index 100% rename from cpp/src/decisiontree/levelalgo/metric_def.h rename to cpp/src/decisiontree/levelalgo/metric_def.cuh diff --git a/cpp/src/glm/glm.cu b/cpp/src/glm/glm.cu index 9c51a10dfd..663739c31b 100644 --- a/cpp/src/glm/glm.cu +++ b/cpp/src/glm/glm.cu @@ -15,9 +15,9 @@ */ #include #include -#include "glm/qn/qn.h" -#include "ols.h" -#include "ridge.h" +#include "glm/qn/qn.cuh" +#include "ols.cuh" +#include "ridge.cuh" namespace ML { namespace GLM { diff --git a/cpp/src/glm/ols.h b/cpp/src/glm/ols.cuh similarity index 99% rename from cpp/src/glm/ols.h rename to cpp/src/glm/ols.cuh index 73005eefdf..6a929b0f3e 100644 --- a/cpp/src/glm/ols.h +++ b/cpp/src/glm/ols.cuh @@ -30,7 +30,7 @@ #include "common/cumlHandle.hpp" #include "common/device_buffer.hpp" #include "ml_utils.h" -#include "preprocess.h" +#include "preprocess.cuh" namespace ML { namespace GLM { diff --git a/cpp/src/glm/preprocess.h b/cpp/src/glm/preprocess.cuh similarity index 100% rename from cpp/src/glm/preprocess.h rename to cpp/src/glm/preprocess.cuh diff --git a/cpp/src/glm/qn/glm_base.h b/cpp/src/glm/qn/glm_base.cuh similarity index 99% rename from cpp/src/glm/qn/glm_base.h rename to cpp/src/glm/qn/glm_base.cuh index ee3089819b..09076513d1 100644 --- a/cpp/src/glm/qn/glm_base.h +++ b/cpp/src/glm/qn/glm_base.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include #include #include #include "cuda_utils.h" diff --git a/cpp/src/glm/qn/glm_linear.h b/cpp/src/glm/qn/glm_linear.cuh similarity index 94% rename from cpp/src/glm/qn/glm_linear.h rename to cpp/src/glm/qn/glm_linear.cuh index 7f9722ef3d..35acbb3bc5 100644 --- a/cpp/src/glm/qn/glm_linear.h +++ b/cpp/src/glm/qn/glm_linear.cuh @@ -16,9 +16,9 @@ #pragma once -#include +#include #include "cuda_utils.h" -#include "glm/qn/glm_base.h" +#include "glm/qn/glm_base.cuh" #include "linalg/binary_op.h" namespace ML { diff --git a/cpp/src/glm/qn/glm_logistic.h b/cpp/src/glm/qn/glm_logistic.cuh similarity index 95% rename from cpp/src/glm/qn/glm_logistic.h rename to cpp/src/glm/qn/glm_logistic.cuh index dda7976b86..dfb78f0874 100644 --- a/cpp/src/glm/qn/glm_logistic.h +++ b/cpp/src/glm/qn/glm_logistic.cuh @@ -16,9 +16,9 @@ #pragma once -#include +#include #include "cuda_utils.h" -#include "glm/qn/glm_base.h" +#include "glm/qn/glm_base.cuh" #include "linalg/binary_op.h" namespace ML { diff --git a/cpp/src/glm/qn/glm_regularizer.h b/cpp/src/glm/qn/glm_regularizer.cuh similarity index 98% rename from cpp/src/glm/qn/glm_regularizer.h rename to cpp/src/glm/qn/glm_regularizer.cuh index fa6fb0eee3..4209b0d3fe 100644 --- a/cpp/src/glm/qn/glm_regularizer.h +++ b/cpp/src/glm/qn/glm_regularizer.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include #include "cuda_utils.h" #include "linalg/binary_op.h" #include "linalg/map_then_reduce.h" diff --git a/cpp/src/glm/qn/glm_softmax.h b/cpp/src/glm/qn/glm_softmax.cuh similarity index 98% rename from cpp/src/glm/qn/glm_softmax.h rename to cpp/src/glm/qn/glm_softmax.cuh index 0d41b0b2c9..dfae1bfaba 100644 --- a/cpp/src/glm/qn/glm_softmax.h +++ b/cpp/src/glm/qn/glm_softmax.cuh @@ -16,9 +16,9 @@ #pragma once -#include +#include #include "cuda_utils.h" -#include "glm/qn/glm_base.h" +#include "glm/qn/glm_base.cuh" #include "linalg/binary_op.h" namespace ML { diff --git a/cpp/src/glm/qn/qn.h b/cpp/src/glm/qn/qn.cuh similarity index 96% rename from cpp/src/glm/qn/qn.h rename to cpp/src/glm/qn/qn.cuh index 303b6fd639..c44306d047 100644 --- a/cpp/src/glm/qn/qn.h +++ b/cpp/src/glm/qn/qn.cuh @@ -15,12 +15,12 @@ */ #pragma once -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/glm/qn/qn_linesearch.h b/cpp/src/glm/qn/qn_linesearch.cuh similarity index 99% rename from cpp/src/glm/qn/qn_linesearch.h rename to cpp/src/glm/qn/qn_linesearch.cuh index 7ed28aa74c..469f5b9a93 100644 --- a/cpp/src/glm/qn/qn_linesearch.h +++ b/cpp/src/glm/qn/qn_linesearch.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include /* * Linesearch functions diff --git a/cpp/src/glm/qn/qn_solvers.h b/cpp/src/glm/qn/qn_solvers.cuh similarity index 99% rename from cpp/src/glm/qn/qn_solvers.h rename to cpp/src/glm/qn/qn_solvers.cuh index b2f4eeb5be..4328b22ade 100644 --- a/cpp/src/glm/qn/qn_solvers.h +++ b/cpp/src/glm/qn/qn_solvers.cuh @@ -41,9 +41,9 @@ */ #include -#include -#include -#include +#include +#include +#include #include namespace ML { diff --git a/cpp/src/glm/qn/qn_util.h b/cpp/src/glm/qn/qn_util.cuh similarity index 100% rename from cpp/src/glm/qn/qn_util.h rename to cpp/src/glm/qn/qn_util.cuh diff --git a/cpp/src/glm/qn/simple_mat.h b/cpp/src/glm/qn/simple_mat.cuh similarity index 100% rename from cpp/src/glm/qn/simple_mat.h rename to cpp/src/glm/qn/simple_mat.cuh diff --git a/cpp/src/glm/ridge.h b/cpp/src/glm/ridge.cuh similarity index 99% rename from cpp/src/glm/ridge.h rename to cpp/src/glm/ridge.cuh index 008f1ae2d7..5f5c7466db 100644 --- a/cpp/src/glm/ridge.h +++ b/cpp/src/glm/ridge.cuh @@ -29,7 +29,7 @@ #include #include "common/cumlHandle.hpp" #include "ml_utils.h" -#include "preprocess.h" +#include "preprocess.cuh" namespace ML { namespace GLM { diff --git a/cpp/src/pca/pca.cu b/cpp/src/pca/pca.cu index fb05393f8b..f5146d8834 100644 --- a/cpp/src/pca/pca.cu +++ b/cpp/src/pca/pca.cu @@ -15,7 +15,7 @@ */ #include -#include "pca.h" +#include "pca.cuh" namespace ML { diff --git a/cpp/src/pca/pca.h b/cpp/src/pca/pca.cuh similarity index 100% rename from cpp/src/pca/pca.h rename to cpp/src/pca/pca.cuh diff --git a/cpp/src/solver/cd.cuh b/cpp/src/solver/cd.cuh index 0518e8c760..936bd86563 100644 --- a/cpp/src/solver/cd.cuh +++ b/cpp/src/solver/cd.cuh @@ -30,7 +30,7 @@ #include #include #include "common/cumlHandle.hpp" -#include "glm/preprocess.h" +#include "glm/preprocess.cuh" #include "ml_utils.h" #include "shuffle.h" diff --git a/cpp/src/solver/sgd.cuh b/cpp/src/solver/sgd.cuh index d3cc57cd38..d66fdb8ec6 100644 --- a/cpp/src/solver/sgd.cuh +++ b/cpp/src/solver/sgd.cuh @@ -32,7 +32,7 @@ #include #include #include "common/cumlHandle.hpp" -#include "glm/preprocess.h" +#include "glm/preprocess.cuh" #include "learning_rate.h" #include "ml_utils.h" #include "shuffle.h" diff --git a/cpp/test/sg/ols.cu b/cpp/test/sg/ols.cu index 191d2062f1..c641fcfca9 100644 --- a/cpp/test/sg/ols.cu +++ b/cpp/test/sg/ols.cu @@ -18,7 +18,7 @@ #include #include #include -#include "glm/ols.h" +#include "glm/ols.cuh" #include "ml_utils.h" namespace ML { diff --git a/cpp/test/sg/pca_test.cu b/cpp/test/sg/pca_test.cu index 9647a58579..9bcc9c8a02 100644 --- a/cpp/test/sg/pca_test.cu +++ b/cpp/test/sg/pca_test.cu @@ -19,7 +19,7 @@ #include #include #include "ml_utils.h" -#include "pca/pca.h" +#include "pca/pca.cuh" #include "random/rng.h" #include "test_utils.h" diff --git a/cpp/test/sg/quasi_newton.cu b/cpp/test/sg/quasi_newton.cu index 970d82f89c..c15e41f431 100644 --- a/cpp/test/sg/quasi_newton.cu +++ b/cpp/test/sg/quasi_newton.cu @@ -1,7 +1,7 @@ -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include diff --git a/cpp/test/sg/ridge.cu b/cpp/test/sg/ridge.cu index 14c8370bac..dd531000ee 100644 --- a/cpp/test/sg/ridge.cu +++ b/cpp/test/sg/ridge.cu @@ -17,7 +17,7 @@ #include #include #include -#include "glm/ridge.h" +#include "glm/ridge.cuh" #include "ml_utils.h" namespace ML { From e2a58868af49de9ae4b3837e90f5058635d8971b Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 17 Apr 2020 12:10:03 -0500 Subject: [PATCH 237/330] some mbsgd classifier test change for CI --- python/cuml/test/test_mbsgd_classifier.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/cuml/test/test_mbsgd_classifier.py b/python/cuml/test/test_mbsgd_classifier.py index d5fde1bbfd..1c5c9894f0 100644 --- a/python/cuml/test/test_mbsgd_classifier.py +++ b/python/cuml/test/test_mbsgd_classifier.py @@ -43,6 +43,9 @@ def make_dataset(request): X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=10) + y_train = y_train.astype(datatype) + y_test = y_test.astype(datatype) + return nrows, X_train, X_test, y_train, y_test From 7f0d64b89d245aa68c36e7736475cb3830932a31 Mon Sep 17 00:00:00 2001 From: Vinay D Date: Sat, 18 Apr 2020 15:30:54 +0530 Subject: [PATCH 238/330] Reverting all, recent refactoring changes temporarily, to work on CI issue --- cpp/include/cuml/tree/decisiontree.hpp | 2 +- cpp/src/decisiontree/decisiontree_impl.cuh | 87 +++++++++++-------- cpp/src/decisiontree/decisiontree_impl.h | 25 ++++-- .../levelalgo/levelfunc_classifier.cuh | 75 ++++++++-------- .../levelalgo/levelfunc_regressor.cuh | 74 ++++++++-------- cpp/src/decisiontree/memory.cuh | 41 +++++---- cpp/src/decisiontree/memory.h | 23 +++-- cpp/src/randomforest/randomforest_impl.cuh | 16 +++- 8 files changed, 193 insertions(+), 150 deletions(-) diff --git a/cpp/include/cuml/tree/decisiontree.hpp b/cpp/include/cuml/tree/decisiontree.hpp index d72a3e2040..a675061037 100644 --- a/cpp/include/cuml/tree/decisiontree.hpp +++ b/cpp/include/cuml/tree/decisiontree.hpp @@ -68,7 +68,7 @@ struct DecisionTreeParams { /** * Minimum impurity decrease required for spliting a node. If the impurity decrease is below this value, node is leafed out. Default is 0.0 */ - float min_impurity_decrease = 0.0f; + float min_impurity_decrease; }; /** diff --git a/cpp/src/decisiontree/decisiontree_impl.cuh b/cpp/src/decisiontree/decisiontree_impl.cuh index b99ee555ab..c816201a85 100644 --- a/cpp/src/decisiontree/decisiontree_impl.cuh +++ b/cpp/src/decisiontree/decisiontree_impl.cuh @@ -203,28 +203,43 @@ void DecisionTreeBase::print( * @param[in] n_sampled_rows: Number of rows after subsampling * @param[in] unique_labels: Number of unique classes for calssification. Its set to 1 for regression * @param[in] treeid: Tree id in case of building multiple tree from RF. + * @param[in] n_bins: Number of split bins for every node. + * @param[in] split_algo_flag: Split algo used. MinMax / Quantile + * @param[in] cfg_min_rows_per_rows: Minimum number of rows to consider before split evaluation + * @param[in] cfg_bootstrap_features: If features need to be bootstarpped. + * @param[in] cfg_split_criterion: Split criteria to be used. GINI, ENTROPY, MSE, MAE + * @param[in] quantile_per_tree: If per tree quantile needs to be built. */ template void DecisionTreeBase::plant( std::vector> &sparsetree, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, - const int n_sampled_rows, int unique_labels, const int treeid) { + const int n_sampled_rows, int unique_labels, const int treeid, int maxdepth, + int max_leaf_nodes, const float colper, int n_bins, int split_algo_flag, + int cfg_min_rows_per_node, bool cfg_bootstrap_features, + CRITERION cfg_split_criterion, bool quantile_per_tree) { + split_algo = split_algo_flag; dinfo.NLocalrows = nrows; dinfo.NGlobalrows = nrows; dinfo.Ncols = ncols; + nbins = n_bins; + treedepth = maxdepth; + maxleaves = max_leaf_nodes; n_unique_labels = unique_labels; + min_rows_per_node = cfg_min_rows_per_node; + bootstrap_features = cfg_bootstrap_features; + split_criterion = cfg_split_criterion; - if (tree_params.split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && - tree_params.quantile_per_tree) { + if (split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && quantile_per_tree) { preprocess_quantile(data, rowids, n_sampled_rows, ncols, dinfo.NLocalrows, - tree_params.n_bins, tempmem); + n_bins, tempmem); } CUDA_CHECK(cudaStreamSynchronize( tempmem->stream)); // added to ensure accurate measurement //Bootstrap features unsigned int *h_colids = tempmem->h_colids->data(); - if (tree_params.bootstrap_features) { + if (bootstrap_features) { srand(treeid * 1000); for (int i = 0; i < dinfo.Ncols; i++) { h_colids[i] = rand() % dinfo.Ncols; @@ -236,12 +251,10 @@ void DecisionTreeBase::plant( total_temp_mem = tempmem->totalmem; MLCommon::TimerCPU timer; - grow_deep_tree(data, labels, rowids, n_sampled_rows, ncols, - tree_params.max_features, dinfo.NLocalrows, sparsetree, treeid, - tempmem); + grow_deep_tree(data, labels, rowids, n_sampled_rows, ncols, colper, + dinfo.NLocalrows, sparsetree, treeid, tempmem); train_time = timer.getElapsedSeconds(); } - template void DecisionTreeBase::predict(const ML::cumlHandle &handle, const TreeMetaDataNode *tree, @@ -310,7 +323,7 @@ void DecisionTreeBase::base_fit( const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, std::vector> &sparsetree, - const int treeid, bool is_classifier, + const int treeid, DecisionTreeParams &tree_params, bool is_classifier, std::shared_ptr> in_tempmem) { prepare_fit_timer.reset(); const char *CRITERION_NAME[] = {"GINI", "ENTROPY", "MSE", "MAE", "END"}; @@ -342,12 +355,17 @@ void DecisionTreeBase::base_fit( } else { tempmem = std::make_shared>( device_allocator_in, host_allocator_in, stream_in, nrows, ncols, - unique_labels, tree_params); + tree_params.max_features, unique_labels, tree_params.n_bins, + tree_params.split_algo, tree_params.max_depth, + tree_params.shuffle_features); tree_params.quantile_per_tree = true; } plant(sparsetree, data, ncols, nrows, labels, rowids, n_sampled_rows, - unique_labels, treeid); + unique_labels, treeid, tree_params.max_depth, tree_params.max_leaves, + tree_params.max_features, tree_params.n_bins, tree_params.split_algo, + tree_params.min_rows_per_node, tree_params.bootstrap_features, + tree_params.split_criterion, tree_params.quantile_per_tree); if (in_tempmem == nullptr) { tempmem.reset(); } @@ -358,14 +376,13 @@ void DecisionTreeClassifier::fit( const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, TreeMetaDataNode *&tree, - DecisionTreeParams tree_parameters, + DecisionTreeParams tree_params, std::shared_ptr> in_tempmem) { - this->tree_params = tree_parameters; this->base_fit(handle.getImpl().getDeviceAllocator(), handle.getImpl().getHostAllocator(), handle.getImpl().getStream(), data, ncols, nrows, labels, rowids, n_sampled_rows, unique_labels, tree->sparsetree, - tree->treeid, true, in_tempmem); + tree->treeid, tree_params, true, in_tempmem); this->set_metadata(tree); } @@ -377,12 +394,11 @@ void DecisionTreeClassifier::fit( const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, TreeMetaDataNode *&tree, - DecisionTreeParams tree_parameters, + DecisionTreeParams tree_params, std::shared_ptr> in_tempmem) { - this->tree_params = tree_parameters; this->base_fit(device_allocator_in, host_allocator_in, stream_in, data, ncols, nrows, labels, rowids, n_sampled_rows, unique_labels, - tree->sparsetree, tree->treeid, true, in_tempmem); + tree->sparsetree, tree->treeid, tree_params, true, in_tempmem); this->set_metadata(tree); } @@ -390,13 +406,13 @@ template void DecisionTreeRegressor::fit( const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, - TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, + TreeMetaDataNode *&tree, DecisionTreeParams tree_params, std::shared_ptr> in_tempmem) { - this->tree_params = tree_parameters; - this->base_fit( - handle.getImpl().getDeviceAllocator(), handle.getImpl().getHostAllocator(), - handle.getImpl().getStream(), data, ncols, nrows, labels, rowids, - n_sampled_rows, 1, tree->sparsetree, tree->treeid, false, in_tempmem); + this->base_fit(handle.getImpl().getDeviceAllocator(), + handle.getImpl().getHostAllocator(), + handle.getImpl().getStream(), data, ncols, nrows, labels, + rowids, n_sampled_rows, 1, tree->sparsetree, tree->treeid, + tree_params, false, in_tempmem); this->set_metadata(tree); } @@ -406,12 +422,11 @@ void DecisionTreeRegressor::fit( const std::shared_ptr host_allocator_in, const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, - TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, + TreeMetaDataNode *&tree, DecisionTreeParams tree_params, std::shared_ptr> in_tempmem) { - this->tree_params = tree_parameters; this->base_fit(device_allocator_in, host_allocator_in, stream_in, data, ncols, nrows, labels, rowids, n_sampled_rows, 1, tree->sparsetree, - tree->treeid, false, in_tempmem); + tree->treeid, tree_params, false, in_tempmem); this->set_metadata(tree); } @@ -423,10 +438,12 @@ void DecisionTreeClassifier::grow_deep_tree( const int treeid, std::shared_ptr> tempmem) { int leaf_cnt = 0; int depth_cnt = 0; - grow_deep_tree_classification(data, labels, rowids, ncols, colper, - n_sampled_rows, nrows, this->n_unique_labels, - this->tree_params, depth_cnt, leaf_cnt, - sparsetree, treeid, tempmem); + grow_deep_tree_classification( + data, labels, rowids, ncols, colper, n_sampled_rows, nrows, + this->n_unique_labels, this->nbins, this->treedepth, this->maxleaves, + this->min_rows_per_node, this->split_criterion, this->split_algo, + this->min_impurity_decrease, depth_cnt, leaf_cnt, sparsetree, treeid, + tempmem); this->depth_counter = depth_cnt; this->leaf_counter = leaf_cnt; } @@ -439,9 +456,11 @@ void DecisionTreeRegressor::grow_deep_tree( const int treeid, std::shared_ptr> tempmem) { int leaf_cnt = 0; int depth_cnt = 0; - grow_deep_tree_regression(data, labels, rowids, ncols, colper, n_sampled_rows, - nrows, this->tree_params, depth_cnt, leaf_cnt, - sparsetree, treeid, tempmem); + grow_deep_tree_regression( + data, labels, rowids, ncols, colper, n_sampled_rows, nrows, this->nbins, + this->treedepth, this->maxleaves, this->min_rows_per_node, + this->split_criterion, this->split_algo, this->min_impurity_decrease, + depth_cnt, leaf_cnt, sparsetree, treeid, tempmem); this->depth_counter = depth_cnt; this->leaf_counter = leaf_cnt; } diff --git a/cpp/src/decisiontree/decisiontree_impl.h b/cpp/src/decisiontree/decisiontree_impl.h index b0ec67c798..2c30028a1e 100644 --- a/cpp/src/decisiontree/decisiontree_impl.h +++ b/cpp/src/decisiontree/decisiontree_impl.h @@ -63,8 +63,12 @@ struct DataInfo { template class DecisionTreeBase { protected: + int split_algo; + int nbins; DataInfo dinfo; + int treedepth; int depth_counter = 0; + int maxleaves; int leaf_counter = 0; std::shared_ptr> tempmem; size_t total_temp_mem; @@ -74,13 +78,19 @@ class DecisionTreeBase { int n_unique_labels = -1; // number of unique labels in dataset double prepare_time = 0; double train_time = 0; + int min_rows_per_node; + bool bootstrap_features; + CRITERION split_criterion; MLCommon::TimerCPU prepare_fit_timer; - DecisionTreeParams tree_params; + float min_impurity_decrease = 0.0; void plant(std::vector> &sparsetree, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, - const int treeid); + const int treeid, int maxdepth, int max_leaf_nodes, + const float colper, int n_bins, int split_algo_flag, + int cfg_min_rows_per_node, bool cfg_bootstrap_features, + CRITERION cfg_split_criterion, bool cfg_quantile_per_tree); virtual void grow_deep_tree( const T *data, const L *labels, unsigned int *rowids, @@ -95,7 +105,8 @@ class DecisionTreeBase { const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, std::vector> &sparsetree, const int treeid, - bool is_classifier, std::shared_ptr> in_tempmem); + DecisionTreeParams &tree_params, bool is_classifier, + std::shared_ptr> in_tempmem); public: // Printing utility for high level tree info. @@ -127,7 +138,7 @@ class DecisionTreeClassifier : public DecisionTreeBase { void fit(const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, - TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, + TreeMetaDataNode *&tree, DecisionTreeParams tree_params, std::shared_ptr> in_tempmem = nullptr); //This fit fucntion does not take handle , used by RF @@ -136,7 +147,7 @@ class DecisionTreeClassifier : public DecisionTreeBase { const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, - TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, + TreeMetaDataNode *&tree, DecisionTreeParams tree_params, std::shared_ptr> in_tempmem); private: @@ -155,7 +166,7 @@ class DecisionTreeRegressor : public DecisionTreeBase { void fit(const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, TreeMetaDataNode *&tree, - DecisionTreeParams tree_parameters, + DecisionTreeParams tree_params, std::shared_ptr> in_tempmem = nullptr); //This fit function does not take handle. Used by RF @@ -164,7 +175,7 @@ class DecisionTreeRegressor : public DecisionTreeBase { const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, TreeMetaDataNode *&tree, - DecisionTreeParams tree_parameters, + DecisionTreeParams tree_params, std::shared_ptr> in_tempmem); private: diff --git a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh index b13ecf4bfe..6e9249706b 100644 --- a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh +++ b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh @@ -36,10 +36,12 @@ template void grow_deep_tree_classification( const T* data, const int* labels, unsigned int* rowids, const int Ncols, const float colper, int n_sampled_rows, const int nrows, - const int n_unique_labels, - const ML::DecisionTree::DecisionTreeParams& tree_params, int& depth_cnt, - int& leaf_cnt, std::vector>& sparsetree, - const int treeid, std::shared_ptr> tempmem) { + const int n_unique_labels, const int nbins, const int maxdepth, + const int maxleaves, const int min_rows_per_node, + const ML::CRITERION split_cr, const int split_algo, + const float min_impurity_decrease, int& depth_cnt, int& leaf_cnt, + std::vector>& sparsetree, const int treeid, + std::shared_ptr> tempmem) { const int ncols_sampled = (int)(colper * Ncols); unsigned int* flagsptr = tempmem->d_flags->data(); unsigned int* sample_cnt = tempmem->d_sample_cnt->data(); @@ -47,7 +49,7 @@ void grow_deep_tree_classification( tempmem->stream); std::vector histvec(n_unique_labels, 0); T initial_metric; - if (tree_params.split_criterion == ML::CRITERION::GINI) { + if (split_cr == ML::CRITERION::GINI) { initial_metric_classification(labels, sample_cnt, nrows, n_unique_labels, histvec, initial_metric, tempmem); @@ -56,7 +58,7 @@ void grow_deep_tree_classification( n_unique_labels, histvec, initial_metric, tempmem); } - int reserve_depth = std::min(tempmem->swap_depth, tree_params.max_depth); + int reserve_depth = std::min(tempmem->swap_depth, maxdepth); size_t total_nodes = pow(2, (reserve_depth + 1)) - 1; unsigned int* h_parent_hist = tempmem->h_parent_hist->data(); @@ -104,7 +106,7 @@ void grow_deep_tree_classification( } std::vector feature_selector(h_colids, h_colids + Ncols); - int scatter_algo_depth = std::min(tempmem->swap_depth, tree_params.max_depth); + int scatter_algo_depth = std::min(tempmem->swap_depth, maxdepth); for (int depth = 0; (depth < scatter_algo_depth) && (n_nodes_nextitr != 0); depth++) { depth_cnt = depth + 1; @@ -120,40 +122,37 @@ void grow_deep_tree_classification( ncols_sampled, n_nodes, mtg, dist, feature_selector, tempmem, d_rng); get_histogram_classification(data, labels, flagsptr, sample_cnt, nrows, - Ncols, ncols_sampled, n_unique_labels, - tree_params.n_bins, n_nodes, - tree_params.split_algo, tempmem, d_histogram); + Ncols, ncols_sampled, n_unique_labels, nbins, + n_nodes, split_algo, tempmem, d_histogram); float* infogain = tempmem->h_outgain->data(); - if (tree_params.split_criterion == ML::CRITERION::GINI) { + if (split_cr == ML::CRITERION::GINI) { get_best_split_classification( h_histogram, d_histogram, h_colids, d_colids, h_colstart, d_colstart, - Ncols, ncols_sampled, tree_params.n_bins, n_unique_labels, n_nodes, - depth, tree_params.min_rows_per_node, tree_params.split_algo, infogain, - h_parent_hist, h_child_hist, sparsetree, sparsesize, sparse_nodelist, - h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, - tempmem); + Ncols, ncols_sampled, nbins, n_unique_labels, n_nodes, depth, + min_rows_per_node, split_algo, infogain, h_parent_hist, h_child_hist, + sparsetree, sparsesize, sparse_nodelist, h_split_colidx, h_split_binidx, + d_split_colidx, d_split_binidx, tempmem); } else { get_best_split_classification( h_histogram, d_histogram, h_colids, d_colids, h_colstart, d_colstart, - Ncols, ncols_sampled, tree_params.n_bins, n_unique_labels, n_nodes, - depth, tree_params.min_rows_per_node, tree_params.split_algo, infogain, - h_parent_hist, h_child_hist, sparsetree, sparsesize, sparse_nodelist, - h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, - tempmem); + Ncols, ncols_sampled, nbins, n_unique_labels, n_nodes, depth, + min_rows_per_node, split_algo, infogain, h_parent_hist, h_child_hist, + sparsetree, sparsesize, sparse_nodelist, h_split_colidx, h_split_binidx, + d_split_colidx, d_split_binidx, tempmem); } CUDA_CHECK(cudaStreamSynchronize(tempmem->stream)); - leaf_eval_classification( - infogain, depth, tree_params.min_impurity_decrease, tree_params.max_depth, - n_unique_labels, tree_params.max_leaves, h_new_node_flags, sparsetree, - sparsesize, h_parent_hist, n_nodes_nextitr, sparse_nodelist, leaf_cnt); + leaf_eval_classification(infogain, depth, min_impurity_decrease, maxdepth, + n_unique_labels, maxleaves, h_new_node_flags, + sparsetree, sparsesize, h_parent_hist, + n_nodes_nextitr, sparse_nodelist, leaf_cnt); MLCommon::updateDevice(d_new_node_flags, h_new_node_flags, n_nodes, tempmem->stream); - make_level_split(data, nrows, Ncols, ncols_sampled, tree_params.n_bins, - n_nodes, tree_params.split_algo, d_split_colidx, - d_split_binidx, d_new_node_flags, flagsptr, tempmem); + make_level_split(data, nrows, Ncols, ncols_sampled, nbins, n_nodes, + split_algo, d_split_colidx, d_split_binidx, + d_new_node_flags, flagsptr, tempmem); CUDA_CHECK(cudaStreamSynchronize(tempmem->stream)); if (depth != (scatter_algo_depth - 1)) { memcpy(h_parent_hist, h_child_hist, @@ -190,26 +189,26 @@ void grow_deep_tree_classification( sparsetree.resize(sparsetree.size() - lastsize); convert_scatter_to_gather(flagsptr, sample_cnt, n_nodes, nrows, d_nodecount, d_nodestart, d_samplelist, tempmem); - for (int depth = tempmem->swap_depth; - (depth < tree_params.max_depth) && (n_nodes != 0); depth++) { + for (int depth = tempmem->swap_depth; (depth < maxdepth) && (n_nodes != 0); + depth++) { depth_cnt = depth + 1; //Algorithm starts here update_feature_sampling(h_colids, d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, lastsize, mtg, dist, feature_selector, tempmem, d_rng); - if (tree_params.split_criterion == ML::CRITERION::GINI) { + if (split_cr == ML::CRITERION::GINI) { best_split_gather_classification( data, labels, d_colids, d_colstart, d_nodestart, d_samplelist, nrows, - Ncols, ncols_sampled, n_unique_labels, tree_params.n_bins, n_nodes, - tree_params.split_algo, sparsetree.size() + lastsize, - tree_params.min_impurity_decrease, tempmem, d_sparsenodes, d_nodelist); + Ncols, ncols_sampled, n_unique_labels, nbins, n_nodes, split_algo, + sparsetree.size() + lastsize, min_impurity_decrease, tempmem, + d_sparsenodes, d_nodelist); } else { best_split_gather_classification( data, labels, d_colids, d_colstart, d_nodestart, d_samplelist, nrows, - Ncols, ncols_sampled, n_unique_labels, tree_params.n_bins, n_nodes, - tree_params.split_algo, sparsetree.size() + lastsize, - tree_params.min_impurity_decrease, tempmem, d_sparsenodes, d_nodelist); + Ncols, ncols_sampled, n_unique_labels, nbins, n_nodes, split_algo, + sparsetree.size() + lastsize, min_impurity_decrease, tempmem, + d_sparsenodes, d_nodelist); } MLCommon::updateHost(h_sparsenodes, d_sparsenodes, lastsize, tempmem->stream); @@ -228,7 +227,7 @@ void grow_deep_tree_classification( n_nodes = h_counter[0]; } if (n_nodes != 0) { - if (tree_params.split_criterion == ML::CRITERION::GINI) { + if (split_cr == ML::CRITERION::GINI) { make_leaf_gather_classification( labels, d_nodestart, d_samplelist, n_unique_labels, d_sparsenodes, d_nodelist, n_nodes, tempmem); diff --git a/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh b/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh index cd51519d81..7377c63aee 100644 --- a/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh +++ b/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,9 +35,11 @@ template void grow_deep_tree_regression( const T* data, const T* labels, unsigned int* rowids, const int Ncols, const float colper, const int n_sampled_rows, const int nrows, - const ML::DecisionTree::DecisionTreeParams& tree_params, int& depth_cnt, - int& leaf_cnt, std::vector>& sparsetree, - const int treeid, std::shared_ptr> tempmem) { + const int nbins, int maxdepth, const int maxleaves, + const int min_rows_per_node, const ML::CRITERION split_cr, int split_algo, + const float min_impurity_decrease, int& depth_cnt, int& leaf_cnt, + std::vector>& sparsetree, const int treeid, + std::shared_ptr> tempmem) { const int ncols_sampled = (int)(colper * Ncols); unsigned int* flagsptr = tempmem->d_flags->data(); unsigned int* sample_cnt = tempmem->d_sample_cnt->data(); @@ -47,14 +49,14 @@ void grow_deep_tree_regression( T mean; T initial_metric; unsigned int count; - if (tree_params.split_criterion == ML::CRITERION::MSE) { + if (split_cr == ML::CRITERION::MSE) { initial_metric_regression(labels, sample_cnt, nrows, mean, count, initial_metric, tempmem); } else { initial_metric_regression(labels, sample_cnt, nrows, mean, count, initial_metric, tempmem); } - int reserve_depth = std::min(tempmem->swap_depth, tree_params.max_depth); + int reserve_depth = std::min(tempmem->swap_depth, maxdepth); size_t total_nodes = pow(2, (reserve_depth + 1)) - 1; std::vector sparse_meanstate; @@ -110,7 +112,7 @@ void grow_deep_tree_regression( std::vector feature_selector(h_colids, h_colids + Ncols); float* infogain = tempmem->h_outgain->data(); - int scatter_algo_depth = std::min(tempmem->swap_depth, tree_params.max_depth); + int scatter_algo_depth = std::min(tempmem->swap_depth, maxdepth); for (int depth = 0; (depth < scatter_algo_depth) && (n_nodes_nextitr != 0); depth++) { depth_cnt = depth + 1; @@ -128,44 +130,42 @@ void grow_deep_tree_regression( init_parent_value(sparse_meanstate, sparse_countstate, sparse_nodelist, sparsesize, depth, tempmem); - if (tree_params.split_criterion == ML::CRITERION::MSE) { - get_mse_regression_fused(data, labels, flagsptr, sample_cnt, nrows, - Ncols, ncols_sampled, tree_params.n_bins, - n_nodes, tree_params.split_algo, tempmem, - d_mseout, d_predout, d_count); + if (split_cr == ML::CRITERION::MSE) { + get_mse_regression_fused( + data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, nbins, + n_nodes, split_algo, tempmem, d_mseout, d_predout, d_count); get_best_split_regression>( h_mseout, d_mseout, h_predout, d_predout, h_count, d_count, h_colids, - d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, - tree_params.n_bins, n_nodes, depth, tree_params.min_rows_per_node, - tree_params.split_algo, sparsesize, infogain, sparse_meanstate, - sparse_countstate, sparsetree, sparse_nodelist, h_split_colidx, - h_split_binidx, d_split_colidx, d_split_binidx, tempmem); + d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, nbins, n_nodes, + depth, min_rows_per_node, split_algo, sparsesize, infogain, + sparse_meanstate, sparse_countstate, sparsetree, sparse_nodelist, + h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, + tempmem); } else { get_mse_regression( - data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, - tree_params.n_bins, n_nodes, tree_params.split_algo, tempmem, d_mseout, - d_predout, d_count); + data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, nbins, + n_nodes, split_algo, tempmem, d_mseout, d_predout, d_count); get_best_split_regression>( h_mseout, d_mseout, h_predout, d_predout, h_count, d_count, h_colids, - d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, - tree_params.n_bins, n_nodes, depth, tree_params.min_rows_per_node, - tree_params.split_algo, sparsesize, infogain, sparse_meanstate, - sparse_countstate, sparsetree, sparse_nodelist, h_split_colidx, - h_split_binidx, d_split_colidx, d_split_binidx, tempmem); + d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, nbins, n_nodes, + depth, min_rows_per_node, split_algo, sparsesize, infogain, + sparse_meanstate, sparse_countstate, sparsetree, sparse_nodelist, + h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, + tempmem); } CUDA_CHECK(cudaStreamSynchronize(tempmem->stream)); - leaf_eval_regression( - infogain, depth, tree_params.min_impurity_decrease, tree_params.max_depth, - tree_params.max_leaves, h_new_node_flags, sparsetree, sparsesize, - sparse_meanstate, n_nodes_nextitr, sparse_nodelist, leaf_cnt); + leaf_eval_regression(infogain, depth, min_impurity_decrease, maxdepth, + maxleaves, h_new_node_flags, sparsetree, sparsesize, + sparse_meanstate, n_nodes_nextitr, sparse_nodelist, + leaf_cnt); MLCommon::updateDevice(d_new_node_flags, h_new_node_flags, n_nodes, tempmem->stream); - make_level_split(data, nrows, Ncols, ncols_sampled, tree_params.n_bins, - n_nodes, tree_params.split_algo, d_split_colidx, - d_split_binidx, d_new_node_flags, flagsptr, tempmem); + make_level_split(data, nrows, Ncols, ncols_sampled, nbins, n_nodes, + split_algo, d_split_colidx, d_split_binidx, + d_new_node_flags, flagsptr, tempmem); } // Start of gather algorithm @@ -198,8 +198,8 @@ void grow_deep_tree_regression( sparsetree.resize(sparsetree.size() - lastsize); convert_scatter_to_gather(flagsptr, sample_cnt, n_nodes, nrows, d_nodecount, d_nodestart, d_samplelist, tempmem); - for (int depth = tempmem->swap_depth; - (depth < tree_params.max_depth) && (n_nodes != 0); depth++) { + for (int depth = tempmem->swap_depth; (depth < maxdepth) && (n_nodes != 0); + depth++) { depth_cnt = depth + 1; //Algorithm starts here update_feature_sampling(h_colids, d_colids, h_colstart, d_colstart, Ncols, @@ -208,9 +208,9 @@ void grow_deep_tree_regression( best_split_gather_regression( data, labels, d_colids, d_colstart, d_nodestart, d_samplelist, nrows, - Ncols, ncols_sampled, tree_params.n_bins, n_nodes, tree_params.split_algo, - tree_params.split_criterion, sparsetree.size() + lastsize, - tree_params.min_impurity_decrease, tempmem, d_sparsenodes, d_nodelist); + Ncols, ncols_sampled, nbins, n_nodes, split_algo, split_cr, + sparsetree.size() + lastsize, min_impurity_decrease, tempmem, + d_sparsenodes, d_nodelist); MLCommon::updateHost(h_sparsenodes, d_sparsenodes, lastsize, tempmem->stream); diff --git a/cpp/src/decisiontree/memory.cuh b/cpp/src/decisiontree/memory.cuh index 8bb69f285e..5b9b8fb2b9 100644 --- a/cpp/src/decisiontree/memory.cuh +++ b/cpp/src/decisiontree/memory.cuh @@ -26,26 +26,35 @@ template TemporaryMemory::TemporaryMemory( const std::shared_ptr device_allocator_in, const std::shared_ptr host_allocator_in, - const cudaStream_t stream_in, int N, int Ncols, int n_unique, - const ML::DecisionTree::DecisionTreeParams& tree_params) { + const cudaStream_t stream_in, int N, int Ncols, float colper, int n_unique, + int n_bins, const int split_algo, int depth, bool col_shuffle) { stream = stream_in; + splitalgo = split_algo; + max_shared_mem = MLCommon::getSharedMemPerBlock(); num_sms = MLCommon::getMultiProcessorCount(); device_allocator = device_allocator_in; host_allocator = host_allocator_in; - LevelMemAllocator(N, Ncols, n_unique, tree_params); + LevelMemAllocator(N, Ncols, colper, n_unique, n_bins, depth, split_algo, + col_shuffle); } template -TemporaryMemory::TemporaryMemory( - const ML::cumlHandle_impl& handle, cudaStream_t stream_in, int N, int Ncols, - int n_unique, const ML::DecisionTree::DecisionTreeParams& tree_params) { +TemporaryMemory::TemporaryMemory(const ML::cumlHandle_impl& handle, + cudaStream_t stream_in, int N, int Ncols, + float colper, int n_unique, int n_bins, + const int split_algo, int depth, + bool col_shuffle) { + //Assign Stream from cumlHandle stream = stream_in; + splitalgo = split_algo; + max_shared_mem = MLCommon::getSharedMemPerBlock(); num_sms = MLCommon::getMultiProcessorCount(); device_allocator = handle.getDeviceAllocator(); host_allocator = handle.getHostAllocator(); - LevelMemAllocator(N, Ncols, n_unique, tree_params); + LevelMemAllocator(N, Ncols, colper, n_unique, n_bins, depth, split_algo, + col_shuffle); } template @@ -71,18 +80,18 @@ void TemporaryMemory::print_info(int depth, int nrows, int ncols, } template -void TemporaryMemory::LevelMemAllocator( - int nrows, int ncols, int n_unique, - const ML::DecisionTree::DecisionTreeParams& tree_params) { - int nbins = tree_params.n_bins; - int depth = tree_params.max_depth; +void TemporaryMemory::LevelMemAllocator(int nrows, int ncols, + float colper, int n_unique, + int nbins, int depth, + const int split_algo, + bool col_shuffle) { if (depth > swap_depth || (depth == -1)) { max_nodes_per_level = pow(2, swap_depth); } else { max_nodes_per_level = pow(2, depth); } size_t maxnodes = max_nodes_per_level; - size_t ncols_sampled = (size_t)(ncols * tree_params.max_features); + size_t ncols_sampled = (size_t)(ncols * colper); if (depth < 64) { gather_max_nodes = std::min((size_t)(nrows + 1), (size_t)(pow((size_t)2, (size_t)depth) + 1)); @@ -125,7 +134,7 @@ void TemporaryMemory::LevelMemAllocator( totalmem = 3 * parentsz * sizeof(int) + childsz * sizeof(T) + (nrows + 1) * sizeof(T); - if (tree_params.split_algo == 0) { + if (split_algo == 0) { d_globalminmax = new MLCommon::device_buffer( device_allocator, stream, 2 * maxnodes * ncols_sampled); h_globalminmax = new MLCommon::host_buffer(host_allocator, stream, @@ -140,7 +149,7 @@ void TemporaryMemory::LevelMemAllocator( } d_sample_cnt = new MLCommon::device_buffer(device_allocator, stream, nrows); - if (tree_params.shuffle_features == true) { + if (col_shuffle == true) { d_colids = new MLCommon::device_buffer( device_allocator, stream, ncols_sampled * gather_max_nodes); h_colids = new MLCommon::host_buffer( @@ -242,7 +251,7 @@ void TemporaryMemory::LevelMemAllocator( max_nodes_pred /= 2; // For occupancy purposes. max_nodes_mse /= 2; // For occupancy purposes. } - if (tree_params.split_algo == ML::SPLIT_ALGO::HIST) { + if (split_algo == ML::SPLIT_ALGO::HIST) { size_t shmem_per_node = 2 * sizeof(T); max_nodes_minmax = max_shared_mem / shmem_per_node; max_nodes_minmax /= 2; diff --git a/cpp/src/decisiontree/memory.h b/cpp/src/decisiontree/memory.h index e4423ea128..619a350caf 100644 --- a/cpp/src/decisiontree/memory.h +++ b/cpp/src/decisiontree/memory.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,6 @@ #include #include #include -#include #include "common/cumlHandle.hpp" template @@ -69,6 +68,8 @@ struct TemporaryMemory { MLCommon::device_buffer *d_colstart = nullptr; MLCommon::host_buffer *h_colids = nullptr; MLCommon::host_buffer *h_colstart = nullptr; + //Split algo + int splitalgo; //For level algorithm MLCommon::device_buffer *d_flags = nullptr; @@ -109,21 +110,17 @@ struct TemporaryMemory { TemporaryMemory( const std::shared_ptr device_allocator_in, const std::shared_ptr host_allocator_in, - const cudaStream_t stream_in, int N, int Ncols, int n_unique, - const ML::DecisionTree::DecisionTreeParams &tree_params); - + const cudaStream_t stream_in, int N, int Ncols, float colper, int n_unique, + int n_bins, const int split_algo, int depth, bool col_shuffle); TemporaryMemory(const ML::cumlHandle_impl &handle, cudaStream_t stream_in, - int N, int Ncols, int n_unique, - const ML::DecisionTree::DecisionTreeParams &tree_params); - + int N, int Ncols, float colper, int n_unique, int n_bins, + const int split_algo, int depth, bool colshuffle); ~TemporaryMemory(); - - void LevelMemAllocator( - int nrows, int ncols, int n_unique, - const ML::DecisionTree::DecisionTreeParams &tree_params); + void LevelMemAllocator(int nrows, int ncols, float colper, int n_unique, + int nbins, int depth, const int split_algo, + bool col_shuffle); void LevelMemCleaner(); - void print_info(int depth, int nrows, int ncols, float colper); }; #include "memory.cuh" diff --git a/cpp/src/randomforest/randomforest_impl.cuh b/cpp/src/randomforest/randomforest_impl.cuh index f5add7f02f..91d9f68661 100644 --- a/cpp/src/randomforest/randomforest_impl.cuh +++ b/cpp/src/randomforest/randomforest_impl.cuh @@ -188,8 +188,12 @@ void rfClassifier::fit(const cumlHandle& user_handle, const T* input, std::shared_ptr> tempmem[n_streams]; for (int i = 0; i < n_streams; i++) { tempmem[i] = std::make_shared>( - handle, handle.getInternalStream(i), n_rows, n_cols, n_unique_labels, - this->rf_params.tree_params); + handle, handle.getInternalStream(i), n_rows, n_cols, + this->rf_params.tree_params.max_features, n_unique_labels, + this->rf_params.tree_params.n_bins, + this->rf_params.tree_params.split_algo, + this->rf_params.tree_params.max_depth, + this->rf_params.tree_params.shuffle_features); } //Preprocess once only per forest if ((this->rf_params.tree_params.split_algo == SPLIT_ALGO::GLOBAL_QUANTILE) && @@ -456,8 +460,12 @@ void rfRegressor::fit(const cumlHandle& user_handle, const T* input, std::shared_ptr> tempmem[n_streams]; for (int i = 0; i < n_streams; i++) { tempmem[i] = std::make_shared>( - handle, handle.getInternalStream(i), n_rows, n_cols, 1, - this->rf_params.tree_params); + handle, handle.getInternalStream(i), n_rows, n_cols, + this->rf_params.tree_params.max_features, 1, + this->rf_params.tree_params.n_bins, + this->rf_params.tree_params.split_algo, + this->rf_params.tree_params.max_depth, + this->rf_params.tree_params.shuffle_features); } //Preprocess once only per forest if ((this->rf_params.tree_params.split_algo == SPLIT_ALGO::GLOBAL_QUANTILE) && From 0357a54dc30837a07c2d1f6b992eccbcdc613cdd Mon Sep 17 00:00:00 2001 From: Vinay D Date: Sat, 18 Apr 2020 16:02:50 +0530 Subject: [PATCH 239/330] NOP change to trigger correct set of CI tests --- cpp/include/cuml/tree/decisiontree.hpp | 2 +- cpp/src/decisiontree/decisiontree_impl.cuh | 2 +- cpp/src/decisiontree/decisiontree_impl.h | 2 +- cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh | 2 +- cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh | 1 + cpp/src/decisiontree/memory.cuh | 1 + cpp/src/decisiontree/memory.h | 1 + cpp/src/randomforest/randomforest_impl.cuh | 1 + 8 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cpp/include/cuml/tree/decisiontree.hpp b/cpp/include/cuml/tree/decisiontree.hpp index a675061037..30ba042b6d 100644 --- a/cpp/include/cuml/tree/decisiontree.hpp +++ b/cpp/include/cuml/tree/decisiontree.hpp @@ -20,7 +20,7 @@ #include "flatnode.h" namespace ML { - +// Changes for triggerring correct set of CI namespace DecisionTree { struct DecisionTreeParams { diff --git a/cpp/src/decisiontree/decisiontree_impl.cuh b/cpp/src/decisiontree/decisiontree_impl.cuh index c816201a85..9698f55fd5 100644 --- a/cpp/src/decisiontree/decisiontree_impl.cuh +++ b/cpp/src/decisiontree/decisiontree_impl.cuh @@ -27,7 +27,7 @@ #include "quantile/quantile.cuh" namespace ML { - +// Changes for triggerring correct set of CI bool is_dev_ptr(const void *p) { cudaPointerAttributes pointer_attr; cudaError_t err = cudaPointerGetAttributes(&pointer_attr, p); diff --git a/cpp/src/decisiontree/decisiontree_impl.h b/cpp/src/decisiontree/decisiontree_impl.h index 2c30028a1e..aa6728a9df 100644 --- a/cpp/src/decisiontree/decisiontree_impl.h +++ b/cpp/src/decisiontree/decisiontree_impl.h @@ -36,7 +36,7 @@ } while (0) namespace ML { - +// Changes for triggerring correct set of CI bool is_dev_ptr(const void *p); namespace DecisionTree { diff --git a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh index 6e9249706b..0641f862fd 100644 --- a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh +++ b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh @@ -14,7 +14,7 @@ * limitations under the License. */ #pragma once - +// Changes for triggerring correct set of CI #include #include #include diff --git a/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh b/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh index 7377c63aee..f2bc0f7066 100644 --- a/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh +++ b/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh @@ -14,6 +14,7 @@ * limitations under the License. */ #pragma once +// Changes for triggerring correct set of CI #include #include #include diff --git a/cpp/src/decisiontree/memory.cuh b/cpp/src/decisiontree/memory.cuh index 5b9b8fb2b9..0818f43962 100644 --- a/cpp/src/decisiontree/memory.cuh +++ b/cpp/src/decisiontree/memory.cuh @@ -15,6 +15,7 @@ */ #pragma once +// Changes for triggerring correct set of CI #include #include #include diff --git a/cpp/src/decisiontree/memory.h b/cpp/src/decisiontree/memory.h index 619a350caf..511aee6ef2 100644 --- a/cpp/src/decisiontree/memory.h +++ b/cpp/src/decisiontree/memory.h @@ -15,6 +15,7 @@ */ #pragma once +// Changes for triggerring correct set of CI #include #include #include diff --git a/cpp/src/randomforest/randomforest_impl.cuh b/cpp/src/randomforest/randomforest_impl.cuh index 91d9f68661..a30d822c69 100644 --- a/cpp/src/randomforest/randomforest_impl.cuh +++ b/cpp/src/randomforest/randomforest_impl.cuh @@ -25,6 +25,7 @@ #include "score/scores.h" namespace ML { +// Changes for triggerring correct set of CI /** * @brief Construct rf (random forest) object. * @tparam T: data type for input data (float or double). From 9ce0cc2f0c8deb18caf8b4ac17f25ff16a77b93c Mon Sep 17 00:00:00 2001 From: Vinay D Date: Sun, 19 Apr 2020 23:05:30 +0530 Subject: [PATCH 240/330] Reintroducing the refactoring changes --- cpp/include/cuml/tree/decisiontree.hpp | 135 ++------------ cpp/src/decisiontree/decisiontree_impl.cuh | 164 ++++++++---------- cpp/src/decisiontree/decisiontree_impl.h | 37 ++-- .../levelalgo/levelfunc_classifier.cuh | 79 +++++---- .../levelalgo/levelfunc_regressor.cuh | 75 ++++---- cpp/src/decisiontree/memory.cuh | 62 +++---- cpp/src/decisiontree/memory.h | 24 +-- cpp/src/randomforest/randomforest_impl.cuh | 83 ++++----- 8 files changed, 249 insertions(+), 410 deletions(-) diff --git a/cpp/include/cuml/tree/decisiontree.hpp b/cpp/include/cuml/tree/decisiontree.hpp index 30ba042b6d..7c9db1b678 100644 --- a/cpp/include/cuml/tree/decisiontree.hpp +++ b/cpp/include/cuml/tree/decisiontree.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ #include "flatnode.h" namespace ML { -// Changes for triggerring correct set of CI + namespace DecisionTree { struct DecisionTreeParams { @@ -68,23 +68,9 @@ struct DecisionTreeParams { /** * Minimum impurity decrease required for spliting a node. If the impurity decrease is below this value, node is leafed out. Default is 0.0 */ - float min_impurity_decrease; + float min_impurity_decrease = 0.0f; }; -/** - * @brief Set all DecisionTreeParams members. - * @param[in,out] params: update with tree parameters - * @param[in] cfg_max_depth: maximum tree depth; default -1 - * @param[in] cfg_max_leaves: maximum leaves; default -1 - * @param[in] cfg_max_features: maximum number of features; default 1.0f - * @param[in] cfg_n_bins: number of bins; default 8 - * @param[in] cfg_split_algo: split algorithm; default SPLIT_ALGO::HIST - * @param[in] cfg_min_rows_per_node: min. rows per node; default 2 - * @param[in] cfg_bootstrap_features: bootstrapping for features; default false - * @param[in] cfg_split_criterion: split criterion; default CRITERION_END, - * i.e., GINI for classification or MSE for regression - * @param[in] cfg_quantile_per_tree: compute quantile per tree; default false - */ void set_tree_params(DecisionTreeParams ¶ms, int cfg_max_depth = -1, int cfg_max_leaves = -1, float cfg_max_features = 1.0f, int cfg_n_bins = 8, int cfg_split_algo = SPLIT_ALGO::HIST, @@ -94,17 +80,7 @@ void set_tree_params(DecisionTreeParams ¶ms, int cfg_max_depth = -1, CRITERION cfg_split_criterion = CRITERION_END, bool cfg_quantile_per_tree = false, bool cfg_shuffle_features = false); - -/** - * @brief Check validity of all decision tree hyper-parameters. - * @param[in] params: decision tree hyper-parameters. - */ void validity_check(const DecisionTreeParams params); - -/** - * @brief Print all decision tree hyper-parameters. - * @param[in] params: decision tree hyper-parameters. - */ void print(const DecisionTreeParams params); template @@ -117,21 +93,9 @@ struct TreeMetaDataNode { std::vector> sparsetree; }; -/** - * @brief Print high-level tree information. - * @tparam T: data type for input data (float or double). - * @tparam L: data type for labels (int type for classification, T type for regression). - * @param[in] tree: CPU pointer to TreeMetaDataNode - */ template void print_tree_summary(const TreeMetaDataNode *tree); -/** - * @brief Print detailed tree information. - * @tparam T: data type for input data (float or double). - * @tparam L: data type for labels (int type for classification, T type for regression). - * @param[in] tree: CPU pointer to TreeMetaDataNode - */ template void print_tree(const TreeMetaDataNode *tree); @@ -140,136 +104,59 @@ void print_tree(const TreeMetaDataNode *tree); typedef TreeMetaDataNode TreeClassifierF; typedef TreeMetaDataNode TreeClassifierD; -/** - * @defgroup Decision Tree Classifier - Fit function - * @brief Build (i.e., fit, train) Decision Tree classifier for input data. - * @param[in] handle: cumlHandle - * @param[in, out] tree: CPU pointer to TreeMetaDataNode. User allocated. - * @param[in] data: train data (nrows samples, ncols features) in column major format, - * excluding labels. Device pointer. - * @param[in] ncols: number of features (i.e., columns) excluding target feature. - * @param[in] nrows: number of training data samples of the whole unsampled dataset. - * @param[in] labels: 1D array of target features (int only). One label per training - * sample. Device pointer. - * Assumption: labels need to be preprocessed to map to ascending numbers from 0; - * needed for current gini impl. in decision tree. - * @param[in,out] rowids: array of n_sampled_rows integers in [0, nrows) range. - * Device pointer. The same array is then rearranged when splits are made, - * allowing us to construct trees without rearranging the actual dataset. - * @param[in] n_sampled_rows: number of training samples, after sampling. - * If using decision tree directly over the whole dataset: n_sampled_rows = nrows - * @param[in] n_unique_labels: #unique label values. Number of categories of classification. - * @param[in] tree_params: Decision Tree training hyper parameter struct. - * @{ - */ void decisionTreeClassifierFit(const ML::cumlHandle &handle, TreeClassifierF *&tree, float *data, const int ncols, const int nrows, int *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, DecisionTree::DecisionTreeParams tree_params); + void decisionTreeClassifierFit(const ML::cumlHandle &handle, TreeClassifierD *&tree, double *data, const int ncols, const int nrows, int *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, DecisionTree::DecisionTreeParams tree_params); -/** @} */ -/** - * @defgroup Decision Tree Classifier - Predict function - * @brief Predict target feature for input data; n-ary classification for - * single feature supported. Inference of trees is CPU only for now. - * @param[in] handle: cumlHandle (currently unused; API placeholder) - * @param[in] tree: CPU pointer to TreeMetaDataNode. - * @param[in] rows: test data (n_rows samples, n_cols features) in row major format. - * Current impl. expects a CPU pointer. TODO future API change. - * @param[in] n_rows: number of data samples. - * @param[in] n_cols: number of features (excluding target feature). - * @param[in,out] predictions: n_rows predicted labels. Current impl. expects a - * CPU pointer, user allocated. TODO future API change. - * @param[in] verbosity: verbosity level for logging messages during execution. - * A negative value means to not perform an explicit - * `setLevel()` call, but to continue with the level that - * the caller itself might have set. - * @{ - */ void decisionTreeClassifierPredict(const ML::cumlHandle &handle, const TreeClassifierF *tree, const float *rows, const int n_rows, const int n_cols, int *predictions, - int verbosity = -1); + bool verbose = false); + void decisionTreeClassifierPredict(const ML::cumlHandle &handle, const TreeClassifierD *tree, const double *rows, const int n_rows, const int n_cols, int *predictions, - int verbosity = -1); -/** @} */ + bool verbose = false); // ----------------------------- Regression ----------------------------------- // typedef TreeMetaDataNode TreeRegressorF; typedef TreeMetaDataNode TreeRegressorD; -/** - * @defgroup Decision Tree Regressor - Fit function - * @brief Build (i.e., fit, train) Decision Tree regressor for input data. - * @param[in] handle: cumlHandle - * @param[in, out] tree: CPU pointer to TreeMetaDataNode. User allocated. - * @param[in] data: train data (nrows samples, ncols features) in column major format, - * excluding labels. Device pointer. - * @param[in] ncols: number of features (i.e., columns) excluding target feature. - * @param[in] nrows: number of training data samples of the whole unsampled dataset. - * @param[in] labels: 1D array of target features (float or double). One label per - * training sample. Device pointer. - * @param[in,out] rowids: array of n_sampled_rows integers in [0, nrows) range. - * Device pointer. The same array is then rearranged when splits are made, - * allowing us to construct trees without rearranging the actual dataset. - * @param[in] n_sampled_rows: number of training samples, after sampling. If using decision - * tree directly over the whole dataset: n_sampled_rows = nrows - * @param[in] tree_params: Decision Tree training hyper parameter struct. - * @{ - */ void decisionTreeRegressorFit(const ML::cumlHandle &handle, TreeRegressorF *&tree, float *data, const int ncols, const int nrows, float *labels, unsigned int *rowids, const int n_sampled_rows, DecisionTree::DecisionTreeParams tree_params); + void decisionTreeRegressorFit(const ML::cumlHandle &handle, TreeRegressorD *&tree, double *data, const int ncols, const int nrows, double *labels, unsigned int *rowids, const int n_sampled_rows, DecisionTree::DecisionTreeParams tree_params); -/** @} */ -/** - * @defgroup Decision Tree Regressor - Predict function - * @brief Predict target feature for input data; regression for single feature supported. - * Inference of trees is CPU only for now. - * @param[in] handle: cumlHandle (currently unused; API placeholder) - * @param[in] tree: CPU pointer to TreeMetaDataNode. - * @param[in] rows: test data (n_rows samples, n_cols features) in row major format. - * Current impl. expects a CPU pointer. TODO future API change. - * @param[in] n_rows: number of data samples. - * @param[in] n_cols: number of features (excluding target feature). - * @param[in,out] predictions: n_rows predicted labels. Current impl. expects a CPU - * pointer, user allocated. TODO future API change. - * @param[in] verbosity: verbosity level for logging messages during execution. - * A negative value means to not perform an explicit - * `setLevel()` call, but to continue with the level that - * the caller itself might have set. - * @{ - */ void decisionTreeRegressorPredict(const ML::cumlHandle &handle, const TreeRegressorF *tree, const float *rows, const int n_rows, const int n_cols, - float *predictions, int verbosity = -1); + float *predictions, bool verbose = false); + void decisionTreeRegressorPredict(const ML::cumlHandle &handle, const TreeRegressorD *tree, const double *rows, const int n_rows, const int n_cols, double *predictions, - int verbosity = -1); -/** @} */ + bool verbose = false); } // End namespace DecisionTree } //End namespace ML diff --git a/cpp/src/decisiontree/decisiontree_impl.cuh b/cpp/src/decisiontree/decisiontree_impl.cuh index 9698f55fd5..81422f9fcd 100644 --- a/cpp/src/decisiontree/decisiontree_impl.cuh +++ b/cpp/src/decisiontree/decisiontree_impl.cuh @@ -15,7 +15,6 @@ */ #include -#include #include #include #include @@ -27,7 +26,7 @@ #include "quantile/quantile.cuh" namespace ML { -// Changes for triggerring correct set of CI + bool is_dev_ptr(const void *p) { cudaPointerAttributes pointer_attr; cudaError_t err = cudaPointerGetAttributes(&pointer_attr, p); @@ -57,13 +56,12 @@ void print_node(const std::string &prefix, const std::vector> &sparsetree, int idx, bool isLeft) { const SparseTreeNode &node = sparsetree[idx]; - CUML_LOG_DEBUG(prefix.c_str()); - CUML_LOG_DEBUG(isLeft ? "├" : "└"); + std::cout << prefix; + + std::cout << (isLeft ? "├" : "└"); // print the value of the node - std::stringstream ss; - ss << node << std::endl; - CUML_LOG_DEBUG(ss.str().c_str()); + std::cout << node << std::endl; if ((node.colid != -1)) { // enter the next tree level - left and right branch @@ -165,16 +163,18 @@ void build_treelite_tree(TreeBuilderHandle tree_builder, */ template void DecisionTreeBase::print_tree_summary() const { - PatternSetter _("%v"); - CUML_LOG_DEBUG(" Decision Tree depth --> %d and n_leaves --> %d", - depth_counter, leaf_counter); - CUML_LOG_DEBUG(" Total temporary memory usage--> %lf MB", - ((double)total_temp_mem / (1024 * 1024))); - CUML_LOG_DEBUG(" Shared memory used --> %d B", shmem_used); - CUML_LOG_DEBUG(" Tree Fitting - Overall time --> %lf s", - prepare_time + train_time); - CUML_LOG_DEBUG(" - preparing for fit time: %lf s", prepare_time); - CUML_LOG_DEBUG(" - tree growing time: %lf s", train_time); + std::cout << " Decision Tree depth --> " << depth_counter + << " and n_leaves --> " << leaf_counter << std::endl; + std::cout << " Total temporary memory usage--> " + << ((double)total_temp_mem / (1024 * 1024)) << " MB" << std::endl; + std::cout << " Shared memory used --> " << shmem_used << " bytes " + << std::endl; + std::cout << " Tree Fitting - Overall time --> " << prepare_time + train_time + << " seconds" << std::endl; + std::cout << " - preparing for fit time: " << prepare_time << " seconds" + << std::endl; + std::cout << " - tree growing time: " << train_time << " seconds" + << std::endl; } /** @@ -203,43 +203,28 @@ void DecisionTreeBase::print( * @param[in] n_sampled_rows: Number of rows after subsampling * @param[in] unique_labels: Number of unique classes for calssification. Its set to 1 for regression * @param[in] treeid: Tree id in case of building multiple tree from RF. - * @param[in] n_bins: Number of split bins for every node. - * @param[in] split_algo_flag: Split algo used. MinMax / Quantile - * @param[in] cfg_min_rows_per_rows: Minimum number of rows to consider before split evaluation - * @param[in] cfg_bootstrap_features: If features need to be bootstarpped. - * @param[in] cfg_split_criterion: Split criteria to be used. GINI, ENTROPY, MSE, MAE - * @param[in] quantile_per_tree: If per tree quantile needs to be built. */ template void DecisionTreeBase::plant( std::vector> &sparsetree, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, - const int n_sampled_rows, int unique_labels, const int treeid, int maxdepth, - int max_leaf_nodes, const float colper, int n_bins, int split_algo_flag, - int cfg_min_rows_per_node, bool cfg_bootstrap_features, - CRITERION cfg_split_criterion, bool quantile_per_tree) { - split_algo = split_algo_flag; + const int n_sampled_rows, int unique_labels, const int treeid) { dinfo.NLocalrows = nrows; dinfo.NGlobalrows = nrows; dinfo.Ncols = ncols; - nbins = n_bins; - treedepth = maxdepth; - maxleaves = max_leaf_nodes; n_unique_labels = unique_labels; - min_rows_per_node = cfg_min_rows_per_node; - bootstrap_features = cfg_bootstrap_features; - split_criterion = cfg_split_criterion; - if (split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && quantile_per_tree) { + if (tree_params.split_algo == SPLIT_ALGO::GLOBAL_QUANTILE && + tree_params.quantile_per_tree) { preprocess_quantile(data, rowids, n_sampled_rows, ncols, dinfo.NLocalrows, - n_bins, tempmem); + tree_params.n_bins, tempmem); } CUDA_CHECK(cudaStreamSynchronize( tempmem->stream)); // added to ensure accurate measurement //Bootstrap features unsigned int *h_colids = tempmem->h_colids->data(); - if (bootstrap_features) { + if (tree_params.bootstrap_features) { srand(treeid * 1000); for (int i = 0; i < dinfo.Ncols; i++) { h_colids[i] = rand() % dinfo.Ncols; @@ -251,19 +236,18 @@ void DecisionTreeBase::plant( total_temp_mem = tempmem->totalmem; MLCommon::TimerCPU timer; - grow_deep_tree(data, labels, rowids, n_sampled_rows, ncols, colper, - dinfo.NLocalrows, sparsetree, treeid, tempmem); + grow_deep_tree(data, labels, rowids, n_sampled_rows, ncols, + tree_params.max_features, dinfo.NLocalrows, sparsetree, treeid, + tempmem); train_time = timer.getElapsedSeconds(); } + template void DecisionTreeBase::predict(const ML::cumlHandle &handle, const TreeMetaDataNode *tree, const T *rows, const int n_rows, const int n_cols, L *predictions, - int verbosity) const { - if (verbosity >= 0) { - ML::Logger::get().setLevel(verbosity); - } + bool verbose) const { ASSERT(!is_dev_ptr(rows) && !is_dev_ptr(predictions), "DT Error: Current impl. expects both input and predictions to be CPU " "pointers.\n"); @@ -274,37 +258,45 @@ void DecisionTreeBase::predict(const ML::cumlHandle &handle, ASSERT((n_rows > 0), "Invalid n_rows %d", n_rows); ASSERT((n_cols > 0), "Invalid n_cols %d", n_cols); - predict_all(tree, rows, n_rows, n_cols, predictions); + predict_all(tree, rows, n_rows, n_cols, predictions, verbose); } template void DecisionTreeBase::predict_all(const TreeMetaDataNode *tree, const T *rows, const int n_rows, - const int n_cols, L *preds) const { + const int n_cols, L *preds, + bool verbose) const { for (int row_id = 0; row_id < n_rows; row_id++) { - preds[row_id] = predict_one(&rows[row_id * n_cols], tree->sparsetree, 0); + preds[row_id] = + predict_one(&rows[row_id * n_cols], tree->sparsetree, 0, verbose); } } template L DecisionTreeBase::predict_one( - const T *row, const std::vector> sparsetree, - int idx) const { + const T *row, const std::vector> sparsetree, int idx, + bool verbose) const { int colid = sparsetree[idx].colid; T quesval = sparsetree[idx].quesval; int leftchild = sparsetree[idx].left_child_id; if (colid == -1) { - CUML_LOG_DEBUG("Leaf node. Predicting %f", - (float)sparsetree[idx].prediction); + if (verbose) { + std::cout << "Leaf node. Predicting " << sparsetree[idx].prediction + << std::endl; + } return sparsetree[idx].prediction; } else if (row[colid] <= quesval) { - CUML_LOG_DEBUG("Classifying Left @ node w/ column %d and value %f", colid, - (float)quesval); - return predict_one(row, sparsetree, leftchild); + if (verbose) { + std::cout << "Classifying Left @ node w/ column " << colid + << " and value " << quesval << std::endl; + } + return predict_one(row, sparsetree, leftchild, verbose); } else { - CUML_LOG_DEBUG("Classifying Right @ node w/ column %d and value %f", colid, - (float)quesval); - return predict_one(row, sparsetree, leftchild + 1); + if (verbose) { + std::cout << "Classifying Right @ node w/ column " << colid + << " and value " << quesval << std::endl; + } + return predict_one(row, sparsetree, leftchild + 1, verbose); } } @@ -323,7 +315,7 @@ void DecisionTreeBase::base_fit( const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, std::vector> &sparsetree, - const int treeid, DecisionTreeParams &tree_params, bool is_classifier, + const int treeid, bool is_classifier, std::shared_ptr> in_tempmem) { prepare_fit_timer.reset(); const char *CRITERION_NAME[] = {"GINI", "ENTROPY", "MSE", "MAE", "END"}; @@ -334,8 +326,8 @@ void DecisionTreeBase::base_fit( validity_check(tree_params); if (tree_params.n_bins > n_sampled_rows) { - CUML_LOG_WARN("Calling with number of bins > number of rows!"); - CUML_LOG_WARN("Resetting n_bins to %d.", n_sampled_rows); + std::cout << "Warning! Calling with number of bins > number of rows! "; + std::cout << "Resetting n_bins to " << n_sampled_rows << "." << std::endl; tree_params.n_bins = n_sampled_rows; } @@ -355,17 +347,12 @@ void DecisionTreeBase::base_fit( } else { tempmem = std::make_shared>( device_allocator_in, host_allocator_in, stream_in, nrows, ncols, - tree_params.max_features, unique_labels, tree_params.n_bins, - tree_params.split_algo, tree_params.max_depth, - tree_params.shuffle_features); + unique_labels, tree_params); tree_params.quantile_per_tree = true; } plant(sparsetree, data, ncols, nrows, labels, rowids, n_sampled_rows, - unique_labels, treeid, tree_params.max_depth, tree_params.max_leaves, - tree_params.max_features, tree_params.n_bins, tree_params.split_algo, - tree_params.min_rows_per_node, tree_params.bootstrap_features, - tree_params.split_criterion, tree_params.quantile_per_tree); + unique_labels, treeid); if (in_tempmem == nullptr) { tempmem.reset(); } @@ -376,13 +363,14 @@ void DecisionTreeClassifier::fit( const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, TreeMetaDataNode *&tree, - DecisionTreeParams tree_params, + DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem) { + this->tree_params = tree_parameters; this->base_fit(handle.getImpl().getDeviceAllocator(), handle.getImpl().getHostAllocator(), handle.getImpl().getStream(), data, ncols, nrows, labels, rowids, n_sampled_rows, unique_labels, tree->sparsetree, - tree->treeid, tree_params, true, in_tempmem); + tree->treeid, true, in_tempmem); this->set_metadata(tree); } @@ -394,11 +382,12 @@ void DecisionTreeClassifier::fit( const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, TreeMetaDataNode *&tree, - DecisionTreeParams tree_params, + DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem) { + this->tree_params = tree_parameters; this->base_fit(device_allocator_in, host_allocator_in, stream_in, data, ncols, nrows, labels, rowids, n_sampled_rows, unique_labels, - tree->sparsetree, tree->treeid, tree_params, true, in_tempmem); + tree->sparsetree, tree->treeid, true, in_tempmem); this->set_metadata(tree); } @@ -406,13 +395,13 @@ template void DecisionTreeRegressor::fit( const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, - TreeMetaDataNode *&tree, DecisionTreeParams tree_params, + TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem) { - this->base_fit(handle.getImpl().getDeviceAllocator(), - handle.getImpl().getHostAllocator(), - handle.getImpl().getStream(), data, ncols, nrows, labels, - rowids, n_sampled_rows, 1, tree->sparsetree, tree->treeid, - tree_params, false, in_tempmem); + this->tree_params = tree_parameters; + this->base_fit( + handle.getImpl().getDeviceAllocator(), handle.getImpl().getHostAllocator(), + handle.getImpl().getStream(), data, ncols, nrows, labels, rowids, + n_sampled_rows, 1, tree->sparsetree, tree->treeid, false, in_tempmem); this->set_metadata(tree); } @@ -422,11 +411,12 @@ void DecisionTreeRegressor::fit( const std::shared_ptr host_allocator_in, const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, - TreeMetaDataNode *&tree, DecisionTreeParams tree_params, + TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem) { + this->tree_params = tree_parameters; this->base_fit(device_allocator_in, host_allocator_in, stream_in, data, ncols, nrows, labels, rowids, n_sampled_rows, 1, tree->sparsetree, - tree->treeid, tree_params, false, in_tempmem); + tree->treeid, false, in_tempmem); this->set_metadata(tree); } @@ -438,12 +428,10 @@ void DecisionTreeClassifier::grow_deep_tree( const int treeid, std::shared_ptr> tempmem) { int leaf_cnt = 0; int depth_cnt = 0; - grow_deep_tree_classification( - data, labels, rowids, ncols, colper, n_sampled_rows, nrows, - this->n_unique_labels, this->nbins, this->treedepth, this->maxleaves, - this->min_rows_per_node, this->split_criterion, this->split_algo, - this->min_impurity_decrease, depth_cnt, leaf_cnt, sparsetree, treeid, - tempmem); + grow_deep_tree_classification(data, labels, rowids, ncols, colper, + n_sampled_rows, nrows, this->n_unique_labels, + this->tree_params, depth_cnt, leaf_cnt, + sparsetree, treeid, tempmem); this->depth_counter = depth_cnt; this->leaf_counter = leaf_cnt; } @@ -456,11 +444,9 @@ void DecisionTreeRegressor::grow_deep_tree( const int treeid, std::shared_ptr> tempmem) { int leaf_cnt = 0; int depth_cnt = 0; - grow_deep_tree_regression( - data, labels, rowids, ncols, colper, n_sampled_rows, nrows, this->nbins, - this->treedepth, this->maxleaves, this->min_rows_per_node, - this->split_criterion, this->split_algo, this->min_impurity_decrease, - depth_cnt, leaf_cnt, sparsetree, treeid, tempmem); + grow_deep_tree_regression(data, labels, rowids, ncols, colper, n_sampled_rows, + nrows, this->tree_params, depth_cnt, leaf_cnt, + sparsetree, treeid, tempmem); this->depth_counter = depth_cnt; this->leaf_counter = leaf_cnt; } diff --git a/cpp/src/decisiontree/decisiontree_impl.h b/cpp/src/decisiontree/decisiontree_impl.h index aa6728a9df..576a1951c5 100644 --- a/cpp/src/decisiontree/decisiontree_impl.h +++ b/cpp/src/decisiontree/decisiontree_impl.h @@ -1,3 +1,4 @@ + /* * Copyright (c) 2019-2020, NVIDIA CORPORATION. * @@ -36,7 +37,7 @@ } while (0) namespace ML { -// Changes for triggerring correct set of CI + bool is_dev_ptr(const void *p); namespace DecisionTree { @@ -63,12 +64,8 @@ struct DataInfo { template class DecisionTreeBase { protected: - int split_algo; - int nbins; DataInfo dinfo; - int treedepth; int depth_counter = 0; - int maxleaves; int leaf_counter = 0; std::shared_ptr> tempmem; size_t total_temp_mem; @@ -78,19 +75,13 @@ class DecisionTreeBase { int n_unique_labels = -1; // number of unique labels in dataset double prepare_time = 0; double train_time = 0; - int min_rows_per_node; - bool bootstrap_features; - CRITERION split_criterion; MLCommon::TimerCPU prepare_fit_timer; - float min_impurity_decrease = 0.0; + DecisionTreeParams tree_params; void plant(std::vector> &sparsetree, const T *data, const int ncols, const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, - const int treeid, int maxdepth, int max_leaf_nodes, - const float colper, int n_bins, int split_algo_flag, - int cfg_min_rows_per_node, bool cfg_bootstrap_features, - CRITERION cfg_split_criterion, bool cfg_quantile_per_tree); + const int treeid); virtual void grow_deep_tree( const T *data, const L *labels, unsigned int *rowids, @@ -105,8 +96,7 @@ class DecisionTreeBase { const int nrows, const L *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, std::vector> &sparsetree, const int treeid, - DecisionTreeParams &tree_params, bool is_classifier, - std::shared_ptr> in_tempmem); + bool is_classifier, std::shared_ptr> in_tempmem); public: // Printing utility for high level tree info. @@ -118,12 +108,13 @@ class DecisionTreeBase { // Predict labels for n_rows rows, with n_cols features each, for a given tree. rows in row-major format. void predict(const ML::cumlHandle &handle, const TreeMetaDataNode *tree, const T *rows, const int n_rows, const int n_cols, - L *predictions, int verbosity = -1) const; + L *predictions, bool verbose = false) const; void predict_all(const TreeMetaDataNode *tree, const T *rows, - const int n_rows, const int n_cols, L *preds) const; + const int n_rows, const int n_cols, L *preds, + bool verbose = false) const; L predict_one(const T *row, - const std::vector> sparsetree, - int idx) const; + const std::vector> sparsetree, int idx, + bool verbose = false) const; void set_metadata(TreeMetaDataNode *&tree); @@ -138,7 +129,7 @@ class DecisionTreeClassifier : public DecisionTreeBase { void fit(const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, - TreeMetaDataNode *&tree, DecisionTreeParams tree_params, + TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem = nullptr); //This fit fucntion does not take handle , used by RF @@ -147,7 +138,7 @@ class DecisionTreeClassifier : public DecisionTreeBase { const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const int *labels, unsigned int *rowids, const int n_sampled_rows, const int unique_labels, - TreeMetaDataNode *&tree, DecisionTreeParams tree_params, + TreeMetaDataNode *&tree, DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem); private: @@ -166,7 +157,7 @@ class DecisionTreeRegressor : public DecisionTreeBase { void fit(const ML::cumlHandle &handle, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, TreeMetaDataNode *&tree, - DecisionTreeParams tree_params, + DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem = nullptr); //This fit function does not take handle. Used by RF @@ -175,7 +166,7 @@ class DecisionTreeRegressor : public DecisionTreeBase { const cudaStream_t stream_in, const T *data, const int ncols, const int nrows, const T *labels, unsigned int *rowids, const int n_sampled_rows, TreeMetaDataNode *&tree, - DecisionTreeParams tree_params, + DecisionTreeParams tree_parameters, std::shared_ptr> in_tempmem); private: diff --git a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh index 0641f862fd..27634ae4dc 100644 --- a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh +++ b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh @@ -14,9 +14,7 @@ * limitations under the License. */ #pragma once -// Changes for triggerring correct set of CI #include -#include #include #include #include @@ -36,12 +34,10 @@ template void grow_deep_tree_classification( const T* data, const int* labels, unsigned int* rowids, const int Ncols, const float colper, int n_sampled_rows, const int nrows, - const int n_unique_labels, const int nbins, const int maxdepth, - const int maxleaves, const int min_rows_per_node, - const ML::CRITERION split_cr, const int split_algo, - const float min_impurity_decrease, int& depth_cnt, int& leaf_cnt, - std::vector>& sparsetree, const int treeid, - std::shared_ptr> tempmem) { + const int n_unique_labels, + const ML::DecisionTree::DecisionTreeParams& tree_params, int& depth_cnt, + int& leaf_cnt, std::vector>& sparsetree, + const int treeid, std::shared_ptr> tempmem) { const int ncols_sampled = (int)(colper * Ncols); unsigned int* flagsptr = tempmem->d_flags->data(); unsigned int* sample_cnt = tempmem->d_sample_cnt->data(); @@ -49,7 +45,7 @@ void grow_deep_tree_classification( tempmem->stream); std::vector histvec(n_unique_labels, 0); T initial_metric; - if (split_cr == ML::CRITERION::GINI) { + if (tree_params.split_criterion == ML::CRITERION::GINI) { initial_metric_classification(labels, sample_cnt, nrows, n_unique_labels, histvec, initial_metric, tempmem); @@ -58,7 +54,7 @@ void grow_deep_tree_classification( n_unique_labels, histvec, initial_metric, tempmem); } - int reserve_depth = std::min(tempmem->swap_depth, maxdepth); + int reserve_depth = std::min(tempmem->swap_depth, tree_params.max_depth); size_t total_nodes = pow(2, (reserve_depth + 1)) - 1; unsigned int* h_parent_hist = tempmem->h_parent_hist->data(); @@ -106,7 +102,7 @@ void grow_deep_tree_classification( } std::vector feature_selector(h_colids, h_colids + Ncols); - int scatter_algo_depth = std::min(tempmem->swap_depth, maxdepth); + int scatter_algo_depth = std::min(tempmem->swap_depth, tree_params.max_depth); for (int depth = 0; (depth < scatter_algo_depth) && (n_nodes_nextitr != 0); depth++) { depth_cnt = depth + 1; @@ -122,37 +118,40 @@ void grow_deep_tree_classification( ncols_sampled, n_nodes, mtg, dist, feature_selector, tempmem, d_rng); get_histogram_classification(data, labels, flagsptr, sample_cnt, nrows, - Ncols, ncols_sampled, n_unique_labels, nbins, - n_nodes, split_algo, tempmem, d_histogram); + Ncols, ncols_sampled, n_unique_labels, + tree_params.n_bins, n_nodes, + tree_params.split_algo, tempmem, d_histogram); float* infogain = tempmem->h_outgain->data(); - if (split_cr == ML::CRITERION::GINI) { + if (tree_params.split_criterion == ML::CRITERION::GINI) { get_best_split_classification( h_histogram, d_histogram, h_colids, d_colids, h_colstart, d_colstart, - Ncols, ncols_sampled, nbins, n_unique_labels, n_nodes, depth, - min_rows_per_node, split_algo, infogain, h_parent_hist, h_child_hist, - sparsetree, sparsesize, sparse_nodelist, h_split_colidx, h_split_binidx, - d_split_colidx, d_split_binidx, tempmem); + Ncols, ncols_sampled, tree_params.n_bins, n_unique_labels, n_nodes, + depth, tree_params.min_rows_per_node, tree_params.split_algo, infogain, + h_parent_hist, h_child_hist, sparsetree, sparsesize, sparse_nodelist, + h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, + tempmem); } else { get_best_split_classification( h_histogram, d_histogram, h_colids, d_colids, h_colstart, d_colstart, - Ncols, ncols_sampled, nbins, n_unique_labels, n_nodes, depth, - min_rows_per_node, split_algo, infogain, h_parent_hist, h_child_hist, - sparsetree, sparsesize, sparse_nodelist, h_split_colidx, h_split_binidx, - d_split_colidx, d_split_binidx, tempmem); + Ncols, ncols_sampled, tree_params.n_bins, n_unique_labels, n_nodes, + depth, tree_params.min_rows_per_node, tree_params.split_algo, infogain, + h_parent_hist, h_child_hist, sparsetree, sparsesize, sparse_nodelist, + h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, + tempmem); } CUDA_CHECK(cudaStreamSynchronize(tempmem->stream)); - leaf_eval_classification(infogain, depth, min_impurity_decrease, maxdepth, - n_unique_labels, maxleaves, h_new_node_flags, - sparsetree, sparsesize, h_parent_hist, - n_nodes_nextitr, sparse_nodelist, leaf_cnt); + leaf_eval_classification( + infogain, depth, tree_params.min_impurity_decrease, tree_params.max_depth, + n_unique_labels, tree_params.max_leaves, h_new_node_flags, sparsetree, + sparsesize, h_parent_hist, n_nodes_nextitr, sparse_nodelist, leaf_cnt); MLCommon::updateDevice(d_new_node_flags, h_new_node_flags, n_nodes, tempmem->stream); - make_level_split(data, nrows, Ncols, ncols_sampled, nbins, n_nodes, - split_algo, d_split_colidx, d_split_binidx, - d_new_node_flags, flagsptr, tempmem); + make_level_split(data, nrows, Ncols, ncols_sampled, tree_params.n_bins, + n_nodes, tree_params.split_algo, d_split_colidx, + d_split_binidx, d_new_node_flags, flagsptr, tempmem); CUDA_CHECK(cudaStreamSynchronize(tempmem->stream)); if (depth != (scatter_algo_depth - 1)) { memcpy(h_parent_hist, h_child_hist, @@ -161,7 +160,7 @@ void grow_deep_tree_classification( } // Start of gather algorithm //Convertor - CUML_LOG_DEBUG("begin gather "); + //std::cout << "begin gather \n"; int lastsize = sparsetree.size() - sparsesize_nextitr; n_nodes = n_nodes_nextitr; if (n_nodes == 0) return; @@ -189,26 +188,26 @@ void grow_deep_tree_classification( sparsetree.resize(sparsetree.size() - lastsize); convert_scatter_to_gather(flagsptr, sample_cnt, n_nodes, nrows, d_nodecount, d_nodestart, d_samplelist, tempmem); - for (int depth = tempmem->swap_depth; (depth < maxdepth) && (n_nodes != 0); - depth++) { + for (int depth = tempmem->swap_depth; + (depth < tree_params.max_depth) && (n_nodes != 0); depth++) { depth_cnt = depth + 1; //Algorithm starts here update_feature_sampling(h_colids, d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, lastsize, mtg, dist, feature_selector, tempmem, d_rng); - if (split_cr == ML::CRITERION::GINI) { + if (tree_params.split_criterion == ML::CRITERION::GINI) { best_split_gather_classification( data, labels, d_colids, d_colstart, d_nodestart, d_samplelist, nrows, - Ncols, ncols_sampled, n_unique_labels, nbins, n_nodes, split_algo, - sparsetree.size() + lastsize, min_impurity_decrease, tempmem, - d_sparsenodes, d_nodelist); + Ncols, ncols_sampled, n_unique_labels, tree_params.n_bins, n_nodes, + tree_params.split_algo, sparsetree.size() + lastsize, + tree_params.min_impurity_decrease, tempmem, d_sparsenodes, d_nodelist); } else { best_split_gather_classification( data, labels, d_colids, d_colstart, d_nodestart, d_samplelist, nrows, - Ncols, ncols_sampled, n_unique_labels, nbins, n_nodes, split_algo, - sparsetree.size() + lastsize, min_impurity_decrease, tempmem, - d_sparsenodes, d_nodelist); + Ncols, ncols_sampled, n_unique_labels, tree_params.n_bins, n_nodes, + tree_params.split_algo, sparsetree.size() + lastsize, + tree_params.min_impurity_decrease, tempmem, d_sparsenodes, d_nodelist); } MLCommon::updateHost(h_sparsenodes, d_sparsenodes, lastsize, tempmem->stream); @@ -227,7 +226,7 @@ void grow_deep_tree_classification( n_nodes = h_counter[0]; } if (n_nodes != 0) { - if (split_cr == ML::CRITERION::GINI) { + if (tree_params.split_criterion == ML::CRITERION::GINI) { make_leaf_gather_classification( labels, d_nodestart, d_samplelist, n_unique_labels, d_sparsenodes, d_nodelist, n_nodes, tempmem); diff --git a/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh b/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh index f2bc0f7066..cd51519d81 100644 --- a/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh +++ b/cpp/src/decisiontree/levelalgo/levelfunc_regressor.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ #pragma once -// Changes for triggerring correct set of CI #include #include #include @@ -36,11 +35,9 @@ template void grow_deep_tree_regression( const T* data, const T* labels, unsigned int* rowids, const int Ncols, const float colper, const int n_sampled_rows, const int nrows, - const int nbins, int maxdepth, const int maxleaves, - const int min_rows_per_node, const ML::CRITERION split_cr, int split_algo, - const float min_impurity_decrease, int& depth_cnt, int& leaf_cnt, - std::vector>& sparsetree, const int treeid, - std::shared_ptr> tempmem) { + const ML::DecisionTree::DecisionTreeParams& tree_params, int& depth_cnt, + int& leaf_cnt, std::vector>& sparsetree, + const int treeid, std::shared_ptr> tempmem) { const int ncols_sampled = (int)(colper * Ncols); unsigned int* flagsptr = tempmem->d_flags->data(); unsigned int* sample_cnt = tempmem->d_sample_cnt->data(); @@ -50,14 +47,14 @@ void grow_deep_tree_regression( T mean; T initial_metric; unsigned int count; - if (split_cr == ML::CRITERION::MSE) { + if (tree_params.split_criterion == ML::CRITERION::MSE) { initial_metric_regression(labels, sample_cnt, nrows, mean, count, initial_metric, tempmem); } else { initial_metric_regression(labels, sample_cnt, nrows, mean, count, initial_metric, tempmem); } - int reserve_depth = std::min(tempmem->swap_depth, maxdepth); + int reserve_depth = std::min(tempmem->swap_depth, tree_params.max_depth); size_t total_nodes = pow(2, (reserve_depth + 1)) - 1; std::vector sparse_meanstate; @@ -113,7 +110,7 @@ void grow_deep_tree_regression( std::vector feature_selector(h_colids, h_colids + Ncols); float* infogain = tempmem->h_outgain->data(); - int scatter_algo_depth = std::min(tempmem->swap_depth, maxdepth); + int scatter_algo_depth = std::min(tempmem->swap_depth, tree_params.max_depth); for (int depth = 0; (depth < scatter_algo_depth) && (n_nodes_nextitr != 0); depth++) { depth_cnt = depth + 1; @@ -131,42 +128,44 @@ void grow_deep_tree_regression( init_parent_value(sparse_meanstate, sparse_countstate, sparse_nodelist, sparsesize, depth, tempmem); - if (split_cr == ML::CRITERION::MSE) { - get_mse_regression_fused( - data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, nbins, - n_nodes, split_algo, tempmem, d_mseout, d_predout, d_count); + if (tree_params.split_criterion == ML::CRITERION::MSE) { + get_mse_regression_fused(data, labels, flagsptr, sample_cnt, nrows, + Ncols, ncols_sampled, tree_params.n_bins, + n_nodes, tree_params.split_algo, tempmem, + d_mseout, d_predout, d_count); get_best_split_regression>( h_mseout, d_mseout, h_predout, d_predout, h_count, d_count, h_colids, - d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, nbins, n_nodes, - depth, min_rows_per_node, split_algo, sparsesize, infogain, - sparse_meanstate, sparse_countstate, sparsetree, sparse_nodelist, - h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, - tempmem); + d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, + tree_params.n_bins, n_nodes, depth, tree_params.min_rows_per_node, + tree_params.split_algo, sparsesize, infogain, sparse_meanstate, + sparse_countstate, sparsetree, sparse_nodelist, h_split_colidx, + h_split_binidx, d_split_colidx, d_split_binidx, tempmem); } else { get_mse_regression( - data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, nbins, - n_nodes, split_algo, tempmem, d_mseout, d_predout, d_count); + data, labels, flagsptr, sample_cnt, nrows, Ncols, ncols_sampled, + tree_params.n_bins, n_nodes, tree_params.split_algo, tempmem, d_mseout, + d_predout, d_count); get_best_split_regression>( h_mseout, d_mseout, h_predout, d_predout, h_count, d_count, h_colids, - d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, nbins, n_nodes, - depth, min_rows_per_node, split_algo, sparsesize, infogain, - sparse_meanstate, sparse_countstate, sparsetree, sparse_nodelist, - h_split_colidx, h_split_binidx, d_split_colidx, d_split_binidx, - tempmem); + d_colids, h_colstart, d_colstart, Ncols, ncols_sampled, + tree_params.n_bins, n_nodes, depth, tree_params.min_rows_per_node, + tree_params.split_algo, sparsesize, infogain, sparse_meanstate, + sparse_countstate, sparsetree, sparse_nodelist, h_split_colidx, + h_split_binidx, d_split_colidx, d_split_binidx, tempmem); } CUDA_CHECK(cudaStreamSynchronize(tempmem->stream)); - leaf_eval_regression(infogain, depth, min_impurity_decrease, maxdepth, - maxleaves, h_new_node_flags, sparsetree, sparsesize, - sparse_meanstate, n_nodes_nextitr, sparse_nodelist, - leaf_cnt); + leaf_eval_regression( + infogain, depth, tree_params.min_impurity_decrease, tree_params.max_depth, + tree_params.max_leaves, h_new_node_flags, sparsetree, sparsesize, + sparse_meanstate, n_nodes_nextitr, sparse_nodelist, leaf_cnt); MLCommon::updateDevice(d_new_node_flags, h_new_node_flags, n_nodes, tempmem->stream); - make_level_split(data, nrows, Ncols, ncols_sampled, nbins, n_nodes, - split_algo, d_split_colidx, d_split_binidx, - d_new_node_flags, flagsptr, tempmem); + make_level_split(data, nrows, Ncols, ncols_sampled, tree_params.n_bins, + n_nodes, tree_params.split_algo, d_split_colidx, + d_split_binidx, d_new_node_flags, flagsptr, tempmem); } // Start of gather algorithm @@ -199,8 +198,8 @@ void grow_deep_tree_regression( sparsetree.resize(sparsetree.size() - lastsize); convert_scatter_to_gather(flagsptr, sample_cnt, n_nodes, nrows, d_nodecount, d_nodestart, d_samplelist, tempmem); - for (int depth = tempmem->swap_depth; (depth < maxdepth) && (n_nodes != 0); - depth++) { + for (int depth = tempmem->swap_depth; + (depth < tree_params.max_depth) && (n_nodes != 0); depth++) { depth_cnt = depth + 1; //Algorithm starts here update_feature_sampling(h_colids, d_colids, h_colstart, d_colstart, Ncols, @@ -209,9 +208,9 @@ void grow_deep_tree_regression( best_split_gather_regression( data, labels, d_colids, d_colstart, d_nodestart, d_samplelist, nrows, - Ncols, ncols_sampled, nbins, n_nodes, split_algo, split_cr, - sparsetree.size() + lastsize, min_impurity_decrease, tempmem, - d_sparsenodes, d_nodelist); + Ncols, ncols_sampled, tree_params.n_bins, n_nodes, tree_params.split_algo, + tree_params.split_criterion, sparsetree.size() + lastsize, + tree_params.min_impurity_decrease, tempmem, d_sparsenodes, d_nodelist); MLCommon::updateHost(h_sparsenodes, d_sparsenodes, lastsize, tempmem->stream); diff --git a/cpp/src/decisiontree/memory.cuh b/cpp/src/decisiontree/memory.cuh index 0818f43962..ed80c6ec3b 100644 --- a/cpp/src/decisiontree/memory.cuh +++ b/cpp/src/decisiontree/memory.cuh @@ -15,11 +15,9 @@ */ #pragma once -// Changes for triggerring correct set of CI #include #include #include -#include #include "cub/cub.cuh" #include "memory.h" @@ -27,35 +25,26 @@ template TemporaryMemory::TemporaryMemory( const std::shared_ptr device_allocator_in, const std::shared_ptr host_allocator_in, - const cudaStream_t stream_in, int N, int Ncols, float colper, int n_unique, - int n_bins, const int split_algo, int depth, bool col_shuffle) { + const cudaStream_t stream_in, int N, int Ncols, int n_unique, + const ML::DecisionTree::DecisionTreeParams& tree_params) { stream = stream_in; - splitalgo = split_algo; - max_shared_mem = MLCommon::getSharedMemPerBlock(); num_sms = MLCommon::getMultiProcessorCount(); device_allocator = device_allocator_in; host_allocator = host_allocator_in; - LevelMemAllocator(N, Ncols, colper, n_unique, n_bins, depth, split_algo, - col_shuffle); + LevelMemAllocator(N, Ncols, n_unique, tree_params); } template -TemporaryMemory::TemporaryMemory(const ML::cumlHandle_impl& handle, - cudaStream_t stream_in, int N, int Ncols, - float colper, int n_unique, int n_bins, - const int split_algo, int depth, - bool col_shuffle) { - //Assign Stream from cumlHandle +TemporaryMemory::TemporaryMemory( + const ML::cumlHandle_impl& handle, cudaStream_t stream_in, int N, int Ncols, + int n_unique, const ML::DecisionTree::DecisionTreeParams& tree_params) { stream = stream_in; - splitalgo = split_algo; - max_shared_mem = MLCommon::getSharedMemPerBlock(); num_sms = MLCommon::getMultiProcessorCount(); device_allocator = handle.getDeviceAllocator(); host_allocator = handle.getHostAllocator(); - LevelMemAllocator(N, Ncols, colper, n_unique, n_bins, depth, split_algo, - col_shuffle); + LevelMemAllocator(N, Ncols, n_unique, tree_params); } template @@ -69,30 +58,31 @@ void TemporaryMemory::print_info(int depth, int nrows, int ncols, size_t maxnodes = max_nodes_per_level; size_t ncols_sampled = (size_t)(ncols * colper); - ML::PatternSetter _("%v"); - CUML_LOG_DEBUG("maxnodes --> %lu gather maxnodes--> %lu", maxnodes, - gather_max_nodes); - CUML_LOG_DEBUG("Parent size --> %lu", parentsz); - CUML_LOG_DEBUG("Child size --> %lu", childsz); - CUML_LOG_DEBUG("Nrows size --> %d", (nrows + 1)); - CUML_LOG_DEBUG("Sparse tree holder size --> %lu", 2 * gather_max_nodes); - CUML_LOG_DEBUG(" Total temporary memory usage--> %lf MB", - ((double)totalmem / (1024 * 1024))); + std::cout << "maxnodes --> " << maxnodes << " gather maxnodes--> " + << gather_max_nodes << std::endl; + std::cout << "Parent size --> " << parentsz << std::endl; + std::cout << "Child size --> " << childsz << std::endl; + std::cout << "Nrows size --> " << (nrows + 1) << std::endl; + std::cout << "Sparse tree holder size --> " << 2 * gather_max_nodes + << std::endl; + + std::cout << " Total temporary memory usage--> " + << ((double)totalmem / (1024 * 1024)) << " MB" << std::endl; } template -void TemporaryMemory::LevelMemAllocator(int nrows, int ncols, - float colper, int n_unique, - int nbins, int depth, - const int split_algo, - bool col_shuffle) { +void TemporaryMemory::LevelMemAllocator( + int nrows, int ncols, int n_unique, + const ML::DecisionTree::DecisionTreeParams& tree_params) { + int nbins = tree_params.n_bins; + int depth = tree_params.max_depth; if (depth > swap_depth || (depth == -1)) { max_nodes_per_level = pow(2, swap_depth); } else { max_nodes_per_level = pow(2, depth); } size_t maxnodes = max_nodes_per_level; - size_t ncols_sampled = (size_t)(ncols * colper); + size_t ncols_sampled = (size_t)(ncols * tree_params.max_features); if (depth < 64) { gather_max_nodes = std::min((size_t)(nrows + 1), (size_t)(pow((size_t)2, (size_t)depth) + 1)); @@ -135,7 +125,7 @@ void TemporaryMemory::LevelMemAllocator(int nrows, int ncols, totalmem = 3 * parentsz * sizeof(int) + childsz * sizeof(T) + (nrows + 1) * sizeof(T); - if (split_algo == 0) { + if (tree_params.split_algo == 0) { d_globalminmax = new MLCommon::device_buffer( device_allocator, stream, 2 * maxnodes * ncols_sampled); h_globalminmax = new MLCommon::host_buffer(host_allocator, stream, @@ -150,7 +140,7 @@ void TemporaryMemory::LevelMemAllocator(int nrows, int ncols, } d_sample_cnt = new MLCommon::device_buffer(device_allocator, stream, nrows); - if (col_shuffle == true) { + if (tree_params.shuffle_features == true) { d_colids = new MLCommon::device_buffer( device_allocator, stream, ncols_sampled * gather_max_nodes); h_colids = new MLCommon::host_buffer( @@ -252,7 +242,7 @@ void TemporaryMemory::LevelMemAllocator(int nrows, int ncols, max_nodes_pred /= 2; // For occupancy purposes. max_nodes_mse /= 2; // For occupancy purposes. } - if (split_algo == ML::SPLIT_ALGO::HIST) { + if (tree_params.split_algo == ML::SPLIT_ALGO::HIST) { size_t shmem_per_node = 2 * sizeof(T); max_nodes_minmax = max_shared_mem / shmem_per_node; max_nodes_minmax /= 2; diff --git a/cpp/src/decisiontree/memory.h b/cpp/src/decisiontree/memory.h index 511aee6ef2..e4423ea128 100644 --- a/cpp/src/decisiontree/memory.h +++ b/cpp/src/decisiontree/memory.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +15,11 @@ */ #pragma once -// Changes for triggerring correct set of CI #include #include #include #include +#include #include "common/cumlHandle.hpp" template @@ -69,8 +69,6 @@ struct TemporaryMemory { MLCommon::device_buffer *d_colstart = nullptr; MLCommon::host_buffer *h_colids = nullptr; MLCommon::host_buffer *h_colstart = nullptr; - //Split algo - int splitalgo; //For level algorithm MLCommon::device_buffer *d_flags = nullptr; @@ -111,17 +109,21 @@ struct TemporaryMemory { TemporaryMemory( const std::shared_ptr device_allocator_in, const std::shared_ptr host_allocator_in, - const cudaStream_t stream_in, int N, int Ncols, float colper, int n_unique, - int n_bins, const int split_algo, int depth, bool col_shuffle); + const cudaStream_t stream_in, int N, int Ncols, int n_unique, + const ML::DecisionTree::DecisionTreeParams &tree_params); + TemporaryMemory(const ML::cumlHandle_impl &handle, cudaStream_t stream_in, - int N, int Ncols, float colper, int n_unique, int n_bins, - const int split_algo, int depth, bool colshuffle); + int N, int Ncols, int n_unique, + const ML::DecisionTree::DecisionTreeParams &tree_params); + ~TemporaryMemory(); - void LevelMemAllocator(int nrows, int ncols, float colper, int n_unique, - int nbins, int depth, const int split_algo, - bool col_shuffle); + + void LevelMemAllocator( + int nrows, int ncols, int n_unique, + const ML::DecisionTree::DecisionTreeParams &tree_params); void LevelMemCleaner(); + void print_info(int depth, int nrows, int ncols, float colper); }; #include "memory.cuh" diff --git a/cpp/src/randomforest/randomforest_impl.cuh b/cpp/src/randomforest/randomforest_impl.cuh index a30d822c69..cc6197b5f5 100644 --- a/cpp/src/randomforest/randomforest_impl.cuh +++ b/cpp/src/randomforest/randomforest_impl.cuh @@ -16,7 +16,6 @@ #ifndef _OPENMP #define omp_get_thread_num() 0 #endif -#include #include "../decisiontree/memory.h" #include "../decisiontree/quantile/quantile.h" #include "random/permute.h" @@ -25,7 +24,6 @@ #include "score/scores.h" namespace ML { -// Changes for triggerring correct set of CI /** * @brief Construct rf (random forest) object. * @tparam T: data type for input data (float or double). @@ -189,12 +187,8 @@ void rfClassifier::fit(const cumlHandle& user_handle, const T* input, std::shared_ptr> tempmem[n_streams]; for (int i = 0; i < n_streams; i++) { tempmem[i] = std::make_shared>( - handle, handle.getInternalStream(i), n_rows, n_cols, - this->rf_params.tree_params.max_features, n_unique_labels, - this->rf_params.tree_params.n_bins, - this->rf_params.tree_params.split_algo, - this->rf_params.tree_params.max_depth, - this->rf_params.tree_params.shuffle_features); + handle, handle.getInternalStream(i), n_rows, n_cols, n_unique_labels, + this->rf_params.tree_params); } //Preprocess once only per forest if ((this->rf_params.tree_params.split_algo == SPLIT_ALGO::GLOBAL_QUANTILE) && @@ -257,14 +251,13 @@ void rfClassifier::fit(const cumlHandle& user_handle, const T* input, * @param[in] n_rows: number of data samples. * @param[in] n_cols: number of features (excluding target feature). * @param[in, out] predictions: n_rows predicted labels. GPU pointer, user allocated. - * @param[in] verbosity: verbosity level for logging messages during execution + * @param[in] verbose: flag for debugging purposes. */ template void rfClassifier::predict(const cumlHandle& user_handle, const T* input, int n_rows, int n_cols, int* predictions, const RandomForestMetaData* forest, - int verbosity) const { - ML::Logger::get().setLevel(verbosity); + bool verbose) const { this->error_checking(input, predictions, n_rows, n_cols, true); std::vector h_predictions(n_rows); const cumlHandle_impl& handle = user_handle.getImpl(); @@ -276,14 +269,13 @@ void rfClassifier::predict(const cumlHandle& user_handle, const T* input, int row_size = n_cols; - ML::PatternSetter _("%v"); for (int row_id = 0; row_id < n_rows; row_id++) { - if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) { - std::stringstream ss; - ss << "Predict for sample: "; + if (verbose) { + std::cout << "\n\n"; + std::cout << "Predict for sample: "; for (int i = 0; i < n_cols; i++) - ss << h_input[row_id * row_size + i] << ", "; - CUML_LOG_DEBUG(ss.str().c_str()); + std::cout << h_input[row_id * row_size + i] << ", "; + std::cout << std::endl; } std::map prediction_to_cnt; @@ -295,7 +287,7 @@ void rfClassifier::predict(const cumlHandle& user_handle, const T* input, int prediction; trees[i].predict(user_handle, &forest->trees[i], &h_input[row_id * row_size], 1, n_cols, &prediction, - verbosity); + verbose); ret = prediction_to_cnt.insert(std::pair(prediction, 1)); if (!(ret.second)) { ret.first->second += 1; @@ -321,15 +313,14 @@ void rfClassifier::predict(const cumlHandle& user_handle, const T* input, * @param[in] n_rows: number of data samples. * @param[in] n_cols: number of features (excluding target feature). * @param[in, out] predictions: n_rows predicted labels. GPU pointer, user allocated. - * @param[in] verbosity: verbosity level for logging messages during execution + * @param[in] verbose: flag for debugging purposes. */ template void rfClassifier::predictGetAll(const cumlHandle& user_handle, const T* input, int n_rows, int n_cols, int* predictions, const RandomForestMetaData* forest, - int verbosity) { - ML::Logger::get().setLevel(verbosity); + bool verbose) { int num_trees = this->rf_params.n_trees; std::vector h_predictions(n_rows * num_trees); @@ -343,19 +334,19 @@ void rfClassifier::predictGetAll(const cumlHandle& user_handle, int pred_id = 0; for (int row_id = 0; row_id < n_rows; row_id++) { - if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) { - std::stringstream ss; - ss << "Predict for sample: "; + if (verbose) { + std::cout << "\n\n"; + std::cout << "Predict for sample: "; for (int i = 0; i < n_cols; i++) - ss << h_input[row_id * row_size + i] << ", "; - CUML_LOG_DEBUG(ss.str().c_str()); + std::cout << h_input[row_id * row_size + i] << ", "; + std::cout << std::endl; } for (int i = 0; i < num_trees; i++) { int prediction; trees[i].predict(user_handle, &forest->trees[i], &h_input[row_id * row_size], 1, n_cols, &prediction, - verbosity); + verbose); h_predictions[pred_id] = prediction; pred_id++; } @@ -375,19 +366,18 @@ void rfClassifier::predictGetAll(const cumlHandle& user_handle, * @param[in] n_rows: number of data samples. * @param[in] n_cols: number of features (excluding target feature). * @param[in] predictions: n_rows predicted labels. GPU pointer, user allocated. - * @param[in] verbosity: verbosity level for logging messages during execution + * @param[in] verbose: flag for debugging purposes. */ template RF_metrics rfClassifier::score(const cumlHandle& user_handle, const int* ref_labels, int n_rows, - const int* predictions, int verbosity) { - ML::Logger::get().setLevel(verbosity); + const int* predictions, bool verbose) { cudaStream_t stream = user_handle.getImpl().getStream(); auto d_alloc = user_handle.getDeviceAllocator(); float accuracy = MLCommon::Score::accuracy_score(predictions, ref_labels, n_rows, d_alloc, stream); RF_metrics stats = set_rf_metrics_classification(accuracy); - if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) print(stats); + if (verbose) print(stats); /* TODO: Potentially augment RF_metrics w/ more metrics (e.g., precision, F1, etc.). For non binary classification problems (i.e., one target and > 2 labels), need avg. @@ -461,12 +451,8 @@ void rfRegressor::fit(const cumlHandle& user_handle, const T* input, std::shared_ptr> tempmem[n_streams]; for (int i = 0; i < n_streams; i++) { tempmem[i] = std::make_shared>( - handle, handle.getInternalStream(i), n_rows, n_cols, - this->rf_params.tree_params.max_features, 1, - this->rf_params.tree_params.n_bins, - this->rf_params.tree_params.split_algo, - this->rf_params.tree_params.max_depth, - this->rf_params.tree_params.shuffle_features); + handle, handle.getInternalStream(i), n_rows, n_cols, 1, + this->rf_params.tree_params); } //Preprocess once only per forest if ((this->rf_params.tree_params.split_algo == SPLIT_ALGO::GLOBAL_QUANTILE) && @@ -527,13 +513,13 @@ void rfRegressor::fit(const cumlHandle& user_handle, const T* input, * @param[in] n_cols: number of features (excluding target feature). * @param[in, out] predictions: n_rows predicted labels. GPU pointer, user allocated. * @param[in] forest: CPU pointer to RandomForestMetaData struct - * @param[in] verbosity: verbosity level for logging messages during execution + * @param[in] verbose: flag for debugging purposes. */ template void rfRegressor::predict(const cumlHandle& user_handle, const T* input, int n_rows, int n_cols, T* predictions, const RandomForestMetaData* forest, - int verbosity) const { + bool verbose) const { this->error_checking(input, predictions, n_rows, n_cols, true); std::vector h_predictions(n_rows); @@ -547,12 +533,12 @@ void rfRegressor::predict(const cumlHandle& user_handle, const T* input, int row_size = n_cols; for (int row_id = 0; row_id < n_rows; row_id++) { - if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) { - std::stringstream ss; - ss << "Predict for sample: "; + if (verbose) { + std::cout << "\n\n"; + std::cout << "Predict for sample: "; for (int i = 0; i < n_cols; i++) - ss << h_input[row_id * row_size + i] << ", "; - CUML_LOG_DEBUG(ss.str().c_str()); + std::cout << h_input[row_id * row_size + i] << ", "; + std::cout << std::endl; } T sum_predictions = 0; @@ -561,7 +547,7 @@ void rfRegressor::predict(const cumlHandle& user_handle, const T* input, T prediction; trees[i].predict(user_handle, &forest->trees[i], &h_input[row_id * row_size], 1, n_cols, &prediction, - verbosity); + verbose); sum_predictions += prediction; } // Random forest's prediction is the arithmetic mean of all its decision tree predictions. @@ -582,13 +568,12 @@ void rfRegressor::predict(const cumlHandle& user_handle, const T* input, * @param[in] n_cols: number of features (excluding target feature). * @param[in] predictions: n_rows predicted labels. GPU pointer, user allocated. * @param[in] forest: CPU pointer to RandomForestMetaData struct - * @param[in] verbosity: verbosity level for logging messages during execution + * @param[in] verbose: flag for debugging purposes. */ template RF_metrics rfRegressor::score(const cumlHandle& user_handle, const T* ref_labels, int n_rows, - const T* predictions, int verbosity) { - ML::Logger::get().setLevel(verbosity); + const T* predictions, bool verbose) { cudaStream_t stream = user_handle.getImpl().getStream(); auto d_alloc = user_handle.getDeviceAllocator(); @@ -598,7 +583,7 @@ RF_metrics rfRegressor::score(const cumlHandle& user_handle, mean_squared_error, median_abs_error); RF_metrics stats = set_rf_metrics_regression( mean_abs_error, mean_squared_error, median_abs_error); - if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) print(stats); + if (verbose) print(stats); return stats; } From d087e7dcf7a599894a366d92ac245049ed11189e Mon Sep 17 00:00:00 2001 From: Vinay D Date: Mon, 20 Apr 2020 07:57:28 +0530 Subject: [PATCH 241/330] Reverting to correct 3266937 --- cpp/include/cuml/tree/decisiontree.hpp | 131 ++++++++++++++++-- cpp/src/decisiontree/decisiontree_impl.cuh | 75 +++++----- cpp/src/decisiontree/decisiontree_impl.h | 10 +- .../levelalgo/levelfunc_classifier.cuh | 4 +- cpp/src/decisiontree/memory.cuh | 20 +-- cpp/src/randomforest/randomforest_impl.cuh | 66 +++++---- 6 files changed, 210 insertions(+), 96 deletions(-) diff --git a/cpp/include/cuml/tree/decisiontree.hpp b/cpp/include/cuml/tree/decisiontree.hpp index 7c9db1b678..d72a3e2040 100644 --- a/cpp/include/cuml/tree/decisiontree.hpp +++ b/cpp/include/cuml/tree/decisiontree.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -71,6 +71,20 @@ struct DecisionTreeParams { float min_impurity_decrease = 0.0f; }; +/** + * @brief Set all DecisionTreeParams members. + * @param[in,out] params: update with tree parameters + * @param[in] cfg_max_depth: maximum tree depth; default -1 + * @param[in] cfg_max_leaves: maximum leaves; default -1 + * @param[in] cfg_max_features: maximum number of features; default 1.0f + * @param[in] cfg_n_bins: number of bins; default 8 + * @param[in] cfg_split_algo: split algorithm; default SPLIT_ALGO::HIST + * @param[in] cfg_min_rows_per_node: min. rows per node; default 2 + * @param[in] cfg_bootstrap_features: bootstrapping for features; default false + * @param[in] cfg_split_criterion: split criterion; default CRITERION_END, + * i.e., GINI for classification or MSE for regression + * @param[in] cfg_quantile_per_tree: compute quantile per tree; default false + */ void set_tree_params(DecisionTreeParams ¶ms, int cfg_max_depth = -1, int cfg_max_leaves = -1, float cfg_max_features = 1.0f, int cfg_n_bins = 8, int cfg_split_algo = SPLIT_ALGO::HIST, @@ -80,7 +94,17 @@ void set_tree_params(DecisionTreeParams ¶ms, int cfg_max_depth = -1, CRITERION cfg_split_criterion = CRITERION_END, bool cfg_quantile_per_tree = false, bool cfg_shuffle_features = false); + +/** + * @brief Check validity of all decision tree hyper-parameters. + * @param[in] params: decision tree hyper-parameters. + */ void validity_check(const DecisionTreeParams params); + +/** + * @brief Print all decision tree hyper-parameters. + * @param[in] params: decision tree hyper-parameters. + */ void print(const DecisionTreeParams params); template @@ -93,9 +117,21 @@ struct TreeMetaDataNode { std::vector> sparsetree; }; +/** + * @brief Print high-level tree information. + * @tparam T: data type for input data (float or double). + * @tparam L: data type for labels (int type for classification, T type for regression). + * @param[in] tree: CPU pointer to TreeMetaDataNode + */ template void print_tree_summary(const TreeMetaDataNode *tree); +/** + * @brief Print detailed tree information. + * @tparam T: data type for input data (float or double). + * @tparam L: data type for labels (int type for classification, T type for regression). + * @param[in] tree: CPU pointer to TreeMetaDataNode + */ template void print_tree(const TreeMetaDataNode *tree); @@ -104,59 +140,136 @@ void print_tree(const TreeMetaDataNode *tree); typedef TreeMetaDataNode TreeClassifierF; typedef TreeMetaDataNode TreeClassifierD; +/** + * @defgroup Decision Tree Classifier - Fit function + * @brief Build (i.e., fit, train) Decision Tree classifier for input data. + * @param[in] handle: cumlHandle + * @param[in, out] tree: CPU pointer to TreeMetaDataNode. User allocated. + * @param[in] data: train data (nrows samples, ncols features) in column major format, + * excluding labels. Device pointer. + * @param[in] ncols: number of features (i.e., columns) excluding target feature. + * @param[in] nrows: number of training data samples of the whole unsampled dataset. + * @param[in] labels: 1D array of target features (int only). One label per training + * sample. Device pointer. + * Assumption: labels need to be preprocessed to map to ascending numbers from 0; + * needed for current gini impl. in decision tree. + * @param[in,out] rowids: array of n_sampled_rows integers in [0, nrows) range. + * Device pointer. The same array is then rearranged when splits are made, + * allowing us to construct trees without rearranging the actual dataset. + * @param[in] n_sampled_rows: number of training samples, after sampling. + * If using decision tree directly over the whole dataset: n_sampled_rows = nrows + * @param[in] n_unique_labels: #unique label values. Number of categories of classification. + * @param[in] tree_params: Decision Tree training hyper parameter struct. + * @{ + */ void decisionTreeClassifierFit(const ML::cumlHandle &handle, TreeClassifierF *&tree, float *data, const int ncols, const int nrows, int *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, DecisionTree::DecisionTreeParams tree_params); - void decisionTreeClassifierFit(const ML::cumlHandle &handle, TreeClassifierD *&tree, double *data, const int ncols, const int nrows, int *labels, unsigned int *rowids, const int n_sampled_rows, int unique_labels, DecisionTree::DecisionTreeParams tree_params); +/** @} */ +/** + * @defgroup Decision Tree Classifier - Predict function + * @brief Predict target feature for input data; n-ary classification for + * single feature supported. Inference of trees is CPU only for now. + * @param[in] handle: cumlHandle (currently unused; API placeholder) + * @param[in] tree: CPU pointer to TreeMetaDataNode. + * @param[in] rows: test data (n_rows samples, n_cols features) in row major format. + * Current impl. expects a CPU pointer. TODO future API change. + * @param[in] n_rows: number of data samples. + * @param[in] n_cols: number of features (excluding target feature). + * @param[in,out] predictions: n_rows predicted labels. Current impl. expects a + * CPU pointer, user allocated. TODO future API change. + * @param[in] verbosity: verbosity level for logging messages during execution. + * A negative value means to not perform an explicit + * `setLevel()` call, but to continue with the level that + * the caller itself might have set. + * @{ + */ void decisionTreeClassifierPredict(const ML::cumlHandle &handle, const TreeClassifierF *tree, const float *rows, const int n_rows, const int n_cols, int *predictions, - bool verbose = false); - + int verbosity = -1); void decisionTreeClassifierPredict(const ML::cumlHandle &handle, const TreeClassifierD *tree, const double *rows, const int n_rows, const int n_cols, int *predictions, - bool verbose = false); + int verbosity = -1); +/** @} */ // ----------------------------- Regression ----------------------------------- // typedef TreeMetaDataNode TreeRegressorF; typedef TreeMetaDataNode TreeRegressorD; +/** + * @defgroup Decision Tree Regressor - Fit function + * @brief Build (i.e., fit, train) Decision Tree regressor for input data. + * @param[in] handle: cumlHandle + * @param[in, out] tree: CPU pointer to TreeMetaDataNode. User allocated. + * @param[in] data: train data (nrows samples, ncols features) in column major format, + * excluding labels. Device pointer. + * @param[in] ncols: number of features (i.e., columns) excluding target feature. + * @param[in] nrows: number of training data samples of the whole unsampled dataset. + * @param[in] labels: 1D array of target features (float or double). One label per + * training sample. Device pointer. + * @param[in,out] rowids: array of n_sampled_rows integers in [0, nrows) range. + * Device pointer. The same array is then rearranged when splits are made, + * allowing us to construct trees without rearranging the actual dataset. + * @param[in] n_sampled_rows: number of training samples, after sampling. If using decision + * tree directly over the whole dataset: n_sampled_rows = nrows + * @param[in] tree_params: Decision Tree training hyper parameter struct. + * @{ + */ void decisionTreeRegressorFit(const ML::cumlHandle &handle, TreeRegressorF *&tree, float *data, const int ncols, const int nrows, float *labels, unsigned int *rowids, const int n_sampled_rows, DecisionTree::DecisionTreeParams tree_params); - void decisionTreeRegressorFit(const ML::cumlHandle &handle, TreeRegressorD *&tree, double *data, const int ncols, const int nrows, double *labels, unsigned int *rowids, const int n_sampled_rows, DecisionTree::DecisionTreeParams tree_params); +/** @} */ +/** + * @defgroup Decision Tree Regressor - Predict function + * @brief Predict target feature for input data; regression for single feature supported. + * Inference of trees is CPU only for now. + * @param[in] handle: cumlHandle (currently unused; API placeholder) + * @param[in] tree: CPU pointer to TreeMetaDataNode. + * @param[in] rows: test data (n_rows samples, n_cols features) in row major format. + * Current impl. expects a CPU pointer. TODO future API change. + * @param[in] n_rows: number of data samples. + * @param[in] n_cols: number of features (excluding target feature). + * @param[in,out] predictions: n_rows predicted labels. Current impl. expects a CPU + * pointer, user allocated. TODO future API change. + * @param[in] verbosity: verbosity level for logging messages during execution. + * A negative value means to not perform an explicit + * `setLevel()` call, but to continue with the level that + * the caller itself might have set. + * @{ + */ void decisionTreeRegressorPredict(const ML::cumlHandle &handle, const TreeRegressorF *tree, const float *rows, const int n_rows, const int n_cols, - float *predictions, bool verbose = false); - + float *predictions, int verbosity = -1); void decisionTreeRegressorPredict(const ML::cumlHandle &handle, const TreeRegressorD *tree, const double *rows, const int n_rows, const int n_cols, double *predictions, - bool verbose = false); + int verbosity = -1); +/** @} */ } // End namespace DecisionTree } //End namespace ML diff --git a/cpp/src/decisiontree/decisiontree_impl.cuh b/cpp/src/decisiontree/decisiontree_impl.cuh index 81422f9fcd..b99ee555ab 100644 --- a/cpp/src/decisiontree/decisiontree_impl.cuh +++ b/cpp/src/decisiontree/decisiontree_impl.cuh @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -56,12 +57,13 @@ void print_node(const std::string &prefix, const std::vector> &sparsetree, int idx, bool isLeft) { const SparseTreeNode &node = sparsetree[idx]; - std::cout << prefix; - - std::cout << (isLeft ? "├" : "└"); + CUML_LOG_DEBUG(prefix.c_str()); + CUML_LOG_DEBUG(isLeft ? "├" : "└"); // print the value of the node - std::cout << node << std::endl; + std::stringstream ss; + ss << node << std::endl; + CUML_LOG_DEBUG(ss.str().c_str()); if ((node.colid != -1)) { // enter the next tree level - left and right branch @@ -163,18 +165,16 @@ void build_treelite_tree(TreeBuilderHandle tree_builder, */ template void DecisionTreeBase::print_tree_summary() const { - std::cout << " Decision Tree depth --> " << depth_counter - << " and n_leaves --> " << leaf_counter << std::endl; - std::cout << " Total temporary memory usage--> " - << ((double)total_temp_mem / (1024 * 1024)) << " MB" << std::endl; - std::cout << " Shared memory used --> " << shmem_used << " bytes " - << std::endl; - std::cout << " Tree Fitting - Overall time --> " << prepare_time + train_time - << " seconds" << std::endl; - std::cout << " - preparing for fit time: " << prepare_time << " seconds" - << std::endl; - std::cout << " - tree growing time: " << train_time << " seconds" - << std::endl; + PatternSetter _("%v"); + CUML_LOG_DEBUG(" Decision Tree depth --> %d and n_leaves --> %d", + depth_counter, leaf_counter); + CUML_LOG_DEBUG(" Total temporary memory usage--> %lf MB", + ((double)total_temp_mem / (1024 * 1024))); + CUML_LOG_DEBUG(" Shared memory used --> %d B", shmem_used); + CUML_LOG_DEBUG(" Tree Fitting - Overall time --> %lf s", + prepare_time + train_time); + CUML_LOG_DEBUG(" - preparing for fit time: %lf s", prepare_time); + CUML_LOG_DEBUG(" - tree growing time: %lf s", train_time); } /** @@ -247,7 +247,10 @@ void DecisionTreeBase::predict(const ML::cumlHandle &handle, const TreeMetaDataNode *tree, const T *rows, const int n_rows, const int n_cols, L *predictions, - bool verbose) const { + int verbosity) const { + if (verbosity >= 0) { + ML::Logger::get().setLevel(verbosity); + } ASSERT(!is_dev_ptr(rows) && !is_dev_ptr(predictions), "DT Error: Current impl. expects both input and predictions to be CPU " "pointers.\n"); @@ -258,45 +261,37 @@ void DecisionTreeBase::predict(const ML::cumlHandle &handle, ASSERT((n_rows > 0), "Invalid n_rows %d", n_rows); ASSERT((n_cols > 0), "Invalid n_cols %d", n_cols); - predict_all(tree, rows, n_rows, n_cols, predictions, verbose); + predict_all(tree, rows, n_rows, n_cols, predictions); } template void DecisionTreeBase::predict_all(const TreeMetaDataNode *tree, const T *rows, const int n_rows, - const int n_cols, L *preds, - bool verbose) const { + const int n_cols, L *preds) const { for (int row_id = 0; row_id < n_rows; row_id++) { - preds[row_id] = - predict_one(&rows[row_id * n_cols], tree->sparsetree, 0, verbose); + preds[row_id] = predict_one(&rows[row_id * n_cols], tree->sparsetree, 0); } } template L DecisionTreeBase::predict_one( - const T *row, const std::vector> sparsetree, int idx, - bool verbose) const { + const T *row, const std::vector> sparsetree, + int idx) const { int colid = sparsetree[idx].colid; T quesval = sparsetree[idx].quesval; int leftchild = sparsetree[idx].left_child_id; if (colid == -1) { - if (verbose) { - std::cout << "Leaf node. Predicting " << sparsetree[idx].prediction - << std::endl; - } + CUML_LOG_DEBUG("Leaf node. Predicting %f", + (float)sparsetree[idx].prediction); return sparsetree[idx].prediction; } else if (row[colid] <= quesval) { - if (verbose) { - std::cout << "Classifying Left @ node w/ column " << colid - << " and value " << quesval << std::endl; - } - return predict_one(row, sparsetree, leftchild, verbose); + CUML_LOG_DEBUG("Classifying Left @ node w/ column %d and value %f", colid, + (float)quesval); + return predict_one(row, sparsetree, leftchild); } else { - if (verbose) { - std::cout << "Classifying Right @ node w/ column " << colid - << " and value " << quesval << std::endl; - } - return predict_one(row, sparsetree, leftchild + 1, verbose); + CUML_LOG_DEBUG("Classifying Right @ node w/ column %d and value %f", colid, + (float)quesval); + return predict_one(row, sparsetree, leftchild + 1); } } @@ -326,8 +321,8 @@ void DecisionTreeBase::base_fit( validity_check(tree_params); if (tree_params.n_bins > n_sampled_rows) { - std::cout << "Warning! Calling with number of bins > number of rows! "; - std::cout << "Resetting n_bins to " << n_sampled_rows << "." << std::endl; + CUML_LOG_WARN("Calling with number of bins > number of rows!"); + CUML_LOG_WARN("Resetting n_bins to %d.", n_sampled_rows); tree_params.n_bins = n_sampled_rows; } diff --git a/cpp/src/decisiontree/decisiontree_impl.h b/cpp/src/decisiontree/decisiontree_impl.h index 576a1951c5..b0ec67c798 100644 --- a/cpp/src/decisiontree/decisiontree_impl.h +++ b/cpp/src/decisiontree/decisiontree_impl.h @@ -1,4 +1,3 @@ - /* * Copyright (c) 2019-2020, NVIDIA CORPORATION. * @@ -108,13 +107,12 @@ class DecisionTreeBase { // Predict labels for n_rows rows, with n_cols features each, for a given tree. rows in row-major format. void predict(const ML::cumlHandle &handle, const TreeMetaDataNode *tree, const T *rows, const int n_rows, const int n_cols, - L *predictions, bool verbose = false) const; + L *predictions, int verbosity = -1) const; void predict_all(const TreeMetaDataNode *tree, const T *rows, - const int n_rows, const int n_cols, L *preds, - bool verbose = false) const; + const int n_rows, const int n_cols, L *preds) const; L predict_one(const T *row, - const std::vector> sparsetree, int idx, - bool verbose = false) const; + const std::vector> sparsetree, + int idx) const; void set_metadata(TreeMetaDataNode *&tree); diff --git a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh index 27634ae4dc..b13ecf4bfe 100644 --- a/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh +++ b/cpp/src/decisiontree/levelalgo/levelfunc_classifier.cuh @@ -14,7 +14,9 @@ * limitations under the License. */ #pragma once + #include +#include #include #include #include @@ -160,7 +162,7 @@ void grow_deep_tree_classification( } // Start of gather algorithm //Convertor - //std::cout << "begin gather \n"; + CUML_LOG_DEBUG("begin gather "); int lastsize = sparsetree.size() - sparsesize_nextitr; n_nodes = n_nodes_nextitr; if (n_nodes == 0) return; diff --git a/cpp/src/decisiontree/memory.cuh b/cpp/src/decisiontree/memory.cuh index ed80c6ec3b..8bb69f285e 100644 --- a/cpp/src/decisiontree/memory.cuh +++ b/cpp/src/decisiontree/memory.cuh @@ -18,6 +18,7 @@ #include #include #include +#include #include "cub/cub.cuh" #include "memory.h" @@ -58,16 +59,15 @@ void TemporaryMemory::print_info(int depth, int nrows, int ncols, size_t maxnodes = max_nodes_per_level; size_t ncols_sampled = (size_t)(ncols * colper); - std::cout << "maxnodes --> " << maxnodes << " gather maxnodes--> " - << gather_max_nodes << std::endl; - std::cout << "Parent size --> " << parentsz << std::endl; - std::cout << "Child size --> " << childsz << std::endl; - std::cout << "Nrows size --> " << (nrows + 1) << std::endl; - std::cout << "Sparse tree holder size --> " << 2 * gather_max_nodes - << std::endl; - - std::cout << " Total temporary memory usage--> " - << ((double)totalmem / (1024 * 1024)) << " MB" << std::endl; + ML::PatternSetter _("%v"); + CUML_LOG_DEBUG("maxnodes --> %lu gather maxnodes--> %lu", maxnodes, + gather_max_nodes); + CUML_LOG_DEBUG("Parent size --> %lu", parentsz); + CUML_LOG_DEBUG("Child size --> %lu", childsz); + CUML_LOG_DEBUG("Nrows size --> %d", (nrows + 1)); + CUML_LOG_DEBUG("Sparse tree holder size --> %lu", 2 * gather_max_nodes); + CUML_LOG_DEBUG(" Total temporary memory usage--> %lf MB", + ((double)totalmem / (1024 * 1024))); } template diff --git a/cpp/src/randomforest/randomforest_impl.cuh b/cpp/src/randomforest/randomforest_impl.cuh index cc6197b5f5..f5add7f02f 100644 --- a/cpp/src/randomforest/randomforest_impl.cuh +++ b/cpp/src/randomforest/randomforest_impl.cuh @@ -16,6 +16,7 @@ #ifndef _OPENMP #define omp_get_thread_num() 0 #endif +#include #include "../decisiontree/memory.h" #include "../decisiontree/quantile/quantile.h" #include "random/permute.h" @@ -251,13 +252,14 @@ void rfClassifier::fit(const cumlHandle& user_handle, const T* input, * @param[in] n_rows: number of data samples. * @param[in] n_cols: number of features (excluding target feature). * @param[in, out] predictions: n_rows predicted labels. GPU pointer, user allocated. - * @param[in] verbose: flag for debugging purposes. + * @param[in] verbosity: verbosity level for logging messages during execution */ template void rfClassifier::predict(const cumlHandle& user_handle, const T* input, int n_rows, int n_cols, int* predictions, const RandomForestMetaData* forest, - bool verbose) const { + int verbosity) const { + ML::Logger::get().setLevel(verbosity); this->error_checking(input, predictions, n_rows, n_cols, true); std::vector h_predictions(n_rows); const cumlHandle_impl& handle = user_handle.getImpl(); @@ -269,13 +271,14 @@ void rfClassifier::predict(const cumlHandle& user_handle, const T* input, int row_size = n_cols; + ML::PatternSetter _("%v"); for (int row_id = 0; row_id < n_rows; row_id++) { - if (verbose) { - std::cout << "\n\n"; - std::cout << "Predict for sample: "; + if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) { + std::stringstream ss; + ss << "Predict for sample: "; for (int i = 0; i < n_cols; i++) - std::cout << h_input[row_id * row_size + i] << ", "; - std::cout << std::endl; + ss << h_input[row_id * row_size + i] << ", "; + CUML_LOG_DEBUG(ss.str().c_str()); } std::map prediction_to_cnt; @@ -287,7 +290,7 @@ void rfClassifier::predict(const cumlHandle& user_handle, const T* input, int prediction; trees[i].predict(user_handle, &forest->trees[i], &h_input[row_id * row_size], 1, n_cols, &prediction, - verbose); + verbosity); ret = prediction_to_cnt.insert(std::pair(prediction, 1)); if (!(ret.second)) { ret.first->second += 1; @@ -313,14 +316,15 @@ void rfClassifier::predict(const cumlHandle& user_handle, const T* input, * @param[in] n_rows: number of data samples. * @param[in] n_cols: number of features (excluding target feature). * @param[in, out] predictions: n_rows predicted labels. GPU pointer, user allocated. - * @param[in] verbose: flag for debugging purposes. + * @param[in] verbosity: verbosity level for logging messages during execution */ template void rfClassifier::predictGetAll(const cumlHandle& user_handle, const T* input, int n_rows, int n_cols, int* predictions, const RandomForestMetaData* forest, - bool verbose) { + int verbosity) { + ML::Logger::get().setLevel(verbosity); int num_trees = this->rf_params.n_trees; std::vector h_predictions(n_rows * num_trees); @@ -334,19 +338,19 @@ void rfClassifier::predictGetAll(const cumlHandle& user_handle, int pred_id = 0; for (int row_id = 0; row_id < n_rows; row_id++) { - if (verbose) { - std::cout << "\n\n"; - std::cout << "Predict for sample: "; + if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) { + std::stringstream ss; + ss << "Predict for sample: "; for (int i = 0; i < n_cols; i++) - std::cout << h_input[row_id * row_size + i] << ", "; - std::cout << std::endl; + ss << h_input[row_id * row_size + i] << ", "; + CUML_LOG_DEBUG(ss.str().c_str()); } for (int i = 0; i < num_trees; i++) { int prediction; trees[i].predict(user_handle, &forest->trees[i], &h_input[row_id * row_size], 1, n_cols, &prediction, - verbose); + verbosity); h_predictions[pred_id] = prediction; pred_id++; } @@ -366,18 +370,19 @@ void rfClassifier::predictGetAll(const cumlHandle& user_handle, * @param[in] n_rows: number of data samples. * @param[in] n_cols: number of features (excluding target feature). * @param[in] predictions: n_rows predicted labels. GPU pointer, user allocated. - * @param[in] verbose: flag for debugging purposes. + * @param[in] verbosity: verbosity level for logging messages during execution */ template RF_metrics rfClassifier::score(const cumlHandle& user_handle, const int* ref_labels, int n_rows, - const int* predictions, bool verbose) { + const int* predictions, int verbosity) { + ML::Logger::get().setLevel(verbosity); cudaStream_t stream = user_handle.getImpl().getStream(); auto d_alloc = user_handle.getDeviceAllocator(); float accuracy = MLCommon::Score::accuracy_score(predictions, ref_labels, n_rows, d_alloc, stream); RF_metrics stats = set_rf_metrics_classification(accuracy); - if (verbose) print(stats); + if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) print(stats); /* TODO: Potentially augment RF_metrics w/ more metrics (e.g., precision, F1, etc.). For non binary classification problems (i.e., one target and > 2 labels), need avg. @@ -513,13 +518,13 @@ void rfRegressor::fit(const cumlHandle& user_handle, const T* input, * @param[in] n_cols: number of features (excluding target feature). * @param[in, out] predictions: n_rows predicted labels. GPU pointer, user allocated. * @param[in] forest: CPU pointer to RandomForestMetaData struct - * @param[in] verbose: flag for debugging purposes. + * @param[in] verbosity: verbosity level for logging messages during execution */ template void rfRegressor::predict(const cumlHandle& user_handle, const T* input, int n_rows, int n_cols, T* predictions, const RandomForestMetaData* forest, - bool verbose) const { + int verbosity) const { this->error_checking(input, predictions, n_rows, n_cols, true); std::vector h_predictions(n_rows); @@ -533,12 +538,12 @@ void rfRegressor::predict(const cumlHandle& user_handle, const T* input, int row_size = n_cols; for (int row_id = 0; row_id < n_rows; row_id++) { - if (verbose) { - std::cout << "\n\n"; - std::cout << "Predict for sample: "; + if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) { + std::stringstream ss; + ss << "Predict for sample: "; for (int i = 0; i < n_cols; i++) - std::cout << h_input[row_id * row_size + i] << ", "; - std::cout << std::endl; + ss << h_input[row_id * row_size + i] << ", "; + CUML_LOG_DEBUG(ss.str().c_str()); } T sum_predictions = 0; @@ -547,7 +552,7 @@ void rfRegressor::predict(const cumlHandle& user_handle, const T* input, T prediction; trees[i].predict(user_handle, &forest->trees[i], &h_input[row_id * row_size], 1, n_cols, &prediction, - verbose); + verbosity); sum_predictions += prediction; } // Random forest's prediction is the arithmetic mean of all its decision tree predictions. @@ -568,12 +573,13 @@ void rfRegressor::predict(const cumlHandle& user_handle, const T* input, * @param[in] n_cols: number of features (excluding target feature). * @param[in] predictions: n_rows predicted labels. GPU pointer, user allocated. * @param[in] forest: CPU pointer to RandomForestMetaData struct - * @param[in] verbose: flag for debugging purposes. + * @param[in] verbosity: verbosity level for logging messages during execution */ template RF_metrics rfRegressor::score(const cumlHandle& user_handle, const T* ref_labels, int n_rows, - const T* predictions, bool verbose) { + const T* predictions, int verbosity) { + ML::Logger::get().setLevel(verbosity); cudaStream_t stream = user_handle.getImpl().getStream(); auto d_alloc = user_handle.getDeviceAllocator(); @@ -583,7 +589,7 @@ RF_metrics rfRegressor::score(const cumlHandle& user_handle, mean_squared_error, median_abs_error); RF_metrics stats = set_rf_metrics_regression( mean_abs_error, mean_squared_error, median_abs_error); - if (verbose) print(stats); + if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) print(stats); return stats; } From 12e444f8160c2fb46b37ec1b06102fedfd7d7893 Mon Sep 17 00:00:00 2001 From: wxbn Date: Mon, 20 Apr 2020 15:00:20 +0000 Subject: [PATCH 242/330] Fix PCA transform --- python/cuml/decomposition/pca.pyx | 2 +- python/cuml/test/test_pca.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/python/cuml/decomposition/pca.pyx b/python/cuml/decomposition/pca.pyx index 13845f9295..7c0e38738c 100644 --- a/python/cuml/decomposition/pca.pyx +++ b/python/cuml/decomposition/pca.pyx @@ -607,7 +607,7 @@ class PCA(Base): # following transfers start self.handle.sync() - return X_m.to_output(out_type) + return t_input_data.to_output(out_type) def get_param_names(self): return ["copy", "iterated_power", "n_components", "svd_solver", "tol", diff --git a/python/cuml/test/test_pca.py b/python/cuml/test/test_pca.py index 5cdca3af73..5d554714b1 100644 --- a/python/cuml/test/test_pca.py +++ b/python/cuml/test/test_pca.py @@ -93,14 +93,19 @@ def test_pca_fit_then_transform(datatype, input_type, if name != 'blobs': skpca = skPCA(n_components=2) - Xskpca = skpca.fit_transform(X) + skpca.fit(X) + Xskpca = skpca.transform(X) handle, stream = get_handle(use_handle) cupca = cuPCA(n_components=2, handle=handle) - X_cupca = cupca.fit_transform(X) + cupca.fit(X) + X_cupca = cupca.transform(X) cupca.handle.sync() + assert Xskpca.shape[0] == X_cupca.shape[0] + assert Xskpca.shape[1] == X_cupca.shape[1] + if name != 'blobs': assert array_equal(X_cupca, Xskpca, 1e-3, with_sign=True) @@ -138,6 +143,9 @@ def test_pca_fit_transform(datatype, input_type, X_cupca = cupca.fit_transform(X) cupca.handle.sync() + assert Xskpca.shape[0] == X_cupca.shape[0] + assert Xskpca.shape[1] == X_cupca.shape[1] + if name != 'blobs': assert array_equal(X_cupca, Xskpca, 1e-3, with_sign=True) From efe2a92ec0132d6f165f5fbe8dcab71f44696616 Mon Sep 17 00:00:00 2001 From: wxbn Date: Mon, 20 Apr 2020 15:04:31 +0000 Subject: [PATCH 243/330] Changelog update --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b8d0b6667..3a58d6bc0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ - PR #2078: Ignore negative cache indices in get_vecs - PR #2084: Fixed cuda-memcheck errors with COO unit-tests - PR #2087: Fixed cuda-memcheck errors with dispersion prim +- PR #2107: Fix PCA transform # cuML 0.13.0 (Date TBD) From dad06540e5b00f6bbe3e39cf03946f61e426cea0 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 20 Apr 2020 11:14:24 -0400 Subject: [PATCH 244/330] Updates based on review feedback --- cpp/comms/std/src/ucp_helper.h | 26 ++++++++++++++++-------- cpp/src/umap/runner.h | 2 ++ python/cuml/test/test_trustworthiness.py | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/cpp/comms/std/src/ucp_helper.h b/cpp/comms/std/src/ucp_helper.h index ad69f856f9..6e60603a36 100644 --- a/cpp/comms/std/src/ucp_helper.h +++ b/cpp/comms/std/src/ucp_helper.h @@ -24,6 +24,17 @@ #pragma once +typedef void (*dlsym_print_info)(ucp_ep_h, FILE *); +typedef void (*dlsym_rec_free)(void *); +typedef int (*dlsym_worker_progress)(ucp_worker_h); + +typedef ucs_status_ptr_t (*dlsym_send)(ucp_ep_h, const void *, size_t, + ucp_datatype_t, ucp_tag_t, + ucp_send_callback_t); +typedef ucs_status_ptr_t (*dlsym_recv)(ucp_worker_h, void *, size_t count, + ucp_datatype_t datatype, ucp_tag_t, + ucp_tag_t, ucp_tag_recv_callback_t); + /** * Standard UCX request object that will be passed * around asynchronously. This object is really @@ -97,20 +108,19 @@ class comms_ucp_handler { private: void *ucp_handle; - ucs_status_ptr_t (*send_func)(ucp_ep_h, const void *, size_t, ucp_datatype_t, - ucp_tag_t, ucp_send_callback_t); - ucs_status_ptr_t (*recv_func)(ucp_worker_h, void *, size_t count, - ucp_datatype_t datatype, ucp_tag_t, ucp_tag_t, - ucp_tag_recv_callback_t); - void (*print_info_func)(ucp_ep_h, FILE *); - void (*req_free_func)(void *); - int (*worker_progress_func)(ucp_worker_h); + dlsym_print_info print_info_func; + dlsym_rec_free req_free_func; + dlsym_worker_progress worker_progress_func; + dlsym_send send_func; + dlsym_recv recv_func; + void load_ucp_handle() { ucp_handle = dlopen("libucp.so", RTLD_LAZY | RTLD_NOLOAD | RTLD_NODELETE); if (!ucp_handle) { ucp_handle = dlopen("libucp.so", RTLD_LAZY | RTLD_NODELETE); ASSERT(ucp_handle, "Cannot open UCX library: %s\n", dlerror()); } + // Reset any potential error dlerror(); } diff --git a/cpp/src/umap/runner.h b/cpp/src/umap/runner.h index e08b247417..95a31c4aab 100644 --- a/cpp/src/umap/runner.h +++ b/cpp/src/umap/runner.h @@ -426,6 +426,8 @@ void _transform(const cumlHandle &handle, T *X, int n, int d, params->callback->on_preprocess_end(transformed); } + params->initial_alpha /= 4.0; + SimplSetEmbedImpl::optimize_layout( transformed, n, embedding, embedding_n, comp_coo.rows(), comp_coo.cols(), comp_coo.nnz, epochs_per_sample.data(), n, params->repulsion_strength, diff --git a/python/cuml/test/test_trustworthiness.py b/python/cuml/test/test_trustworthiness.py index 866fd9bfdb..4e68afb096 100644 --- a/python/cuml/test/test_trustworthiness.py +++ b/python/cuml/test/test_trustworthiness.py @@ -25,7 +25,7 @@ @pytest.mark.parametrize('input_type', ['ndarray', 'dataframe']) -@pytest.mark.parametrize('n_samples', [10, 100, 500]) +@pytest.mark.parametrize('n_samples', [10, 500]) @pytest.mark.parametrize('batch_size', [512, 2]) @pytest.mark.parametrize('n_features', [10, 100]) @pytest.mark.parametrize('n_components', [2, 8]) From 364c4f7acdd13a66070b8c1645a88617a681c741 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 20 Apr 2020 10:57:16 -0500 Subject: [PATCH 245/330] Update call to extract_arr_partitions with DistributedDataHandler --- python/cuml/dask/metrics/confusion_matrix.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/python/cuml/dask/metrics/confusion_matrix.py b/python/cuml/dask/metrics/confusion_matrix.py index 79e48666d6..98389c87a0 100644 --- a/python/cuml/dask/metrics/confusion_matrix.py +++ b/python/cuml/dask/metrics/confusion_matrix.py @@ -16,12 +16,13 @@ import numpy as np import cupy as cp -from cuml.dask.common import extract_arr_partitions +from cuml.dask.common.input_utils import DistributedDataHandler + +from cuml.dask.common.utils import get_client from cuml.utils.memory_utils import with_cupy_rmm from cuml.dask.metrics.utils import sorted_unique_labels from cuml.prims.label import make_monotonic -from dask.distributed import default_client @with_cupy_rmm @@ -82,7 +83,7 @@ def confusion_matrix(y_true, y_pred, C : array-like (device or host) shape = (n_classes, n_classes) Confusion matrix. """ - client = default_client() if client is None else client + client = get_client() if labels is None: labels = sorted_unique_labels(y_true, y_pred) @@ -97,9 +98,9 @@ def confusion_matrix(y_true, y_pred, [y_true, y_pred] # run cm computation on each partition. - parts = client.sync(extract_arr_partitions, dask_arrays) + data = DistributedDataHandler.create(dask_arrays, client=client) cms = [client.submit(_local_cm, p, labels, use_sample_weight, - workers=[w]).result() for w, p in parts] + workers=[w]).result() for w, p in data.gpu_futures] # reduce each partition's result into one cupy matrix cm = sum(cms) From 060e8c419086c95a2a2052e67c35eb8520adb4d8 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 20 Apr 2020 11:26:40 -0700 Subject: [PATCH 246/330] BUG unit-tests from Simon which show a possible integer overflow issue with contingency matrix prim --- cpp/test/prims/contingencyMatrix.cu | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/test/prims/contingencyMatrix.cu b/cpp/test/prims/contingencyMatrix.cu index 1e89d15352..223ada44a7 100644 --- a/cpp/test/prims/contingencyMatrix.cu +++ b/cpp/test/prims/contingencyMatrix.cu @@ -137,10 +137,14 @@ class ContingencyMatrixTestImpl const std::vector inputs = { {10000, 1, 10, true, false, 0.000001}, + {10000, 1, 5000, true, false, 0.000001}, + {10000, 1, 10000, true, false, 0.000001}, + {10000, 1, 20000, true, false, 0.000001}, {100000, 1, 100, false, false, 0.000001}, {1000000, 1, 1200, true, false, 0.000001}, {1000000, 1, 10000, false, false, 0.000001}, - {100000, 1, 100, false, true, 0.000001}}; + {100000, 1, 100, false, true, 0.000001}, +}; typedef ContingencyMatrixTestImpl ContingencyMatrixTestImplS; TEST_P(ContingencyMatrixTestImplS, Result) { From da4c7976bcea966aef15d1d18731c1bc45480c98 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 20 Apr 2020 11:27:22 -0700 Subject: [PATCH 247/330] DOC copyright year update --- cpp/test/prims/contingencyMatrix.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/test/prims/contingencyMatrix.cu b/cpp/test/prims/contingencyMatrix.cu index 223ada44a7..7bddf933b1 100644 --- a/cpp/test/prims/contingencyMatrix.cu +++ b/cpp/test/prims/contingencyMatrix.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 918b84c56b3e9e74d5631a05a9d92ec82e17524d Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 20 Apr 2020 16:59:26 -0500 Subject: [PATCH 248/330] FIX input_to_cuml_array cuda array interface bugfix --- python/cuml/utils/input_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cuml/utils/input_utils.py b/python/cuml/utils/input_utils.py index 9cd8369757..c2503e66d2 100644 --- a/python/cuml/utils/input_utils.py +++ b/python/cuml/utils/input_utils.py @@ -138,7 +138,8 @@ def input_to_cuml_array(X, order='F', deepcopy=False, elif order == 'C': X_m = CumlArray(data=cuml.utils.numba_utils.row_matrix(X)) - elif cuda.is_cuda_array(X) or isinstance(X, np.ndarray): + elif hasattr(X, "__array_interface__") or \ + hasattr(X, "__cuda_array_interface__"): X_m = CumlArray(data=X) if deepcopy: From f22b858a20eb26f85d50c1e55efd3a450cd5623c Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 20 Apr 2020 17:04:32 -0500 Subject: [PATCH 249/330] DOC Add entry to changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b8d0b6667..2379af58dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ - PR #2078: Ignore negative cache indices in get_vecs - PR #2084: Fixed cuda-memcheck errors with COO unit-tests - PR #2087: Fixed cuda-memcheck errors with dispersion prim +- PR #2109: input_to_cuml_array __cuda_array_interface__ bugfix # cuML 0.13.0 (Date TBD) From 3aa80c9c563bdc61977efebfcdb015f3a0253cc4 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 20 Apr 2020 17:08:57 -0500 Subject: [PATCH 250/330] FIX Remove deep option from build.sh --- build.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/build.sh b/build.sh index 8b9d539c84..3431c81c0d 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,7 @@ ARGS=$* # script, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcuml cuml prims bench prims-bench -v -g -n --allgpuarch --singlegpu --nvtx --show_depr_warn -h --help --deep" +VALIDARGS="clean libcuml cuml prims bench prims-bench -v -g -n --allgpuarch --singlegpu --nvtx --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -53,7 +53,6 @@ BUILD_ALL_GPU_ARCH=0 SINGLEGPU="" NVTX=OFF CLEAN=0 -DEEPCLEAN=0 BUILD_DISABLE_DEPRECATION_WARNING=ON # Set defaults for vars that may not have been defined externally From c06a04bee7d0bd556231305f26858e0ce92b1813 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 20 Apr 2020 17:12:43 -0500 Subject: [PATCH 251/330] FIX singlegpu build_ext import fix --- python/setup.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/python/setup.py b/python/setup.py index 02143fe8c4..8f2c195440 100644 --- a/python/setup.py +++ b/python/setup.py @@ -30,13 +30,15 @@ import versioneer import warnings -try: - if "--singlegpu" in sys.argv: - from Cython.Build import cythonize - else: - from Cython.Distutils.build_ext import new_build_ext as build_ext -except ImportError: + +if "--singlegpu" in sys.argv: + from Cython.Build import cythonize from setuptools.command.build_ext import build_ext +else: + try: + from Cython.Distutils.build_ext import new_build_ext as build_ext + except ImportError: + from setuptools.command.build_ext import build_ext install_requires = [ 'numba', From cec629a17109a2f5952dcfb825f7f9869b494ef6 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 20 Apr 2020 18:13:59 -0400 Subject: [PATCH 252/330] Further typedef of funcs --- cpp/comms/std/src/ucp_helper.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/cpp/comms/std/src/ucp_helper.h b/cpp/comms/std/src/ucp_helper.h index 6e60603a36..bf650dae50 100644 --- a/cpp/comms/std/src/ucp_helper.h +++ b/cpp/comms/std/src/ucp_helper.h @@ -130,34 +130,30 @@ class comms_ucp_handler { } void load_send_func() { - send_func = (ucs_status_ptr_t(*)( - ucp_ep_h, const void *, size_t, ucp_datatype_t, ucp_tag_t, - ucp_send_callback_t))dlsym(ucp_handle, "ucp_tag_send_nb"); + send_func = (dlsym_send)dlsym(ucp_handle, "ucp_tag_send_nb"); assert_dlerror(); } void load_free_req_func() { req_free_func = - (void (*)(void *request))dlsym(ucp_handle, "ucp_request_free"); + (dlsym_rec_free)dlsym(ucp_handle, "ucp_request_free"); assert_dlerror(); } void load_print_info_func() { print_info_func = - (void (*)(ucp_ep_h, FILE *))dlsym(ucp_handle, "ucp_ep_print_info"); + (dlsym_print_info)dlsym(ucp_handle, "ucp_ep_print_info"); assert_dlerror(); } void load_worker_progress_func() { worker_progress_func = - (int (*)(ucp_worker_h))dlsym(ucp_handle, "ucp_worker_progress"); + (dlsym_worker_progress)dlsym(ucp_handle, "ucp_worker_progress"); assert_dlerror(); } void load_recv_func() { - recv_func = (ucs_status_ptr_t(*)( - ucp_worker_h, void *, size_t, ucp_datatype_t, ucp_tag_t, ucp_tag_t, - ucp_tag_recv_callback_t))dlsym(ucp_handle, "ucp_tag_recv_nb"); + recv_func = (dlsym_recv)dlsym(ucp_handle, "ucp_tag_recv_nb"); assert_dlerror(); } From c452bd5d07d2dd3de377341bd9a7daf6d6e464a1 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 20 Apr 2020 17:17:41 -0500 Subject: [PATCH 253/330] Revert "Fix whitespaces" This reverts commit 7b0c7304fea413bbe4ff51694b01329c1c80967a. --- python/cuml/preprocessing/encoders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 8a4c7d11ac..c98c76f0e9 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -43,7 +43,7 @@ class OneHotEncoder: categories : 'auto' an cupy.ndarray or a cudf.DataFrame, default='auto' Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - - DataFrame/Array : ``categories[col]`` holds the categories + - DataFrame/Array : ``categories[col]`` holds the categories expected in the feature col. drop : 'first', None, a dict or a list, default=None Specifies a methodology to use to drop one of the categories per From c3c50b843447e2c6a0dc6c9e64c17996bf6605b6 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 20 Apr 2020 17:17:46 -0500 Subject: [PATCH 254/330] Revert "Fix coding style" This reverts commit e14567a1f65a64291d3c817fbc5eb7c1e9b02e04. --- python/cuml/preprocessing/encoders.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index c98c76f0e9..8a2d99ba47 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -43,8 +43,8 @@ class OneHotEncoder: categories : 'auto' an cupy.ndarray or a cudf.DataFrame, default='auto' Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - - DataFrame/Array : ``categories[col]`` holds the categories - expected in the feature col. + - DataFrame/Array : ``categories[col]`` holds the categories expected in the + feature col. drop : 'first', None, a dict or a list, default=None Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear From 772fa4892a2a928d29b6c70d0ab65e95e3b20c81 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 20 Apr 2020 17:17:48 -0500 Subject: [PATCH 255/330] Revert "Update documentation for input as array" This reverts commit c53dab05437fe7c37ba7b0a68a92d7d64152a678. --- python/cuml/preprocessing/encoders.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 8a2d99ba47..bd25843dba 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -40,12 +40,12 @@ class OneHotEncoder: Parameters ---------- - categories : 'auto' an cupy.ndarray or a cudf.DataFrame, default='auto' + categories : 'auto' or a cudf.DataFrame, default='auto' Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - - DataFrame/Array : ``categories[col]`` holds the categories expected in the + - DataFrame : ``categories[col]`` holds the categories expected in the feature col. - drop : 'first', None, a dict or a list, default=None + drop : 'first', None or a dict, default=None Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear features cause problems, such as when feeding the resulting data @@ -53,7 +53,7 @@ class OneHotEncoder: - None : retain all features (the default). - 'first' : drop the first category in each feature. If only one category is present, the feature will be dropped entirely. - - dict/list : ``drop[col]`` is the category in feature col that + - Dict : ``drop[col]`` is the category in feature col that should be dropped. sparse : bool, default=False This feature was deactivated and will give an exception when True. @@ -120,7 +120,6 @@ def _take_feature(self, collection, key): return collection[:, key] def _compute_drop_idx(self): - """Helper to compute indices to drop from category to drop""" if self.drop is None: return None elif isinstance(self.drop, str) and self.drop == 'first': @@ -254,7 +253,7 @@ def fit(self, X): Fit OneHotEncoder to X. Parameters ---------- - X : cuDF.DataFrame or cupy.ndarray + X : cuDF.DataFrame The data to determine the categories of each feature. Returns ------- @@ -283,7 +282,7 @@ def fit_transform(self, X): Parameters ---------- - X : cudf.DataFrame or cupy.ndarray + X : cudf.DataFrame The data to encode. Returns ------- @@ -298,7 +297,7 @@ def transform(self, X): Transform X using one-hot encoding. Parameters ---------- - X : cudf.DataFrame or cupy.ndarray + X : cudf.DataFrame The data to encode. Returns ------- From 67aa19ea2a85207d644ded3bae2f3f9a474d650c Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 20 Apr 2020 17:18:39 -0500 Subject: [PATCH 256/330] Revert "Tests with cupy input for all features" This reverts commit 492c8798e5e8b4eda424d97496e9f85b897e9cfe. --- python/cuml/preprocessing/encoders.py | 215 ++++++----------------- python/cuml/test/test_one_hot_encoder.py | 157 ++++------------- 2 files changed, 87 insertions(+), 285 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index bd25843dba..0db7698071 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -112,141 +112,47 @@ def _check_is_fitted(self): "with appropriate arguments before using this estimator.") raise NotFittedError(msg) - def _take_feature(self, collection, key): - """Helper to handle both df and array as input""" - if self.input_type == 'df': - return collection[key] - else: - return collection[:, key] - def _compute_drop_idx(self): if self.drop is None: return None elif isinstance(self.drop, str) and self.drop == 'first': - if self.input_type == 'df': - return {feature: 0 for feature in self._encoders.keys()} - else: - return cp.zeros(shape=(len(self._encoders),), dtype=cp.int32) - elif isinstance(self.drop, (dict, list)): - if self.input_type == 'df': - drop_columns = self.drop.keys() - drop_idx = dict() - make_collection, get_size = Series, len - else: - drop_columns = range(len(self.drop)) - drop_idx = cp.empty(shape=(len(drop_columns),), dtype=cp.int32) - make_collection, get_size = cp.array, cp.size - - if len(drop_columns) != len(self._encoders): + return {feature: 0 for feature in self._encoders.keys()} + elif isinstance(self.drop, dict): + if len(self.drop.keys()) != len(self._encoders): msg = ("`drop` should have as many columns as the number " "of features ({}), got {}") raise ValueError(msg.format(len(self._encoders), - len(drop_columns))) - for feature in drop_columns: - drop_feature = make_collection(self.drop[feature]) - if get_size(drop_feature) != 1: + len(self.drop.keys()))) + drop_idx = dict() + for feature in self.drop.keys(): + self.drop[feature] = Series(self.drop[feature]) + if len(self.drop[feature]) != 1: msg = ("Trying to drop multiple values for feature {}, " "this is not supported.").format(feature) raise ValueError(msg) cats = self._encoders[feature].classes_ - if not self.isin(drop_feature, cats).all(): + if not self.drop[feature].isin(cats).all(): msg = ("Some categories for feature {} were supposed " "to be dropped, but were not found in the encoder " "categories.".format(feature)) raise ValueError(msg) - idx = self.isin(cats, drop_feature) cats = Series(cats) - idx_val = cats[idx].index.values - if self.input_type == 'array': - idx_val = idx_val[0] - drop_idx[feature] = idx_val + idx = cats.isin(self.drop[feature]) + drop_idx[feature] = cp.asarray(cats[idx].index) return drop_idx else: msg = ("Wrong input for parameter `drop`. Expected " - "'first', None, a dict or a list, got {}") + "'first', None or a dict, got {}") raise ValueError(msg.format(type(self.drop))) - @property - def categories_(self): - """ - Returns categories used for the one hot encoding in the order used by - transform. - """ - return [self._encoders[f].classes_ for f in self._features] - - def _set_input_type(self, X): - if isinstance(X, cp.ndarray): - self.input_type = 'array' - self.isin = cp.isin - elif isinstance(X, DataFrame): - self.input_type = 'df' - self.isin = lambda a, b: Series(a).isin(b) - else: - raise TypeError( - 'Expected input to be cupy.ndarray or cudf.DataFrame, ' - 'got {}'.format(type(X))) - - class _ArrayEncoder: - """Helper for OneHotEncoder. - - This simplified LabelEncoder reflect the same interface - but using cp.arrays instead of cudf.Series internally. + def get_categories_(self): """ + Returns categories used for the one hot encoding in the correct order. - def __init__(self, handle_unknown='error'): - self.classes_ = None - self.handle_unknown = handle_unknown - - def fit(self, X): - self.classes_ = cp.unique(X) - return self - - def transform(self, X): - sorted_index = cp.searchsorted(self.classes_, X) - - xindex = cp.take(cp.arange(len(self.classes_)), sorted_index) - mask = self.classes_[xindex] != X - - if mask.any(): - if self.handle_unknown == 'error': - raise KeyError("Attempted to encode unseen key") - else: - xindex[mask] = -1 - - return xindex - - def _fit_encoders(self, X, categories=None): + This copies the categories to the CPU and should only be used to check + the order of the categories. """ - Helper to reduce code duplication in fit method - """ - fit_from_categories = categories is not None - _X = categories if fit_from_categories else X - - if self.input_type == 'df': - _encoders = dict() - def append(d, k, v): d[k] = v - Encoder = LabelEncoder - self._features = X.columns - else: - _encoders = list() - def append(l, _, v): l.append(v) - Encoder = self._ArrayEncoder - # used as indices for a list, no need to use a gpu array here - self._features = np.arange(0, _X.shape[1], dtype=cp.int32) - - for feature in self._features: - le = Encoder(handle_unknown=self.handle_unknown) - x_feature = self._take_feature(_X, feature) - append(_encoders, feature, le.fit(x_feature)) - - if fit_from_categories and self.handle_unknown == 'error': - x_categories = x_feature - if not self.isin(self._take_feature(X, feature), - x_categories).all(): - msg = ("Found unknown categories in column {0}" - " during fit".format(feature)) - raise KeyError(msg) - return _encoders + return [self._encoders[f].classes_.to_array() for f in self._features] def fit(self, X): """ @@ -260,16 +166,24 @@ def fit(self, X): self """ self._validate_keywords() - - self._set_input_type(X) - if type(self.categories) is str and self.categories == 'auto': - self._encoders = self._fit_encoders(X) + self._features = X.columns + self._encoders = { + feature: LabelEncoder(handle_unknown=self.handle_unknown).fit( + X[feature]) + for feature in X.columns + } else: - _categories = self.categories - if self.input_type == 'array': - _categories = _categories.transpose() # same format as X - self._encoders = self._fit_encoders(X, categories=_categories) + self._features = self.categories.columns + self._encoders = dict() + for feature in self.categories.columns: + le = LabelEncoder(handle_unknown=self.handle_unknown) + self._encoders[feature] = le.fit(self.categories[feature]) + if self.handle_unknown == 'error': + if not X[feature].isin(self.categories[feature]).all(): + msg = ("Found unknown categories in column {0}" + " during fit".format(feature)) + raise KeyError(msg) self.drop_idx_ = self._compute_drop_idx() self._fitted = True @@ -308,13 +222,10 @@ def transform(self, X): cols, rows = list(), list() j = 0 - for feature in self._features: + for feature in X.columns: encoder = self._encoders[feature] - - col_idx = encoder.transform(self._take_feature(X, feature)) - if self.input_type == 'df': - col_idx = cp.asarray(col_idx.to_gpu_array(fillna="pandas")) - + col_idx = encoder.transform(X[feature]) + col_idx = cp.asarray(col_idx.to_gpu_array(fillna="pandas")) idx_to_keep = col_idx > -1 # increase indices to take previous features into account @@ -367,68 +278,44 @@ def inverse_transform(self, X): """ self._check_is_fitted() if cp.sparse.issparse(X): + # cupy.sparse 7.x does not support argmax, when we upgrade cupy to + # 8.x, we should add a condition in the + # if close: `and cp.sparse.issparsecsc(X)` + # and change the following line by `X = X.tocsc()` X = X.toarray() - - if self.input_type == 'df': - result = DataFrame(columns=self._features) - def add_result_column(res, key, col): res[key] = col - - def dropped_1cat_inverse(value): - return Series(GenericIndex(value).repeat(X.shape[0])) - - def drop_inverse(enc, drop_index): - return enc.inverse_transform(Series(drop_index))[0] - else: - result = cp.empty(shape=(len(X), len(self._features))) - def add_result_column(res, key, col): res[:, key] = col - - def dropped_1cat_inverse(value): - return cp.full(len(X), value.item(), dtype=self.dtype) - - def drop_inverse(enc, drop_index): - return enc.classes_[drop_index] - + result = DataFrame(columns=self._encoders.keys()) j = 0 - for feature in self._features: + for feature in self._encoders.keys(): feature_enc = self._encoders[feature] cats = feature_enc.classes_ if self.drop is not None: # Remove dropped categories - drop_idx = self.drop_idx_[feature] - dropped_class_mask = self.isin(cats, cats[drop_idx]) + dropped_class_idx = Series(self.drop_idx_[feature]) + dropped_class_mask = Series(cats).isin(cats[dropped_class_idx]) if len(cats) == 1: - # if there is only one category and we drop it, then we - # know that the full inverse column is this category - inv = dropped_1cat_inverse(cats[0]) - add_result_column(result, feature, inv) + inv = Series(GenericIndex(cats[0]).repeat(X.shape[0])) + result[feature] = inv continue cats = cats[~dropped_class_mask] enc_size = len(cats) x_feature = X[:, j:j + enc_size] idx = cp.argmax(x_feature, axis=1) - inv = cats[idx] - if self.input_type == 'df': - inv = Series(cats[idx]).reset_index(drop=True) + inv = Series(cats[idx]).reset_index(drop=True) if self.handle_unknown == 'ignore': not_null_idx = x_feature.any(axis=1) - if not_null_idx.any(): - if self.input_type == 'array': - raise ValueError('Found an unknown category during ' - 'inverse_transform, which is not ' - 'supported with cupy arrays') - inv[~not_null_idx] = None + inv.iloc[~not_null_idx] = None elif self.drop is not None: # drop will either be None or handle_unknown will be error. If # self.drop is not None, then we can safely assume that all of # the nulls in each column are the dropped value dropped_mask = cp.asarray(x_feature.sum(axis=1) == 0).flatten() if dropped_mask.any(): - drop_idx = self.drop_idx_[feature] - inv[dropped_mask] = drop_inverse(feature_enc, drop_idx) + inv[dropped_mask] = feature_enc.inverse_transform( + Series(self.drop_idx_[feature]))[0] - add_result_column(result, feature, inv) + result[feature] = inv j += enc_size return result diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index 3ad0900bc9..dbcb730a41 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -29,58 +29,23 @@ def _from_df_to_array(df): return list(zip(*[df[feature] for feature in df.columns])) -def _from_df_to_cupy(df): - # transform char columns to integer columns - for col in df.columns: - if not np.issubdtype(df[col].dtype, np.number): - df[col] = [ord(c) for c in df[col]] - return cp.array(_from_df_to_array(df)) - - -def _convert_drop(drop): - if drop is None or drop == 'first': - return drop - return [ord(x) if isinstance(x, str) else x for x in drop.values()] - - def _generate_inputs_from_categories(categories=None, - n_samples=10, - seed=5060, - as_array=False): + n_samples=10, seed=5060): if categories is None: - if as_array: - categories = {'strings': list(range(1000, 4000, 3)), - 'integers': list(range(1000))} - else: - categories = {'strings': ['Foo', 'Bar', 'Baz'], - 'integers': list(range(1000))} + categories = {'strings': ['Foo', 'Bar', 'Baz'], + 'integers': list(range(1000))} rd = np.random.RandomState(seed) pandas_df = pd.DataFrame({name: rd.choice(cat, n_samples) for name, cat in categories.items()}) ary = _from_df_to_array(pandas_df) - if as_array: - inp_ary = cp.array(ary) - return inp_ary, ary - else: - df = DataFrame.from_pandas(pandas_df) - return df, ary - - -def assert_inverse_equal(ours, ref): - if isinstance(ours, cp.ndarray): - cp.testing.assert_array_equal(ours, ref) - else: - assert_frame_equal(ours.to_pandas(), ref.to_pandas()) + df = DataFrame.from_pandas(pandas_df) + return df, ary -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_vs_skonehot(as_array): - X = DataFrame({'gender': ['M', 'F', 'F'], 'int': [1, 3, 2]}) +def test_onehot_vs_skonehot(): + X = DataFrame({'gender': ['Male', 'Female', 'Female'], 'int': [1, 3, 2]}) skX = _from_df_to_array(X) - if as_array: - X = _from_df_to_cupy(X) - skX = cp.asnumpy(X) enc = OneHotEncoder(sparse=True) skohe = SkOneHotEncoder(sparse=True) @@ -94,42 +59,31 @@ def test_onehot_vs_skonehot(as_array): @pytest.mark.parametrize('drop', [None, 'first', {'g': 'F', 'i': 3}]) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_inverse_transform(drop, as_array): +def test_onehot_inverse_transform(drop): X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2]}) - if as_array: - X = _from_df_to_cupy(X) - drop = _convert_drop(drop) enc = OneHotEncoder(drop=drop) ohe = enc.fit_transform(X) inv = enc.inverse_transform(ohe) - assert_inverse_equal(inv, X) + assert_frame_equal(inv.to_pandas(), X.to_pandas()) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_categories(as_array): +def test_onehot_categories(): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) - categories = DataFrame({'chars': ['a', 'b', 'c'], 'int': [0, 1, 2]}) - if as_array: - X = _from_df_to_cupy(X) - categories = _from_df_to_cupy(categories).transpose() - - enc = OneHotEncoder(categories=categories, sparse=False) + enc = OneHotEncoder( + categories=DataFrame({'chars': ['a', 'b', 'c'], 'int': [0, 1, 2]}), + sparse=False + ) ref = cp.array([[1., 0., 0., 1., 0., 0.], [0., 1., 0., 0., 0., 1.]]) res = enc.fit_transform(X) cp.testing.assert_array_equal(res, ref) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_fit_handle_unknown(as_array): +def test_onehot_fit_handle_unknown(): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]}) - if as_array: - X = _from_df_to_cupy(X) - Y = _from_df_to_cupy(Y) enc = OneHotEncoder(handle_unknown='error', categories=Y) with pytest.raises(KeyError): @@ -139,13 +93,9 @@ def test_onehot_fit_handle_unknown(as_array): enc.fit(X) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_transform_handle_unknown(as_array): +def test_onehot_transform_handle_unknown(): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]}) - if as_array: - X = _from_df_to_cupy(X) - Y = _from_df_to_cupy(Y) enc = OneHotEncoder(handle_unknown='error', sparse=False) enc = enc.fit(X) @@ -160,36 +110,27 @@ def test_onehot_transform_handle_unknown(as_array): cp.testing.assert_array_equal(ohe, ref) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_inverse_transform_handle_unknown(as_array): +def test_onehot_inverse_transform_handle_unknown(): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) Y_ohe = cp.array([[0., 0., 1., 0.], [0., 1., 0., 1.]]) - if as_array: - X = _from_df_to_cupy(X) enc = OneHotEncoder(handle_unknown='ignore') enc = enc.fit(X) - if as_array: - with pytest.raises(ValueError): - enc.inverse_transform(Y_ohe) - else: - df = enc.inverse_transform(Y_ohe) - ref = DataFrame({'chars': [None, 'b'], 'int': [0, 2]}) - assert_inverse_equal(df, ref) + df = enc.inverse_transform(Y_ohe) + ref = DataFrame({'chars': [None, 'b'], 'int': [0, 2]}) + assert_frame_equal(df.to_pandas(), ref.to_pandas()) @pytest.mark.parametrize('drop', [None, 'first']) @pytest.mark.parametrize('sparse', [True, False], ids=['sparse', 'dense']) @pytest.mark.parametrize("n_samples", [10, 1000, 20000, stress_param(250000)]) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_random_inputs(drop, sparse, n_samples, as_array): - X, ary = _generate_inputs_from_categories(n_samples=n_samples, - as_array=as_array) +def test_onehot_random_inputs(drop, sparse, n_samples): + df, ary = _generate_inputs_from_categories(n_samples=n_samples) enc = OneHotEncoder(sparse=sparse, drop=drop) sk_enc = SkOneHotEncoder(sparse=sparse, drop=drop) - ohe = enc.fit_transform(X) + ohe = enc.fit_transform(df) ref = sk_enc.fit_transform(ary) if sparse: cp.testing.assert_array_equal(ohe.toarray(), ref.toarray()) @@ -198,17 +139,13 @@ def test_onehot_random_inputs(drop, sparse, n_samples, as_array): inv_ohe = enc.inverse_transform(ohe) - assert_inverse_equal(inv_ohe, X) + assert_frame_equal(inv_ohe.to_pandas(), df.to_pandas()) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_drop_idx_first(as_array): +def test_onehot_drop_idx_first(): X_ary = [['c', 2, 'a'], ['b', 2, 'b']] X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) - if as_array: - X = _from_df_to_cupy(X) - X_ary = cp.asnumpy(X) enc = OneHotEncoder(sparse=False, drop='first') sk_enc = SkOneHotEncoder(sparse=False, drop='first') @@ -216,27 +153,18 @@ def test_onehot_drop_idx_first(as_array): ref = sk_enc.fit_transform(X_ary) cp.testing.assert_array_equal(ohe, ref) inv = enc.inverse_transform(ohe) - assert_inverse_equal(inv, X) + assert_frame_equal(inv.to_pandas(), X.to_pandas()) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_drop_one_of_each(as_array): +def test_onehot_drop_one_of_each(): X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) drop = dict({'chars': 'b', 'int': 2, 'letters': 'b'}) - X_ary = _from_df_to_array(X) - drop_ary = ['b', 2, 'b'] - if as_array: - X = _from_df_to_cupy(X) - X_ary = cp.asnumpy(X) - drop = drop_ary = _convert_drop(drop) - enc = OneHotEncoder(sparse=False, drop=drop) ohe = enc.fit_transform(X) - print(ohe.dtype) - ref = SkOneHotEncoder(sparse=False, drop=drop_ary).fit_transform(X_ary) + ref = SkOneHotEncoder(sparse=False, drop=['b', 2, 'b']).fit_transform(X) cp.testing.assert_array_equal(ohe, ref) inv = enc.inverse_transform(ohe) - assert_inverse_equal(inv, X) + assert_frame_equal(inv.to_pandas(), X.to_pandas()) @pytest.mark.parametrize("drop, pattern", @@ -245,46 +173,33 @@ def test_onehot_drop_one_of_each(as_array): [dict({'chars': 'b', 'int': [2, 0]}), 'Trying to drop multiple values'], [dict({'chars': 'b', 'int': 3}), - 'Some categories [0-9a-zA-Z, ]* were not found'], + 'Some categories [a-zA-Z, ]* were not found'], [DataFrame({'chars': 'b', 'int': 3}), 'Wrong input for parameter `drop`.']]) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_drop_exceptions(drop, pattern, as_array): +def test_onehot_drop_exceptions(drop, pattern): X = DataFrame({'chars': ['c', 'b', 'd'], 'int': [2, 1, 0]}) - if as_array: - X = _from_df_to_cupy(X) - drop = _convert_drop(drop) if not isinstance(drop, DataFrame) else drop with pytest.raises(ValueError, match=pattern): OneHotEncoder(sparse=False, drop=drop).fit(X) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_get_categories(as_array): +def test_onehot_get_categories(): X = DataFrame({'chars': ['c', 'b', 'd'], 'ints': [2, 1, 0]}) - ref = [np.array(['b', 'c', 'd']), np.array([0, 1, 2])] - if as_array: - X = _from_df_to_cupy(X) - ref[0] = np.array([ord(x) for x in ref[0]]) + ref = [np.array(['b', 'c', 'd']), np.array([0, 1, 2])] enc = OneHotEncoder().fit(X) - cats = enc.categories_ + cats = enc.get_categories_() for i in range(len(ref)): - cp.testing.assert_array_equal(ref[i], cats[i]) + np.testing.assert_array_equal(ref[i], cats[i]) -@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) -def test_onehot_sparse_drop(as_array): +def test_onehot_sparse_drop(): X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2], 'l': [5, 5, 6]}) drop = {'g': 'F', 'i': 3, 'l': 6} ary = _from_df_to_array(X) drop_ary = ['F', 3, 6] - if as_array: - X = _from_df_to_cupy(X) - ary = cp.asnumpy(X) - drop = drop_ary = _convert_drop(drop) enc = OneHotEncoder(sparse=True, drop=drop) sk_enc = SkOneHotEncoder(sparse=True, drop=drop_ary) From 3a86fa911d0c2de41b69e5dcd1e48d556fc76e91 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 20 Apr 2020 17:52:23 -0500 Subject: [PATCH 257/330] Add support for arrays by converting input arrays to DataFrame --- python/cuml/preprocessing/encoders.py | 65 ++++++++++++++++++++------- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index 0db7698071..bc5937848c 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -21,6 +21,7 @@ from cudf.core import GenericIndex from cuml.utils import with_cupy_rmm +import warnings class OneHotEncoder: @@ -40,12 +41,12 @@ class OneHotEncoder: Parameters ---------- - categories : 'auto' or a cudf.DataFrame, default='auto' + categories : 'auto' an cupy.ndarray or a cudf.DataFrame, default='auto' Categories (unique values) per feature: - 'auto' : Determine categories automatically from the training data. - - DataFrame : ``categories[col]`` holds the categories expected in the - feature col. - drop : 'first', None or a dict, default=None + - DataFrame/ndarray : ``categories[col]`` holds the categories expected + in the feature col. + drop : 'first', None, a dict or a list, default=None Specifies a methodology to use to drop one of the categories per feature. This is useful in situations where perfectly collinear features cause problems, such as when feeding the resulting data @@ -53,7 +54,7 @@ class OneHotEncoder: - None : retain all features (the default). - 'first' : drop the first category in each feature. If only one category is present, the feature will be dropped entirely. - - Dict : ``drop[col]`` is the category in feature col that + - dict/list : ``drop[col]`` is the category in feature col that should be dropped. sparse : bool, default=False This feature was deactivated and will give an exception when True. @@ -88,6 +89,7 @@ def __init__(self, categories='auto', drop=None, sparse=True, self.drop_idx_ = None self._features = None self._encoders = None + self.input_type = None if sparse and np.dtype(dtype) not in ['f', 'd', 'F', 'D']: raise ValueError('Only float32, float64, complex64 and complex128 ' 'are supported when using sparse') @@ -113,11 +115,14 @@ def _check_is_fitted(self): raise NotFittedError(msg) def _compute_drop_idx(self): + """Helper to compute indices to drop from category to drop""" if self.drop is None: return None elif isinstance(self.drop, str) and self.drop == 'first': return {feature: 0 for feature in self._encoders.keys()} - elif isinstance(self.drop, dict): + elif isinstance(self.drop, (dict, list)): + if isinstance(self.drop, list): + self.drop = dict(zip(range(len(self.drop)), self.drop)) if len(self.drop.keys()) != len(self._encoders): msg = ("`drop` should have as many columns as the number " "of features ({}), got {}") @@ -145,27 +150,43 @@ def _compute_drop_idx(self): "'first', None or a dict, got {}") raise ValueError(msg.format(type(self.drop))) - def get_categories_(self): + @property + def categories_(self): """ Returns categories used for the one hot encoding in the correct order. + """ + return [self._encoders[f].classes_ for f in self._features] + + def _set_input_type(self, value): + if self.input_type is None: + self.input_type = value - This copies the categories to the CPU and should only be used to check - the order of the categories. + def _check_input(self, X, is_categories=False): + """ + If input is cupy, convert it to a DataFrame with 0 copies """ - return [self._encoders[f].classes_.to_array() for f in self._features] + if isinstance(X, cp.ndarray): + self._set_input_type('array') + if is_categories: + X = X.transpose() + return DataFrame.from_gpu_matrix(X) + else: + self._set_input_type('df') + return X def fit(self, X): """ Fit OneHotEncoder to X. Parameters ---------- - X : cuDF.DataFrame + X : cuDF.DataFrame or cupy.ndarray The data to determine the categories of each feature. Returns ------- self """ self._validate_keywords() + X = self._check_input(X) if type(self.categories) is str and self.categories == 'auto': self._features = X.columns self._encoders = { @@ -174,6 +195,7 @@ def fit(self, X): for feature in X.columns } else: + self.categories = self._check_input(self.categories, True) self._features = self.categories.columns self._encoders = dict() for feature in self.categories.columns: @@ -196,13 +218,14 @@ def fit_transform(self, X): Parameters ---------- - X : cudf.DataFrame + X : cudf.DataFrame or cupy.ndarray The data to encode. Returns ------- X_out : sparse matrix if sparse=True else a 2-d array Transformed input. """ + X = self._check_input(X) return self.fit(X).transform(X) @with_cupy_rmm @@ -211,7 +234,7 @@ def transform(self, X): Transform X using one-hot encoding. Parameters ---------- - X : cudf.DataFrame + X : cudf.DataFrame or cupy.ndarray The data to encode. Returns ------- @@ -219,6 +242,7 @@ def transform(self, X): Transformed input. """ self._check_is_fitted() + X = self._check_input(X) cols, rows = list(), list() j = 0 @@ -267,20 +291,23 @@ def inverse_transform(self, X): Convert the data back to the original representation. In case unknown categories are encountered (all zeros in the one-hot encoding), ``None`` is used to represent this category. + + The return type is the same as the type of the input used by the first + call to fit on this estimator instance. Parameters ---------- X : array-like or sparse matrix, shape [n_samples, n_encoded_features] The transformed data. Returns ------- - X_tr : cudf.DataFrame + X_tr : cudf.DataFrame or cupy.ndarray Inverse transformed array. """ self._check_is_fitted() if cp.sparse.issparse(X): # cupy.sparse 7.x does not support argmax, when we upgrade cupy to # 8.x, we should add a condition in the - # if close: `and cp.sparse.issparsecsc(X)` + # if close: `and not cp.sparse.issparsecsc(X)` # and change the following line by `X = X.tocsc()` X = X.toarray() result = DataFrame(columns=self._encoders.keys()) @@ -318,4 +345,12 @@ def inverse_transform(self, X): result[feature] = inv j += enc_size + if self.input_type == 'array': + try: + result = cp.asarray(result.as_gpu_matrix()) + except ValueError: + warnings.warn("The input one hot encoding contains rows with " + "unknown categories. Arrays do not support null " + "values. Returning output as a DataFrame " + "instead.") return result From 5f090a2f42d5b56ccf99695b5a726c507873fa82 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 20 Apr 2020 17:52:58 -0500 Subject: [PATCH 258/330] Add tests for cp.ndarrays as input --- python/cuml/test/test_one_hot_encoder.py | 154 +++++++++++++++++------ 1 file changed, 117 insertions(+), 37 deletions(-) diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index dbcb730a41..8f96cf8bad 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -29,23 +29,58 @@ def _from_df_to_array(df): return list(zip(*[df[feature] for feature in df.columns])) +def _from_df_to_cupy(df): + """Transform char columns to integer columns, and then create an array""" + for col in df.columns: + if not np.issubdtype(df[col].dtype, np.number): + df[col] = [ord(c) for c in df[col]] + return cp.array(_from_df_to_array(df)) + + +def _convert_drop(drop): + if drop is None or drop == 'first': + return drop + return [ord(x) if isinstance(x, str) else x for x in drop.values()] + + def _generate_inputs_from_categories(categories=None, - n_samples=10, seed=5060): + n_samples=10, + seed=5060, + as_array=False): if categories is None: - categories = {'strings': ['Foo', 'Bar', 'Baz'], - 'integers': list(range(1000))} + if as_array: + categories = {'strings': list(range(1000, 4000, 3)), + 'integers': list(range(1000))} + else: + categories = {'strings': ['Foo', 'Bar', 'Baz'], + 'integers': list(range(1000))} rd = np.random.RandomState(seed) pandas_df = pd.DataFrame({name: rd.choice(cat, n_samples) for name, cat in categories.items()}) ary = _from_df_to_array(pandas_df) - df = DataFrame.from_pandas(pandas_df) - return df, ary + if as_array: + inp_ary = cp.array(ary) + return inp_ary, ary + else: + df = DataFrame.from_pandas(pandas_df) + return df, ary -def test_onehot_vs_skonehot(): - X = DataFrame({'gender': ['Male', 'Female', 'Female'], 'int': [1, 3, 2]}) +def assert_inverse_equal(ours, ref): + if isinstance(ours, cp.ndarray): + cp.testing.assert_array_equal(ours, ref) + else: + assert_frame_equal(ours.to_pandas(), ref.to_pandas()) + + +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_vs_skonehot(as_array): + X = DataFrame({'gender': ['M', 'F', 'F'], 'int': [1, 3, 2]}) skX = _from_df_to_array(X) + if as_array: + X = _from_df_to_cupy(X) + skX = cp.asnumpy(X) enc = OneHotEncoder(sparse=True) skohe = SkOneHotEncoder(sparse=True) @@ -59,31 +94,42 @@ def test_onehot_vs_skonehot(): @pytest.mark.parametrize('drop', [None, 'first', {'g': 'F', 'i': 3}]) -def test_onehot_inverse_transform(drop): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_inverse_transform(drop, as_array): X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2]}) + if as_array: + X = _from_df_to_cupy(X) + drop = _convert_drop(drop) enc = OneHotEncoder(drop=drop) ohe = enc.fit_transform(X) inv = enc.inverse_transform(ohe) - assert_frame_equal(inv.to_pandas(), X.to_pandas()) + assert_inverse_equal(inv, X) -def test_onehot_categories(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_categories(as_array): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) - enc = OneHotEncoder( - categories=DataFrame({'chars': ['a', 'b', 'c'], 'int': [0, 1, 2]}), - sparse=False - ) + categories = DataFrame({'chars': ['a', 'b', 'c'], 'int': [0, 1, 2]}) + if as_array: + X = _from_df_to_cupy(X) + categories = _from_df_to_cupy(categories).transpose() + + enc = OneHotEncoder(categories=categories, sparse=False) ref = cp.array([[1., 0., 0., 1., 0., 0.], [0., 1., 0., 0., 0., 1.]]) res = enc.fit_transform(X) cp.testing.assert_array_equal(res, ref) -def test_onehot_fit_handle_unknown(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_fit_handle_unknown(as_array): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]}) + if as_array: + X = _from_df_to_cupy(X) + Y = _from_df_to_cupy(Y) enc = OneHotEncoder(handle_unknown='error', categories=Y) with pytest.raises(KeyError): @@ -93,9 +139,13 @@ def test_onehot_fit_handle_unknown(): enc.fit(X) -def test_onehot_transform_handle_unknown(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_transform_handle_unknown(as_array): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) Y = DataFrame({'chars': ['c', 'b'], 'int': [0, 2]}) + if as_array: + X = _from_df_to_cupy(X) + Y = _from_df_to_cupy(Y) enc = OneHotEncoder(handle_unknown='error', sparse=False) enc = enc.fit(X) @@ -110,42 +160,50 @@ def test_onehot_transform_handle_unknown(): cp.testing.assert_array_equal(ohe, ref) -def test_onehot_inverse_transform_handle_unknown(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_inverse_transform_handle_unknown(as_array): X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) Y_ohe = cp.array([[0., 0., 1., 0.], [0., 1., 0., 1.]]) + ref = DataFrame({'chars': [None, 'b'], 'int': [0, 2]}) + if as_array: + X = _from_df_to_cupy(X) + ref = DataFrame({0: [None, ord('b')], 1: [0, 2]}) enc = OneHotEncoder(handle_unknown='ignore') enc = enc.fit(X) df = enc.inverse_transform(Y_ohe) - ref = DataFrame({'chars': [None, 'b'], 'int': [0, 2]}) - assert_frame_equal(df.to_pandas(), ref.to_pandas()) + assert_inverse_equal(df, ref) @pytest.mark.parametrize('drop', [None, 'first']) @pytest.mark.parametrize('sparse', [True, False], ids=['sparse', 'dense']) @pytest.mark.parametrize("n_samples", [10, 1000, 20000, stress_param(250000)]) -def test_onehot_random_inputs(drop, sparse, n_samples): - df, ary = _generate_inputs_from_categories(n_samples=n_samples) +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_random_inputs(drop, sparse, n_samples, as_array): + X, ary = _generate_inputs_from_categories(n_samples=n_samples, + as_array=as_array) enc = OneHotEncoder(sparse=sparse, drop=drop) sk_enc = SkOneHotEncoder(sparse=sparse, drop=drop) - ohe = enc.fit_transform(df) + ohe = enc.fit_transform(X) ref = sk_enc.fit_transform(ary) if sparse: cp.testing.assert_array_equal(ohe.toarray(), ref.toarray()) else: cp.testing.assert_array_equal(ohe, ref) - inv_ohe = enc.inverse_transform(ohe) + assert_inverse_equal(inv_ohe, X) - assert_frame_equal(inv_ohe.to_pandas(), df.to_pandas()) - -def test_onehot_drop_idx_first(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_drop_idx_first(as_array): X_ary = [['c', 2, 'a'], ['b', 2, 'b']] X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) + if as_array: + X = _from_df_to_cupy(X) + X_ary = cp.asnumpy(X) enc = OneHotEncoder(sparse=False, drop='first') sk_enc = SkOneHotEncoder(sparse=False, drop='first') @@ -153,18 +211,27 @@ def test_onehot_drop_idx_first(): ref = sk_enc.fit_transform(X_ary) cp.testing.assert_array_equal(ohe, ref) inv = enc.inverse_transform(ohe) - assert_frame_equal(inv.to_pandas(), X.to_pandas()) + assert_inverse_equal(inv, X) -def test_onehot_drop_one_of_each(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_drop_one_of_each(as_array): X = DataFrame({'chars': ['c', 'b'], 'int': [2, 2], 'letters': ['a', 'b']}) drop = dict({'chars': 'b', 'int': 2, 'letters': 'b'}) + X_ary = _from_df_to_array(X) + drop_ary = ['b', 2, 'b'] + if as_array: + X = _from_df_to_cupy(X) + X_ary = cp.asnumpy(X) + drop = drop_ary = _convert_drop(drop) + enc = OneHotEncoder(sparse=False, drop=drop) ohe = enc.fit_transform(X) - ref = SkOneHotEncoder(sparse=False, drop=['b', 2, 'b']).fit_transform(X) + print(ohe.dtype) + ref = SkOneHotEncoder(sparse=False, drop=drop_ary).fit_transform(X_ary) cp.testing.assert_array_equal(ohe, ref) inv = enc.inverse_transform(ohe) - assert_frame_equal(inv.to_pandas(), X.to_pandas()) + assert_inverse_equal(inv, X) @pytest.mark.parametrize("drop, pattern", @@ -173,33 +240,46 @@ def test_onehot_drop_one_of_each(): [dict({'chars': 'b', 'int': [2, 0]}), 'Trying to drop multiple values'], [dict({'chars': 'b', 'int': 3}), - 'Some categories [a-zA-Z, ]* were not found'], + 'Some categories [0-9a-zA-Z, ]* were not found'], [DataFrame({'chars': 'b', 'int': 3}), 'Wrong input for parameter `drop`.']]) -def test_onehot_drop_exceptions(drop, pattern): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_drop_exceptions(drop, pattern, as_array): X = DataFrame({'chars': ['c', 'b', 'd'], 'int': [2, 1, 0]}) + if as_array: + X = _from_df_to_cupy(X) + drop = _convert_drop(drop) if not isinstance(drop, DataFrame) else drop with pytest.raises(ValueError, match=pattern): OneHotEncoder(sparse=False, drop=drop).fit(X) -def test_onehot_get_categories(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_get_categories(as_array): X = DataFrame({'chars': ['c', 'b', 'd'], 'ints': [2, 1, 0]}) - ref = [np.array(['b', 'c', 'd']), np.array([0, 1, 2])] + if as_array: + X = _from_df_to_cupy(X) + ref[0] = np.array([ord(x) for x in ref[0]]) + enc = OneHotEncoder().fit(X) - cats = enc.get_categories_() + cats = enc.categories_ for i in range(len(ref)): - np.testing.assert_array_equal(ref[i], cats[i]) + cp.testing.assert_array_equal(ref[i], cats[i]) -def test_onehot_sparse_drop(): +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_sparse_drop(as_array): X = DataFrame({'g': ['M', 'F', 'F'], 'i': [1, 3, 2], 'l': [5, 5, 6]}) drop = {'g': 'F', 'i': 3, 'l': 6} ary = _from_df_to_array(X) drop_ary = ['F', 3, 6] + if as_array: + X = _from_df_to_cupy(X) + ary = cp.asnumpy(X) + drop = drop_ary = _convert_drop(drop) enc = OneHotEncoder(sparse=True, drop=drop) sk_enc = SkOneHotEncoder(sparse=True, drop=drop_ary) From 58615972f6fa04aef08adf45aab618c21ec7dda9 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 20 Apr 2020 19:20:40 -0500 Subject: [PATCH 259/330] FIX use the correct numba checker function --- python/cuml/common/base.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/common/base.pyx b/python/cuml/common/base.pyx index cb169ea102..6a16b9f344 100644 --- a/python/cuml/common/base.pyx +++ b/python/cuml/common/base.pyx @@ -27,7 +27,7 @@ import inspect from cudf.core import Series, DataFrame from cuml.common.array import CumlArray from cupy import ndarray as cupyArray -from numba.cuda import is_cuda_array +from numba.cuda import devicearray as numbaArray from numpy import ndarray as numpyArray @@ -301,7 +301,7 @@ def _input_to_type(input): # numba check for a numba device_array if type(input) in _input_type_to_str.keys(): return _input_type_to_str[type(input)] - elif is_cuda_array(input): + elif numbaArray.is_cuda_ndarray(input): return 'numba' else: return 'cupy' From ad13eab655b33439d4178de5f11664791006d70a Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 20 Apr 2020 22:52:53 -0700 Subject: [PATCH 260/330] FIX minor code fixes for contingency matrix prim unit-test --- cpp/test/prims/contingencyMatrix.cu | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/test/prims/contingencyMatrix.cu b/cpp/test/prims/contingencyMatrix.cu index 7bddf933b1..faa4d48bec 100644 --- a/cpp/test/prims/contingencyMatrix.cu +++ b/cpp/test/prims/contingencyMatrix.cu @@ -24,7 +24,7 @@ namespace MLCommon { namespace Metrics { -struct contingencyMatrixParam { +struct ContingencyMatrixParam { int nElements; int minClass; int maxClass; @@ -34,11 +34,11 @@ struct contingencyMatrixParam { }; template -class ContingencyMatrixTestImpl - : public ::testing::TestWithParam { +class ContingencyMatrixTest + : public ::testing::TestWithParam { protected: void SetUp() override { - params = ::testing::TestWithParam::GetParam(); + params = ::testing::TestWithParam::GetParam(); int numElements = params.nElements; int lowerLabelRange = params.minClass; @@ -124,7 +124,7 @@ class ContingencyMatrixTestImpl if (pWorkspace) CUDA_CHECK(cudaFree(pWorkspace)); } - contingencyMatrixParam params; + ContingencyMatrixParam params; int numUniqueClasses = -1; T *dY = nullptr; T *dYHat = nullptr; @@ -135,7 +135,7 @@ class ContingencyMatrixTestImpl cudaStream_t stream; }; -const std::vector inputs = { +const std::vector inputs = { {10000, 1, 10, true, false, 0.000001}, {10000, 1, 5000, true, false, 0.000001}, {10000, 1, 10000, true, false, 0.000001}, @@ -146,14 +146,14 @@ const std::vector inputs = { {100000, 1, 100, false, true, 0.000001}, }; -typedef ContingencyMatrixTestImpl ContingencyMatrixTestImplS; -TEST_P(ContingencyMatrixTestImplS, Result) { +typedef ContingencyMatrixTest ContingencyMatrixTestS; +TEST_P(ContingencyMatrixTestS, Result) { ASSERT_TRUE(devArrMatch(dComputedOutput, dGoldenOutput, numUniqueClasses * numUniqueClasses, CompareApprox(params.tolerance))); } -INSTANTIATE_TEST_CASE_P(ContingencyMatrix, ContingencyMatrixTestImplS, +INSTANTIATE_TEST_CASE_P(ContingencyMatrix, ContingencyMatrixTestS, ::testing::ValuesIn(inputs)); } // namespace Metrics } // namespace MLCommon From efa369c8333dabc6fa930262be6c9fb85e304a1f Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 20 Apr 2020 22:53:54 -0700 Subject: [PATCH 261/330] FIX add stream-sync before the test teardown --- cpp/test/prims/contingencyMatrix.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/test/prims/contingencyMatrix.cu b/cpp/test/prims/contingencyMatrix.cu index faa4d48bec..35b9d830f8 100644 --- a/cpp/test/prims/contingencyMatrix.cu +++ b/cpp/test/prims/contingencyMatrix.cu @@ -115,6 +115,7 @@ class ContingencyMatrixTest } void TearDown() override { + CUDA_CHECK(cudaStreamSynchronize(stream)); free(hGoldenOutput); CUDA_CHECK(cudaStreamDestroy(stream)); CUDA_CHECK(cudaFree(dY)); From f99ede81c6ad1ae7f6dcd69f290396981738f147 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 20 Apr 2020 23:27:31 -0700 Subject: [PATCH 262/330] ENH use integer log2 for counting number of bits to sort --- cpp/src_prims/metrics/contingencyMatrix.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/src_prims/metrics/contingencyMatrix.h b/cpp/src_prims/metrics/contingencyMatrix.h index a9d630de70..d7ad4b7b1e 100644 --- a/cpp/src_prims/metrics/contingencyMatrix.h +++ b/cpp/src_prims/metrics/contingencyMatrix.h @@ -109,7 +109,8 @@ void contingencyMatrixWSort(const T *groundTruth, const T *predictedLabel, T *outValue = reinterpret_cast((size_t)workspace + alignedBufferSz); void *pWorkspaceCub = reinterpret_cast((size_t)workspace + 2 * alignedBufferSz); - int bitsToSort = int(std::ceil(std::log2f((float)maxLabel))); + auto bitsToSort = log2(maxLabel); + if (!isPo2(maxLabel)) ++bitsToSort; // we dont really need perfect sorting, should get by with some sort of // binning-reordering operation ///@todo: future work - explore "efficient" custom binning kernels vs cub sort @@ -125,14 +126,12 @@ template ContingencyMatrixImplType getImplVersion(OutT outDimN) { int currDevice = 0; int l2CacheSize = 0; - int maxSmemPerBlock = 0; // no way to query this from CUDA APIs, value for CC 7.0, 3.0 int maxBlocksResidentPerSM = 16; CUDA_CHECK(cudaGetDevice(&currDevice)); CUDA_CHECK( cudaDeviceGetAttribute(&l2CacheSize, cudaDevAttrL2CacheSize, currDevice)); - CUDA_CHECK(cudaDeviceGetAttribute( - &maxSmemPerBlock, cudaDevAttrMaxSharedMemoryPerBlock, currDevice)); + auto maxSmemPerBlock = getSharedMemPerBlock(); ContingencyMatrixImplType implVersion = IMPL_NONE; // keeping 8 block per SM to get good utilization // can go higher but reduced L1 size degrades perf From 1b10469cd07d05f74d4949161c019462b32a4c4e Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 20 Apr 2020 23:51:28 -0700 Subject: [PATCH 263/330] ENH added more unit-tests for contingency-matrix --- cpp/test/prims/contingencyMatrix.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/test/prims/contingencyMatrix.cu b/cpp/test/prims/contingencyMatrix.cu index 35b9d830f8..24c5dd39a7 100644 --- a/cpp/test/prims/contingencyMatrix.cu +++ b/cpp/test/prims/contingencyMatrix.cu @@ -141,6 +141,10 @@ const std::vector inputs = { {10000, 1, 5000, true, false, 0.000001}, {10000, 1, 10000, true, false, 0.000001}, {10000, 1, 20000, true, false, 0.000001}, + {10000, 1, 10, false, false, 0.000001}, + {10000, 1, 5000, false, false, 0.000001}, + {10000, 1, 10000, false, false, 0.000001}, + {10000, 1, 20000, false, false, 0.000001}, {100000, 1, 100, false, false, 0.000001}, {1000000, 1, 1200, true, false, 0.000001}, {1000000, 1, 10000, false, false, 0.000001}, From e9eb02639576cfcca3b13e1139f152cebf7924a6 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 20 Apr 2020 23:59:34 -0700 Subject: [PATCH 264/330] FIX issue with unit-test itself while computing the class cardinality --- cpp/test/prims/contingencyMatrix.cu | 58 ++++++++++++++--------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/cpp/test/prims/contingencyMatrix.cu b/cpp/test/prims/contingencyMatrix.cu index 24c5dd39a7..c1d6b3a6c9 100644 --- a/cpp/test/prims/contingencyMatrix.cu +++ b/cpp/test/prims/contingencyMatrix.cu @@ -70,48 +70,48 @@ class ContingencyMatrixTest std::replace(y_hat.begin(), y_hat.end(), y2, y2_R); } - numUniqueClasses = upperLabelRange - lowerLabelRange + 1; + CUDA_CHECK(cudaStreamCreate(&stream)); + MLCommon::allocate(dY, numElements); + MLCommon::allocate(dYHat, numElements); + + MLCommon::updateDevice(dYHat, &y_hat[0], numElements, stream); + MLCommon::updateDevice(dY, &y[0], numElements, stream); + + T minLabel, maxLabel; + if (params.calcCardinality) { + MLCommon::Metrics::getInputClassCardinality(dY, numElements, stream, + minLabel, maxLabel); + } else { + minLabel = lowerLabelRange; + maxLabel = upperLabelRange; + } + + numUniqueClasses = maxLabel - minLabel + 1; + + MLCommon::allocate(dComputedOutput, numUniqueClasses * numUniqueClasses); + MLCommon::allocate(dGoldenOutput, numUniqueClasses * numUniqueClasses); // generate golden output on CPU size_t sizeOfMat = numUniqueClasses * numUniqueClasses * sizeof(int); - int *hGoldenOutput = (int *)malloc(sizeOfMat); + hGoldenOutput = (int *)malloc(sizeOfMat); memset(hGoldenOutput, 0, sizeOfMat); for (int i = 0; i < numElements; i++) { - int row = y[i] - lowerLabelRange; - int column = y_hat[i] - lowerLabelRange; - + auto row = y[i] - minLabel; + auto column = y_hat[i] - minLabel; hGoldenOutput[row * numUniqueClasses + column] += 1; } - CUDA_CHECK(cudaStreamCreate(&stream)); - MLCommon::allocate(dY, numElements); - MLCommon::allocate(dYHat, numElements); - MLCommon::allocate(dComputedOutput, numUniqueClasses * numUniqueClasses); - MLCommon::allocate(dGoldenOutput, numUniqueClasses * numUniqueClasses); + MLCommon::updateDevice(dGoldenOutput, hGoldenOutput, + numUniqueClasses * numUniqueClasses, stream); size_t workspaceSz = MLCommon::Metrics::getContingencyMatrixWorkspaceSize( - numElements, dY, stream, lowerLabelRange, upperLabelRange); - + numElements, dY, stream, minLabel, maxLabel); if (workspaceSz != 0) MLCommon::allocate(pWorkspace, workspaceSz); - MLCommon::updateDevice(dYHat, &y_hat[0], numElements, stream); - MLCommon::updateDevice(dY, &y[0], numElements, stream); - MLCommon::updateDevice(dGoldenOutput, hGoldenOutput, - numUniqueClasses * numUniqueClasses, stream); - - if (params.calcCardinality) { - T minLabel, maxLabel; - MLCommon::Metrics::getInputClassCardinality(dY, numElements, stream, - minLabel, maxLabel); - // allocate dComputedOutput using minLabel, maxLabel count - already done above - MLCommon::Metrics::contingencyMatrix( - dY, dYHat, numElements, dComputedOutput, stream, (void *)pWorkspace, - workspaceSz, minLabel, maxLabel); - } else - MLCommon::Metrics::contingencyMatrix( - dY, dYHat, numElements, dComputedOutput, stream, (void *)pWorkspace, - workspaceSz, lowerLabelRange, upperLabelRange); + MLCommon::Metrics::contingencyMatrix( + dY, dYHat, numElements, dComputedOutput, stream, (void *)pWorkspace, + workspaceSz, minLabel, maxLabel); } void TearDown() override { From fa4b2ccfeaaa95190eb9e5ae5b806d5146183d69 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Tue, 21 Apr 2020 00:04:19 -0700 Subject: [PATCH 265/330] ENH minor refactor to the contingency matrix unit-test to reduce code duplication --- cpp/test/prims/contingencyMatrix.cu | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/cpp/test/prims/contingencyMatrix.cu b/cpp/test/prims/contingencyMatrix.cu index c1d6b3a6c9..01423bec39 100644 --- a/cpp/test/prims/contingencyMatrix.cu +++ b/cpp/test/prims/contingencyMatrix.cu @@ -77,7 +77,6 @@ class ContingencyMatrixTest MLCommon::updateDevice(dYHat, &y_hat[0], numElements, stream); MLCommon::updateDevice(dY, &y[0], numElements, stream); - T minLabel, maxLabel; if (params.calcCardinality) { MLCommon::Metrics::getInputClassCardinality(dY, numElements, stream, minLabel, maxLabel); @@ -105,13 +104,9 @@ class ContingencyMatrixTest MLCommon::updateDevice(dGoldenOutput, hGoldenOutput, numUniqueClasses * numUniqueClasses, stream); - size_t workspaceSz = MLCommon::Metrics::getContingencyMatrixWorkspaceSize( + workspaceSz = MLCommon::Metrics::getContingencyMatrixWorkspaceSize( numElements, dY, stream, minLabel, maxLabel); if (workspaceSz != 0) MLCommon::allocate(pWorkspace, workspaceSz); - - MLCommon::Metrics::contingencyMatrix( - dY, dYHat, numElements, dComputedOutput, stream, (void *)pWorkspace, - workspaceSz, minLabel, maxLabel); } void TearDown() override { @@ -125,15 +120,27 @@ class ContingencyMatrixTest if (pWorkspace) CUDA_CHECK(cudaFree(pWorkspace)); } + void RunTest() { + int numElements = params.nElements; + MLCommon::Metrics::contingencyMatrix( + dY, dYHat, numElements, dComputedOutput, stream, (void *)pWorkspace, + workspaceSz, minLabel, maxLabel); + ASSERT_TRUE( + devArrMatch(dComputedOutput, dGoldenOutput, + numUniqueClasses * numUniqueClasses, Compare())); + } + ContingencyMatrixParam params; int numUniqueClasses = -1; T *dY = nullptr; T *dYHat = nullptr; + T minLabel, maxLabel; int *dComputedOutput = nullptr; int *dGoldenOutput = nullptr; int *hGoldenOutput = nullptr; char *pWorkspace = nullptr; cudaStream_t stream; + size_t workspaceSz; }; const std::vector inputs = { @@ -152,12 +159,7 @@ const std::vector inputs = { }; typedef ContingencyMatrixTest ContingencyMatrixTestS; -TEST_P(ContingencyMatrixTestS, Result) { - ASSERT_TRUE(devArrMatch(dComputedOutput, dGoldenOutput, - numUniqueClasses * numUniqueClasses, - CompareApprox(params.tolerance))); -} - +TEST_P(ContingencyMatrixTestS, Result) { RunTest(); } INSTANTIATE_TEST_CASE_P(ContingencyMatrix, ContingencyMatrixTestS, ::testing::ValuesIn(inputs)); } // namespace Metrics From cc0d604169605eea6c46cf07c1408ffb06e74acf Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Tue, 21 Apr 2020 00:08:24 -0700 Subject: [PATCH 266/330] DOC update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b8d0b6667..7db955203e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ - PR #2078: Ignore negative cache indices in get_vecs - PR #2084: Fixed cuda-memcheck errors with COO unit-tests - PR #2087: Fixed cuda-memcheck errors with dispersion prim +- PR #2115: Fixed contingency matrix prim unit-tests for computing correct golden values # cuML 0.13.0 (Date TBD) From 0eb6f5c6e593dae426707c314f62873ed09a9b39 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Tue, 21 Apr 2020 00:09:37 -0700 Subject: [PATCH 267/330] DOC copyright year update --- cpp/src_prims/metrics/contingencyMatrix.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src_prims/metrics/contingencyMatrix.h b/cpp/src_prims/metrics/contingencyMatrix.h index d7ad4b7b1e..0a2029bd09 100644 --- a/cpp/src_prims/metrics/contingencyMatrix.h +++ b/cpp/src_prims/metrics/contingencyMatrix.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 2ad7ab859517dd492c0dc6f798fa3eee49fb466d Mon Sep 17 00:00:00 2001 From: Louis Sugy Date: Tue, 21 Apr 2020 12:47:21 +0200 Subject: [PATCH 268/330] ARIMA output_type rework --- python/cuml/test/test_arima.py | 14 +++++----- python/cuml/tsa/arima.pyx | 51 ++++++++++++++++++++++------------ 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/python/cuml/test/test_arima.py b/python/cuml/test/test_arima.py index c108a96893..09fa8241af 100644 --- a/python/cuml/test/test_arima.py +++ b/python/cuml/test/test_arima.py @@ -239,12 +239,12 @@ def test_integration(test_case, dtype): ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype) # Create and fit cuML model - cuml_model = arima.ARIMA( - y_cudf, order, seasonal_order, fit_intercept=intercept) + cuml_model = arima.ARIMA(y_cudf, order, seasonal_order, + fit_intercept=intercept, output_type='numpy') cuml_model.fit() # Predict - cuml_pred = cuml_model.predict(data.start, data.end).to_output('numpy') + cuml_pred = cuml_model.predict(data.start, data.end) ref_preds = np.zeros((data.end - data.start, data.batch_size)) for i in range(data.batch_size): ref_preds[:, i] = ref_fits[i].get_prediction( @@ -286,8 +286,8 @@ def _predict_common(test_case, dtype, start, end, num_steps=None): ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype) # Create cuML model - cuml_model = arima.ARIMA( - y_cudf, order, seasonal_order, fit_intercept=intercept) + cuml_model = arima.ARIMA(y_cudf, order, seasonal_order, + fit_intercept=intercept, output_type='numpy') # Feed the parameters to the cuML model _statsmodels_to_cuml(ref_fits, cuml_model, order, seasonal_order, @@ -299,9 +299,9 @@ def _predict_common(test_case, dtype, start, end, num_steps=None): ref_preds[:, i] = ref_fits[i].get_prediction( start, end - 1).predicted_mean if num_steps is None: - cuml_pred = cuml_model.predict(start, end).to_output('numpy') + cuml_pred = cuml_model.predict(start, end) else: - cuml_pred = cuml_model.forecast(num_steps).to_output('numpy') + cuml_pred = cuml_model.forecast(num_steps) # Compare results np.testing.assert_allclose(cuml_pred, ref_preds, rtol=0.001, atol=0.01) diff --git a/python/cuml/tsa/arima.pyx b/python/cuml/tsa/arima.pyx index 9c65cc2acb..8c4f6e4505 100644 --- a/python/cuml/tsa/arima.pyx +++ b/python/cuml/tsa/arima.pyx @@ -119,7 +119,7 @@ class ARIMA(Base): model.fit() # Forecast - fc = model.forecast(10).to_output('numpy') + fc = model.forecast(10) print(fc) Output: @@ -152,6 +152,13 @@ class ARIMA(Base): Leave to None for automatic selection based on the model order handle: cuml.Handle If it is None, a new one is created just for this instance + verbose: int (optional, default 0) + Controls verbosity level of logging. + output_type : {'input', 'cudf', 'cupy', 'numpy'}, optional + Variable to control output type of the results and attributes of + the estimators. If None, it'll inherit the output type set at the + module level, cuml.output_type. If set, the estimator will override + the global option for its behavior. Attributes ---------- @@ -200,14 +207,20 @@ class ARIMA(Base): seasonal_order: Tuple[int, int, int, int] = (0, 0, 0, 0), fit_intercept=None, - handle=None): + handle=None, + verbose=0, + output_type=None): if not has_scipy(): raise RuntimeError("Scipy is needed to run cuML's ARIMA estimator." " Please install it to enable ARIMA " "estimation.") - super().__init__(handle) + # Initialize base class + super().__init__(handle, verbose, output_type) + self._set_output_type(y) + + # Set the ARIMA order cdef ARIMAOrder cpp_order cpp_order.p, cpp_order.d, cpp_order.q = order cpp_order.P, cpp_order.D, cpp_order.Q, cpp_order.s = seasonal_order @@ -215,7 +228,6 @@ class ARIMA(Base): # by default, use an intercept only with non differenced models fit_intercept = (order[1] + seasonal_order[1] == 0) cpp_order.k = int(fit_intercept) - self.order = cpp_order # Check validity of the ARIMA order and seasonal order @@ -239,7 +251,7 @@ class ARIMA(Base): raise ValueError("ERROR: Invalid order. Required: p,q,P,Q <= 4") # Get device array. Float64 only for now. - self.d_y, self.n_obs, self.batch_size, self.dtype \ + self._d_y, self.n_obs, self.batch_size, self.dtype \ = input_to_cuml_array(y, check_dtype=np.float64) if self.n_obs < d + s * D + 1: @@ -249,12 +261,15 @@ class ARIMA(Base): self.niter = None # number of iterations used during fit def __str__(self): - if self.seasonal_order[3]: - return "Batched ARIMA{}{}_{}".format(self.order, - self.seasonal_order[:3], - self.seasonal_order[3]) + cdef ARIMAOrder order = self.order + intercept_str = 'c' if order.k else 'n' + if order.s: + return "ARIMA({},{},{})({},{},{})_{} ({}) - {} series".format( + order.p, order.d, order.q, order.P, order.D, order.Q, order.s, + intercept_str, self.batch_size) else: - return "Batched ARIMA{}".format(self.order) + return "ARIMA({},{},{}) ({}) - {} series".format( + order.p, order.d, order.q, intercept_str, self.batch_size) @nvtx_range_wrap def _ic(self, ic_type: str): @@ -299,7 +314,7 @@ class ARIMA(Base): cdef vector[double] ic ic.resize(self.batch_size) - cdef uintptr_t d_y_ptr = self.d_y.ptr + cdef uintptr_t d_y_ptr = self._d_y.ptr ic_name_to_number = {"aic": 0, "aicc": 1, "bic": 2} cdef int ic_type_id @@ -397,7 +412,7 @@ class ARIMA(Base): ... model = ARIMA(ys, (1,1,1)) model.fit() - y_pred = model.predict().to_output('numpy') + y_pred = model.predict() """ cdef ARIMAOrder order = self.order @@ -463,13 +478,13 @@ class ARIMA(Base): d_vs_ptr = d_vs.ptr d_y_p_ptr = d_y_p.ptr - cdef uintptr_t d_y_ptr = self.d_y.ptr + cdef uintptr_t d_y_ptr = self._d_y.ptr cpp_predict(handle_[0], d_y_ptr, self.batch_size, self.n_obs, start, end, order, cpp_params, d_vs_ptr, d_y_p_ptr) - return d_y_p + return d_y_p.to_output(self.output_type) @nvtx_range_wrap def forecast(self, nsteps: int): @@ -492,7 +507,7 @@ class ARIMA(Base): ... model = ARIMA(ys, (1,1,1)) model.fit() - y_fc = model.forecast(10).copy_to_host() + y_fc = model.forecast(10) """ return self.predict(self.n_obs, self.n_obs + nsteps) @@ -503,7 +518,7 @@ class ARIMA(Base): """ cdef ARIMAOrder order = self.order - cdef uintptr_t d_y_ptr = self.d_y.ptr + cdef uintptr_t d_y_ptr = self._d_y.ptr cdef cumlHandle* handle_ = self.handle.getHandle() # Create mu, ar and ma arrays @@ -597,7 +612,7 @@ class ARIMA(Base): else: self.set_params(start_params) - cdef uintptr_t d_y_ptr = self.d_y.ptr + cdef uintptr_t d_y_ptr = self._d_y.ptr def f(x: np.ndarray) -> np.ndarray: """The (batched) energy functional returning the negative @@ -658,7 +673,7 @@ class ARIMA(Base): input_to_cuml_array(x, check_dtype=np.float64, order='C') cdef uintptr_t d_x_ptr = d_x_array.ptr - cdef uintptr_t d_y_ptr = self.d_y.ptr + cdef uintptr_t d_y_ptr = self._d_y.ptr cdef cumlHandle* handle_ = self.handle.getHandle() d_vs = cumlArray.empty((self.n_obs - order.d - order.D * order.s, From 6dabc05d8f57dd1cfee3bed898066f799643b493 Mon Sep 17 00:00:00 2001 From: Louis Sugy Date: Tue, 21 Apr 2020 13:54:27 +0200 Subject: [PATCH 269/330] Update ARIMA notebook --- notebooks/arima_demo.ipynb | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/notebooks/arima_demo.ipynb b/notebooks/arima_demo.ipynb index 84c2e3837e..1268bbf59d 100644 --- a/notebooks/arima_demo.ipynb +++ b/notebooks/arima_demo.ipynb @@ -108,7 +108,6 @@ " \n", " # Range for the prediction\n", " if pred is not None:\n", - " pred_np = pred.to_output('numpy')\n", " pred_start = pred_start or n_obs\n", " pred_end = pred_start + pred.shape[0]\n", " \n", @@ -117,8 +116,9 @@ " title = y.columns[i]\n", " ax[i].plot(np.r_[:n_obs], y[title].to_array())\n", " if pred is not None:\n", - " ax[i].plot(np.r_[pred_start:pred_end], pred_np[:, i],\n", - " linestyle=\"--\")\n", + " ax[i].plot(np.r_[pred_start:pred_end],\n", + " pred[pred.columns[i]].to_array(),\n", + " linestyle=\"--\")\n", " ax[i].title.set_text(title)\n", " for i in range(batch_size, r*c):\n", " fig.delaxes(ax[i])\n", @@ -189,23 +189,6 @@ "visualize(df_mig, fc_mig)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "*Note:* the returned array is a cuML array. You can convert it to other types with the `to_output` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(type(fc_mig))\n", - "print(type(fc_mig.to_output('numpy')))" - ] - }, { "cell_type": "markdown", "metadata": {}, From cbed4f316bf2ff36005f95ca977102818e3395fc Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 21 Apr 2020 11:47:33 -0400 Subject: [PATCH 270/330] Updating sgd solver & mini-batched estimators to use cuml array --- python/cuml/linear_model/mbsgd_classifier.pyx | 18 +++- python/cuml/linear_model/mbsgd_regressor.pyx | 18 +++- python/cuml/solvers/sgd.pyx | 85 +++++++++++-------- python/cuml/test/test_mbsgd_classifier.py | 4 +- python/cuml/test/test_mbsgd_regressor.py | 4 +- 5 files changed, 82 insertions(+), 47 deletions(-) diff --git a/python/cuml/linear_model/mbsgd_classifier.pyx b/python/cuml/linear_model/mbsgd_classifier.pyx index 418b50b96b..9e3512edc7 100644 --- a/python/cuml/linear_model/mbsgd_classifier.pyx +++ b/python/cuml/linear_model/mbsgd_classifier.pyx @@ -115,6 +115,11 @@ class MBSGDClassifier(Base): The old learning rate is generally divided by 5 n_iter_no_change : int (default = 5) the number of epochs to train without any imporvement in the model + output_type : {'input', 'cudf', 'cupy', 'numpy'}, optional + Variable to control output type of the results and attributes of + the estimators. If None, it'll inherit the output type set at the + module level, cuml.output_type. If set, the estimator will override + the global option for its behavior. Notes ------ @@ -126,8 +131,9 @@ class MBSGDClassifier(Base): l1_ratio=0.15, fit_intercept=True, epochs=1000, tol=1e-3, shuffle=True, learning_rate='constant', eta0=0.001, power_t=0.5, batch_size=32, n_iter_no_change=5, handle=None, - verbose=False): - super(MBSGDClassifier, self).__init__(handle=handle, verbose=verbose) + verbose=False, output_type=None): + super(MBSGDClassifier, self).__init__(handle=handle, verbose=verbose, + output_type=output_type) self.loss = loss self.penalty = penalty self.alpha = alpha @@ -169,6 +175,8 @@ class MBSGDClassifier(Base): self.coef_ = self.cu_mbsgd_classifier.coef_ self.intercept_ = self.cu_mbsgd_classifier.intercept_ + return self + def predict(self, X, convert_dtype=False): """ Predicts the y for X. @@ -187,14 +195,16 @@ class MBSGDClassifier(Base): Returns ---------- - y: cuDF DataFrame + y: Type specified by `output_type` Dense vector (floats or doubles) of shape (n_samples, 1) """ - return \ + preds = \ self.cu_mbsgd_classifier.predictClass(X, convert_dtype=convert_dtype) + return preds + def get_params(self, deep=True): """ Scikit-learn style function that returns the estimator parameters. diff --git a/python/cuml/linear_model/mbsgd_regressor.pyx b/python/cuml/linear_model/mbsgd_regressor.pyx index 14110371f8..1b15bf31cc 100644 --- a/python/cuml/linear_model/mbsgd_regressor.pyx +++ b/python/cuml/linear_model/mbsgd_regressor.pyx @@ -106,6 +106,11 @@ class MBSGDRegressor(Base): The old learning rate is generally divided by 5 n_iter_no_change : int (default = 5) the number of epochs to train without any imporvement in the model + output_type : {'input', 'cudf', 'cupy', 'numpy'}, optional + Variable to control output type of the results and attributes of + the estimators. If None, it'll inherit the output type set at the + module level, cuml.output_type. If set, the estimator will override + the global option for its behavior. Notes ------ @@ -117,8 +122,9 @@ class MBSGDRegressor(Base): l1_ratio=0.15, fit_intercept=True, epochs=1000, tol=1e-3, shuffle=True, learning_rate='constant', eta0=0.001, power_t=0.5, batch_size=32, n_iter_no_change=5, handle=None, - verbose=False): - super(MBSGDRegressor, self).__init__(handle=handle, verbose=verbose) + verbose=False, output_type=None): + super(MBSGDRegressor, self).__init__(handle=handle, verbose=verbose, + output_type=output_type) if loss in ['squared_loss']: self.loss = loss else: @@ -165,6 +171,8 @@ class MBSGDRegressor(Base): self.coef_ = self.cu_mbsgd_classifier.coef_ self.intercept_ = self.cu_mbsgd_classifier.intercept_ + return self + def predict(self, X, convert_dtype=False): """ Predicts the y for X. @@ -183,11 +191,13 @@ class MBSGDRegressor(Base): Returns ---------- - y: cuDF DataFrame + y: Type specified by `output_type` Dense vector (floats or doubles) of shape (n_samples, 1) """ - return self.cu_mbsgd_classifier.predict(X, convert_dtype=convert_dtype) + preds = self.cu_mbsgd_classifier.predict(X, + convert_dtype=convert_dtype) + return preds def get_params(self, deep=True): """ diff --git a/python/cuml/solvers/sgd.pyx b/python/cuml/solvers/sgd.pyx index 4984962803..1abd431f24 100644 --- a/python/cuml/solvers/sgd.pyx +++ b/python/cuml/solvers/sgd.pyx @@ -29,9 +29,10 @@ from libc.stdint cimport uintptr_t from libc.stdlib cimport calloc, malloc, free from cuml.common.base import Base +from cuml.common import CumlArray from cuml.common.handle cimport cumlHandle from cuml.utils import get_cudf_column_ptr, get_dev_array_ptr, \ - input_to_dev_array, zeros + input_to_cuml_array, zeros cdef extern from "cuml/solvers/solver.hpp" namespace "ML::Solver": @@ -200,6 +201,11 @@ class SGD(Base): The old learning rate is generally divide by 5 n_iter_no_change : int (default = 5) the number of epochs to train without any imporvement in the model + output_type : {'input', 'cudf', 'cupy', 'numpy'}, optional + Variable to control output type of the results and attributes of + the estimators. If None, it'll inherit the output type set at the + module level, cuml.output_type. If set, the estimator will override + the global option for its behavior. Notes ------ @@ -210,7 +216,8 @@ class SGD(Base): def __init__(self, loss='squared_loss', penalty='none', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, epochs=1000, tol=1e-3, shuffle=True, learning_rate='constant', eta0=0.001, - power_t=0.5, batch_size=32, n_iter_no_change=5, handle=None): + power_t=0.5, batch_size=32, n_iter_no_change=5, handle=None, + output_type=None): if loss in ['hinge', 'log', 'squared_loss']: self.loss = self._get_loss_int(loss) @@ -224,7 +231,8 @@ class SGD(Base): msg = "penalty {!r} is not supported" raise TypeError(msg.format(penalty)) - super(SGD, self).__init__(handle=handle, verbose=False) + super(SGD, self).__init__(handle=handle, verbose=False, + output_type=output_type) self.alpha = alpha self.l1_ratio = l1_ratio self.fit_intercept = fit_intercept @@ -312,21 +320,23 @@ class SGD(Base): will increase memory used for the method. """ - cdef uintptr_t X_ptr, y_ptr - X_m, X_ptr, n_rows, self.n_cols, self.dtype = \ - input_to_dev_array(X, check_dtype=[np.float32, np.float64]) + X_m, n_rows, self.n_cols, self.dtype = \ + input_to_cuml_array(X, check_dtype=[np.float32, np.float64]) - y_m, y_ptr, _, _, _ = \ - input_to_dev_array(y, check_dtype=self.dtype, - convert_to_dtype=(self.dtype if convert_dtype - else None), - check_rows=n_rows, check_cols=1) + y_m, _, _, _ = \ + input_to_cuml_array(y, check_dtype=self.dtype, + convert_to_dtype=(self.dtype if convert_dtype + else None), + check_rows=n_rows, check_cols=1) + + cdef uintptr_t X_ptr = X_m.ptr + cdef uintptr_t y_ptr = y_m.ptr self.n_alpha = 1 - self.coef_ = cudf.Series(zeros(self.n_cols, - dtype=self.dtype)) - cdef uintptr_t coef_ptr = get_cudf_column_ptr(self.coef_) + self.coef_ = CumlArray.zeros(self.n_cols, + dtype=self.dtype) + cdef uintptr_t coef_ptr = self.coef_.ptr cdef float c_intercept1 cdef double c_intercept2 @@ -403,20 +413,21 @@ class SGD(Base): will increase memory used for the method. Returns ---------- - y: cuDF DataFrame + y: Type specified in `output_type` Dense vector (floats or doubles) of shape (n_samples, 1) """ - cdef uintptr_t X_ptr - X_m, X_ptr, n_rows, n_cols, self.dtype = \ - input_to_dev_array(X, check_dtype=self.dtype, - convert_to_dtype=(self.dtype if convert_dtype - else None), - check_cols=self.n_cols) + X_m, n_rows, n_cols, self.dtype = \ + input_to_cuml_array(X, check_dtype=self.dtype, + convert_to_dtype=(self.dtype if convert_dtype + else None), + check_cols=self.n_cols) + + cdef uintptr_t X_ptr = X_m.ptr cdef uintptr_t coef_ptr = get_cudf_column_ptr(self.coef_) - preds = cudf.Series(zeros(n_rows, dtype=self.dtype)) - cdef uintptr_t preds_ptr = get_cudf_column_ptr(preds) + preds = CumlArray.zeros(n_rows, dtype=self.dtype) + cdef uintptr_t preds_ptr = preds.ptr cdef cumlHandle* handle_ = self.handle.getHandle() @@ -443,7 +454,9 @@ class SGD(Base): del(X_m) - return preds + output_type = self._get_output_type(self.output_type) + + return preds.to_output(output_type) def predictClass(self, X, convert_dtype=False): """ @@ -463,20 +476,20 @@ class SGD(Base): Returns ---------- - y : cuDF DataFrame + y : Type specified in `output_type` Dense vector (floats or doubles) of shape (n_samples, 1) """ - cdef uintptr_t X_ptr - X_m, X_ptr, n_rows, n_cols, dtype = \ - input_to_dev_array(X, check_dtype=self.dtype, - convert_to_dtype=(self.dtype if convert_dtype - else None), - check_cols=self.n_cols) + X_m, n_rows, n_cols, dtype = \ + input_to_cuml_array(X, check_dtype=self.dtype, + convert_to_dtype=(self.dtype if convert_dtype + else None), + check_cols=self.n_cols) - cdef uintptr_t coef_ptr = get_cudf_column_ptr(self.coef_) - preds = cudf.Series(zeros(n_rows, dtype=dtype)) - cdef uintptr_t preds_ptr = get_cudf_column_ptr(preds) + cdef uintptr_t X_ptr = X_m.ptr + cdef uintptr_t coef_ptr = self.coef_.ptr + preds = CumlArray.zeros(n_rows, dtype=dtype) + cdef uintptr_t preds_ptr = preds.ptr cdef cumlHandle* handle_ = self.handle.getHandle() if dtype.type == np.float32: @@ -502,4 +515,6 @@ class SGD(Base): del(X_m) - return preds + output_type = self._get_output_type(self.output_type) + + return preds.to_output(output_type) diff --git a/python/cuml/test/test_mbsgd_classifier.py b/python/cuml/test/test_mbsgd_classifier.py index d5fde1bbfd..8de734b2fc 100644 --- a/python/cuml/test/test_mbsgd_classifier.py +++ b/python/cuml/test/test_mbsgd_classifier.py @@ -65,7 +65,7 @@ def test_mbsgd_classifier(lrate, penalty, loss, make_dataset): penalty=penalty) cu_mbsgd_classifier.fit(X_train, y_train) - cu_pred = cu_mbsgd_classifier.predict(X_test).to_array() + cu_pred = cu_mbsgd_classifier.predict(X_test).get() cu_acc = accuracy_score(cu_pred, y_test) if nrows < 500000: @@ -86,7 +86,7 @@ def test_mbsgd_classifier_default(make_dataset): cu_mbsgd_classifier = cumlMBSGClassifier() cu_mbsgd_classifier.fit(X_train, y_train) - cu_pred = cu_mbsgd_classifier.predict(X_test).to_array() + cu_pred = cu_mbsgd_classifier.predict(X_test).get() cu_acc = accuracy_score(cu_pred, y_test) if nrows < 500000: diff --git a/python/cuml/test/test_mbsgd_regressor.py b/python/cuml/test/test_mbsgd_regressor.py index 67404909fe..c21279fff2 100644 --- a/python/cuml/test/test_mbsgd_regressor.py +++ b/python/cuml/test/test_mbsgd_regressor.py @@ -65,7 +65,7 @@ def test_mbsgd_regressor(lrate, penalty, make_dataset): penalty=penalty) cu_mbsgd_regressor.fit(X_train, y_train) - cu_pred = cu_mbsgd_regressor.predict(X_test).to_array() + cu_pred = cu_mbsgd_regressor.predict(X_test).get() cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype) if nrows < 500000: @@ -85,7 +85,7 @@ def test_mbsgd_regressor_default(make_dataset): cu_mbsgd_regressor = cumlMBSGRegressor() cu_mbsgd_regressor.fit(X_train, y_train) - cu_pred = cu_mbsgd_regressor.predict(X_test).to_array() + cu_pred = cu_mbsgd_regressor.predict(X_test).get() cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype) if nrows < 500000: From 1365806916b5d1ae4c55edc0da53ba7676c244cd Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 21 Apr 2020 12:15:39 -0400 Subject: [PATCH 271/330] Updating changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b8d0b6667..41ecd09660 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ - PR #2080: Improved import of sparse FIL forests from treelite - PR #2090: Upgrade C++ build to C++14 standard - PR #2089: CI: enabled cuda-memcheck on ml-prims unit-tests during nightly build +- PR #2118: Updating SGD & mini-batch estimators to use CumlArray ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array From 0d7904f4a13b681766a3a9ae3440aafe4199d7d1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 21 Apr 2020 12:18:55 -0400 Subject: [PATCH 272/330] Removing unused imports. Surprised flake8 didn't catch it --- python/cuml/solvers/sgd.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cuml/solvers/sgd.pyx b/python/cuml/solvers/sgd.pyx index 1abd431f24..dde88acd38 100644 --- a/python/cuml/solvers/sgd.pyx +++ b/python/cuml/solvers/sgd.pyx @@ -31,8 +31,7 @@ from libc.stdlib cimport calloc, malloc, free from cuml.common.base import Base from cuml.common import CumlArray from cuml.common.handle cimport cumlHandle -from cuml.utils import get_cudf_column_ptr, get_dev_array_ptr, \ - input_to_cuml_array, zeros +from cuml.utils import input_to_cuml_array cdef extern from "cuml/solvers/solver.hpp" namespace "ML::Solver": From 782bc4f1a1a5d9b742ebd151ad6debf164e4f09f Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 21 Apr 2020 14:54:04 -0400 Subject: [PATCH 273/330] Updating sgd to use cumlarray ptr --- python/cuml/solvers/sgd.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/solvers/sgd.pyx b/python/cuml/solvers/sgd.pyx index dde88acd38..8b4620feef 100644 --- a/python/cuml/solvers/sgd.pyx +++ b/python/cuml/solvers/sgd.pyx @@ -424,7 +424,7 @@ class SGD(Base): cdef uintptr_t X_ptr = X_m.ptr - cdef uintptr_t coef_ptr = get_cudf_column_ptr(self.coef_) + cdef uintptr_t coef_ptr = self.coef_.ptr preds = CumlArray.zeros(n_rows, dtype=self.dtype) cdef uintptr_t preds_ptr = preds.ptr From 301cfbd87caf06f7f25ac4670594be2a8a0640f9 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 21 Apr 2020 15:54:51 -0500 Subject: [PATCH 274/330] cupy low rank covariance --- python/cuml/dask/datasets/regression.py | 68 +++++++++++-------------- 1 file changed, 30 insertions(+), 38 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index bb059686a3..5f8d71df4c 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -20,8 +20,9 @@ import numpy as np import cupy as cp from cuml.dask.common.part_utils import _extract_partitions -from cuml.datasets.regression import make_regression as sg_make_regression from cuml.utils import with_cupy_rmm +from cuml.dask.datasets.blobs import get_X +from cuml.dask.datasets.blobs import get_labels def create_rs_generator(random_state): @@ -76,6 +77,7 @@ def _data_from_multivariate_normal(seed, covar, n_samples, n_features, dtype): return local_rs.multivariate_normal(mean, covar, n_samples, dtype=dtype) + def data_from_multivariate_normal(client, rs, covar, chunksizes, n_features, dtype): workers = list(client.has_what().keys()) @@ -84,7 +86,6 @@ def data_from_multivariate_normal(client, rs, covar, chunksizes, n_features, chunks_workers = (workers * n_chunks)[:n_chunks] chunk_seeds = rs.permutation(len(chunksizes)) - covar = covar.compute() data_parts = [client.submit(_data_from_multivariate_normal, chunk_seeds[idx], covar, @@ -96,18 +97,10 @@ def data_from_multivariate_normal(client, rs, covar, chunksizes, n_features, data_dela = [da.from_delayed(dask.delayed(chunk), shape=(chunksizes[idx], n_features), meta=cp.zeros((1)), dtype=dtype) - for idx, chunk in enumerate(data_parts)] + for idx, chunk in enumerate(data_parts)] return da.concatenate(data_dela, axis=0) -def get_X(t): - return t[0] - - -def get_labels(t): - return t[1] - - def _f_order_shuffle(X, y, n_samples, seed, features_indices): local_rs = cp.random.RandomState(seed=seed) samples_indices = local_rs.permutation(n_samples) @@ -188,34 +181,31 @@ def generate_singular_values(n, effective_rank, tail_strength, tmp = sing_ind / effective_rank low_rank = (1 - tail_strength) * cp.exp(-1.0 * tmp ** 2) tail = tail_strength * cp.exp(-0.1 * tmp) - local_s = low_rank + tail - s = da.from_array(local_s, - chunks=(int(n_samples_per_part),)) + s = low_rank + tail return s -def make_low_rank_covariance(n_features, effective_rank, - tail_strength, random_state, n_parts, - n_samples_per_part, dtype): - - rs = create_rs_generator(random_state) - - m2 = rs.standard_normal((n_features, n_features), - chunks=(-1, generate_chunks_for_qr(n_features, - n_features, - n_parts)), - dtype=dtype) - v, _ = da.linalg.qr(m2) - - if n_samples_per_part is None: - n_samples_per_part = max(1, int(n_samples / n_parts)) - v = v.rechunk({0: n_samples_per_part, 1: -1}) +def _make_low_rank_covariance(n_features, effective_rank, + tail_strength, seed, n_parts, + n_samples_per_part, dtype): + local_rs = cp.random.RandomState(seed=seed) + m2 = local_rs.standard_normal((n_features, n_features), dtype=dtype) + v, _ = cp.linalg.qr(m2) s = generate_singular_values(n_features, effective_rank, tail_strength, n_samples_per_part) v *= (s ** 2) - return da.dot(v, da.transpose(v)) + return cp.dot(v, cp.transpose(v)) + + +def make_low_rank_covariance(client, n_features, effective_rank, + tail_strength, seed, n_parts, + n_samples_per_part, dtype): + + return client.submit(_make_low_rank_covariance, n_features, + effective_rank, tail_strength, seed, + n_parts, n_samples_per_part, dtype) def make_low_rank_matrix(n_samples=100, n_features=100, @@ -273,8 +263,10 @@ def make_low_rank_matrix(n_samples=100, n_features=100, u = u.rechunk({0: n_samples_per_part, 1: -1}) v = v.rechunk({0: n_samples_per_part, 1: -1}) - s = generate_singular_values(n, effective_rank, tail_strength, - n_samples_per_part) + local_s = generate_singular_values(n, effective_rank, tail_strength, + n_samples_per_part) + s = da.from_array(local_s, + chunks=(int(n_samples_per_part),)) u *= s return da.dot(u, v) @@ -358,7 +350,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, we cannot generate order `F` by construction, and an explicit transpose is performed on each part. This may cause memory to spike (other parameters make order `F` by construction) - 2. When `n_targets > 3` and `order = 'F'` as above, we have to + 2. When `n_targets > 1` and `order = 'F'` as above, we have to explicity transpose the `y` array. If `coef = True`, then we also explicity transpose the `ground_truth` array 3. When `shuffle = True` and `order = F`, there are memory spikes to @@ -409,8 +401,9 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, X = X.rechunk({0: data_chunksizes, 1: -1}) else: + seed = rs.randint(n_samples) covar = make_low_rank_covariance(n_features, effective_rank, - tail_strength, rs, n_parts, + tail_strength, seed, n_parts, n_samples_per_part, dtype) X = data_from_multivariate_normal(client, rs, covar, data_chunksizes, n_features, @@ -424,9 +417,8 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, # zeros (the other features are not correlated to y and should be ignored # by a sparsifying regularizers such as L1 or elastic net) ground_truth = 100.0 * rs.standard_normal((n_informative, n_targets), - chunks=(n_samples_per_part, - -1), - dtype=dtype) + chunks=(n_samples_per_part, -1), + dtype=dtype) y = da.dot(X[:, :n_informative], ground_truth) + bias From cabcf74f665c6fc1a1c727431a1954e2a4ebcd0d Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 21 Apr 2020 17:31:25 -0500 Subject: [PATCH 275/330] expanding on docs and tests --- python/cuml/dask/datasets/blobs.py | 14 ++-- python/cuml/dask/datasets/classification.py | 86 +++++++++++++------- python/cuml/datasets/classification.py | 53 +++++++++++- python/cuml/datasets/utils.py | 8 ++ python/cuml/test/dask/test_datasets.py | 11 +-- python/cuml/test/test_make_classification.py | 11 +-- 6 files changed, 133 insertions(+), 50 deletions(-) diff --git a/python/cuml/dask/datasets/blobs.py b/python/cuml/dask/datasets/blobs.py index 9261d560a3..a77bd36e50 100644 --- a/python/cuml/dask/datasets/blobs.py +++ b/python/cuml/dask/datasets/blobs.py @@ -50,16 +50,16 @@ def create_local_data(m, n, centers, cluster_std, random_state, return X, y -def get_meta(df): +def _get_meta(df): ret = df.iloc[:0] return ret -def get_X(t): +def _get_X(t): return t[0] -def get_labels(t): +def _get_labels(t): return t[1] @@ -163,19 +163,19 @@ def make_blobs(n_samples=100, n_features=2, centers=None, cluster_std=1.0, pure=False, workers=[worker])) - X = [client.submit(get_X, f, pure=False) + X = [client.submit(_get_X, f, pure=False) for idx, f in enumerate(parts)] - Y = [client.submit(get_labels, f, pure=False) + Y = [client.submit(_get_labels, f, pure=False) for idx, f in enumerate(parts)] if output == 'dataframe': - meta_X = client.submit(get_meta, X[0], pure=False) + meta_X = client.submit(_get_meta, X[0], pure=False) meta_X_local = meta_X.result() X_final = from_delayed([dask.delayed(x, pure=False) for x in X], meta=meta_X_local) - meta_y = client.submit(get_meta, Y[0], pure=False) + meta_y = client.submit(_get_meta, Y[0], pure=False) meta_y_local = meta_y.result() Y_final = from_delayed([dask.delayed(y, pure=False) for y in Y], meta=meta_y_local) diff --git a/python/cuml/dask/datasets/classification.py b/python/cuml/dask/datasets/classification.py index e1765afb05..ef2bc11b30 100644 --- a/python/cuml/dask/datasets/classification.py +++ b/python/cuml/dask/datasets/classification.py @@ -17,6 +17,8 @@ from cuml.datasets.classification import make_classification \ as sg_make_classification from cuml.datasets.utils import _create_rs_generator +from cuml.dask.datasets.blobs import _get_X +from cuml.dask.datasets.blobs import _get_labels from cuml.utils import with_cupy_rmm from dask.distributed import default_client @@ -28,14 +30,6 @@ import math -def get_X(t): - return t[0] - - -def get_labels(t): - return t[1] - - def _create_covariance(dims, seed, dtype='float32'): local_rs = cp.random.RandomState(seed=seed) return 2 * local_rs.rand(*dims, dtype=dtype) - 1 @@ -61,7 +55,42 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, redundant features. The remaining features are filled with random noise. Thus, without shuffling, all useful features are contained in the columns ``X[:, :n_informative + n_redundant + n_repeated]``. - Read more in the :ref:`User Guide `. + + Examples + -------- + + .. code-block:: python + from dask.distributed import Client + from dask_cuda import LocalCUDACluster + from cuml.dask.datasets.classification import make_classification + cluster = LocalCUDACluster() + client = Client(cluster) + X, y = make_classification(n_samples=10, n_features=4, n_informative=2, n_classes=2) + + print("X:") + print(X.compute()) + + print("y:") + print(y.compute()) + + Output: + + .. code-block:: python + X: + [[-1.6990056 -0.8241044 -0.06997631 0.45107925] + [-1.8105277 1.7829906 0.492909 0.05390119] + [-0.18290454 -0.6155432 0.6667889 -1.0053712 ] + [-2.7530136 -0.888528 -0.5023055 1.3983376 ] + [-0.9788184 -0.89851004 0.10802134 -0.10021686] + [-0.76883423 -1.0689086 0.01249526 -0.1404741 ] + [-1.5676656 -0.83082974 -0.03072987 0.34499463] + [-0.9381793 -1.0971068 -0.07465998 0.02618019] + [-1.3021476 -0.87076336 0.02249984 0.15187258] + [ 1.1820307 1.7524253 1.5087451 -2.4626074 ]] + + y: + [0 1 0 0 0 0 0 0 0 1] + Parameters ---------- n_samples : int, optional (default=100) @@ -129,18 +158,23 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, than the number of workers) Returns ------- - X : device array of shape [n_samples, n_features] + X : dask.array of shape [n_samples, n_features] The generated samples. - y : device array of shape [n_samples] + y : dask.array of shape [n_samples] The integer labels for class membership of each sample. - Notes - ----- - The algorithm is adapted from Guyon [1] and was designed to generate - the "Madelon" dataset. - References - ---------- - .. [1] I. Guyon, "Design of experiments for the NIPS 2003 variable - selection benchmark", 2003. + + How we extended the dask MNMG version from the single GPU version: + 1. We generate centroids of shape (n_centroids, n_informative) + 2. We generate an informative covariance of shape + (n_centroids, n_informative, n_informative) + 3. We generate a redundant covariance of shape + (n_informative, n_redundant) + 4. We generate the indices for the repeated features + We pass along the references to the futures of the above arrays + with each part to the single GPU + `cuml.datasets.classification.make_classification` so that each + part (and worker) has access to the correct values to generate + data from the same covariances """ client = default_client() if client is None else client @@ -159,16 +193,6 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, centroids = cp.array(_generate_hypercube(n_clusters, n_informative, rs)).astype(dtype, copy=False) - # # create covariance matrices - # informative_covariance_local = rs.rand(n_clusters, n_informative, - # n_informative, dtype=dtype) - # informative_covariance = client.scatter(informative_covariance_local, - # workers=workers) - - # redundant_covariance_local = rs.rand(n_informative, n_redundant, - # dtype=dtype) - # redundant_covariance = client.scatter(redundant_covariance_local, - # workers=workers) covariance_seeds = rs.randint(n_features, size=2) informative_covariance = client.submit(_create_covariance, (n_clusters, n_informative, @@ -215,9 +239,9 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, workers=[parts_workers[i]]) for i in range(len(parts_workers))] - X_parts = [client.submit(get_X, f, pure=False) + X_parts = [client.submit(_get_X, f, pure=False) for idx, f in enumerate(parts)] - y_parts = [client.submit(get_labels, f, pure=False) + y_parts = [client.submit(_get_labels, f, pure=False) for idx, f in enumerate(parts)] X_dela = [da.from_delayed(dask.delayed(Xp), diff --git a/python/cuml/datasets/classification.py b/python/cuml/datasets/classification.py index 141f5eef0a..8a69275ed1 100644 --- a/python/cuml/datasets/classification.py +++ b/python/cuml/datasets/classification.py @@ -59,7 +59,39 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, redundant features. The remaining features are filled with random noise. Thus, without shuffling, all useful features are contained in the columns ``X[:, :n_informative + n_redundant + n_repeated]``. - Read more in the :ref:`User Guide `. + + Examples + -------- + + .. code-block:: python + from cuml.datasets.classification import make_classification + + X, y = make_classification(n_samples=10, n_features=4, n_informative=2, n_classes=2) + + print("X:") + print(X) + + print("y:") + print(y) + + Output: + + .. code-block:: python + X: + [[-2.3249989 -0.8679415 -1.1511791 1.3525577 ] + [ 2.2933831 1.3743551 0.63128835 -0.84648645] + [ 1.6361488 -1.3233329 0.807027 -0.894092 ] + [-1.0093077 -0.9990691 -0.00808992 0.00950443] + [ 0.99803793 2.068382 0.49570698 -0.8462848 ] + [-1.2750955 -0.9725835 -0.2390058 0.28081596] + [-1.3635055 -0.9637669 -0.31582272 0.37106958] + [ 1.1893625 2.227583 0.48750278 -0.8737561 ] + [-0.05753583 -1.0939395 0.8188342 -0.9620734 ] + [ 0.47910076 0.7648213 -0.17165393 0.26144698]] + + y: + [0 1 0 0 1 0 0 1 0 1] + Parameters ---------- n_samples : int, optional (default=100) @@ -143,6 +175,23 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, ---------- .. [1] I. Guyon, "Design of experiments for the NIPS 2003 variable selection benchmark", 2003. + + How we optimized to the GPU: + 1. Firstly, we generate X from a standard univariate instead of zeros. + This saves memory as we don't need to generate univariates each + time for each feature class (informative, repeated, etc.) while + also providing the added speedup of generating a big matrix + on GPU + 2. We generate `order=F` construction. We exploit the + fact that X is a generated from a univariate normal, and + covariance is introduced with matrix multiplications. Which means, + we can generate X as a 1D array and just reshape it to the + desired order, which only updates the metadata and eliminates + copies + 3. Lastly, we also shuffle by construction. Centroid indices are + permuted for each sample, and then we construct the data for + each centroid. This shuffle works for both `order=C` and + `order=F` and eliminates any need for secondary copies """ generator = _create_rs_generator(random_state) @@ -182,7 +231,6 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, n_samples_per_cluster[i % n_clusters] += 1 # Initialize X and y - # X = cp.zeros(n_samples * n_features, dtype=dtype, order=order) X = generator.randn(n_samples * n_features, dtype=dtype) X = X.reshape((n_samples, n_features), order=order) y = cp.zeros(n_samples, dtype=np.int) @@ -231,6 +279,7 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, # NOTE: This could be done outside the loop, but a current # cupy bug does not allow that + # https://github.com/cupy/cupy/issues/3284 X[centroid_indices[0], n_informative:n_informative + n_redundant] = cp.dot(X_k, B) diff --git a/python/cuml/datasets/utils.py b/python/cuml/datasets/utils.py index c8bb5becc7..0a0b3c7b5e 100644 --- a/python/cuml/datasets/utils.py +++ b/python/cuml/datasets/utils.py @@ -17,6 +17,14 @@ def _create_rs_generator(random_state): + """ + This is a utility function that returns an instance of CuPy RandomState + Parameters + ---------- + random_state : None, int, or CuPy RandomState + The random_state from which the CuPy random state is generated + """ + if hasattr(random_state, '__module__'): rs_type = random_state.__module__ + '.' + type(random_state).__name__ else: diff --git a/python/cuml/test/dask/test_datasets.py b/python/cuml/test/dask/test_datasets.py index 8adf019f0a..6e253b22bc 100644 --- a/python/cuml/test/dask/test_datasets.py +++ b/python/cuml/test/dask/test_datasets.py @@ -158,15 +158,16 @@ def test_make_regression(n_samples, n_features, n_informative, c.close() -@pytest.mark.parametrize('n_samples', [1000]) -@pytest.mark.parametrize('n_features', [100]) +@pytest.mark.parametrize('n_samples', [500, 1000]) +@pytest.mark.parametrize('n_features', [50, 100]) +@pytest.mark.parametrize('hypercube', [True, False]) @pytest.mark.parametrize('n_classes', [2, 4]) @pytest.mark.parametrize('n_clusters_per_class', [2, 4]) -@pytest.mark.parametrize('n_informative', [7]) +@pytest.mark.parametrize('n_informative', [7, 20]) @pytest.mark.parametrize('random_state', [None, 1234]) @pytest.mark.parametrize('n_parts', [2, 23]) @pytest.mark.parametrize('order', ['C', 'F']) -def test_make_classification(n_samples, n_features, n_classes, +def test_make_classification(n_samples, n_features, hypercube, n_classes, n_clusters_per_class, n_informative, random_state, n_parts, order, cluster): client = Client(cluster) @@ -174,7 +175,7 @@ def test_make_classification(n_samples, n_features, n_classes, from cuml.dask.datasets.classification import make_classification X, y = make_classification(n_samples=n_samples, n_features=n_features, - n_classes=n_classes, + n_classes=n_classes, hypercube=hypercube, n_clusters_per_class=n_clusters_per_class, n_informative=n_informative, random_state=random_state, n_parts=n_parts, diff --git a/python/cuml/test/test_make_classification.py b/python/cuml/test/test_make_classification.py index 1617e5a48f..7aed477f93 100644 --- a/python/cuml/test/test_make_classification.py +++ b/python/cuml/test/test_make_classification.py @@ -18,19 +18,20 @@ from cuml.datasets.classification import make_classification -@pytest.mark.parametrize('n_samples', [1000]) -@pytest.mark.parametrize('n_features', [100]) +@pytest.mark.parametrize('n_samples', [500, 1000]) +@pytest.mark.parametrize('n_features', [50, 100]) +@pytest.mark.parametrize('hypercube', [True, False]) @pytest.mark.parametrize('n_classes', [2, 4]) @pytest.mark.parametrize('n_clusters_per_class', [2, 4]) -@pytest.mark.parametrize('n_informative', [7]) +@pytest.mark.parametrize('n_informative', [7, 20]) @pytest.mark.parametrize('random_state', [None, 1234]) @pytest.mark.parametrize('order', ['C', 'F']) -def test_make_classification(n_samples, n_features, n_classes, +def test_make_classification(n_samples, n_features, hypercube, n_classes, n_clusters_per_class, n_informative, random_state, order): X, y = make_classification(n_samples=n_samples, n_features=n_features, - n_classes=n_classes, + n_classes=n_classes, hypercube=hypercube, n_clusters_per_class=n_clusters_per_class, n_informative=n_informative, random_state=random_state, order=order) From 9f4365b44bbae3633934d317b332fa35195b84e8 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 21 Apr 2020 17:39:45 -0500 Subject: [PATCH 276/330] style fixes --- python/cuml/dask/datasets/classification.py | 5 +++-- python/cuml/datasets/classification.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/cuml/dask/datasets/classification.py b/python/cuml/dask/datasets/classification.py index ef2bc11b30..0e3a0cad79 100644 --- a/python/cuml/dask/datasets/classification.py +++ b/python/cuml/dask/datasets/classification.py @@ -65,7 +65,8 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, from cuml.dask.datasets.classification import make_classification cluster = LocalCUDACluster() client = Client(cluster) - X, y = make_classification(n_samples=10, n_features=4, n_informative=2, n_classes=2) + X, y = make_classification(n_samples=10, n_features=4, + n_informative=2, n_classes=2) print("X:") print(X.compute()) @@ -171,7 +172,7 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, (n_informative, n_redundant) 4. We generate the indices for the repeated features We pass along the references to the futures of the above arrays - with each part to the single GPU + with each part to the single GPU `cuml.datasets.classification.make_classification` so that each part (and worker) has access to the correct values to generate data from the same covariances diff --git a/python/cuml/datasets/classification.py b/python/cuml/datasets/classification.py index 8a69275ed1..dd14d267e7 100644 --- a/python/cuml/datasets/classification.py +++ b/python/cuml/datasets/classification.py @@ -66,7 +66,8 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, .. code-block:: python from cuml.datasets.classification import make_classification - X, y = make_classification(n_samples=10, n_features=4, n_informative=2, n_classes=2) + X, y = make_classification(n_samples=10, n_features=4, + n_informative=2, n_classes=2) print("X:") print(X) @@ -190,7 +191,7 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, copies 3. Lastly, we also shuffle by construction. Centroid indices are permuted for each sample, and then we construct the data for - each centroid. This shuffle works for both `order=C` and + each centroid. This shuffle works for both `order=C` and `order=F` and eliminates any need for secondary copies """ generator = _create_rs_generator(random_state) From 036e5b486d2e3b935a22092e45538e256fe68541 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 21 Apr 2020 18:13:19 -0500 Subject: [PATCH 277/330] speeding up dask rf tests --- python/cuml/test/dask/test_random_forest.py | 56 ++------------------- 1 file changed, 5 insertions(+), 51 deletions(-) diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index ad09a9bdd9..7568cf9c8b 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -56,9 +56,7 @@ def _prep_training_data(c, X_train, y_train, partitions_per_worker): X_cudf = cudf.DataFrame.from_pandas(pd.DataFrame(X_train)) X_train_df = dask_cudf.from_cudf(X_cudf, npartitions=n_partitions) - y_cudf = np.array(pd.DataFrame(y_train).values) - y_cudf = y_cudf[:, 0] - y_cudf = cudf.Series(y_cudf) + y_cudf = cudf.Series(y_train) y_train_df = \ dask_cudf.from_cudf(y_cudf, npartitions=n_partitions) @@ -71,7 +69,7 @@ def _prep_training_data(c, X_train, y_train, partitions_per_worker): @pytest.mark.parametrize('partitions_per_worker', [1, 3]) -def test_rf_classification_dask(partitions_per_worker, cluster): +def test_rf_classification_dask_cudf(partitions_per_worker, cluster): # Use CUDA_VISIBLE_DEVICES to control the number of workers c = Client(cluster) @@ -108,46 +106,6 @@ def test_rf_classification_dask(partitions_per_worker, cluster): c.close() -@pytest.mark.parametrize('partitions_per_worker', [1, 3]) -def test_rf_classification_dask_cudf(partitions_per_worker, cluster): - - # Use CUDA_VISIBLE_DEVICES to control the number of workers - c = Client(cluster) - - try: - - X, y = make_classification(n_samples=10000, n_features=20, - n_clusters_per_class=1, n_informative=10, - random_state=123, n_classes=5) - - X = X.astype(np.float32) - y = y.astype(np.int32) - - X_train, X_test, y_train, y_test = \ - train_test_split(X, y, test_size=1000) - - cu_rf_params = { - 'n_estimators': 40, - 'max_depth': 16, - 'n_bins': 16, - } - - X_train_df, y_train_df = _prep_training_data(c, X_train, y_train, - partitions_per_worker) - - X_test_cudf = cudf.DataFrame.from_gpu_matrix(rmm.to_device(X_test)) - cu_rf_mg = cuRFC_mg(**cu_rf_params) - cu_rf_mg.fit(X_train_df, y_train_df) - cu_rf_mg_predict = cu_rf_mg.predict(X_test_cudf) - - acc_score = accuracy_score(cu_rf_mg_predict, y_test, normalize=True) - - assert acc_score > 0.8 - - finally: - c.close() - - @pytest.mark.xfail(reason="Intermittent failure of test observed. For" "more information please check cuml issue #1934") @pytest.mark.parametrize('partitions_per_worker', [1, 5]) @@ -180,9 +138,7 @@ def test_rf_regression_dask_fil(partitions_per_worker, cluster): X_train_df = \ dask_cudf.from_cudf(X_cudf, npartitions=n_partitions) - y_cudf = np.array(pd.DataFrame(y_train).values) - y_cudf = y_cudf[:, 0] - y_cudf = cudf.Series(y_cudf) + y_cudf = cudf.Series(y_train) y_train_df = \ dask_cudf.from_cudf(y_cudf, npartitions=n_partitions) X_cudf_test = cudf.DataFrame.from_pandas(pd.DataFrame(X_test)) @@ -303,7 +259,7 @@ def test_rf_regression_dask_cpu(partitions_per_worker, cluster): try: - X, y = make_regression(n_samples=100000, n_features=20, + X, y = make_regression(n_samples=10000, n_features=20, n_informative=10, random_state=123) X = X.astype(np.float32) @@ -325,9 +281,7 @@ def test_rf_regression_dask_cpu(partitions_per_worker, cluster): X_train_df = \ dask_cudf.from_cudf(X_cudf, npartitions=n_partitions) - y_cudf = np.array(pd.DataFrame(y_train).values) - y_cudf = y_cudf[:, 0] - y_cudf = cudf.Series(y_cudf) + y_cudf = cudf.Series(y_train) y_train_df = \ dask_cudf.from_cudf(y_cudf, npartitions=n_partitions) From 7bcf0ca2a0f21d32d87548aaa6f24903e5bca463 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 21 Apr 2020 18:16:52 -0500 Subject: [PATCH 278/330] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b8d0b6667..31f46171a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ - PR #2080: Improved import of sparse FIL forests from treelite - PR #2090: Upgrade C++ build to C++14 standard - PR #2089: CI: enabled cuda-memcheck on ml-prims unit-tests during nightly build +- PR #2120: Speeding up dask RandomForest tests ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array From af4fd3d632a842cdbe56c6be32bce5e1460c6778 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 21 Apr 2020 18:19:44 -0500 Subject: [PATCH 279/330] style fix --- python/cuml/test/dask/test_random_forest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index 7568cf9c8b..63ff71addc 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -34,7 +34,6 @@ import cupy as cp import dask_cudf import pytest -import rmm import numpy as np import pandas as pd From e9ebb9b3ce58f585c5e78993ad9e4c2f44d6808c Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Tue, 21 Apr 2020 16:34:48 -0700 Subject: [PATCH 280/330] added a many-class test; fixed too-many-classes error path depending on the GPU compute capability, as well as the configuration, the upper bound for classes can be approx. 12000, or up to 24000 as of now. However, the real limit is determined at runtime. --- cpp/src/fil/infer.cu | 2 +- cpp/test/sg/fil_test.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index bd73167786..79a64e37fe 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -315,7 +315,7 @@ void infer_k_launcher(storage_type forest, predict_params params, // given_num_cols is a random large int params.num_cols = params.max_shm / sizeof(float); // since we're crashing, this will not take too long - while (get_smem_footprint<1, leaf_payload_type>(params) > params.max_shm) { + while (params.num_cols > 0 && get_smem_footprint<1, leaf_payload_type>(params) > params.max_shm) { --params.num_cols; } ASSERT(false, "p.num_cols == %d: too many features, only %d allowed%s", diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 99d8560d3e..2cd0c440ed 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -698,7 +698,7 @@ std::vector predict_sparse_inputs = { {20000, 50, 0.05, 8, 50, 0.05, fil::output_t(fil::output_t::AVG | fil::output_t::CLASS), 1.0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), - fil::leaf_value_t::INT_CLASS_LABEL, 10}, + fil::leaf_value_t::INT_CLASS_LABEL, 5000}, {20000, 50, 0.05, 8, 50, 0.05, fil::output_t::RAW, 0, 0.5, fil::algo_t::NAIVE, 42, 2e-3f, tl::Operator(0), fil::leaf_value_t::INT_CLASS_LABEL, 6}, {20000, 50, 0.05, 8, 50, 0.05, From 5f791bd63b41e7c7cef653bd479f2a86e031374c Mon Sep 17 00:00:00 2001 From: Levs Dolgovs Date: Tue, 21 Apr 2020 16:46:42 -0700 Subject: [PATCH 281/330] fixed style --- cpp/src/fil/infer.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/fil/infer.cu b/cpp/src/fil/infer.cu index 79a64e37fe..3f6dd76472 100644 --- a/cpp/src/fil/infer.cu +++ b/cpp/src/fil/infer.cu @@ -315,7 +315,8 @@ void infer_k_launcher(storage_type forest, predict_params params, // given_num_cols is a random large int params.num_cols = params.max_shm / sizeof(float); // since we're crashing, this will not take too long - while (params.num_cols > 0 && get_smem_footprint<1, leaf_payload_type>(params) > params.max_shm) { + while (params.num_cols > 0 && + get_smem_footprint<1, leaf_payload_type>(params) > params.max_shm) { --params.num_cols; } ASSERT(false, "p.num_cols == %d: too many features, only %d allowed%s", From ab1fa4eb407c8c7957d249f8167eee5b9a4371b3 Mon Sep 17 00:00:00 2001 From: chaithyagr Date: Wed, 22 Apr 2020 10:06:49 +0200 Subject: [PATCH 282/330] Add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a2d4a434e..5f78673e46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ - PR #2080: Improved import of sparse FIL forests from treelite - PR #2090: Upgrade C++ build to C++14 standard - PR #2089: CI: enabled cuda-memcheck on ml-prims unit-tests during nightly build +- PR #2098: Renaming .h to .cuh in decision_tree, glm, pca ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array From 76870bd39a54c8437e109bf6cbbb90fdcd07ecde Mon Sep 17 00:00:00 2001 From: chaithyagr Date: Wed, 22 Apr 2020 10:15:56 +0200 Subject: [PATCH 283/330] Fix Clang errors --- cpp/src/glm/qn/glm_base.cuh | 2 +- cpp/src/glm/qn/qn.cuh | 4 ++-- cpp/src/glm/qn/qn_solvers.cuh | 2 +- cpp/test/sg/quasi_newton.cu | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/glm/qn/glm_base.cuh b/cpp/src/glm/qn/glm_base.cuh index 09076513d1..ce871a8ef4 100644 --- a/cpp/src/glm/qn/glm_base.cuh +++ b/cpp/src/glm/qn/glm_base.cuh @@ -16,8 +16,8 @@ #pragma once -#include #include +#include #include #include "cuda_utils.h" #include "linalg/add.h" diff --git a/cpp/src/glm/qn/qn.cuh b/cpp/src/glm/qn/qn.cuh index c44306d047..3b5fa38c57 100644 --- a/cpp/src/glm/qn/qn.cuh +++ b/cpp/src/glm/qn/qn.cuh @@ -15,14 +15,14 @@ */ #pragma once +#include +#include #include #include #include #include #include #include -#include -#include namespace ML { namespace GLM { diff --git a/cpp/src/glm/qn/qn_solvers.cuh b/cpp/src/glm/qn/qn_solvers.cuh index 4328b22ade..f00b486385 100644 --- a/cpp/src/glm/qn/qn_solvers.cuh +++ b/cpp/src/glm/qn/qn_solvers.cuh @@ -41,10 +41,10 @@ */ #include +#include #include #include #include -#include namespace ML { namespace GLM { diff --git a/cpp/test/sg/quasi_newton.cu b/cpp/test/sg/quasi_newton.cu index c15e41f431..b7c0875978 100644 --- a/cpp/test/sg/quasi_newton.cu +++ b/cpp/test/sg/quasi_newton.cu @@ -1,10 +1,10 @@ +#include +#include +#include #include #include #include #include -#include -#include -#include #include #include "test_utils.h" #include "utils.h" From fb0d41309e2303e521856506d7a1cab87a253618 Mon Sep 17 00:00:00 2001 From: wxbn Date: Wed, 22 Apr 2020 09:30:24 +0000 Subject: [PATCH 284/330] Datasets as fixtures --- python/cuml/test/test_random_forest.py | 181 ++++++++++--------------- 1 file changed, 70 insertions(+), 111 deletions(-) diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py index 3cf0dd26c4..e0d8dc1ca6 100644 --- a/python/cuml/test/test_random_forest.py +++ b/python/cuml/test/test_random_forest.py @@ -35,24 +35,54 @@ from sklearn.model_selection import train_test_split -@pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), - stress_param(500000)]) -@pytest.mark.parametrize('column_info', [unit_param([20, 10]), - quality_param([200, 100]), - stress_param([500, 350])]) +@pytest.fixture( + params=[ + unit_param({'n_samples': 500, 'n_features': 20, 'n_informative': 10}), + quality_param({'n_samples': 5000, 'n_features': 200, + 'n_informative': 100}), + stress_param({'n_samples': 500000, 'n_features': 500, + 'n_informative': 350}) + ]) +def dataset(request): + n_samples = request.param['n_samples'] + n_features = request.param['n_features'] + n_informative = request.param['n_informative'] + X, y = make_classification(n_samples=n_samples, n_features=n_features, + n_clusters_per_class=1, + n_informative=n_informative, + random_state=123, n_classes=2) + return X, y + + +@pytest.fixture( + params=[ + unit_param({'n_samples': 1500, 'n_features': 80, 'n_informative': 40}), + quality_param({'n_samples': 5000, 'n_features': 200, + 'n_informative': 100}), + stress_param({'n_samples': 500000, 'n_features': 500, + 'n_informative': 350}) + ]) +def large_dataset(request): + n_samples = request.param['n_samples'] + n_features = request.param['n_features'] + n_informative = request.param['n_informative'] + X, y = make_classification(n_samples=n_samples, n_features=n_features, + n_clusters_per_class=1, + n_informative=n_informative, + random_state=123, n_classes=2) + return X, y + + @pytest.mark.parametrize('rows_sample', [unit_param(1.0), quality_param(0.90), stress_param(0.95)]) @pytest.mark.parametrize('datatype', [np.float32]) @pytest.mark.parametrize('split_algo', [0, 1]) @pytest.mark.parametrize('max_features', [1.0, 'auto', 'log2', 'sqrt']) -def test_rf_classification(datatype, split_algo, rows_sample, nrows, - column_info, max_features): +def test_rf_classification(dataset, datatype, split_algo, + rows_sample, max_features): use_handle = True - ncols, n_info = column_info - X, y = make_classification(n_samples=nrows, n_features=ncols, - n_clusters_per_class=1, n_informative=n_info, - random_state=123, n_classes=2) + X, y = dataset X = X.astype(datatype) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -77,7 +107,7 @@ def test_rf_classification(datatype, split_algo, rows_sample, nrows, fil_preds = np.reshape(fil_preds, np.shape(cu_preds)) cuml_acc = accuracy_score(y_test, cu_preds) fil_acc = accuracy_score(y_test, fil_preds) - if nrows < 500000: + if X.shape[0] < 500000: sk_model = skrfc(n_estimators=40, max_depth=16, min_samples_split=2, max_features=max_features, @@ -152,17 +182,9 @@ def test_rf_regression(datatype, split_algo, mode, column_info, @pytest.mark.parametrize('datatype', [np.float32]) -@pytest.mark.parametrize('column_info', [unit_param([20, 10]), - quality_param([200, 100]), - stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(2000), quality_param(25000), - stress_param(500000)]) -def test_rf_regression_default(datatype, column_info, nrows): +def test_rf_regression_default(large_dataset, datatype): - ncols, n_info = column_info - X, y = make_regression(n_samples=nrows, n_features=ncols, - n_informative=n_info, - random_state=123) + X, y = large_dataset X = X.astype(datatype) y = y.astype(datatype) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -189,7 +211,7 @@ def test_rf_regression_default(datatype, column_info, nrows): # Initialize, fit and predict using # sklearn's random forest regression model - if nrows < 500000: + if X.shape[0] < 500000: sk_model = skrfr(max_depth=16, random_state=10) sk_model.fit(X_train, y_train) sk_preds = sk_model.predict(X_test) @@ -202,17 +224,9 @@ def test_rf_regression_default(datatype, column_info, nrows): @pytest.mark.parametrize('datatype', [np.float32]) -@pytest.mark.parametrize('column_info', [unit_param([20, 10]), - quality_param([200, 100]), - stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(250), quality_param(5000), - stress_param(500000)]) -def test_rf_classification_seed(datatype, column_info, nrows): +def test_rf_classification_seed(dataset, datatype): - ncols, n_info = column_info - X, y = make_classification(n_samples=nrows, n_features=ncols, - n_clusters_per_class=1, n_informative=n_info, - random_state=0, n_classes=2) + X, y = dataset X = X.astype(datatype) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -232,7 +246,7 @@ def test_rf_classification_seed(datatype, column_info, nrows): # sklearn random forest classification model # initialization, fit and predict - if nrows < 500000: + if X.shape[0] < 500000: sk_model = skrfc(max_depth=16, random_state=10) sk_model.fit(X_train, y_train) sk_preds = sk_model.predict(X_test) @@ -279,19 +293,10 @@ def test_rf_classification_seed(datatype, column_info, nrows): @pytest.mark.parametrize('datatype', [(np.float64, np.float32), (np.float32, np.float64)]) -@pytest.mark.parametrize('column_info', [unit_param([20, 10]), - quality_param([200, 100]), - stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), - stress_param(500000)]) @pytest.mark.parametrize('convert_dtype', [True, False]) -def test_rf_classification_float64(datatype, column_info, - nrows, convert_dtype): +def test_rf_classification_float64(dataset, datatype, convert_dtype): - ncols, n_info = column_info - X, y = make_classification(n_samples=nrows, n_features=ncols, - n_clusters_per_class=1, n_informative=n_info, - random_state=0, n_classes=2) + X, y = dataset X = X.astype(datatype[0]) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -307,7 +312,7 @@ def test_rf_classification_float64(datatype, column_info, # sklearn random forest classification model # initialization, fit and predict - if nrows < 500000: + if X.shape[0] < 500000: sk_model = skrfc(max_depth=16, random_state=10) sk_model.fit(X_train, y_train) sk_preds = sk_model.predict(X_test) @@ -330,17 +335,9 @@ def test_rf_classification_float64(datatype, column_info, @pytest.mark.parametrize('datatype', [(np.float64, np.float32), (np.float32, np.float64)]) -@pytest.mark.parametrize('column_info', [unit_param([20, 10]), - quality_param([200, 100]), - stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(1000), quality_param(25000), - stress_param(500000)]) -def test_rf_regression_float64(datatype, column_info, nrows): - ncols, n_info = column_info - X, y = make_regression(n_samples=nrows, n_features=ncols, - n_informative=n_info, - random_state=123) +def test_rf_regression_float64(large_dataset, datatype): + X, y = large_dataset X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0) X_train = X_train.astype(datatype[0]) @@ -357,7 +354,7 @@ def test_rf_regression_float64(datatype, column_info, nrows): # sklearn random forest classification model # initialization, fit and predict - if nrows < 500000: + if X.shape[0] < 500000: sk_model = skrfr(max_depth=16, random_state=10) sk_model.fit(X_train, y_train) sk_preds = sk_model.predict(X_test) @@ -425,25 +422,16 @@ def test_rf_classification_multi_class(datatype, column_info, nrows, assert cu_acc >= (sk_acc - 0.07) -@pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), - stress_param(500000)]) -@pytest.mark.parametrize('column_info', [unit_param([20, 10]), - quality_param([200, 100]), - stress_param([500, 350])]) @pytest.mark.parametrize('datatype', [np.float32]) @pytest.mark.parametrize('fil_sparse_format', ['not_supported', True, 'auto', False]) @pytest.mark.parametrize('algo', ['auto', 'naive', 'tree_reorg', 'batch_tree_reorg']) -def test_rf_classification_sparse(datatype, nrows, column_info, - fil_sparse_format, algo): +def test_rf_classification_sparse(dataset, datatype, fil_sparse_format, algo): use_handle = True - ncols, n_info = column_info num_treees = 50 - X, y = make_classification(n_samples=nrows, n_features=ncols, - n_clusters_per_class=1, n_informative=n_info, - random_state=123, n_classes=2) + X, y = dataset X = X.astype(datatype) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -490,10 +478,10 @@ def test_rf_classification_sparse(datatype, nrows, column_info, tl_model = cuml_model.convert_to_treelite_model() assert num_treees == tl_model.num_trees - assert ncols == tl_model.num_features + assert X.shape[1] == tl_model.num_features del tl_model - if nrows < 500000: + if X.shape[0] < 500000: sk_model = skrfc(n_estimators=50, max_depth=40, min_samples_split=2, @@ -594,20 +582,13 @@ def test_rf_regression_sparse(datatype, mode, column_info, @pytest.mark.memleak @pytest.mark.parametrize('fil_sparse_format', [True, False, 'auto']) -@pytest.mark.parametrize('column_info', [unit_param([80, 40]), - quality_param([200, 100]), - stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(800), quality_param(50000), - stress_param(500000)]) @pytest.mark.parametrize('n_iter', [unit_param(5), quality_param(30), stress_param(80)]) -def test_rf_memory_leakage(fil_sparse_format, column_info, nrows, n_iter): +def test_rf_memory_leakage(dataset, fil_sparse_format, n_iter): datatype = np.float32 use_handle = True - ncols, n_info = column_info - X, y = make_classification(n_samples=nrows, n_features=ncols, - n_clusters_per_class=1, n_informative=n_info, - random_state=0, n_classes=2) + + X, y = dataset X = X.astype(datatype) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -665,19 +646,12 @@ def test_create_classification_model(max_features, assert params['n_bins'] == verfiy_params['n_bins'] -@pytest.mark.parametrize('column_info', [unit_param([100, 50]), - quality_param([200, 100]), - stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), - stress_param(500000)]) @pytest.mark.parametrize('n_estimators', [10, 20, 100]) @pytest.mark.parametrize('n_bins', [8, 9, 10]) -def test_multiple_fits_classification(column_info, - nrows, n_estimators, n_bins): +def test_multiple_fits_classification(large_dataset, n_estimators, n_bins): + datatype = np.float32 - ncols, n_info = column_info - X, y = make_classification(n_samples=nrows, n_features=ncols, - n_informative=n_info, n_classes=2) + X, y = large_dataset X = X.astype(datatype) y = y.astype(np.int32) cuml_model = curfc(n_bins=n_bins, @@ -695,19 +669,12 @@ def test_multiple_fits_classification(column_info, assert params['n_bins'] == n_bins -@pytest.mark.parametrize('column_info', [unit_param([100, 50]), - quality_param([200, 100]), - stress_param([500, 350])]) -@pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), - stress_param(500000)]) @pytest.mark.parametrize('n_estimators', [10, 20, 100]) @pytest.mark.parametrize('n_bins', [8, 9, 10]) -def test_multiple_fits_regression(column_info, nrows, n_estimators, n_bins): +def test_multiple_fits_regression(large_dataset, n_estimators, n_bins): datatype = np.float32 - ncols, n_info = column_info - X, y = make_regression(n_samples=nrows, n_features=ncols, - n_informative=n_info, - random_state=123) + + X, y = large_dataset X = X.astype(datatype) y = y.astype(np.int32) cuml_model = curfr(n_bins=n_bins, @@ -727,22 +694,14 @@ def test_multiple_fits_regression(column_info, nrows, n_estimators, n_bins): assert params['n_bins'] == n_bins -@pytest.mark.parametrize('nrows', [unit_param(500), - stress_param(500000)]) -@pytest.mark.parametrize('column_info', [unit_param([20, 10]), - stress_param([500, 350])]) @pytest.mark.parametrize('rows_sample', [unit_param(1.0), stress_param(0.95)]) @pytest.mark.parametrize('datatype', [np.float32]) @pytest.mark.parametrize('max_features', [1.0, 'auto', 'log2', 'sqrt']) -def test_rf_classification_proba(datatype, rows_sample, nrows, - column_info, max_features): +def test_rf_classification_proba(dataset, datatype, rows_sample, max_features): use_handle = True - ncols, n_info = column_info - X, y = make_classification(n_samples=nrows, n_features=ncols, - n_clusters_per_class=1, n_informative=n_info, - random_state=123, n_classes=2) + X, y = dataset X = X.astype(datatype) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -766,7 +725,7 @@ def test_rf_classification_proba(datatype, rows_sample, nrows, y_proba[:, 1] = y_test y_proba[:, 0] = 1.0 - y_test fil_mse = mean_squared_error(y_proba, fil_preds_proba) - if nrows < 500000: + if X.shape[0] < 500000: sk_model = skrfc(n_estimators=40, max_depth=16, min_samples_split=2, max_features=max_features, From 2a43e8553c31e20a35c102340dee732485fd9b0f Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 22 Apr 2020 11:32:08 -0500 Subject: [PATCH 285/330] Remove unnecessary conversion of CumlArray to Cupy --- python/cuml/metrics/confusion_matrix.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cuml/metrics/confusion_matrix.py b/python/cuml/metrics/confusion_matrix.py index dbd2d8da13..d6662bec66 100644 --- a/python/cuml/metrics/confusion_matrix.py +++ b/python/cuml/metrics/confusion_matrix.py @@ -61,12 +61,10 @@ def confusion_matrix(y_true, y_pred, """ y_true, n_rows, n_cols, dtype = \ input_to_cuml_array(y_true, check_dtype=[cp.int32, cp.int64]) - y_true = y_true.to_output('cupy') y_pred, _, _, _ = \ input_to_cuml_array(y_pred, check_dtype=dtype, check_rows=n_rows, check_cols=n_cols) - y_pred = y_pred.to_output('cupy') if labels is None: labels = sorted_unique_labels(y_true, y_pred) From 76218a492ce5e7c3437f202db78b8e978a174e96 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 22 Apr 2020 11:42:02 -0500 Subject: [PATCH 286/330] Fix dask documentation types and get_client typo --- python/cuml/dask/metrics/confusion_matrix.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cuml/dask/metrics/confusion_matrix.py b/python/cuml/dask/metrics/confusion_matrix.py index 98389c87a0..89e7d0b4c4 100644 --- a/python/cuml/dask/metrics/confusion_matrix.py +++ b/python/cuml/dask/metrics/confusion_matrix.py @@ -59,17 +59,17 @@ def confusion_matrix(y_true, y_pred, Parameters ---------- - y_true : array-like (device or host) shape = (n_samples,) + y_true : dask.Array (device or host) shape = (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. - y_pred : array-like (device or host) shape = (n_samples,) + y_pred : dask.Array (device or host) shape = (n_samples,) or (n_samples, n_outputs) Estimated target values. labels : array-like (device or host) shape = (n_classes,), optional List of labels to index the matrix. This may be used to reorder or select a subset of labels. If None is given, those that appear at least once in y_true or y_pred are used in sorted order. - sample_weight : array-like (device or host) shape = (n_samples,), optional + sample_weight : dask.Array (device or host) shape = (n_samples,), optional Sample weights. normalize : string in [‘true’, ‘pred’, ‘all’] Normalizes confusion matrix over the true (rows), predicted (columns) @@ -83,7 +83,7 @@ def confusion_matrix(y_true, y_pred, C : array-like (device or host) shape = (n_classes, n_classes) Confusion matrix. """ - client = get_client() + client = get_client(client) if labels is None: labels = sorted_unique_labels(y_true, y_pred) From 81ed57a80701064c0e227d257401b29715793af0 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 22 Apr 2020 11:43:17 -0500 Subject: [PATCH 287/330] Remove unnecessary sorting since cp.unique already sorts --- python/cuml/dask/metrics/utils.py | 2 +- python/cuml/metrics/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/dask/metrics/utils.py b/python/cuml/dask/metrics/utils.py index 355425b0f9..60df622845 100644 --- a/python/cuml/dask/metrics/utils.py +++ b/python/cuml/dask/metrics/utils.py @@ -28,4 +28,4 @@ def sorted_unique_labels(*ys): ys = (cp.unique(y.map_blocks(lambda x: cp.unique(x)).compute()) for y in ys) labels = cp.unique(cp.concatenate(ys)) - return cp.sort(labels) + return labels diff --git a/python/cuml/metrics/utils.py b/python/cuml/metrics/utils.py index 2d2fc5e833..bd865b1a53 100644 --- a/python/cuml/metrics/utils.py +++ b/python/cuml/metrics/utils.py @@ -27,4 +27,4 @@ def sorted_unique_labels(*ys): labels.""" ys = (cp.unique(y) for y in ys) labels = cp.unique(cp.concatenate(ys)) - return cp.sort(labels) + return labels From 282f56069690b623f04e005e518629c5ea637d72 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Wed, 22 Apr 2020 12:11:23 -0500 Subject: [PATCH 288/330] FIX Small corrections from merge --- build.sh | 8 +++++--- python/setup.py | 6 ++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/build.sh b/build.sh index 3431c81c0d..10564f4285 100755 --- a/build.sh +++ b/build.sh @@ -119,11 +119,13 @@ if (( ${CLEAN} == 1 )); then rmdir ${bd} || true fi - cd ${REPODIR}/python - python setup.py clean --all - cd ${REPODIR} + done + + cd ${REPODIR}/python + python setup.py clean --all + cd ${REPODIR} fi ################################################################################ diff --git a/python/setup.py b/python/setup.py index 8f2c195440..ad614ece44 100644 --- a/python/setup.py +++ b/python/setup.py @@ -67,7 +67,6 @@ try: setup_file_path = str(Path(__file__).parent.absolute()) - shutil.rmtree(setup_file_path + '/build') shutil.rmtree(setup_file_path + '/.pytest_cache', ignore_errors=True) shutil.rmtree(setup_file_path + '/external_repositories', ignore_errors=True) @@ -75,6 +74,7 @@ shutil.rmtree(setup_file_path + '/__pycache__', ignore_errors=True) clean_folder(setup_file_path + '/cuml') + shutil.rmtree(setup_file_path + '/build') except IOError: pass @@ -82,7 +82,9 @@ # need to terminate script so cythonizing doesn't get triggered after # cleanup unintendedly sys.argv.remove("clean") - sys.argv.remove("--all") + + if "--all" in sys.argv: + sys.argv.remove("--all") if len(sys.argv) == 1: sys.exit(0) From 8c14013a9890900797507f1ae57b29b5607f4528 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 22 Apr 2020 15:59:32 -0500 Subject: [PATCH 289/330] review changes --- python/cuml/dask/datasets/regression.py | 96 ++++++++++++++----------- python/cuml/test/dask/test_datasets.py | 15 ++-- 2 files changed, 62 insertions(+), 49 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 5f8d71df4c..1eada78cac 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -19,10 +19,10 @@ from dask.distributed import default_client import numpy as np import cupy as cp -from cuml.dask.common.part_utils import _extract_partitions from cuml.utils import with_cupy_rmm from cuml.dask.datasets.blobs import get_X from cuml.dask.datasets.blobs import get_labels +from cuml.dask.common.input_utils import DistributedDataHandler def create_rs_generator(random_state): @@ -45,6 +45,11 @@ def create_rs_generator(random_state): return rs +def _dask_array_from_delayed(part, nrows, ncols, dtype): + return da.from_delayed(dask.delayed(part), shape=(nrows, ncols), + meta=cp.zeros((1)), dtype=dtype) + + def _f_order_standard_normal(nrows, ncols, dtype, seed): local_rs = cp.random.RandomState(seed=seed) x = local_rs.standard_normal(nrows * ncols, dtype=dtype) @@ -64,10 +69,10 @@ def f_order_standard_normal(client, rs, chunksizes, ncols, dtype): pure=False) for idx, chunksize in enumerate(chunksizes)] - chunks_dela = [da.from_delayed(dask.delayed(chunk), - shape=(chunksizes[idx], ncols), - meta=cp.zeros((1)), dtype=dtype) + chunks_dela = [_dask_array_from_delayed(chunk, chunksizes[idx], ncols, + dtype) for idx, chunk in enumerate(chunks)] + return da.concatenate(chunks_dela, axis=0) @@ -94,14 +99,15 @@ def data_from_multivariate_normal(client, rs, covar, chunksizes, n_features, pure=False) for idx, chunk in enumerate(chunksizes)] - data_dela = [da.from_delayed(dask.delayed(chunk), - shape=(chunksizes[idx], n_features), - meta=cp.zeros((1)), dtype=dtype) + data_dela = [_dask_array_from_delayed(chunk, chunksizes[idx], n_features, + dtype) for idx, chunk in enumerate(data_parts)] + return da.concatenate(data_dela, axis=0) -def _f_order_shuffle(X, y, n_samples, seed, features_indices): +def _f_order_shuffle(part, n_samples, seed, features_indices): + X, y = part[0], part[1] local_rs = cp.random.RandomState(seed=seed) samples_indices = local_rs.permutation(n_samples) @@ -114,42 +120,37 @@ def _f_order_shuffle(X, y, n_samples, seed, features_indices): def f_order_shuffle(client, rs, X, y, chunksizes, n_features, features_indices, n_targets, dtype): - X_parts = client.sync(_extract_partitions, X) - y_parts = client.sync(_extract_partitions, y) + data_ddh = DistributedDataHandler.create(data=(X, y), client=client) chunk_seeds = rs.permutation(len(chunksizes)) - shuffled = [client.submit(_f_order_shuffle, X_part, y_parts[idx][1], + shuffled = [client.submit(_f_order_shuffle, part, chunksizes[idx], chunk_seeds[idx], features_indices, workers=[w], pure=False) - for idx, (w, X_part) in enumerate(X_parts)] + for idx, (w, part) in enumerate(data_ddh.gpu_futures)] X_shuffled = [client.submit(get_X, f, pure=False) for idx, f in enumerate(shuffled)] y_shuffled = [client.submit(get_labels, f, pure=False) for idx, f in enumerate(shuffled)] - X_dela = [da.from_delayed(dask.delayed(Xs), - shape=(chunksizes[idx], n_features), - meta=cp.zeros((1)), - dtype=dtype) + X_dela = [_dask_array_from_delayed(Xs, chunksizes[idx], n_features, + dtype) for idx, Xs in enumerate(X_shuffled)] - y_dela = [da.from_delayed(dask.delayed(ys), - shape=(chunksizes[idx], n_targets), - meta=cp.zeros((1)), - dtype=dtype) + y_dela = [_dask_array_from_delayed(ys, chunksizes[idx], n_targets, + dtype) for idx, ys in enumerate(y_shuffled)] return da.concatenate(X_dela, axis=0), da.concatenate(y_dela, axis=0) def convert_C_to_F_order(client, X, chunksizes, n_features, dtype): - X_parts = client.sync(_extract_partitions, X) + X_ddh = DistributedDataHandler.create(data=X, client=client) X_converted = [client.submit(cp.array, X_part, copy=False, order='F', workers=[w]) - for idx, (w, X_part) in enumerate(X_parts)] + for idx, (w, X_part) in enumerate(X_ddh.gpu_futures)] X_dela = [da.from_delayed(dask.delayed(Xc), shape=(chunksizes[idx], n_features), @@ -159,7 +160,7 @@ def convert_C_to_F_order(client, X, chunksizes, n_features, dtype): return da.concatenate(X_dela, axis=0) -def generate_chunks_for_qr(total_size, min_size, n_parts): +def _generate_chunks_for_qr(total_size, min_size, n_parts): n_total_per_part = max(1, int(total_size / n_parts)) if n_total_per_part > min_size: @@ -172,10 +173,10 @@ def generate_chunks_for_qr(total_size, min_size, n_parts): return tuple(chunks_list) -def generate_singular_values(n, effective_rank, tail_strength, - n_samples_per_part): +def _generate_singular_values(n, effective_rank, tail_strength, + n_samples_per_part, dtype='float32'): # Index of the singular values - sing_ind = cp.arange(n, dtype=cp.float64) + sing_ind = cp.arange(n, dtype=dtype) # Build the singular profile by assembling signal and noise components tmp = sing_ind / effective_rank @@ -188,12 +189,23 @@ def generate_singular_values(n, effective_rank, tail_strength, def _make_low_rank_covariance(n_features, effective_rank, tail_strength, seed, n_parts, n_samples_per_part, dtype): + """ + This approach is a faster approach than making X as a full low + rank matrix. Here, we take advantage of the fact that with + SVD, X * X^T = V * S^2 * V^T. This means that we can + generate a covariance matrix by generating only the right + eigen-vector and the squared, low-rank singular values. + With a memory usage of only O(n_features ^ 2) in this case, we pass + this covariance matrix to workers to generate each part of X + embarassingly parallel from a multi-variate normal with mean 0 + and generated covariance. + """ local_rs = cp.random.RandomState(seed=seed) m2 = local_rs.standard_normal((n_features, n_features), dtype=dtype) v, _ = cp.linalg.qr(m2) - s = generate_singular_values(n_features, effective_rank, tail_strength, - n_samples_per_part) + s = _generate_singular_values(n_features, effective_rank, tail_strength, + n_samples_per_part) v *= (s ** 2) return cp.dot(v, cp.transpose(v)) @@ -246,14 +258,14 @@ def make_low_rank_matrix(n_samples=100, n_features=100, # Random (ortho normal) vectors m1 = rs.standard_normal((n_samples, n), - chunks=(generate_chunks_for_qr(n_samples, - n, n_parts), -1), + chunks=(_generate_chunks_for_qr(n_samples, + n, n_parts), -1), dtype=dtype) u, _ = da.linalg.qr(m1) m2 = rs.standard_normal((n, n_features), - chunks=(-1, generate_chunks_for_qr(n_features, - n, n_parts)), + chunks=(-1, _generate_chunks_for_qr(n_features, + n, n_parts)), dtype=dtype) v, _ = da.linalg.qr(m2) @@ -263,8 +275,8 @@ def make_low_rank_matrix(n_samples=100, n_features=100, u = u.rechunk({0: n_samples_per_part, 1: -1}) v = v.rechunk({0: n_samples_per_part, 1: -1}) - local_s = generate_singular_values(n, effective_rank, tail_strength, - n_samples_per_part) + local_s = _generate_singular_values(n, effective_rank, tail_strength, + n_samples_per_part) s = da.from_array(local_s, chunks=(int(n_samples_per_part),)) @@ -332,7 +344,9 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, dtype of generated data use_full_low_rank : boolean (default=True) Whether to use the entire dataset to generate the low rank matrix. - If False, it uses the first chunk + If False, it creates a low rank covariance and uses the + corresponding covariance to generate a multivariate normal + distribution on the remaining chunks Returns ------- @@ -356,9 +370,8 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, 3. When `shuffle = True` and `order = F`, there are memory spikes to shuffle the `F` order arrays - NOTE: If one runs into Out-Of-Memory errors when any of the above - known-limitations are breached, try increasing the `n_parts` - parameter. + NOTE: If out-of-memory errors are encountered in any of the above + configurations, try increasing the `n_parts` parameter. """ client = default_client() if client is None else client @@ -401,9 +414,10 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, X = X.rechunk({0: data_chunksizes, 1: -1}) else: - seed = rs.randint(n_samples) - covar = make_low_rank_covariance(n_features, effective_rank, - tail_strength, seed, n_parts, + seed = int(rs.randint(n_samples).compute()) + covar = make_low_rank_covariance(client, n_features, + effective_rank, tail_strength, + seed, n_parts, n_samples_per_part, dtype) X = data_from_multivariate_normal(client, rs, covar, data_chunksizes, n_features, diff --git a/python/cuml/test/dask/test_datasets.py b/python/cuml/test/dask/test_datasets.py index e4d238f8c3..8c8c7f09f3 100644 --- a/python/cuml/test/dask/test_datasets.py +++ b/python/cuml/test/dask/test_datasets.py @@ -25,7 +25,7 @@ from cuml.dask.datasets import make_blobs from cuml.test.utils import unit_param, quality_param, stress_param -from cuml.dask.common.part_utils import _extract_partitions +from cuml.dask.common.input_utils import DistributedDataHandler @pytest.mark.parametrize('nrows', [unit_param(1e3), quality_param(1e5), @@ -155,15 +155,14 @@ def test_make_regression(n_samples, n_features, n_informative, assert test2, "Unexpectedly incongruent outputs" - X_part = c.sync(_extract_partitions, out) - out_part = X_part[0][1].result() - - y_part = c.sync(_extract_partitions, values) - value_part = y_part[0][1].result() + data_ddh = DistributedDataHandler.create(data=(out, values), + client=c) + out_part, value_part = data_ddh.gpu_futures[0][1].result() if coef: - coefs_part = c.sync(_extract_partitions, coefs) - coefs_part = coefs_part[0][1].result() + coefs_ddh = DistributedDataHandler.create(data=coefs, + client=c) + coefs_part = coefs_ddh.gpu_futures[0][1].result() if order == 'F': assert out_part.flags['F_CONTIGUOUS'] if n_targets > 1: From 21c4657148a23e9da0d64603adab3ab545ee9640 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 22 Apr 2020 16:53:14 -0500 Subject: [PATCH 290/330] adding sklearn test --- python/cuml/datasets/classification.py | 10 +-- python/cuml/test/test_make_classification.py | 68 ++++++++++++++++++++ 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/python/cuml/datasets/classification.py b/python/cuml/datasets/classification.py index dd14d267e7..31416345ce 100644 --- a/python/cuml/datasets/classification.py +++ b/python/cuml/datasets/classification.py @@ -281,8 +281,9 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, # NOTE: This could be done outside the loop, but a current # cupy bug does not allow that # https://github.com/cupy/cupy/issues/3284 - X[centroid_indices[0], n_informative:n_informative - + n_redundant] = cp.dot(X_k, B) + if n_redundant > 0: + X[centroid_indices[0], n_informative:n_informative + + n_redundant] = cp.dot(X_k, B) X_k += centroid # shift the cluster to a vertex X[centroid_indices[0], :n_informative] = X_k @@ -300,8 +301,9 @@ def make_classification(n_samples=100, n_features=20, n_informative=2, A = _informative_covariance[k] X_k = cp.dot(X_k, A) # introduce random covariance - X[start:stop, n_informative:n_informative + n_redundant] = \ - cp.dot(X_k, B) + if n_redundant > 0: + X[start:stop, n_informative:n_informative + n_redundant] = \ + cp.dot(X_k, B) X_k += centroid # shift the cluster to a vertex X[start:stop, :n_informative] = X_k diff --git a/python/cuml/test/test_make_classification.py b/python/cuml/test/test_make_classification.py index 7aed477f93..c704a41c1a 100644 --- a/python/cuml/test/test_make_classification.py +++ b/python/cuml/test/test_make_classification.py @@ -15,7 +15,12 @@ # import pytest +from functools import partial +import numpy as np +import cupy as cp + from cuml.datasets.classification import make_classification +from cuml.test.utils import array_equal @pytest.mark.parametrize('n_samples', [500, 1000]) @@ -44,3 +49,66 @@ def test_make_classification(n_samples, n_features, hypercube, n_classes, assert X.flags['F_CONTIGUOUS'] elif order == 'C': assert X.flags['C_CONTIGUOUS'] + + +def test_make_classification_informative_features(): + """Test the construction of informative features in make_classification + Also tests `n_clusters_per_class`, `n_classes`, `hypercube` and + fully-specified `weights`. + """ + # Create very separate clusters; check that vertices are unique and + # correspond to classes + class_sep = 1e6 + make = partial(make_classification, class_sep=class_sep, n_redundant=0, + n_repeated=0, flip_y=0, shift=0, scale=1, shuffle=False) + + for n_informative, weights, n_clusters_per_class in [(2, [1], 1), + (2, [1/3] * 3, 1), + (2, [1/4] * 4, 1), + (2, [1/2] * 2, 2), + (2, [3/4, 1/4], 2), + (10, [1/3] * 3, 10), + (np.int(64), [1], 1) + ]: + n_classes = len(weights) + n_clusters = n_classes * n_clusters_per_class + n_samples = n_clusters * 50 + + for hypercube in (False, True): + X, y = make(n_samples=n_samples, n_classes=n_classes, + weights=weights, n_features=n_informative, + n_informative=n_informative, + n_clusters_per_class=n_clusters_per_class, + hypercube=hypercube, random_state=0) + + assert X.shape == (n_samples, n_informative) + assert y.shape == (n_samples,) + + # Cluster by sign, viewed as strings to allow uniquing + signs = np.sign(cp.asnumpy(X)) + signs = signs.view(dtype='|S{0}'.format(signs.strides[0])) + unique_signs, cluster_index = np.unique(signs, + return_inverse=True) + + assert len(unique_signs) == n_clusters, ( + "Wrong number of clusters, or not in distinct quadrants") + + # Ensure on vertices of hypercube + for cluster in range(len(unique_signs)): + centroid = X[cluster_index == cluster].mean(axis=0) + if hypercube: + assert array_equal(cp.abs(centroid) / class_sep, + cp.ones(n_informative), + 1e-5) + else: + with pytest.raises(AssertionError): + assert array_equal(cp.abs(centroid) / class_sep, + cp.ones(n_informative), + 1e-5) + + with pytest.raises(ValueError): + make(n_features=2, n_informative=2, n_classes=5, + n_clusters_per_class=1) + with pytest.raises(ValueError): + make(n_features=2, n_informative=2, n_classes=3, + n_clusters_per_class=2) From 8a67fd423bc2b20ab5a8de991017f20442f2d6d5 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 22 Apr 2020 18:05:36 -0500 Subject: [PATCH 291/330] Explicitly use .iloc instead of [] --- python/cuml/preprocessing/encoders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index bc5937848c..ce1e20992c 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -329,7 +329,7 @@ def inverse_transform(self, X): enc_size = len(cats) x_feature = X[:, j:j + enc_size] idx = cp.argmax(x_feature, axis=1) - inv = Series(cats[idx]).reset_index(drop=True) + inv = Series(cats.iloc[idx]).reset_index(drop=True) if self.handle_unknown == 'ignore': not_null_idx = x_feature.any(axis=1) From 3027f3e6939359a38afb693391cc6129237a09ac Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Wed, 22 Apr 2020 18:07:32 -0500 Subject: [PATCH 292/330] Update tests with explicit categories='auto' This is necessary to comply with sklearn 0.22 --- python/cuml/test/test_one_hot_encoder.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index 8f96cf8bad..03098ea3d2 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -184,8 +184,8 @@ def test_onehot_random_inputs(drop, sparse, n_samples, as_array): X, ary = _generate_inputs_from_categories(n_samples=n_samples, as_array=as_array) - enc = OneHotEncoder(sparse=sparse, drop=drop) - sk_enc = SkOneHotEncoder(sparse=sparse, drop=drop) + enc = OneHotEncoder(sparse=sparse, drop=drop, categories='auto') + sk_enc = SkOneHotEncoder(sparse=sparse, drop=drop, categories='auto') ohe = enc.fit_transform(X) ref = sk_enc.fit_transform(ary) if sparse: @@ -205,8 +205,8 @@ def test_onehot_drop_idx_first(as_array): X = _from_df_to_cupy(X) X_ary = cp.asnumpy(X) - enc = OneHotEncoder(sparse=False, drop='first') - sk_enc = SkOneHotEncoder(sparse=False, drop='first') + enc = OneHotEncoder(sparse=False, drop='first', categories='auto') + sk_enc = SkOneHotEncoder(sparse=False, drop='first', categories='auto') ohe = enc.fit_transform(X) ref = sk_enc.fit_transform(X_ary) cp.testing.assert_array_equal(ohe, ref) @@ -225,10 +225,11 @@ def test_onehot_drop_one_of_each(as_array): X_ary = cp.asnumpy(X) drop = drop_ary = _convert_drop(drop) - enc = OneHotEncoder(sparse=False, drop=drop) + enc = OneHotEncoder(sparse=False, drop=drop, categories='auto') ohe = enc.fit_transform(X) print(ohe.dtype) - ref = SkOneHotEncoder(sparse=False, drop=drop_ary).fit_transform(X_ary) + ref = SkOneHotEncoder(sparse=False, drop=drop_ary, + categories='auto').fit_transform(X_ary) cp.testing.assert_array_equal(ohe, ref) inv = enc.inverse_transform(ohe) assert_inverse_equal(inv, X) @@ -266,7 +267,7 @@ def test_onehot_get_categories(as_array): cats = enc.categories_ for i in range(len(ref)): - cp.testing.assert_array_equal(ref[i], cats[i]) + np.testing.assert_array_equal(ref[i], cats[i].to_array()) @pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) @@ -281,8 +282,8 @@ def test_onehot_sparse_drop(as_array): ary = cp.asnumpy(X) drop = drop_ary = _convert_drop(drop) - enc = OneHotEncoder(sparse=True, drop=drop) - sk_enc = SkOneHotEncoder(sparse=True, drop=drop_ary) + enc = OneHotEncoder(sparse=True, drop=drop, categories='auto') + sk_enc = SkOneHotEncoder(sparse=True, drop=drop_ary, categories='auto') ohe = enc.fit_transform(X) ref = sk_enc.fit_transform(ary) cp.testing.assert_array_equal(ohe.toarray(), ref.toarray()) From fa8f0b091ca76e4b3045932eaa2f7a9de84bb689 Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Wed, 22 Apr 2020 23:22:04 -0500 Subject: [PATCH 293/330] update test to run on multiple workers and for multiple partitions --- python/cuml/test/dask/test_random_forest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index 211a107571..649c12b987 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -348,13 +348,13 @@ def test_rf_regression_dask_cpu(partitions_per_worker, cluster): c.close() -@pytest.mark.parametrize('partitions_per_worker', [1]) +@pytest.mark.parametrize('partitions_per_worker', [5]) @pytest.mark.parametrize('output_class', [True, False]) def test_rf_classification_dask_fil_predict_proba(partitions_per_worker, cluster, output_class): - c = Client(threads_per_worker=1, n_workers=1) + c = Client() try: @@ -397,4 +397,4 @@ def test_rf_classification_dask_fil_predict_proba(partitions_per_worker, assert fil_mse <= sk_mse + 0.003 finally: - c.close() + c.close() From 230cba6c50df8e3752814244df4095c90a7abfcb Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Wed, 22 Apr 2020 23:42:20 -0500 Subject: [PATCH 294/330] update style checks --- python/cuml/test/dask/test_random_forest.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index 649c12b987..65a591e647 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -349,12 +349,10 @@ def test_rf_regression_dask_cpu(partitions_per_worker, cluster): @pytest.mark.parametrize('partitions_per_worker', [5]) -@pytest.mark.parametrize('output_class', [True, False]) def test_rf_classification_dask_fil_predict_proba(partitions_per_worker, - cluster, - output_class): + cluster): - c = Client() + c = Client(cluster) try: @@ -366,7 +364,7 @@ def test_rf_classification_dask_fil_predict_proba(partitions_per_worker, y = y.astype(np.int32) X_train, X_test, y_train, y_test = \ - train_test_split(X, y, test_size=100) + train_test_split(X, y, test_size=100, random_state=123) cu_rf_params = {'n_bins': 16, 'n_streams': 1, 'n_estimators': 40, 'max_depth': 16 @@ -393,8 +391,8 @@ def test_rf_classification_dask_fil_predict_proba(partitions_per_worker, sk_mse = mean_squared_error(y_proba, sk_preds_proba) # The threshold is required as the test would intermitently - # fail with a max difference of 0.003 between the two mse values - assert fil_mse <= sk_mse + 0.003 + # fail with a max difference of 0.022 between the two mse values + assert fil_mse <= sk_mse + 0.022 finally: - c.close() + c.close() From bdd358e74d5ca80fbd2e7baf847ed8a094add8a9 Mon Sep 17 00:00:00 2001 From: wxbn Date: Thu, 23 Apr 2020 10:45:14 +0000 Subject: [PATCH 295/330] Requested changes --- python/cuml/test/test_random_forest.py | 257 ++++++++++--------------- 1 file changed, 104 insertions(+), 153 deletions(-) diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py index e0d8dc1ca6..51c02ae938 100644 --- a/python/cuml/test/test_random_forest.py +++ b/python/cuml/test/test_random_forest.py @@ -36,40 +36,85 @@ @pytest.fixture( + scope="session", params=[ - unit_param({'n_samples': 500, 'n_features': 20, 'n_informative': 10}), + unit_param({'n_samples': 350, 'n_features': 20, 'n_informative': 10}), quality_param({'n_samples': 5000, 'n_features': 200, - 'n_informative': 100}), - stress_param({'n_samples': 500000, 'n_features': 500, - 'n_informative': 350}) + 'n_informative': 80}), + stress_param({'n_samples': 500000, 'n_features': 400, + 'n_informative': 180}) ]) -def dataset(request): - n_samples = request.param['n_samples'] - n_features = request.param['n_features'] - n_informative = request.param['n_informative'] - X, y = make_classification(n_samples=n_samples, n_features=n_features, +def small_clf(request): + X, y = make_classification(n_samples=request.param['n_samples'], + n_features=request.param['n_features'], n_clusters_per_class=1, - n_informative=n_informative, + n_informative=request.param['n_informative'], random_state=123, n_classes=2) return X, y @pytest.fixture( + scope="session", params=[ - unit_param({'n_samples': 1500, 'n_features': 80, 'n_informative': 40}), + unit_param({'n_samples': 500, 'n_features': 20, 'n_informative': 10}), quality_param({'n_samples': 5000, 'n_features': 200, + 'n_informative': 50}), + stress_param({'n_samples': 500000, 'n_features': 400, + 'n_informative': 100}) + ]) +def large_clf(request): + X, y = make_classification(n_samples=request.param['n_samples'], + n_features=request.param['n_features'], + n_clusters_per_class=1, + n_informative=request.param['n_informative'], + random_state=123, n_classes=2) + return X, y + + +@pytest.fixture( + scope="session", + params=[ + unit_param({'n_samples': 1500, 'n_features': 20, 'n_informative': 10}), + quality_param({'n_samples': 12000, 'n_features': 200, 'n_informative': 100}), stress_param({'n_samples': 500000, 'n_features': 500, 'n_informative': 350}) ]) -def large_dataset(request): - n_samples = request.param['n_samples'] - n_features = request.param['n_features'] - n_informative = request.param['n_informative'] - X, y = make_classification(n_samples=n_samples, n_features=n_features, - n_clusters_per_class=1, - n_informative=n_informative, - random_state=123, n_classes=2) +def large_reg(request): + X, y = make_regression(n_samples=request.param['n_samples'], + n_features=request.param['n_features'], + n_informative=request.param['n_informative'], + random_state=123) + return X, y + + +special_reg_params = [ + unit_param({'mode': 'unit', 'n_samples': 500, + 'n_features': 20, 'n_informative': 10}), + quality_param({'mode': 'quality', 'n_samples': 500, + 'n_features': 20, 'n_informative': 10}), + quality_param({'mode': 'quality', 'n_features': 200, + 'n_informative': 50}), + stress_param({'mode': 'stress', 'n_samples': 500, + 'n_features': 20, 'n_informative': 10}), + stress_param({'mode': 'stress', 'n_features': 200, + 'n_informative': 50}), + stress_param({'mode': 'stress', 'n_samples': 1000, + 'n_features': 400, 'n_informative': 100}) + ] + + +@pytest.fixture( + scope="session", + params=special_reg_params) +def special_reg(request): + if request.param['mode'] == 'quality': + X, y = fetch_california_housing(return_X_y=True) + else: + X, y = make_regression(n_samples=request.param['n_samples'], + n_features=request.param['n_features'], + n_informative=request.param['n_informative'], + random_state=123) return X, y @@ -78,11 +123,11 @@ def large_dataset(request): @pytest.mark.parametrize('datatype', [np.float32]) @pytest.mark.parametrize('split_algo', [0, 1]) @pytest.mark.parametrize('max_features', [1.0, 'auto', 'log2', 'sqrt']) -def test_rf_classification(dataset, datatype, split_algo, +def test_rf_classification(small_clf, datatype, split_algo, rows_sample, max_features): use_handle = True - X, y = dataset + X, y = small_clf X = X.astype(datatype) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -119,33 +164,17 @@ def test_rf_classification(dataset, datatype, split_algo, assert fil_acc >= (cuml_acc - 0.02) -@pytest.mark.parametrize('mode', [unit_param('unit'), quality_param('quality'), - stress_param('stress')]) -@pytest.mark.parametrize('column_info', [unit_param([20, 10]), - quality_param([200, 50]), - stress_param([400, 100])]) @pytest.mark.parametrize('rows_sample', [unit_param(1.0), quality_param(0.90), stress_param(0.95)]) @pytest.mark.parametrize('datatype', [np.float32]) @pytest.mark.parametrize('split_algo', [0, 1]) @pytest.mark.parametrize('max_features', [1.0, 'auto', 'log2', 'sqrt']) -def test_rf_regression(datatype, split_algo, mode, column_info, - max_features, rows_sample): +def test_rf_regression(special_reg, datatype, split_algo, max_features, + rows_sample): - ncols, n_info = column_info use_handle = True - if mode == 'unit': - X, y = make_regression(n_samples=500, n_features=ncols, - n_informative=n_info, - random_state=123) - elif mode == 'quality': - X, y = fetch_california_housing(return_X_y=True) - - else: - X, y = make_regression(n_samples=100000, n_features=ncols, - n_informative=n_info, - random_state=123) + X, y = special_reg X = X.astype(datatype) y = y.astype(datatype) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -170,7 +199,7 @@ def test_rf_regression(datatype, split_algo, mode, column_info, fil_r2 = r2_score(y_test, fil_preds, convert_dtype=datatype) # Initialize, fit and predict using # sklearn's random forest regression model - if mode != "stress": + if X.shape[0] < 1000: # mode != "stress" sk_model = skrfr(n_estimators=50, max_depth=16, min_samples_split=2, max_features=max_features, random_state=10) @@ -182,78 +211,14 @@ def test_rf_regression(datatype, split_algo, mode, column_info, @pytest.mark.parametrize('datatype', [np.float32]) -def test_rf_regression_default(large_dataset, datatype): - - X, y = large_dataset - X = X.astype(datatype) - y = y.astype(datatype) - X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, - random_state=0) - - # Initialize, fit and predict using cuML's - # random forest classification model - cuml_model = curfr() - cuml_model.fit(X_train, y_train) - - cu_preds = cuml_model.predict(X_test, predict_model="CPU") - cu_r2 = r2_score(y_test, cu_preds, convert_dtype=datatype) - - # predict using FIL - fil_preds = cuml_model.predict(X_test, predict_model="GPU") - fil_preds = np.reshape(fil_preds, np.shape(cu_preds)) - - fil_r2 = r2_score(y_test, fil_preds, convert_dtype=datatype) - - # score function should be equivalent - score_mse = cuml_model.score(X_test, y_test, predict_model="GPU") - sk_mse = mean_squared_error(y_test, fil_preds) - assert sk_mse == pytest.approx(score_mse) - - # Initialize, fit and predict using - # sklearn's random forest regression model - if X.shape[0] < 500000: - sk_model = skrfr(max_depth=16, random_state=10) - sk_model.fit(X_train, y_train) - sk_preds = sk_model.predict(X_test) - sk_r2 = r2_score(y_test, sk_preds, convert_dtype=datatype) - # XXX Accuracy gap exists with default parameters, requires - # further investigation for next release - assert fil_r2 >= (sk_r2 - 0.08) - - assert fil_r2 >= (cu_r2 - 0.02) - - -@pytest.mark.parametrize('datatype', [np.float32]) -def test_rf_classification_seed(dataset, datatype): +def test_rf_classification_seed(small_clf, datatype): - X, y = dataset + X, y = small_clf X = X.astype(datatype) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0) - cuml_model = curfc() - cuml_model.fit(X_train, y_train) - fil_preds = cuml_model.predict(X_test, predict_model="GPU") - cu_preds = cuml_model.predict(X_test, predict_model="CPU") - fil_preds = np.reshape(fil_preds, np.shape(cu_preds)) - - fil_acc = accuracy_score(y_test, fil_preds) - cu_acc = accuracy_score(y_test, cu_preds) - - score_acc = cuml_model.score(X_test, y_test) - assert cu_acc == pytest.approx(score_acc) - - # sklearn random forest classification model - # initialization, fit and predict - if X.shape[0] < 500000: - sk_model = skrfc(max_depth=16, random_state=10) - sk_model.fit(X_train, y_train) - sk_preds = sk_model.predict(X_test) - sk_acc = accuracy_score(y_test, sk_preds) - assert fil_acc >= (sk_acc - 0.07) - assert fil_acc >= (cu_acc - 0.02) - for i in range(8): seed = random.randint(100, 1e5) # Initialize, fit and predict using cuML's @@ -294,9 +259,9 @@ def test_rf_classification_seed(dataset, datatype): @pytest.mark.parametrize('datatype', [(np.float64, np.float32), (np.float32, np.float64)]) @pytest.mark.parametrize('convert_dtype', [True, False]) -def test_rf_classification_float64(dataset, datatype, convert_dtype): +def test_rf_classification_float64(small_clf, datatype, convert_dtype): - X, y = dataset + X, y = small_clf X = X.astype(datatype[0]) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -335,9 +300,9 @@ def test_rf_classification_float64(dataset, datatype, convert_dtype): @pytest.mark.parametrize('datatype', [(np.float64, np.float32), (np.float32, np.float64)]) -def test_rf_regression_float64(large_dataset, datatype): +def test_rf_regression_float64(large_reg, datatype): - X, y = large_dataset + X, y = large_reg X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0) X_train = X_train.astype(datatype[0]) @@ -427,11 +392,12 @@ def test_rf_classification_multi_class(datatype, column_info, nrows, 'auto', False]) @pytest.mark.parametrize('algo', ['auto', 'naive', 'tree_reorg', 'batch_tree_reorg']) -def test_rf_classification_sparse(dataset, datatype, fil_sparse_format, algo): +def test_rf_classification_sparse(small_clf, datatype, + fil_sparse_format, algo): use_handle = True num_treees = 50 - X, y = dataset + X, y = small_clf X = X.astype(datatype) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -446,8 +412,6 @@ def test_rf_classification_sparse(dataset, datatype, fil_sparse_format, algo): n_estimators=num_treees, handle=handle, max_leaves=-1, max_depth=40) cuml_model.fit(X_train, y_train) - cu_preds = cuml_model.predict(X_test, predict_model="CPU") - cuml_acc = accuracy_score(y_test, cu_preds) if ((not fil_sparse_format or algo == 'tree_reorg' or algo == 'batch_tree_reorg') or @@ -466,7 +430,7 @@ def test_rf_classification_sparse(dataset, datatype, fil_sparse_format, algo): threshold=0.5, fil_sparse_format=fil_sparse_format, algo=algo) - fil_preds = np.reshape(fil_preds, np.shape(cu_preds)) + fil_preds = np.reshape(fil_preds, np.shape(y_test)) fil_acc = accuracy_score(y_test, fil_preds) fil_model = cuml_model.convert_to_fil_model() @@ -491,36 +455,17 @@ def test_rf_classification_sparse(dataset, datatype, fil_sparse_format, algo): sk_acc = accuracy_score(y_test, sk_preds) assert fil_acc >= (sk_acc - 0.07) - assert fil_acc >= (cuml_acc - 0.02) - -@pytest.mark.parametrize('mode', [unit_param('unit'), quality_param('quality'), - stress_param('stress')]) -@pytest.mark.parametrize('column_info', [unit_param([20, 10]), - quality_param([200, 50]), - stress_param([400, 100])]) @pytest.mark.parametrize('datatype', [np.float32]) @pytest.mark.parametrize('fil_sparse_format', ['not_supported', True, 'auto', False]) @pytest.mark.parametrize('algo', ['auto', 'naive', 'tree_reorg', 'batch_tree_reorg']) -def test_rf_regression_sparse(datatype, mode, column_info, - fil_sparse_format, algo): - ncols, n_info = column_info +def test_rf_regression_sparse(special_reg, datatype, fil_sparse_format, algo): use_handle = True num_treees = 50 - if mode == 'unit': - X, y = make_regression(n_samples=500, n_features=ncols, - n_informative=n_info, - random_state=123) - elif mode == 'quality': - X, y = fetch_california_housing(return_X_y=True) - - else: - X, y = make_regression(n_samples=1000, n_features=ncols, - n_informative=n_info, - random_state=123) + X, y = special_reg X = X.astype(datatype) y = y.astype(datatype) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -535,8 +480,7 @@ def test_rf_regression_sparse(datatype, mode, column_info, n_estimators=num_treees, handle=handle, max_leaves=-1, max_depth=40, accuracy_metric='mse') cuml_model.fit(X_train, y_train) - cu_preds = cuml_model.predict(X_test, predict_model="CPU") - cu_r2 = r2_score(y_test, cu_preds, convert_dtype=datatype) + # predict using FIL if ((not fil_sparse_format or algo == 'tree_reorg' or algo == 'batch_tree_reorg') or @@ -549,7 +493,7 @@ def test_rf_regression_sparse(datatype, mode, column_info, fil_preds = cuml_model.predict(X_test, predict_model="GPU", fil_sparse_format=fil_sparse_format, algo=algo) - fil_preds = np.reshape(fil_preds, np.shape(cu_preds)) + fil_preds = np.reshape(fil_preds, np.shape(y_test)) fil_r2 = r2_score(y_test, fil_preds, convert_dtype=datatype) fil_model = cuml_model.convert_to_fil_model() @@ -557,19 +501,19 @@ def test_rf_regression_sparse(datatype, mode, column_info, input_type = 'numpy' fil_model_preds = fil_model.predict(X_test, output_type=input_type) - fil_model_preds = np.reshape(fil_model_preds, np.shape(cu_preds)) + fil_model_preds = np.reshape(fil_model_preds, np.shape(y_test)) fil_model_r2 = r2_score(y_test, fil_model_preds, convert_dtype=datatype) assert fil_r2 == fil_model_r2 tl_model = cuml_model.convert_to_treelite_model() assert num_treees == tl_model.num_trees - assert ncols == tl_model.num_features + assert X.shape[1] == tl_model.num_features del tl_model # Initialize, fit and predict using # sklearn's random forest regression model - if mode != "stress": + if X.shape[0] < 1000: # mode != "stress": sk_model = skrfr(n_estimators=50, max_depth=40, min_samples_split=2, random_state=10) @@ -577,18 +521,17 @@ def test_rf_regression_sparse(datatype, mode, column_info, sk_preds = sk_model.predict(X_test) sk_r2 = r2_score(y_test, sk_preds, convert_dtype=datatype) assert fil_r2 >= (sk_r2 - 0.07) - assert fil_r2 >= (cu_r2 - 0.02) @pytest.mark.memleak @pytest.mark.parametrize('fil_sparse_format', [True, False, 'auto']) @pytest.mark.parametrize('n_iter', [unit_param(5), quality_param(30), stress_param(80)]) -def test_rf_memory_leakage(dataset, fil_sparse_format, n_iter): +def test_rf_memory_leakage(small_clf, fil_sparse_format, n_iter): datatype = np.float32 use_handle = True - X, y = dataset + X, y = small_clf X = X.astype(datatype) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, @@ -648,10 +591,10 @@ def test_create_classification_model(max_features, @pytest.mark.parametrize('n_estimators', [10, 20, 100]) @pytest.mark.parametrize('n_bins', [8, 9, 10]) -def test_multiple_fits_classification(large_dataset, n_estimators, n_bins): +def test_multiple_fits_classification(large_clf, n_estimators, n_bins): datatype = np.float32 - X, y = large_dataset + X, y = large_clf X = X.astype(datatype) y = y.astype(np.int32) cuml_model = curfc(n_bins=n_bins, @@ -669,12 +612,19 @@ def test_multiple_fits_classification(large_dataset, n_estimators, n_bins): assert params['n_bins'] == n_bins +@pytest.mark.parametrize('column_info', [unit_param([100, 50]), + quality_param([200, 100]), + stress_param([500, 350])]) +@pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), + stress_param(500000)]) @pytest.mark.parametrize('n_estimators', [10, 20, 100]) @pytest.mark.parametrize('n_bins', [8, 9, 10]) -def test_multiple_fits_regression(large_dataset, n_estimators, n_bins): +def test_multiple_fits_regression(column_info, nrows, n_estimators, n_bins): datatype = np.float32 - - X, y = large_dataset + ncols, n_info = column_info + X, y = make_regression(n_samples=nrows, n_features=ncols, + n_informative=n_info, + random_state=123) X = X.astype(datatype) y = y.astype(np.int32) cuml_model = curfr(n_bins=n_bins, @@ -698,10 +648,11 @@ def test_multiple_fits_regression(large_dataset, n_estimators, n_bins): stress_param(0.95)]) @pytest.mark.parametrize('datatype', [np.float32]) @pytest.mark.parametrize('max_features', [1.0, 'auto', 'log2', 'sqrt']) -def test_rf_classification_proba(dataset, datatype, rows_sample, max_features): +def test_rf_classification_proba(small_clf, datatype, + rows_sample, max_features): use_handle = True - X, y = dataset + X, y = small_clf X = X.astype(datatype) y = y.astype(np.int32) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, From 573bd6a57a9958f0fcde47f9cb5e08e2cb2a27be Mon Sep 17 00:00:00 2001 From: salonijain27 Date: Thu, 23 Apr 2020 09:38:32 -0500 Subject: [PATCH 296/330] update predict-proba func --- python/cuml/dask/ensemble/randomforestclassifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/dask/ensemble/randomforestclassifier.py b/python/cuml/dask/ensemble/randomforestclassifier.py index 46ab75281c..d2c1cd20b5 100755 --- a/python/cuml/dask/ensemble/randomforestclassifier.py +++ b/python/cuml/dask/ensemble/randomforestclassifier.py @@ -654,7 +654,7 @@ def predict_proba(self, X, output_class=True, algo='auto', Dask cuDF dataframe or CuPy backed Dask Array (n_rows, n_classes) """ self._concat_treelite_models() - data = DistributedDataHandler.single(X, client=self.client) + data = DistributedDataHandler.create(X, client=self.client) self.datatype = data.datatype kwargs = {"output_class": output_class, "convert_dtype": convert_dtype, From 5f94394d71003cb92100a0f140988a14b41bcbfe Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 23 Apr 2020 15:10:28 -0400 Subject: [PATCH 297/330] Updating style --- cpp/comms/std/src/ucp_helper.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/comms/std/src/ucp_helper.h b/cpp/comms/std/src/ucp_helper.h index bf650dae50..21a2893286 100644 --- a/cpp/comms/std/src/ucp_helper.h +++ b/cpp/comms/std/src/ucp_helper.h @@ -135,14 +135,12 @@ class comms_ucp_handler { } void load_free_req_func() { - req_free_func = - (dlsym_rec_free)dlsym(ucp_handle, "ucp_request_free"); + req_free_func = (dlsym_rec_free)dlsym(ucp_handle, "ucp_request_free"); assert_dlerror(); } void load_print_info_func() { - print_info_func = - (dlsym_print_info)dlsym(ucp_handle, "ucp_ep_print_info"); + print_info_func = (dlsym_print_info)dlsym(ucp_handle, "ucp_ep_print_info"); assert_dlerror(); } From 2f52668763de79b53ef6fe01acb08614f2ba4e73 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 23 Apr 2020 15:25:47 -0500 Subject: [PATCH 298/330] review changes --- python/cuml/test/dask/test_random_forest.py | 53 ++------------------- 1 file changed, 4 insertions(+), 49 deletions(-) diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index 63ff71addc..58230bf838 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -67,7 +67,7 @@ def _prep_training_data(c, X_train, y_train, partitions_per_worker): return X_train_df, y_train_df -@pytest.mark.parametrize('partitions_per_worker', [1, 3]) +@pytest.mark.parametrize('partitions_per_worker', [3]) def test_rf_classification_dask_cudf(partitions_per_worker, cluster): # Use CUDA_VISIBLE_DEVICES to control the number of workers @@ -107,7 +107,7 @@ def test_rf_classification_dask_cudf(partitions_per_worker, cluster): @pytest.mark.xfail(reason="Intermittent failure of test observed. For" "more information please check cuml issue #1934") -@pytest.mark.parametrize('partitions_per_worker', [1, 5]) +@pytest.mark.parametrize('partitions_per_worker', [5]) def test_rf_regression_dask_fil(partitions_per_worker, cluster): # Use CUDA_VISIBLE_DEVICES to control the number of workers @@ -161,52 +161,7 @@ def test_rf_regression_dask_fil(partitions_per_worker, cluster): c.close() -@pytest.mark.parametrize('partitions_per_worker', [1, 5]) -@pytest.mark.parametrize('output_class', [True, False]) -def test_rf_classification_dask_fil(partitions_per_worker, cluster, - output_class): - - # Use CUDA_VISIBLE_DEVICES to control the number of workers - c = Client(cluster) - - try: - - X, y = make_classification(n_samples=10000, n_features=30, - n_clusters_per_class=1, n_informative=20, - random_state=123, n_classes=2) - - X = X.astype(np.float32) - y = y.astype(np.int32) - - X_train, X_test, y_train, y_test = \ - train_test_split(X, y, test_size=1000) - - cu_rf_params = { - 'n_estimators': 25, - 'max_depth': 13, - 'n_bins': 15, - } - - X_train_df, y_train_df = _prep_training_data(c, X_train, y_train, - partitions_per_worker) - X_test_df, _ = _prep_training_data(c, X_test, y_test, - partitions_per_worker) - cu_rf_mg = cuRFC_mg(**cu_rf_params) - cu_rf_mg.fit(X_train_df, y_train_df) - cu_rf_mg_predict = cu_rf_mg.predict(X_test_df, output_class).compute() - cu_rf_mg_predict = cp.asnumpy(cp.array(cu_rf_mg_predict)) - if not output_class: - cu_rf_mg_predict = np.round(cu_rf_mg_predict) - - acc_score = accuracy_score(cu_rf_mg_predict, y_test, normalize=True) - - assert acc_score > 0.8 - - finally: - c.close() - - -@pytest.mark.parametrize('partitions_per_worker', [1, 5]) +@pytest.mark.parametrize('partitions_per_worker', [5]) @pytest.mark.parametrize('output_class', [True, False]) def test_rf_classification_dask_array(partitions_per_worker, cluster, output_class): @@ -250,7 +205,7 @@ def test_rf_classification_dask_array(partitions_per_worker, cluster, c.close() -@pytest.mark.parametrize('partitions_per_worker', [1, 5]) +@pytest.mark.parametrize('partitions_per_worker', [5]) def test_rf_regression_dask_cpu(partitions_per_worker, cluster): # Use CUDA_VISIBLE_DEVICES to control the number of workers From b3b390097ecf68e3e8ffebfe5bb27638d9e7d35c Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 23 Apr 2020 23:44:50 -0400 Subject: [PATCH 299/330] Apply suggestions from code review Co-Authored-By: Dante Gama Dessavre --- python/cuml/solvers/sgd.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/solvers/sgd.pyx b/python/cuml/solvers/sgd.pyx index 8b4620feef..f8e5351ee6 100644 --- a/python/cuml/solvers/sgd.pyx +++ b/python/cuml/solvers/sgd.pyx @@ -453,7 +453,7 @@ class SGD(Base): del(X_m) - output_type = self._get_output_type(self.output_type) + output_type = self._get_output_type(X) return preds.to_output(output_type) @@ -514,6 +514,6 @@ class SGD(Base): del(X_m) - output_type = self._get_output_type(self.output_type) + output_type = self._get_output_type(X) return preds.to_output(output_type) From 0e28237963535dc49237816c5ab09e044f92d9dc Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Thu, 23 Apr 2020 21:20:47 -0700 Subject: [PATCH 300/330] FIX do not link cumlcomms and nccl when --singlepu option is specified --- python/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/setup.py b/python/setup.py index ad614ece44..5808494833 100644 --- a/python/setup.py +++ b/python/setup.py @@ -121,8 +121,6 @@ # on libcumlprims libs = ['cuda', 'cuml++', - 'cumlcomms', - 'nccl', 'rmm'] include_dirs = ['../cpp/src', @@ -155,6 +153,8 @@ else: libs.append('cumlprims') + libs.append('cumlcomms') + libs.append('nccl') sys_include = os.path.dirname(sysconfig.get_path("include")) include_dirs.append("%s/cumlprims" % sys_include) From 283be31ad34bfbc1307f375ca022edae23e7b644 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Thu, 23 Apr 2020 21:42:47 -0700 Subject: [PATCH 301/330] BUG updated logger.trace to encode to utf-8 --- python/cuml/common/logger.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 4c996f4e19..7bb76dff17 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -217,7 +217,7 @@ def trace(msg): msg : str Message to be logged. """ - cdef string s = msg + cdef string s = msg.encode("UTF-8") CUML_LOG_TRACE(s.c_str()) From 39967120556d4ef93962981878f5897e71bc6ffa Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 24 Apr 2020 00:19:39 -0500 Subject: [PATCH 302/330] renaming internal functions --- python/cuml/dask/datasets/regression.py | 86 ++++++++++++------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 1eada78cac..6ec0abd4a4 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -25,7 +25,7 @@ from cuml.dask.common.input_utils import DistributedDataHandler -def create_rs_generator(random_state): +def _create_rs_generator(random_state): if hasattr(random_state, '__module__'): rs_type = random_state.__module__ + '.' + type(random_state).__name__ else: @@ -50,23 +50,23 @@ def _dask_array_from_delayed(part, nrows, ncols, dtype): meta=cp.zeros((1)), dtype=dtype) -def _f_order_standard_normal(nrows, ncols, dtype, seed): +def _dask_f_order_standard_normal(nrows, ncols, dtype, seed): local_rs = cp.random.RandomState(seed=seed) x = local_rs.standard_normal(nrows * ncols, dtype=dtype) x = x.reshape((nrows, ncols), order='F') return x -def f_order_standard_normal(client, rs, chunksizes, ncols, dtype): +def _f_order_standard_normal(client, rs, chunksizes, ncols, dtype): workers = list(client.has_what().keys()) n_chunks = len(chunksizes) chunks_workers = (workers * n_chunks)[:n_chunks] chunk_seeds = rs.permutation(len(chunksizes)) - chunks = [client.submit(_f_order_standard_normal, chunksize, ncols, dtype, - chunk_seeds[idx], workers=[chunks_workers[idx]], - pure=False) + chunks = [client.submit(_dask_f_order_standard_normal, chunksize, ncols, + dtype, chunk_seeds[idx], + workers=[chunks_workers[idx]], pure=False) for idx, chunksize in enumerate(chunksizes)] chunks_dela = [_dask_array_from_delayed(chunk, chunksizes[idx], ncols, @@ -76,14 +76,15 @@ def f_order_standard_normal(client, rs, chunksizes, ncols, dtype): return da.concatenate(chunks_dela, axis=0) -def _data_from_multivariate_normal(seed, covar, n_samples, n_features, dtype): +def _dask_data_from_multivariate_normal(seed, covar, n_samples, n_features, + dtype): mean = cp.zeros(n_features) local_rs = cp.random.RandomState() return local_rs.multivariate_normal(mean, covar, n_samples, dtype=dtype) -def data_from_multivariate_normal(client, rs, covar, chunksizes, n_features, +def _data_from_multivariate_normal(client, rs, covar, chunksizes, n_features, dtype): workers = list(client.has_what().keys()) @@ -92,7 +93,7 @@ def data_from_multivariate_normal(client, rs, covar, chunksizes, n_features, chunk_seeds = rs.permutation(len(chunksizes)) - data_parts = [client.submit(_data_from_multivariate_normal, + data_parts = [client.submit(_dask_data_from_multivariate_normal, chunk_seeds[idx], covar, chunksizes[idx], n_features, dtype, workers=[chunks_workers[idx]], @@ -106,7 +107,7 @@ def data_from_multivariate_normal(client, rs, covar, chunksizes, n_features, return da.concatenate(data_dela, axis=0) -def _f_order_shuffle(part, n_samples, seed, features_indices): +def _dask_f_order_shuffle(part, n_samples, seed, features_indices): X, y = part[0], part[1] local_rs = cp.random.RandomState(seed=seed) samples_indices = local_rs.permutation(n_samples) @@ -118,13 +119,13 @@ def _f_order_shuffle(part, n_samples, seed, features_indices): return X, y -def f_order_shuffle(client, rs, X, y, chunksizes, n_features, - features_indices, n_targets, dtype): +def _f_order_shuffle(client, rs, X, y, chunksizes, n_features, + features_indices, n_targets, dtype): data_ddh = DistributedDataHandler.create(data=(X, y), client=client) chunk_seeds = rs.permutation(len(chunksizes)) - shuffled = [client.submit(_f_order_shuffle, part, + shuffled = [client.submit(_dask_f_order_shuffle, part, chunksizes[idx], chunk_seeds[idx], features_indices, workers=[w], pure=False) @@ -146,17 +147,16 @@ def f_order_shuffle(client, rs, X, y, chunksizes, n_features, return da.concatenate(X_dela, axis=0), da.concatenate(y_dela, axis=0) -def convert_C_to_F_order(client, X, chunksizes, n_features, dtype): +def _convert_C_to_F_order(client, X, chunksizes, n_features, dtype): X_ddh = DistributedDataHandler.create(data=X, client=client) X_converted = [client.submit(cp.array, X_part, copy=False, order='F', workers=[w]) for idx, (w, X_part) in enumerate(X_ddh.gpu_futures)] - X_dela = [da.from_delayed(dask.delayed(Xc), - shape=(chunksizes[idx], n_features), - meta=cp.zeros((1)), - dtype=dtype) + X_dela = [_dask_array_from_delayed(Xc, chunksizes[idx], n_features, + dtype) for idx, Xc in enumerate(X_converted)] + return da.concatenate(X_dela, axis=0) @@ -186,9 +186,9 @@ def _generate_singular_values(n, effective_rank, tail_strength, return s -def _make_low_rank_covariance(n_features, effective_rank, - tail_strength, seed, n_parts, - n_samples_per_part, dtype): +def _dask_make_low_rank_covariance(n_features, effective_rank, + tail_strength, seed, n_parts, + n_samples_per_part, dtype): """ This approach is a faster approach than making X as a full low rank matrix. Here, we take advantage of the fact that with @@ -211,11 +211,11 @@ def _make_low_rank_covariance(n_features, effective_rank, return cp.dot(v, cp.transpose(v)) -def make_low_rank_covariance(client, n_features, effective_rank, +def _make_low_rank_covariance(client, n_features, effective_rank, tail_strength, seed, n_parts, n_samples_per_part, dtype): - return client.submit(_make_low_rank_covariance, n_features, + return client.submit(_dask_make_low_rank_covariance, n_features, effective_rank, tail_strength, seed, n_parts, n_samples_per_part, dtype) @@ -253,7 +253,7 @@ def make_low_rank_matrix(n_samples=100, n_features=100, The matrix. """ - rs = create_rs_generator(random_state) + rs = _create_rs_generator(random_state) n = min(n_samples, n_features) # Random (ortho normal) vectors @@ -377,7 +377,7 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, client = default_client() if client is None else client n_informative = min(n_features, n_informative) - rs = create_rs_generator(random_state) + rs = _create_rs_generator(random_state) if n_samples_per_part is None: n_samples_per_part = max(1, int(n_samples / n_parts)) @@ -391,8 +391,8 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, if effective_rank is None: # Randomly generate a well conditioned input set if order == 'F': - X = f_order_standard_normal(client, rs, data_chunksizes, - n_features, dtype) + X = _f_order_standard_normal(client, rs, data_chunksizes, + n_features, dtype) elif order == 'C': X = rs.standard_normal((n_samples, n_features), @@ -415,17 +415,17 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, 1: -1}) else: seed = int(rs.randint(n_samples).compute()) - covar = make_low_rank_covariance(client, n_features, - effective_rank, tail_strength, - seed, n_parts, - n_samples_per_part, dtype) - X = data_from_multivariate_normal(client, rs, covar, - data_chunksizes, n_features, - dtype) + covar = _make_low_rank_covariance(client, n_features, + effective_rank, tail_strength, + seed, n_parts, + n_samples_per_part, dtype) + X = _data_from_multivariate_normal(client, rs, covar, + data_chunksizes, n_features, + dtype) if order == 'F': - X = convert_C_to_F_order(client, X, data_chunksizes, - n_features, dtype) + X = _convert_C_to_F_order(client, X, data_chunksizes, + n_features, dtype) # Generate a ground truth model with only n_informative features being non # zeros (the other features are not correlated to y and should be ignored @@ -452,9 +452,9 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, if shuffle: features_indices = np.random.permutation(n_features) if order == 'F': - X, y = f_order_shuffle(client, rs, X, y, data_chunksizes, - n_features, features_indices, - n_targets, dtype) + X, y = _f_order_shuffle(client, rs, X, y, data_chunksizes, + n_features, features_indices, + n_targets, dtype) elif order == 'C': samples_indices = np.random.permutation(n_samples) @@ -472,11 +472,11 @@ def make_regression(n_samples=100, n_features=100, n_informative=10, y = da.squeeze(y) if order == 'F' and n_targets > 1: - y = convert_C_to_F_order(client, y, y.chunks[0], n_targets, dtype) + y = _convert_C_to_F_order(client, y, y.chunks[0], n_targets, dtype) if coef: - ground_truth = convert_C_to_F_order(client, ground_truth, - ground_truth.chunks[0], - n_targets, dtype) + ground_truth = _convert_C_to_F_order(client, ground_truth, + ground_truth.chunks[0], + n_targets, dtype) if coef: ground_truth = da.squeeze(ground_truth) From e6ceaa932bfbe1bc7bdafed4ab68d70cb5b45466 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 24 Apr 2020 00:26:18 -0500 Subject: [PATCH 303/330] style fixes --- python/cuml/dask/datasets/regression.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 6ec0abd4a4..61427c9249 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -85,7 +85,7 @@ def _dask_data_from_multivariate_normal(seed, covar, n_samples, n_features, def _data_from_multivariate_normal(client, rs, covar, chunksizes, n_features, - dtype): + dtype): workers = list(client.has_what().keys()) n_chunks = len(chunksizes) @@ -212,8 +212,8 @@ def _dask_make_low_rank_covariance(n_features, effective_rank, def _make_low_rank_covariance(client, n_features, effective_rank, - tail_strength, seed, n_parts, - n_samples_per_part, dtype): + tail_strength, seed, n_parts, + n_samples_per_part, dtype): return client.submit(_dask_make_low_rank_covariance, n_features, effective_rank, tail_strength, seed, From 97a843fd3febba416844301e0a9fd9985a680988 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Thu, 23 Apr 2020 22:57:21 -0700 Subject: [PATCH 304/330] BUG updated utf-8 encode for all logger APIs --- python/cuml/common/logger.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index 7bb76dff17..bc3df93808 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -171,7 +171,7 @@ def set_pattern(pattern): """ cdef string prev = Logger.get().getPattern() context_object = PatternSetter(prev.decode("UTF-8")) - cdef string s = pattern + cdef string s = pattern.encode("UTF-8") Logger.get().setPattern(s) return context_object @@ -238,7 +238,7 @@ def debug(msg): msg : str Message to be logged. """ - cdef string s = msg + cdef string s = msg.encode("UTF-8") CUML_LOG_DEBUG(s.c_str()) @@ -259,7 +259,7 @@ def info(msg): msg : str Message to be logged. """ - cdef string s = msg + cdef string s = msg.encode("UTF-8") CUML_LOG_INFO(s.c_str()) @@ -280,7 +280,7 @@ def warn(msg): msg : str Message to be logged. """ - cdef string s = msg + cdef string s = msg.encode("UTF-8") CUML_LOG_WARN(s.c_str()) @@ -301,7 +301,7 @@ def error(msg): msg : str Message to be logged. """ - cdef string s = msg + cdef string s = msg.encode("UTF-8") CUML_LOG_ERROR(s.c_str()) @@ -322,5 +322,5 @@ def critical(msg): msg : str Message to be logged. """ - cdef string s = msg + cdef string s = msg.encode("UTF-8") CUML_LOG_CRITICAL(s.c_str()) From 5d6c48d089277ec982d4e129240d87eb80a0816c Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Thu, 23 Apr 2020 23:01:21 -0700 Subject: [PATCH 305/330] BUG fixed log level accesses in logger unit-test --- python/cuml/test/test_logger.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cuml/test/test_logger.py b/python/cuml/test/test_logger.py index 85cc16fd4a..903bb14d85 100644 --- a/python/cuml/test/test_logger.py +++ b/python/cuml/test/test_logger.py @@ -25,9 +25,9 @@ def test_logger(): logger.error("This is a error message") logger.critical("This is a critical message") - with logger.set_level(logger.LOG_LEVEL_WARN): - assert(logger.should_log_for(logger.LOG_LEVEL_WARN)) - assert(not logger.should_log_for(logger.LOG_LEVEL_INFO)) + with logger.set_level(logger.LEVEL_WARN): + assert(logger.should_log_for(logger.LEVEL_WARN)) + assert(not logger.should_log_for(logger.LEVEL_INFO)) with logger.set_pattern("%v"): logger.info("This is an info message") From bec1943972a38b54228b9b0ce28158896b3df098 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Thu, 23 Apr 2020 23:05:00 -0700 Subject: [PATCH 306/330] BUG updated on more place needing utf-8 encoding --- python/cuml/common/logger.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/common/logger.pyx b/python/cuml/common/logger.pyx index bc3df93808..07d1488786 100644 --- a/python/cuml/common/logger.pyx +++ b/python/cuml/common/logger.pyx @@ -134,7 +134,7 @@ class PatternSetter: pass def __exit__(self, a, b, c): - cdef string s = self.prev_pattern + cdef string s = self.prev_pattern.encode("utf-8") Logger.get().setPattern(s) From fcb2baeb9593b29f8a76c3bffdedb5b8b430dd95 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Thu, 23 Apr 2020 23:07:13 -0700 Subject: [PATCH 307/330] BUG clang format fixes --- cpp/test/prims/contingencyMatrix.cu | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/test/prims/contingencyMatrix.cu b/cpp/test/prims/contingencyMatrix.cu index 01423bec39..4140c21dd0 100644 --- a/cpp/test/prims/contingencyMatrix.cu +++ b/cpp/test/prims/contingencyMatrix.cu @@ -125,9 +125,8 @@ class ContingencyMatrixTest MLCommon::Metrics::contingencyMatrix( dY, dYHat, numElements, dComputedOutput, stream, (void *)pWorkspace, workspaceSz, minLabel, maxLabel); - ASSERT_TRUE( - devArrMatch(dComputedOutput, dGoldenOutput, - numUniqueClasses * numUniqueClasses, Compare())); + ASSERT_TRUE(devArrMatch(dComputedOutput, dGoldenOutput, + numUniqueClasses * numUniqueClasses, Compare())); } ContingencyMatrixParam params; From 1a96c95a76d303313708da97163e9c0e528e7937 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 24 Apr 2020 11:57:29 -0500 Subject: [PATCH 308/330] style fixes --- python/cuml/test/dask/test_random_forest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cuml/test/dask/test_random_forest.py b/python/cuml/test/dask/test_random_forest.py index 790b4874d7..08d28b8d5d 100644 --- a/python/cuml/test/dask/test_random_forest.py +++ b/python/cuml/test/dask/test_random_forest.py @@ -37,7 +37,6 @@ import numpy as np import pandas as pd -import rmm from cuml.dask.ensemble import RandomForestClassifier as cuRFC_mg from cuml.dask.ensemble import RandomForestRegressor as cuRFR_mg From 5abe7681df221078c80fea5e84c197b981ed19a0 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 24 Apr 2020 16:49:29 -0500 Subject: [PATCH 309/330] silly typo --- python/cuml/dask/datasets/regression.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cuml/dask/datasets/regression.py b/python/cuml/dask/datasets/regression.py index 61427c9249..a302232b23 100644 --- a/python/cuml/dask/datasets/regression.py +++ b/python/cuml/dask/datasets/regression.py @@ -20,8 +20,8 @@ import numpy as np import cupy as cp from cuml.utils import with_cupy_rmm -from cuml.dask.datasets.blobs import get_X -from cuml.dask.datasets.blobs import get_labels +from cuml.dask.datasets.blobs import _get_X +from cuml.dask.datasets.blobs import _get_labels from cuml.dask.common.input_utils import DistributedDataHandler @@ -131,9 +131,9 @@ def _f_order_shuffle(client, rs, X, y, chunksizes, n_features, workers=[w], pure=False) for idx, (w, part) in enumerate(data_ddh.gpu_futures)] - X_shuffled = [client.submit(get_X, f, pure=False) + X_shuffled = [client.submit(_get_X, f, pure=False) for idx, f in enumerate(shuffled)] - y_shuffled = [client.submit(get_labels, f, pure=False) + y_shuffled = [client.submit(_get_labels, f, pure=False) for idx, f in enumerate(shuffled)] X_dela = [_dask_array_from_delayed(Xs, chunksizes[idx], n_features, From 2b324bc9e2629f48089c56f912b8d32e5eb3b44b Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Sun, 26 Apr 2020 14:34:01 -0400 Subject: [PATCH 310/330] A couple small optimizations to UMAP fuzzy simplicial set --- cpp/src/umap/fuzzy_simpl_set/naive.h | 122 ++++++++++++++++----------- 1 file changed, 75 insertions(+), 47 deletions(-) diff --git a/cpp/src/umap/fuzzy_simpl_set/naive.h b/cpp/src/umap/fuzzy_simpl_set/naive.h index 07d190801a..99b13d9c9f 100644 --- a/cpp/src/umap/fuzzy_simpl_set/naive.h +++ b/cpp/src/umap/fuzzy_simpl_set/naive.h @@ -168,26 +168,6 @@ __global__ void smooth_knn_dist_kernel( } } -/** - * Construct the membership strength data for the 1-skeleton of each local - * fuzzy simplicial set -- this is formed as a sparse matrix (COO) where each - * row is a local fuzzy simplicial set, with a membership strength for the - * 1-simplex to each other data point. - * - * TODO: Optimize for coalesced reads (use col-major inputs). - * - * @param knn_indices: the knn index matrix of size (n, k) - * @param knn_dists: the knn distance matrix of size (n, k) - * @param sigmas: array of size n representing distance to kth nearest neighbor - * @param rhos: array of size n representing distance to the first nearest neighbor - * @param vals: T array of size n*k - * @param rows: int64_t array of size n - * @param cols: int64_t array of size k - * @param n Number of samples (rows in knn indices/distances) - * @param n_neighbors number of columns in knn indices/distances - * - * Descriptions adapted from: https://github.com/lmcinnes/umap/blob/master/umap/umap_.py - */ template __global__ void compute_membership_strength_kernel( const int64_t *knn_indices, @@ -231,6 +211,67 @@ __global__ void compute_membership_strength_kernel( } } +/** + * Construct the membership strength data for the 1-skeleton of each local + * fuzzy simplicial set -- this is formed as a sparse matrix (COO) where each + * row is a local fuzzy simplicial set, with a membership strength for the + * 1-simplex to each other data point. + * + * TODO: Optimize for coalesced reads (use col-major inputs). + * + * @param knn_indices: the knn index matrix of size (n, k) + * @param knn_dists: the knn distance matrix of size (n, k) + * @param sigmas: array of size n representing distance to kth nearest neighbor + * @param rhos: array of size n representing distance to the first nearest neighbor + * @param vals: T array of size n*k + * @param rows: int64_t array of size n + * @param cols: int64_t array of size k + * @param n Number of samples (rows in knn indices/distances) + * @param n_neighbors number of columns in knn indices/distances + * + * Descriptions adapted from: https://github.com/lmcinnes/umap/blob/master/umap/umap_.py + */ +template +__global__ void compute_membership_strength_kernel2( + const int64_t *knn_indices, + const float *knn_dists, // nn outputs + const T *sigmas, const T *rhos, // continuous dists to nearest neighbors + T *vals, int *rows, int *cols, // result coo + int n, int n_neighbors) { // model params + + // row-based matrix is best + int idx = (blockIdx.x * TPB_X) + threadIdx.x; + + if (idx < n * n_neighbors) { + + int row = idx / n_neighbors; // one neighbor per thread + + double cur_rho = rhos[row]; + double cur_sigma = sigmas[row]; + + int64_t cur_knn_ind = knn_indices[idx]; + double cur_knn_dist = knn_dists[idx]; + + if (cur_knn_ind != -1) { + double val = 0.0; + if (cur_knn_ind == row) + val = 0.0; + else if (cur_knn_dist - cur_rho <= 0.0 or cur_sigma == 0.0) + val = 1.0; + else { + val = exp( + -((cur_knn_dist - cur_rho) / (cur_sigma))); + + if (val < MIN_FLOAT) val = MIN_FLOAT; + } + + rows[idx] = row; + cols[idx] = cur_knn_ind; + vals[idx] = val; + } + } +} + /* * Sets up and runs the knn dist smoothing */ @@ -240,36 +281,24 @@ void smooth_knn_dist(int n, const int64_t *knn_indices, const float *knn_dists, float local_connectivity, std::shared_ptr d_alloc, cudaStream_t stream) { - int blks = MLCommon::ceildiv(n, TPB_X); - dim3 grid(blks, 1, 1); + dim3 grid(MLCommon::ceildiv(n, TPB_X), 1, 1); dim3 blk(TPB_X, 1, 1); MLCommon::device_buffer dist_means_dev(d_alloc, stream, n_neighbors); - MLCommon::Stats::mean(dist_means_dev.data(), knn_dists, n_neighbors, n, false, + MLCommon::Stats::mean(dist_means_dev.data(), knn_dists, 1, n_neighbors*n, false, false, stream); CUDA_CHECK(cudaPeekAtLastError()); - T *dist_means_host = (T *)malloc(n_neighbors * sizeof(T)); - MLCommon::updateHost(dist_means_host, dist_means_dev.data(), n_neighbors, + T mean_dist = 0.0; + MLCommon::updateHost(&mean_dist, dist_means_dev.data(), 1, stream); - CUDA_CHECK(cudaStreamSynchronize(stream)); - float sum = 0.0; - for (int i = 0; i < n_neighbors; i++) sum += dist_means_host[i]; - - T mean_dist = sum / float(n_neighbors); - - /** - * Clean up memory for subsequent algorithms - */ - free(dist_means_host); - /** - * Smooth kNN distances to be continuous - */ + * Smooth kNN distances to be continuous + */ smooth_knn_dist_kernel<<>>( knn_dists, n, mean_dist, sigmas, rhos, n_neighbors, local_connectivity); CUDA_CHECK(cudaPeekAtLastError()); @@ -297,18 +326,10 @@ void launcher(int n, const int64_t *knn_indices, const float *knn_dists, int n_neighbors, MLCommon::Sparse::COO *out, UMAPParams *params, std::shared_ptr d_alloc, cudaStream_t stream) { - /** - * All of the kernels in this algorithm are row-based and - * upper-bounded by k. Prefer 1-row per thread, scheduled - * as a single dimension. - */ - dim3 grid(MLCommon::ceildiv(n, TPB_X), 1, 1); - dim3 blk(TPB_X, 1, 1); /** * Calculate mean distance through a parallel reduction */ - MLCommon::device_buffer sigmas(d_alloc, stream, n); MLCommon::device_buffer rhos(d_alloc, stream, n); CUDA_CHECK(cudaMemsetAsync(sigmas.data(), 0, n * sizeof(T), stream)); @@ -319,6 +340,9 @@ void launcher(int n, const int64_t *knn_indices, const float *knn_dists, params->local_connectivity, d_alloc, stream); MLCommon::Sparse::COO in(d_alloc, stream, n * n_neighbors, n, n); + CUDA_CHECK(cudaMemsetAsync(in.rows(), 0, n * n_neighbors * sizeof(int), stream)); + CUDA_CHECK(cudaMemsetAsync(in.cols(), 0, n * n_neighbors * sizeof(int), stream)); + CUDA_CHECK(cudaMemsetAsync(in.vals(), 0, n * n_neighbors * sizeof(T), stream)); // check for logging in order to avoid the potentially costly `arr2Str` call! if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) { @@ -334,7 +358,11 @@ void launcher(int n, const int64_t *knn_indices, const float *knn_dists, /** * Compute graph of membership strengths */ - compute_membership_strength_kernel<<>>( + + dim3 grid_elm(MLCommon::ceildiv(n*n_neighbors, TPB_X), 1, 1); + dim3 blk_elm(TPB_X, 1, 1); + + compute_membership_strength_kernel2<<>>( knn_indices, knn_dists, sigmas.data(), rhos.data(), in.vals(), in.rows(), in.cols(), in.n_rows, n_neighbors); CUDA_CHECK(cudaPeekAtLastError()); From ca8a62159e872f204095efd431611d693241e090 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Sun, 26 Apr 2020 15:01:46 -0400 Subject: [PATCH 311/330] Removing old membership strength impl --- cpp/src/umap/fuzzy_simpl_set/naive.h | 53 ++-------------------------- cpp/src/umap/runner.h | 2 +- 2 files changed, 4 insertions(+), 51 deletions(-) diff --git a/cpp/src/umap/fuzzy_simpl_set/naive.h b/cpp/src/umap/fuzzy_simpl_set/naive.h index 99b13d9c9f..67b72d676e 100644 --- a/cpp/src/umap/fuzzy_simpl_set/naive.h +++ b/cpp/src/umap/fuzzy_simpl_set/naive.h @@ -168,48 +168,6 @@ __global__ void smooth_knn_dist_kernel( } } -template -__global__ void compute_membership_strength_kernel( - const int64_t *knn_indices, - const float *knn_dists, // nn outputs - const T *sigmas, const T *rhos, // continuous dists to nearest neighbors - T *vals, int *rows, int *cols, // result coo - int n, int n_neighbors) { // model params - - // row-based matrix is best - int row = (blockIdx.x * TPB_X) + threadIdx.x; - int i = row * n_neighbors; // one row per thread - - if (row < n) { - T cur_rho = rhos[row]; - T cur_sigma = sigmas[row]; - - for (int j = 0; j < n_neighbors; j++) { - int idx = i + j; - - int64_t cur_knn_ind = knn_indices[idx]; - T cur_knn_dist = knn_dists[idx]; - - if (cur_knn_ind == -1) continue; - - double val = 0.0; - if (cur_knn_ind == row) - val = 0.0; - else if (cur_knn_dist - cur_rho <= 0.0) - val = 1.0; - else { - val = exp( - -((double(cur_knn_dist) - double(cur_rho)) / (double(cur_sigma)))); - - if (val < MIN_FLOAT) val = MIN_FLOAT; - } - - rows[idx] = row; - cols[idx] = cur_knn_ind; - vals[idx] = val; - } - } -} /** * Construct the membership strength data for the 1-skeleton of each local @@ -217,8 +175,6 @@ __global__ void compute_membership_strength_kernel( * row is a local fuzzy simplicial set, with a membership strength for the * 1-simplex to each other data point. * - * TODO: Optimize for coalesced reads (use col-major inputs). - * * @param knn_indices: the knn index matrix of size (n, k) * @param knn_dists: the knn distance matrix of size (n, k) * @param sigmas: array of size n representing distance to kth nearest neighbor @@ -232,7 +188,7 @@ __global__ void compute_membership_strength_kernel( * Descriptions adapted from: https://github.com/lmcinnes/umap/blob/master/umap/umap_.py */ template -__global__ void compute_membership_strength_kernel2( +__global__ void compute_membership_strength_kernel( const int64_t *knn_indices, const float *knn_dists, // nn outputs const T *sigmas, const T *rhos, // continuous dists to nearest neighbors @@ -256,7 +212,7 @@ __global__ void compute_membership_strength_kernel2( double val = 0.0; if (cur_knn_ind == row) val = 0.0; - else if (cur_knn_dist - cur_rho <= 0.0 or cur_sigma == 0.0) + else if (cur_knn_dist - cur_rho <= 0.0 || cur_sigma == 0.0) val = 1.0; else { val = exp( @@ -340,9 +296,6 @@ void launcher(int n, const int64_t *knn_indices, const float *knn_dists, params->local_connectivity, d_alloc, stream); MLCommon::Sparse::COO in(d_alloc, stream, n * n_neighbors, n, n); - CUDA_CHECK(cudaMemsetAsync(in.rows(), 0, n * n_neighbors * sizeof(int), stream)); - CUDA_CHECK(cudaMemsetAsync(in.cols(), 0, n * n_neighbors * sizeof(int), stream)); - CUDA_CHECK(cudaMemsetAsync(in.vals(), 0, n * n_neighbors * sizeof(T), stream)); // check for logging in order to avoid the potentially costly `arr2Str` call! if (ML::Logger::get().shouldLogFor(CUML_LEVEL_DEBUG)) { @@ -362,7 +315,7 @@ void launcher(int n, const int64_t *knn_indices, const float *knn_dists, dim3 grid_elm(MLCommon::ceildiv(n*n_neighbors, TPB_X), 1, 1); dim3 blk_elm(TPB_X, 1, 1); - compute_membership_strength_kernel2<<>>( + compute_membership_strength_kernel<<>>( knn_indices, knn_dists, sigmas.data(), rhos.data(), in.vals(), in.rows(), in.cols(), in.n_rows, n_neighbors); CUDA_CHECK(cudaPeekAtLastError()); diff --git a/cpp/src/umap/runner.h b/cpp/src/umap/runner.h index 95a31c4aab..89f2cad097 100644 --- a/cpp/src/umap/runner.h +++ b/cpp/src/umap/runner.h @@ -338,7 +338,7 @@ void _transform(const cumlHandle &handle, T *X, int n, int d, COO graph_coo(d_alloc, stream, nnz, n, n); FuzzySimplSetImpl::compute_membership_strength_kernel - <<>>(knn_indices, knn_dists, sigmas.data(), + <<>>(knn_indices, knn_dists, sigmas.data(), rhos.data(), graph_coo.vals(), graph_coo.rows(), graph_coo.cols(), graph_coo.n_rows, params->n_neighbors); From 3344851e8b9ce7a30d1b7ddf22f16f34f4e4cbe8 Mon Sep 17 00:00:00 2001 From: chaithyagr Date: Sun, 26 Apr 2020 21:18:11 +0200 Subject: [PATCH 312/330] Add cuda_utils.h to qn_util.h --- cpp/src/glm/qn/qn_util.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/glm/qn/qn_util.cuh b/cpp/src/glm/qn/qn_util.cuh index b0800a18bb..6e2cc18b83 100644 --- a/cpp/src/glm/qn/qn_util.cuh +++ b/cpp/src/glm/qn/qn_util.cuh @@ -15,8 +15,8 @@ */ #pragma once - #include +#include "cuda_utils.h" namespace ML { namespace GLM { From 30f86d082fdc5ab9f39af2a6c65af8e0f0a7d623 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Sun, 26 Apr 2020 15:25:19 -0400 Subject: [PATCH 313/330] Updating c++ style --- cpp/src/umap/runner.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/umap/runner.h b/cpp/src/umap/runner.h index 89f2cad097..fee00357db 100644 --- a/cpp/src/umap/runner.h +++ b/cpp/src/umap/runner.h @@ -426,7 +426,7 @@ void _transform(const cumlHandle &handle, T *X, int n, int d, params->callback->on_preprocess_end(transformed); } - params->initial_alpha /= 4.0; + params->initial_alpha /= 4.0; // TODO: This value should be passed into "optimize layout" directly to avoid side-effects. SimplSetEmbedImpl::optimize_layout( transformed, n, embedding, embedding_n, comp_coo.rows(), comp_coo.cols(), From 3e4d83ba4506ae8cf530d7264444313f6dae9b59 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Sun, 26 Apr 2020 19:04:09 -0400 Subject: [PATCH 314/330] Updating tests --- python/cuml/solvers/sgd.pyx | 4 ++-- python/cuml/test/test_mbsgd_classifier.py | 4 ++-- python/cuml/test/test_mbsgd_regressor.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cuml/solvers/sgd.pyx b/python/cuml/solvers/sgd.pyx index 8b4620feef..f8e5351ee6 100644 --- a/python/cuml/solvers/sgd.pyx +++ b/python/cuml/solvers/sgd.pyx @@ -453,7 +453,7 @@ class SGD(Base): del(X_m) - output_type = self._get_output_type(self.output_type) + output_type = self._get_output_type(X) return preds.to_output(output_type) @@ -514,6 +514,6 @@ class SGD(Base): del(X_m) - output_type = self._get_output_type(self.output_type) + output_type = self._get_output_type(X) return preds.to_output(output_type) diff --git a/python/cuml/test/test_mbsgd_classifier.py b/python/cuml/test/test_mbsgd_classifier.py index f7e7680b51..0f5fe0c390 100644 --- a/python/cuml/test/test_mbsgd_classifier.py +++ b/python/cuml/test/test_mbsgd_classifier.py @@ -68,7 +68,7 @@ def test_mbsgd_classifier(lrate, penalty, loss, make_dataset): penalty=penalty) cu_mbsgd_classifier.fit(X_train, y_train) - cu_pred = cu_mbsgd_classifier.predict(X_test).get() + cu_pred = cu_mbsgd_classifier.predict(X_test) cu_acc = accuracy_score(cu_pred, y_test) if nrows < 500000: @@ -89,7 +89,7 @@ def test_mbsgd_classifier_default(make_dataset): cu_mbsgd_classifier = cumlMBSGClassifier() cu_mbsgd_classifier.fit(X_train, y_train) - cu_pred = cu_mbsgd_classifier.predict(X_test).get() + cu_pred = cu_mbsgd_classifier.predict(X_test) cu_acc = accuracy_score(cu_pred, y_test) if nrows < 500000: diff --git a/python/cuml/test/test_mbsgd_regressor.py b/python/cuml/test/test_mbsgd_regressor.py index c21279fff2..4b48d4ad27 100644 --- a/python/cuml/test/test_mbsgd_regressor.py +++ b/python/cuml/test/test_mbsgd_regressor.py @@ -65,7 +65,7 @@ def test_mbsgd_regressor(lrate, penalty, make_dataset): penalty=penalty) cu_mbsgd_regressor.fit(X_train, y_train) - cu_pred = cu_mbsgd_regressor.predict(X_test).get() + cu_pred = cu_mbsgd_regressor.predict(X_test) cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype) if nrows < 500000: @@ -85,7 +85,7 @@ def test_mbsgd_regressor_default(make_dataset): cu_mbsgd_regressor = cumlMBSGRegressor() cu_mbsgd_regressor.fit(X_train, y_train) - cu_pred = cu_mbsgd_regressor.predict(X_test).get() + cu_pred = cu_mbsgd_regressor.predict(X_test) cu_r2 = r2_score(cu_pred, y_test, convert_dtype=datatype) if nrows < 500000: From e482e078466d3826131b7c3f2db0ec4822ea46f3 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Sun, 26 Apr 2020 22:21:14 -0400 Subject: [PATCH 315/330] Updating sgd tests to actually assert correctness of output --- python/cuml/test/test_sgd.py | 113 +++++++++++++++++++++++------------ 1 file changed, 76 insertions(+), 37 deletions(-) diff --git a/python/cuml/test/test_sgd.py b/python/cuml/test/test_sgd.py index 980e1f780b..2089a1de50 100644 --- a/python/cuml/test/test_sgd.py +++ b/python/cuml/test/test_sgd.py @@ -16,63 +16,102 @@ import numpy as np import pytest +import cudf + from cuml.solvers import SGD as cumlSGD -from cuml.test.utils import unit_param, quality_param, \ - stress_param from sklearn.datasets.samples_generator import make_blobs from sklearn.model_selection import train_test_split -from sklearn import datasets @pytest.mark.parametrize('lrate', ['constant', 'invscaling', 'adaptive']) -@pytest.mark.parametrize('datatype', [np.float32, np.float64]) +@pytest.mark.parametrize('dtype', [np.float32, np.float64]) @pytest.mark.parametrize('penalty', ['none', 'l1', 'l2', 'elasticnet']) @pytest.mark.parametrize('loss', ['hinge', 'log', 'squared_loss']) -@pytest.mark.parametrize('name', [unit_param(None), quality_param('iris'), - stress_param('blobs')]) -def test_svd(datatype, lrate, penalty, loss, name): - - if name == 'blobs': - X, y = make_blobs(n_samples=500000, - n_features=1000, random_state=0) - X = X.astype(datatype) - y = y.astype(datatype) - X_train, X_test, y_train, y_test = train_test_split(X, y, - train_size=0.8) - - elif name == 'iris': - iris = datasets.load_iris() - X = (iris.data).astype(datatype) - y = (iris.target).astype(datatype) - X_train, X_test, y_train, y_test = train_test_split(X, y, - train_size=0.8) +@pytest.mark.parametrize('datatype', ["dataframe", "numpy"]) +def test_sgd(dtype, lrate, penalty, loss, datatype): - else: - X_train = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]], - dtype=datatype) - y_train = np.array([1, 1, 2, 2], dtype=datatype) - X_test = np.array([[3.0, 5.0], [2.0, 5.0]]).astype(datatype) + X, y = make_blobs(n_samples=100, + n_features=3, + centers=2, + random_state=0) + X = X.astype(dtype) + y = y.astype(dtype) + + if loss == "hinge" or loss == "squared_loss": + y[y == 0] = -1 + + X_train, X_test, y_train, y_test = train_test_split(X, y, + train_size=0.8) + + if datatype is "dataframe": + X_train = cudf.DataFrame.from_gpu_matrix(X_train) + X_test = cudf.DataFrame.from_gpu_matrix(X_test) + y_train = cudf.Series(y_train) cu_sgd = cumlSGD(learning_rate=lrate, eta0=0.005, epochs=2000, fit_intercept=True, batch_size=4096, tol=0.0, penalty=penalty, loss=loss) cu_sgd.fit(X_train, y_train) - cu_pred = cu_sgd.predict(X_test).to_array() - print("cuML predictions : ", cu_pred) + cu_pred = cu_sgd.predict(X_test) + + if datatype is "dataframe": + assert isinstance(cu_pred, cudf.Series) + cu_pred = cu_pred.to_array() + + else: + assert isinstance(cu_pred, np.ndarray) + + if loss == "log": + cu_pred[cu_pred < 0.5] = 0 + cu_pred[cu_pred >= 0.5] = 1 + elif loss == "squared_loss": + cu_pred[cu_pred < 0] = -1 + cu_pred[cu_pred >= 0] = 1 + # Adjust for squared loss (we don't need to test for high accuracy, + # just that the loss function tended towards the expected classes. + assert np.array_equal(cu_pred, y_test) -@pytest.mark.parametrize('datatype', [np.float32, np.float64]) -def test_svd_default(datatype): - X_train = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]], - dtype=datatype) - y_train = np.array([1, 1, 2, 2], dtype=datatype) - X_test = np.array([[3.0, 5.0], [2.0, 5.0]]).astype(datatype) +@pytest.mark.parametrize('dtype', [np.float32, np.float64]) +@pytest.mark.parametrize('datatype', ["dataframe", "numpy"]) +def test_sgd_default(dtype, datatype): + + X, y = make_blobs(n_samples=100, + n_features=3, + centers=2, + random_state=0) + X = X.astype(dtype) + y = y.astype(dtype) + + # Default loss is squared_loss + y[y == 0] = -1 + + X_train, X_test, y_train, y_test = train_test_split(X, y, + train_size=0.8) + + if datatype is "dataframe": + X_train = cudf.DataFrame.from_gpu_matrix(X_train) + X_test = cudf.DataFrame.from_gpu_matrix(X_test) + y_train = cudf.Series(y_train) cu_sgd = cumlSGD() cu_sgd.fit(X_train, y_train) - cu_pred = cu_sgd.predict(X_test).to_array() - print("cuML predictions : ", cu_pred) + cu_pred = cu_sgd.predict(X_test) + + if datatype is "dataframe": + assert isinstance(cu_pred, cudf.Series) + cu_pred = cu_pred.to_array() + + else: + assert isinstance(cu_pred, np.ndarray) + + # Adjust for squared loss (we don't need to test for high accuracy, + # just that the loss function tended towards the expected classes. + cu_pred[cu_pred < 0] = -1 + cu_pred[cu_pred >= 0] = 1 + + assert np.array_equal(cu_pred, y_test) From 5c958e19c9bb396d324215219b39558f16af669d Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Sun, 26 Apr 2020 22:22:52 -0400 Subject: [PATCH 316/330] Fixing style --- python/cuml/test/test_sgd.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cuml/test/test_sgd.py b/python/cuml/test/test_sgd.py index 2089a1de50..df1ad1bd4e 100644 --- a/python/cuml/test/test_sgd.py +++ b/python/cuml/test/test_sgd.py @@ -44,7 +44,7 @@ def test_sgd(dtype, lrate, penalty, loss, datatype): X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8) - if datatype is "dataframe": + if datatype == "dataframe": X_train = cudf.DataFrame.from_gpu_matrix(X_train) X_test = cudf.DataFrame.from_gpu_matrix(X_test) y_train = cudf.Series(y_train) @@ -56,7 +56,7 @@ def test_sgd(dtype, lrate, penalty, loss, datatype): cu_sgd.fit(X_train, y_train) cu_pred = cu_sgd.predict(X_test) - if datatype is "dataframe": + if datatype == "dataframe": assert isinstance(cu_pred, cudf.Series) cu_pred = cu_pred.to_array() @@ -92,7 +92,7 @@ def test_sgd_default(dtype, datatype): X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8) - if datatype is "dataframe": + if datatype == "dataframe": X_train = cudf.DataFrame.from_gpu_matrix(X_train) X_test = cudf.DataFrame.from_gpu_matrix(X_test) y_train = cudf.Series(y_train) @@ -102,7 +102,7 @@ def test_sgd_default(dtype, datatype): cu_sgd.fit(X_train, y_train) cu_pred = cu_sgd.predict(X_test) - if datatype is "dataframe": + if datatype == "dataframe": assert isinstance(cu_pred, cudf.Series) cu_pred = cu_pred.to_array() From cd738b421cf99016fb7a82e3775c5a5619807296 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Sun, 26 Apr 2020 22:28:25 -0400 Subject: [PATCH 317/330] Updating changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b41fc16738..fb3196055b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ - PR #2090: Upgrade C++ build to C++14 standard - PR #2089: CI: enabled cuda-memcheck on ml-prims unit-tests during nightly build - PR #1883: Use CumlArray in ARIMA +- PR #2135: A few optimizations to UMAP fuzzy simplicial set ## Bug Fixes - PR #1939: Fix syntax error in cuml.common.array From 67aee5f9c7644a61e11b58800f3016a5614655c6 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Sun, 26 Apr 2020 22:55:35 -0400 Subject: [PATCH 318/330] Fixing c++ style --- cpp/src/umap/fuzzy_simpl_set/naive.h | 44 ++++++++++++---------------- cpp/src/umap/runner.h | 9 +++--- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/cpp/src/umap/fuzzy_simpl_set/naive.h b/cpp/src/umap/fuzzy_simpl_set/naive.h index 67b72d676e..50b6b5021c 100644 --- a/cpp/src/umap/fuzzy_simpl_set/naive.h +++ b/cpp/src/umap/fuzzy_simpl_set/naive.h @@ -168,7 +168,6 @@ __global__ void smooth_knn_dist_kernel( } } - /** * Construct the membership strength data for the 1-skeleton of each local * fuzzy simplicial set -- this is formed as a sparse matrix (COO) where each @@ -199,31 +198,29 @@ __global__ void compute_membership_strength_kernel( int idx = (blockIdx.x * TPB_X) + threadIdx.x; if (idx < n * n_neighbors) { + int row = idx / n_neighbors; // one neighbor per thread - int row = idx / n_neighbors; // one neighbor per thread - - double cur_rho = rhos[row]; + double cur_rho = rhos[row]; double cur_sigma = sigmas[row]; int64_t cur_knn_ind = knn_indices[idx]; double cur_knn_dist = knn_dists[idx]; if (cur_knn_ind != -1) { - double val = 0.0; - if (cur_knn_ind == row) - val = 0.0; - else if (cur_knn_dist - cur_rho <= 0.0 || cur_sigma == 0.0) - val = 1.0; - else { - val = exp( - -((cur_knn_dist - cur_rho) / (cur_sigma))); - - if (val < MIN_FLOAT) val = MIN_FLOAT; - } + double val = 0.0; + if (cur_knn_ind == row) + val = 0.0; + else if (cur_knn_dist - cur_rho <= 0.0 || cur_sigma == 0.0) + val = 1.0; + else { + val = exp(-((cur_knn_dist - cur_rho) / (cur_sigma))); + + if (val < MIN_FLOAT) val = MIN_FLOAT; + } - rows[idx] = row; - cols[idx] = cur_knn_ind; - vals[idx] = val; + rows[idx] = row; + cols[idx] = cur_knn_ind; + vals[idx] = val; } } } @@ -237,19 +234,17 @@ void smooth_knn_dist(int n, const int64_t *knn_indices, const float *knn_dists, float local_connectivity, std::shared_ptr d_alloc, cudaStream_t stream) { - dim3 grid(MLCommon::ceildiv(n, TPB_X), 1, 1); dim3 blk(TPB_X, 1, 1); MLCommon::device_buffer dist_means_dev(d_alloc, stream, n_neighbors); - MLCommon::Stats::mean(dist_means_dev.data(), knn_dists, 1, n_neighbors*n, false, - false, stream); + MLCommon::Stats::mean(dist_means_dev.data(), knn_dists, 1, n_neighbors * n, + false, false, stream); CUDA_CHECK(cudaPeekAtLastError()); T mean_dist = 0.0; - MLCommon::updateHost(&mean_dist, dist_means_dev.data(), 1, - stream); + MLCommon::updateHost(&mean_dist, dist_means_dev.data(), 1, stream); CUDA_CHECK(cudaStreamSynchronize(stream)); /** @@ -282,7 +277,6 @@ void launcher(int n, const int64_t *knn_indices, const float *knn_dists, int n_neighbors, MLCommon::Sparse::COO *out, UMAPParams *params, std::shared_ptr d_alloc, cudaStream_t stream) { - /** * Calculate mean distance through a parallel reduction */ @@ -312,7 +306,7 @@ void launcher(int n, const int64_t *knn_indices, const float *knn_dists, * Compute graph of membership strengths */ - dim3 grid_elm(MLCommon::ceildiv(n*n_neighbors, TPB_X), 1, 1); + dim3 grid_elm(MLCommon::ceildiv(n * n_neighbors, TPB_X), 1, 1); dim3 blk_elm(TPB_X, 1, 1); compute_membership_strength_kernel<<>>( diff --git a/cpp/src/umap/runner.h b/cpp/src/umap/runner.h index fee00357db..cfb8557ae4 100644 --- a/cpp/src/umap/runner.h +++ b/cpp/src/umap/runner.h @@ -339,9 +339,9 @@ void _transform(const cumlHandle &handle, T *X, int n, int d, FuzzySimplSetImpl::compute_membership_strength_kernel <<>>(knn_indices, knn_dists, sigmas.data(), - rhos.data(), graph_coo.vals(), - graph_coo.rows(), graph_coo.cols(), - graph_coo.n_rows, params->n_neighbors); + rhos.data(), graph_coo.vals(), + graph_coo.rows(), graph_coo.cols(), + graph_coo.n_rows, params->n_neighbors); CUDA_CHECK(cudaPeekAtLastError()); if (knn_indices_b) delete knn_indices_b; @@ -426,7 +426,8 @@ void _transform(const cumlHandle &handle, T *X, int n, int d, params->callback->on_preprocess_end(transformed); } - params->initial_alpha /= 4.0; // TODO: This value should be passed into "optimize layout" directly to avoid side-effects. + params->initial_alpha /= + 4.0; // TODO: This value should be passed into "optimize layout" directly to avoid side-effects. SimplSetEmbedImpl::optimize_layout( transformed, n, embedding, embedding_n, comp_coo.rows(), comp_coo.cols(), From 2b2a1b18bd2fdcb0cd72a1ab934b5ca4d3f3973f Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 27 Apr 2020 01:52:36 -0700 Subject: [PATCH 319/330] FIX updated the order of argument passing to Base class --- python/cuml/cluster/dbscan.pyx | 1 + python/cuml/common/base.pyx | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python/cuml/cluster/dbscan.pyx b/python/cuml/cluster/dbscan.pyx index e119c02ebe..a264877756 100644 --- a/python/cuml/cluster/dbscan.pyx +++ b/python/cuml/cluster/dbscan.pyx @@ -225,6 +225,7 @@ class DBSCAN(Base): self._labels_ = CumlArray.empty(n_rows, dtype=out_dtype) cdef uintptr_t labels_ptr = self._labels_.ptr + print("type=%s val=%s" % (type(self.verbosity).__name__, self.verbosity)) if self.dtype == np.float32: if out_dtype is "int32" or out_dtype is np.int32: dbscanFit(handle_[0], diff --git a/python/cuml/common/base.pyx b/python/cuml/common/base.pyx index 3ca408dcd4..2559e22406 100644 --- a/python/cuml/common/base.pyx +++ b/python/cuml/common/base.pyx @@ -163,8 +163,8 @@ class Base: del base # optional! """ - def __init__(self, handle=None, verbose=False, verbosity=logger.LEVEL_INFO, - output_type=None): + def __init__(self, handle=None, verbose=False, output_type=None, + verbosity=logger.LEVEL_INFO): """ Constructor. All children must call init method of this base class. From 63acbd7a9b61a93e6d085a7ea1a34cfe21c26d11 Mon Sep 17 00:00:00 2001 From: Thejaswi Rao Date: Mon, 27 Apr 2020 01:53:24 -0700 Subject: [PATCH 320/330] FIX removed accidental commit of a debug print statement in dbscan class --- python/cuml/cluster/dbscan.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cuml/cluster/dbscan.pyx b/python/cuml/cluster/dbscan.pyx index a264877756..e119c02ebe 100644 --- a/python/cuml/cluster/dbscan.pyx +++ b/python/cuml/cluster/dbscan.pyx @@ -225,7 +225,6 @@ class DBSCAN(Base): self._labels_ = CumlArray.empty(n_rows, dtype=out_dtype) cdef uintptr_t labels_ptr = self._labels_.ptr - print("type=%s val=%s" % (type(self.verbosity).__name__, self.verbosity)) if self.dtype == np.float32: if out_dtype is "int32" or out_dtype is np.int32: dbscanFit(handle_[0], From d57c649e1a0bce57c877e5933eabb4453116ee8f Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Mon, 27 Apr 2020 12:45:19 -0700 Subject: [PATCH 321/330] Remove GPU arch < 60 from CMake build --- cpp/cmake/EvalGpuArchs.cmake | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/EvalGpuArchs.cmake b/cpp/cmake/EvalGpuArchs.cmake index aa4002438d..f3918542db 100644 --- a/cpp/cmake/EvalGpuArchs.cmake +++ b/cpp/cmake/EvalGpuArchs.cmake @@ -54,6 +54,15 @@ int main(int argc, char** argv) { ${eval_file} OUTPUT_VARIABLE __gpu_archs OUTPUT_STRIP_TRAILING_WHITESPACE) - message("Auto detection of gpu-archs: ${__gpu_archs}") - set(${gpu_archs} ${__gpu_archs} PARENT_SCOPE) + set(__gpu_archs_filtered "${__gpu_archs}") + foreach(arch ${__gpu_archs}) + if (arch VERSION_LESS 60) + list(REMOVE_ITEM __gpu_archs_filtered ${arch}) + endif() + endforeach() + if (NOT __gpu_archs_filtered) + message(FATAL_ERROR "No supported GPU arch found on this system") + endif() + message("Auto detection of gpu-archs: ${__gpu_archs_filtered}") + set(${gpu_archs} ${__gpu_archs_filtered} PARENT_SCOPE) endfunction(evaluate_gpu_archs) From b649fc26eb1d60e401e75f5d506d338b2bcde663 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Mon, 27 Apr 2020 12:50:39 -0700 Subject: [PATCH 322/330] Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c8e26ca17..63683ede2a 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,7 @@ - PR #2107: Fix PCA transform - PR #2109: input_to_cuml_array __cuda_array_interface__ bugfix - PR #2117: cuDF __array__ exception small fixes +- PR #2144: Remove GPU arch < 60 from CMake build # cuML 0.13.0 (Date TBD) From 29a5e78c83ad02e7beca15187d63948b60e55a75 Mon Sep 17 00:00:00 2001 From: Simon Andersen Date: Mon, 27 Apr 2020 15:01:59 -0500 Subject: [PATCH 323/330] Use self._features when possible and check shape mismatch --- python/cuml/preprocessing/encoders.py | 11 +++++++---- python/cuml/test/test_one_hot_encoder.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/python/cuml/preprocessing/encoders.py b/python/cuml/preprocessing/encoders.py index ce1e20992c..45c4901c3b 100644 --- a/python/cuml/preprocessing/encoders.py +++ b/python/cuml/preprocessing/encoders.py @@ -192,20 +192,23 @@ def fit(self, X): self._encoders = { feature: LabelEncoder(handle_unknown=self.handle_unknown).fit( X[feature]) - for feature in X.columns + for feature in self._features } else: self.categories = self._check_input(self.categories, True) self._features = self.categories.columns + if self._features.shape[0] != X.shape[0]: + raise ValueError("Shape mismatch: if categories is not 'auto'," + " it has to be of shape (n_features, _).") self._encoders = dict() - for feature in self.categories.columns: - le = LabelEncoder(handle_unknown=self.handle_unknown) - self._encoders[feature] = le.fit(self.categories[feature]) + for feature in self._features: if self.handle_unknown == 'error': if not X[feature].isin(self.categories[feature]).all(): msg = ("Found unknown categories in column {0}" " during fit".format(feature)) raise KeyError(msg) + le = LabelEncoder(handle_unknown=self.handle_unknown) + self._encoders[feature] = le.fit(self.categories[feature]) self.drop_idx_ = self._compute_drop_idx() self._fitted = True diff --git a/python/cuml/test/test_one_hot_encoder.py b/python/cuml/test/test_one_hot_encoder.py index 03098ea3d2..382f1ad037 100644 --- a/python/cuml/test/test_one_hot_encoder.py +++ b/python/cuml/test/test_one_hot_encoder.py @@ -287,3 +287,15 @@ def test_onehot_sparse_drop(as_array): ohe = enc.fit_transform(X) ref = sk_enc.fit_transform(ary) cp.testing.assert_array_equal(ohe.toarray(), ref.toarray()) + + +@pytest.mark.parametrize('as_array', [True, False], ids=['cupy', 'cudf']) +def test_onehot_categories_shape_mismatch(as_array): + X = DataFrame({'chars': ['a', 'b'], 'int': [0, 2]}) + categories = DataFrame({'chars': ['a', 'b', 'c']}) + if as_array: + X = _from_df_to_cupy(X) + categories = _from_df_to_cupy(categories).transpose() + + with pytest.raises(ValueError): + OneHotEncoder(categories=categories, sparse=False).fit(X) From 7422e03ee60f5b4a6d2c7ef4a50563281af99368 Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Mon, 27 Apr 2020 16:22:08 -0400 Subject: [PATCH 324/330] Skip existing on conda uploads --- ci/cpu/cuml/upload-anaconda.sh | 2 +- ci/cpu/libcuml/upload-anaconda.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/cpu/cuml/upload-anaconda.sh b/ci/cpu/cuml/upload-anaconda.sh index 82c520341e..6a79b85919 100755 --- a/ci/cpu/cuml/upload-anaconda.sh +++ b/ci/cpu/cuml/upload-anaconda.sh @@ -29,5 +29,5 @@ if [ "$BUILD_CUML" == "1" ]; then echo "Upload" echo ${UPLOADFILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --force ${UPLOADFILE} + anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${UPLOADFILE} fi diff --git a/ci/cpu/libcuml/upload-anaconda.sh b/ci/cpu/libcuml/upload-anaconda.sh index 24dd056462..26634fe865 100644 --- a/ci/cpu/libcuml/upload-anaconda.sh +++ b/ci/cpu/libcuml/upload-anaconda.sh @@ -29,5 +29,5 @@ if [ "$BUILD_LIBCUML" == "1" ]; then echo "Upload" echo ${UPLOADFILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --force ${UPLOADFILE} + anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${UPLOADFILE} fi From 20ee0ffca9585bf08cda89cb46376ca95c9e8a67 Mon Sep 17 00:00:00 2001 From: John Zedlewski Date: Mon, 27 Apr 2020 13:24:33 -0700 Subject: [PATCH 325/330] Remove old (deprecated) kalman filter --- cpp/CMakeLists.txt | 1 - cpp/src/kalman_filter/KalmanFilter.cuh | 24 -- cpp/src/kalman_filter/kf_variables.h | 60 --- cpp/src/kalman_filter/lkf.h | 362 ----------------- cpp/src/kalman_filter/lkf_py.cu | 134 ------- cpp/src/kalman_filter/lkf_py.h | 45 --- cpp/src/kalman_filter/utils.h | 120 ------ cpp/test/CMakeLists.txt | 1 - cpp/test/sg/lkf_test.cu | 213 ---------- python/cuml/__init__.py | 1 - python/cuml/filter/__init__.py | 17 - python/cuml/filter/kalman_filter.pyx | 513 ------------------------- python/cuml/test/test_kalman_filter.py | 116 ------ 13 files changed, 1607 deletions(-) delete mode 100644 cpp/src/kalman_filter/KalmanFilter.cuh delete mode 100644 cpp/src/kalman_filter/kf_variables.h delete mode 100644 cpp/src/kalman_filter/lkf.h delete mode 100644 cpp/src/kalman_filter/lkf_py.cu delete mode 100644 cpp/src/kalman_filter/lkf_py.h delete mode 100644 cpp/src/kalman_filter/utils.h delete mode 100644 cpp/test/sg/lkf_test.cu delete mode 100644 python/cuml/filter/__init__.py delete mode 100644 python/cuml/filter/kalman_filter.pyx delete mode 100644 python/cuml/test/test_kalman_filter.py diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 174831d3dc..6a230330d5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -275,7 +275,6 @@ if(BUILD_CUML_CPP_LIBRARY) src/fil/infer.cu src/glm/glm.cu src/holtwinters/holtwinters.cu - src/kalman_filter/lkf_py.cu src/kmeans/kmeans.cu src/knn/knn.cu src/metrics/metrics.cu diff --git a/cpp/src/kalman_filter/KalmanFilter.cuh b/cpp/src/kalman_filter/KalmanFilter.cuh deleted file mode 100644 index 41c61e7e04..0000000000 --- a/cpp/src/kalman_filter/KalmanFilter.cuh +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once -#include -#include -#include -#include -#include -#include "lkf.h" -#include "lkf_py.h" diff --git a/cpp/src/kalman_filter/kf_variables.h b/cpp/src/kalman_filter/kf_variables.h deleted file mode 100644 index 0da7da5879..0000000000 --- a/cpp/src/kalman_filter/kf_variables.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -namespace kf { -namespace linear { - -/** Solver options for LinearKF */ -enum Option { - /** long form of KF equation */ - LongForm = 0, - /** short (optimal) form of KF equation with explicit inverse finding */ - ShortFormExplicit, - /** short (optimal) form of KF equation with implicit inverse evaluation */ - ShortFormImplicit -}; - -template -struct Variables { - // these device pointers are NOT owned by this class - T *x_est, *x_up, *Phi; // predict_x - T *P_est, *P_up, *Q; // predict_P - T *R, *H; // kalman gain - T *z; // update_x - bool initialized = false; - // cublasHandle_t handle; - // cusolverDnHandle_t handleSol; - Option solver; - // state and measurement vector dimensions, respectively - int dim_x, dim_z; - // all the workspace related pointers - int Lwork; - T *workspace_lu = 0, *placeHolder0 = 0, *R_cpy = 0; - T *placeHolder1 = 0, *placeHolder2 = 0, *K = 0; //kalman_gain - int *piv, *info; -}; - -}; // end namespace linear - -namespace unscented { - -/** implicit/explicit kalman gain calculation */ -enum Inverse { Explicit, Implicit }; - -}; // end namespace unscented -}; // end namespace kf diff --git a/cpp/src/kalman_filter/lkf.h b/cpp/src/kalman_filter/lkf.h deleted file mode 100644 index a289dd083a..0000000000 --- a/cpp/src/kalman_filter/lkf.h +++ /dev/null @@ -1,362 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once -#include -#include -#include "cuda_utils.h" -#include "kf_variables.h" -#include "linalg/cublas_wrappers.h" -#include "linalg/cusolver_wrappers.h" -#include "utils.h" - -namespace kf { -namespace linear { - -using namespace MLCommon; -using namespace MLCommon::LinAlg; - -// initialize this structure with all relevant pointers -// during first call, pass workspace as a nullptr to evaluate the workspace -// size needed in B, then during the second call, pass the rightfully -// allocated workspace buffer as input -template -void set(Variables &var, int _dim_x, int _dim_z, Option _solver, T *_x_est, - T *_x_up, T *_Phi, T *_P_est, T *_P_up, T *_Q, T *_R, T *_H, - void *workspace, size_t &workspaceSize, - cusolverDnHandle_t handle_sol) { - var.solver = _solver; - var.dim_x = _dim_x; - var.dim_z = _dim_z; - CUSOLVER_CHECK(cusolverDngetrf_bufferSize(handle_sol, var.dim_z, var.dim_z, - var.R_cpy, var.dim_z, &var.Lwork)); - workspaceSize = 0; - const size_t granularity = 256; - var.R_cpy = (T *)workspaceSize; - workspaceSize += alignTo(var.dim_z * var.dim_z * sizeof(T), granularity); - var.K = (T *)workspaceSize; - workspaceSize += alignTo(var.dim_x * var.dim_z * sizeof(T), granularity); - var.piv = (int *)workspaceSize; - workspaceSize += alignTo(var.dim_z * sizeof(int), granularity); - var.placeHolder1 = (T *)workspaceSize; - workspaceSize += alignTo(var.dim_z * var.dim_x * sizeof(T), granularity); - var.workspace_lu = (T *)workspaceSize; - workspaceSize += alignTo(var.Lwork * sizeof(T), granularity); - var.info = (int *)workspaceSize; - workspaceSize += alignTo(sizeof(int), granularity); - // only need when we need to calculate kalman gain - if (var.solver < 2) { - var.placeHolder2 = (T *)workspaceSize; - workspaceSize += alignTo(var.dim_z * var.dim_z * sizeof(T), granularity); - var.placeHolder0 = (T *)workspaceSize; - workspaceSize += alignTo(var.dim_z * var.dim_z * sizeof(T), granularity); - } - - if (workspace) { - ASSERT(!var.initialized, "kf::linear::set: already initialized!"); - var.x_est = _x_est; - var.x_up = _x_up; - var.Phi = _Phi; - var.P_est = _P_est; - var.P_up = _P_up; - var.Q = _Q; - var.R = _R; - var.H = _H; - // initialize all the workspace pointers - var.R_cpy = (T *)((size_t)var.R_cpy + (size_t)workspace); - var.K = (T *)((size_t)var.K + (size_t)workspace); - var.piv = (int *)((size_t)var.piv + (size_t)workspace); - var.placeHolder1 = (T *)((size_t)var.placeHolder1 + (size_t)workspace); - var.workspace_lu = (T *)((size_t)var.workspace_lu + (size_t)workspace); - var.info = (int *)((size_t)var.info + (size_t)workspace); - if (var.solver < 2) { - var.placeHolder2 = (T *)((size_t)var.placeHolder2 + (size_t)workspace); - var.placeHolder0 = (T *)((size_t)var.placeHolder0 + (size_t)workspace); - } - if (var.solver < ShortFormImplicit) - make_ID_matrix(var.placeHolder0, var.dim_z); - var.initialized = true; - } -} - -template -void predict_x(Variables &var, cublasHandle_t handle, cudaStream_t stream) { - T alfa = (T)1.0, beta = (T)0.0; - CUBLAS_CHECK(cublasgemv(handle, CUBLAS_OP_N, var.dim_x, var.dim_x, &alfa, - var.Phi, var.dim_x, var.x_up, 1, &beta, var.x_est, 1, - stream)); -} - -template -void predict_P(Variables &var, cublasHandle_t handle, cudaStream_t stream) { - T alfa = (T)1.0, beta = (T)0.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, var.dim_x, - var.dim_x, var.dim_x, &alfa, var.Phi, var.dim_x, - var.P_up, var.dim_x, &beta, var.P_est, var.dim_x, - stream)); - beta = (T)1.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_T, var.dim_x, - var.dim_x, var.dim_x, &alfa, var.P_est, var.dim_x, - var.Phi, var.dim_x, &beta, var.Q, var.dim_x, stream)); - // This is for making the matrix symmetric - alfa = beta = (T)0.5; - CUBLAS_CHECK(cublasgeam(handle, CUBLAS_OP_N, CUBLAS_OP_T, var.dim_x, - var.dim_x, &alfa, var.Q, var.dim_x, &beta, var.Q, - var.dim_x, var.P_est, var.dim_x, stream)); -} - -template -void update_x(Variables &var, cublasHandle_t handle, - cusolverDnHandle_t handle_sol, cudaStream_t stream) { - T alfa = (T)-1.0, beta = (T)1.0; // z - H * x is stored in z - CUBLAS_CHECK(cublasgemv(handle, CUBLAS_OP_N, var.dim_z, var.dim_x, &alfa, - var.H, var.dim_z, var.x_est, 1, &beta, var.z, 1, - stream)); - if (var.solver < ShortFormImplicit) { // explicit KG - alfa = 1.0; - beta = 1.0; - CUBLAS_CHECK(cublasgemv(handle, CUBLAS_OP_N, var.dim_x, var.dim_z, &alfa, - var.K, var.dim_x, var.z, 1, &beta, var.x_est, 1, - stream)); - } else { // implicit Kalman Gain - // finding Y = [inv(B)*(z - H*x)] and placing the result in z - CUSOLVER_CHECK(cusolverDngetrs( - handle_sol, CUBLAS_OP_N, var.dim_z, 1, (const T *)var.R_cpy, var.dim_z, - (const int *)var.piv, var.z, var.dim_z, var.info, stream)); - int info_h; - updateHost(&info_h, var.info, 1, stream); - CUDA_CHECK(cudaStreamSynchronize(stream)); - ASSERT(info_h == 0, - "kf::linear: implicit kalman gain" - " {Y = [inv(B)*(z - H*x)]}, info returned val=%d", - info_h); - // finding x_est + A * w and placing in x_est - alfa = beta = (T)1.0; - CUBLAS_CHECK(cublasgemv(handle, CUBLAS_OP_N, var.dim_x, var.dim_z, &alfa, - var.K, var.dim_x, var.z, 1, &beta, var.x_est, 1, - stream)); - } - // DUE TO GPU POINTER THINGGY, NEED TO COPY DATA AT X_update instead of - // just swapping the pointers. - CUDA_CHECK(cudaMemcpy(var.x_up, var.x_est, var.dim_x * sizeof(T), - cudaMemcpyDeviceToDevice)); -} - -template -void update_P(Variables &var, cublasHandle_t handle, - cusolverDnHandle_t handle_sol, cudaStream_t stream) { - if (var.solver == LongForm) { - T alfa = (T)1.0, beta = (T)0.0; - CUBLAS_CHECK(cublasgemm( - handle, CUBLAS_OP_N, CUBLAS_OP_N, var.dim_x, var.dim_x, var.dim_z, &alfa, - var.K, var.dim_x, var.H, var.dim_z, &beta, var.P_up, var.dim_x, stream)); - alfa = (T)-1.0; - beta = (T)1.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, var.dim_x, - var.dim_x, var.dim_x, &alfa, var.P_up, var.dim_x, - var.P_est, var.dim_x, &beta, var.P_est, var.dim_x, - stream)); - alfa = (T)1.0; - beta = (T)0.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_T, var.dim_z, - var.dim_x, var.dim_z, &alfa, var.R, var.dim_z, - var.K, var.dim_x, &beta, var.placeHolder1, - var.dim_z, stream)); - alfa = (T)-1.0; - beta = (T)1.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_T, var.dim_x, - var.dim_x, var.dim_x, &alfa, var.P_est, var.dim_x, - var.P_up, var.dim_x, &beta, var.P_est, var.dim_x, - stream)); - alfa = (T)1.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, var.dim_x, - var.dim_x, var.dim_z, &alfa, var.K, var.dim_x, - var.placeHolder1, var.dim_z, &beta, var.P_est, - var.dim_x, stream)); - // making the error cov symmetric - alfa = beta = (T)0.5; - CUBLAS_CHECK(cublasgeam(handle, CUBLAS_OP_N, CUBLAS_OP_T, var.dim_x, - var.dim_x, &alfa, var.P_est, var.dim_x, &beta, - var.P_est, var.dim_x, var.P_up, var.dim_x, stream)); - } else if (var.solver == ShortFormExplicit) { - T alfa = (T)1.0, beta = (T)0.0; - CUBLAS_CHECK(cublasgemm( - handle, CUBLAS_OP_N, CUBLAS_OP_N, var.dim_x, var.dim_x, var.dim_z, &alfa, - var.K, var.dim_x, var.H, var.dim_z, &beta, var.P_up, var.dim_x, stream)); - alfa = (T)-1.0; - beta = (T)1.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, var.dim_x, - var.dim_x, var.dim_x, &alfa, var.P_up, var.dim_x, - var.P_est, var.dim_x, &beta, var.P_est, var.dim_x, - stream)); - alfa = beta = (T)0.5; - CUBLAS_CHECK(cublasgeam(handle, CUBLAS_OP_T, CUBLAS_OP_N, var.dim_x, - var.dim_x, &alfa, var.P_est, var.dim_x, &beta, - var.P_est, var.dim_x, var.P_up, var.dim_x, stream)); - } else { - CUDA_CHECK(cudaMemcpy(var.placeHolder1, var.H, - var.dim_z * var.dim_x * sizeof(T), - cudaMemcpyDeviceToDevice)); - // finding [inv(B)*H] and placing the result in var.placeHolder1_d - CUSOLVER_CHECK(cusolverDngetrs(handle_sol, CUBLAS_OP_N, var.dim_z, - var.dim_x, (const T *)var.R_cpy, var.dim_z, - (const int *)var.piv, var.placeHolder1, - var.dim_z, var.info, stream)); - int info_h; - updateHost(&info_h, var.info, 1, stream); - CUDA_CHECK(cudaStreamSynchronize(stream)); - ASSERT(info_h == 0, - "kf::linear: implicit kalman gain with short form, finding " - "[inv(B)*H] info returned val=%d", - info_h); - T alfa = (T)1.0, beta = (T)0.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, var.dim_x, - var.dim_x, var.dim_z, &alfa, var.K, var.dim_x, - var.placeHolder1, var.dim_z, &beta, var.K, - var.dim_x, stream)); - alfa = (T)-1.0; - beta = (T)1.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, var.dim_x, - var.dim_x, var.dim_x, &alfa, var.K, var.dim_x, - var.P_est, var.dim_x, &beta, var.P_est, var.dim_x, - stream)); - alfa = beta = (T)0.5; - CUBLAS_CHECK(cublasgeam(handle, CUBLAS_OP_N, CUBLAS_OP_T, var.dim_x, - var.dim_x, &alfa, var.P_est, var.dim_x, &beta, - var.P_est, var.dim_x, var.P_up, var.dim_x, stream)); - } -} - -template -void find_kalman_gain(Variables &var, cublasHandle_t handle, - cusolverDnHandle_t handle_sol, cudaStream_t stream) { - CUDA_CHECK(cudaMemcpy(var.R_cpy, var.R, var.dim_z * var.dim_z * sizeof(T), - cudaMemcpyDeviceToDevice)); - T alfa = (T)1.0, beta = (T)0.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_T, var.dim_x, - var.dim_z, var.dim_x, &alfa, var.P_est, var.dim_x, - var.H, var.dim_z, &beta, var.K, var.dim_x, stream)); - alfa = beta = (T)1.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, var.dim_z, - var.dim_z, var.dim_x, &alfa, var.H, var.dim_z, var.K, - var.dim_x, &beta, var.R_cpy, var.dim_z, stream)); - CUSOLVER_CHECK(cusolverDngetrf(handle_sol, var.dim_z, var.dim_z, var.R_cpy, - var.dim_z, var.workspace_lu, var.piv, var.info, - stream)); - int info_h; - updateHost(&info_h, var.info, 1, stream); - CUDA_CHECK(cudaStreamSynchronize(stream)); - ASSERT(info_h == 0, "kf::linear: LU decomp, info returned val=%d", info_h); - if (var.solver < ShortFormImplicit) { - // copying ID matrix - CUDA_CHECK(cudaMemcpy(var.placeHolder2, var.placeHolder0, - var.dim_z * var.dim_z * sizeof(T), - cudaMemcpyDeviceToDevice)); - CUSOLVER_CHECK(cusolverDngetrs(handle_sol, CUBLAS_OP_N, var.dim_z, - var.dim_z, (const T *)var.R_cpy, var.dim_z, - (const int *)var.piv, var.placeHolder0, - var.dim_z, var.info, stream)); - int info_h; - updateHost(&info_h, var.info, 1, stream); - CUDA_CHECK(cudaStreamSynchronize(stream)); - ASSERT(info_h == 0, - "kf::linear: Explicit var.kalman gain, inverse " - "returned val=%d", - info_h); - // var.R_cpy contains junk, R contains the real R value. - alfa = (T)1.0; - beta = (T)0.0; - CUBLAS_CHECK(cublasgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, var.dim_x, - var.dim_z, var.dim_z, &alfa, var.K, var.dim_x, - var.placeHolder0, var.dim_z, &beta, var.K, - var.dim_x, stream)); - T *temp = var.placeHolder0; - var.placeHolder0 = var.placeHolder2; - var.placeHolder2 = temp; - // junk in var.R_cpy and var.placeHolder2. and var.K contains the Kalman gain. - } -} - -/** - * @brief Initialization method for the opaque data structure - * @tparam T the data type for computation - * @param var the opaque structure storing all the required state for KF - * @param _dim_x state vector dimension - * @param _dim_z measurement vector dimension - * @param _solver solver type - * @param _x_est estimated state - * @param _x_up updated state - * @param _Phi state transition matrix - * @param _P_est estimated error covariance - * @param _P_up updated error covariance - * @param _Q process noise covariance matrix - * @param _R measurent noise covariance matrix - * @param _H state to measurement tranformation matrix - * @param workspace workspace buffer. Pass nullptr to compute its size - * @param workspaceSize workspace buffer size in B. - * @param handle_sol cusolver handle - * @note this must always be called first before calling predict/update - */ -template -void init(Variables &var, int _dim_x, int _dim_z, Option _solver, T *_x_est, - T *_x_up, T *_Phi, T *_P_est, T *_P_up, T *_Q, T *_R, T *_H, - void *workspace, size_t &workspaceSize, - cusolverDnHandle_t handle_sol) { - set(var, _dim_x, _dim_z, _solver, _x_est, _x_up, _Phi, _P_est, _P_up, _Q, _R, - _H, workspace, workspaceSize, handle_sol); -} - -/** - * @brief Predict the state for the next step, before the measurements are taken - * @tparam T the data type for computation - * @param var the opaque structure storing all the required state for KF - * @param handle cublas handle - * @param stream cuda stream - * @note it is assumed that the 'init' function call has already been made with - * a legal workspace buffer! Also, calling the 'predict' and 'update' functions - * out-of-order will lead to unknown state! - */ -template -void predict(Variables &var, cublasHandle_t handle, cudaStream_t stream) { - ASSERT(var.initialized, "kf::linear::predict: 'init' not called!"); - predict_x(var, handle, stream); - predict_P(var, handle, stream); -} - -/** - * @brief Update the state in-lieu of measurements - * @tparam T the data type for computation - * @param var the opaque structure storing all the required state for KF - * @param _z the measurement vectorw - * @param handle cublas handle - * @param handle_sol cusolver handle - * @param stream cuda stream - * @note it is assumed that the 'init' function call has already been made with - * a legal workspace buffer! Also, calling the 'predict' and 'update' functions - * out-of-order will lead to unknown state! - */ -template -void update(Variables &var, T *_z, cublasHandle_t handle, - cusolverDnHandle_t handle_sol, cudaStream_t stream) { - ASSERT(var.initialized, "kf::linear::update: 'init' not called!"); - var.z = _z; - find_kalman_gain(var, handle, handle_sol, stream); - update_x(var, handle, handle_sol, stream); - update_P(var, handle, handle_sol, stream); -} - -}; // end namespace linear -}; // end namespace kf diff --git a/cpp/src/kalman_filter/lkf_py.cu b/cpp/src/kalman_filter/lkf_py.cu deleted file mode 100644 index fda5b03877..0000000000 --- a/cpp/src/kalman_filter/lkf_py.cu +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linalg/cublas_wrappers.h" -#include "linalg/cusolver_wrappers.h" -#include "lkf.h" -#include "lkf_py.h" - -namespace kf { -namespace linear { - -size_t get_workspace_size_f32(Variables &var, int dim_x, int dim_z, - Option solver, float *x_est, float *x_up, - float *Phi, float *P_est, float *P_up, float *Q, - float *R, float *H) { - size_t workspaceSize; - cusolverDnHandle_t cusolver_handle = NULL; - CUSOLVER_CHECK(cusolverDnCreate(&cusolver_handle)); - - init(var, dim_x, dim_z, solver, x_est, x_up, Phi, P_est, P_up, Q, R, H, - (void *)nullptr, workspaceSize, cusolver_handle); - CUSOLVER_CHECK(cusolverDnDestroy(cusolver_handle)); - return workspaceSize; -} - -void init_f32(Variables &var, int dim_x, int dim_z, Option solver, - float *x_est, float *x_up, float *Phi, float *P_est, float *P_up, - float *Q, float *R, float *H, void *workspace, - size_t &workspaceSize) { - cusolverDnHandle_t cusolver_handle = NULL; - CUSOLVER_CHECK(cusolverDnCreate(&cusolver_handle)); - - // CUDA_CHECK(cudaMalloc((void **)&workspace, workspaceSize)); - init(var, dim_x, dim_z, solver, x_est, x_up, Phi, P_est, P_up, Q, R, H, - workspace, workspaceSize, cusolver_handle); - CUSOLVER_CHECK(cusolverDnDestroy(cusolver_handle)); -} -void predict_f32(Variables &var) { - cublasHandle_t cublas_handle; - CUBLAS_CHECK(cublasCreate(&cublas_handle)); - - cudaStream_t stream; - CUDA_CHECK(cudaStreamCreate(&stream)); - - predict(var, cublas_handle, stream); - CUBLAS_CHECK(cublasDestroy(cublas_handle)); - CUDA_CHECK(cudaStreamDestroy(stream)); -} -void update_f32(Variables &var, float *_z) { - cublasHandle_t cublas_handle; - CUBLAS_CHECK(cublasCreate(&cublas_handle)); - - cusolverDnHandle_t cusolver_handle = NULL; - CUSOLVER_CHECK(cusolverDnCreate(&cusolver_handle)); - - cudaStream_t stream; - CUDA_CHECK(cudaStreamCreate(&stream)); - - update(var, _z, cublas_handle, cusolver_handle, stream); - CUBLAS_CHECK(cublasDestroy(cublas_handle)); - CUSOLVER_CHECK(cusolverDnDestroy(cusolver_handle)); - CUDA_CHECK(cudaStreamDestroy(stream)); -} - -// Double precision functions - -size_t get_workspace_size_f64(Variables &var, int dim_x, int dim_z, - Option solver, double *x_est, double *x_up, - double *Phi, double *P_est, double *P_up, - double *Q, double *R, double *H) { - size_t workspaceSize; - cusolverDnHandle_t cusolver_handle = NULL; - CUSOLVER_CHECK(cusolverDnCreate(&cusolver_handle)); - - init(var, dim_x, dim_z, solver, x_est, x_up, Phi, P_est, P_up, Q, R, - H, (void *)nullptr, workspaceSize, cusolver_handle); - CUSOLVER_CHECK(cusolverDnDestroy(cusolver_handle)); - return workspaceSize; -} - -void init_f64(Variables &var, int dim_x, int dim_z, Option solver, - double *x_est, double *x_up, double *Phi, double *P_est, - double *P_up, double *Q, double *R, double *H, void *workspace, - size_t &workspaceSize) { - cusolverDnHandle_t cusolver_handle = NULL; - CUSOLVER_CHECK(cusolverDnCreate(&cusolver_handle)); - - // CUDA_CHECK(cudaMalloc((void **)&workspace, workspaceSize)); - init(var, dim_x, dim_z, solver, x_est, x_up, Phi, P_est, P_up, Q, R, H, - workspace, workspaceSize, cusolver_handle); - CUSOLVER_CHECK(cusolverDnDestroy(cusolver_handle)); -} -void predict_f64(Variables &var) { - cublasHandle_t cublas_handle; - CUBLAS_CHECK(cublasCreate(&cublas_handle)); - - cudaStream_t stream; - CUDA_CHECK(cudaStreamCreate(&stream)); - - predict(var, cublas_handle, stream); - CUBLAS_CHECK(cublasDestroy(cublas_handle)); - CUDA_CHECK(cudaStreamDestroy(stream)); -} -void update_f64(Variables &var, double *_z) { - cublasHandle_t cublas_handle; - CUBLAS_CHECK(cublasCreate(&cublas_handle)); - - cusolverDnHandle_t cusolver_handle = NULL; - CUSOLVER_CHECK(cusolverDnCreate(&cusolver_handle)); - - cudaStream_t stream; - CUDA_CHECK(cudaStreamCreate(&stream)); - - update(var, _z, cublas_handle, cusolver_handle, stream); - CUBLAS_CHECK(cublasDestroy(cublas_handle)); - CUSOLVER_CHECK(cusolverDnDestroy(cusolver_handle)); - CUDA_CHECK(cudaStreamDestroy(stream)); -} - -}; // end namespace linear -}; // end namespace kf diff --git a/cpp/src/kalman_filter/lkf_py.h b/cpp/src/kalman_filter/lkf_py.h deleted file mode 100644 index acdd85dff3..0000000000 --- a/cpp/src/kalman_filter/lkf_py.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -namespace kf { -namespace linear { - -size_t get_workspace_size_f32(Variables &var, int _dim_x, int _dim_z, - Option _solver, float *_x_est, float *_x_up, - float *_Phi, float *_P_est, float *_P_up, - float *_Q, float *_R, float *_H); -void init_f32(Variables &var, int _dim_x, int _dim_z, Option _solver, - float *_x_est, float *_x_up, float *_Phi, float *_P_est, - float *_P_up, float *_Q, float *_R, float *_H, void *workspace, - size_t &workspaceSize); -void predict_f32(Variables &var); -void update_f32(Variables &var, float *_z); - -size_t get_workspace_size_f64(Variables &var, int _dim_x, int _dim_z, - Option _solver, double *_x_est, double *_x_up, - double *_Phi, double *_P_est, double *_P_up, - double *_Q, double *_R, double *_H); -void init_f64(Variables &var, int _dim_x, int _dim_z, Option _solver, - double *_x_est, double *_x_up, double *_Phi, double *_P_est, - double *_P_up, double *_Q, double *_R, double *_H, - void *workspace, size_t &workspaceSize); -void predict_f64(Variables &var); -void update_f64(Variables &var, double *_z); - -}; // end namespace linear -}; // end namespace kf diff --git a/cpp/src/kalman_filter/utils.h b/cpp/src/kalman_filter/utils.h deleted file mode 100644 index ad0c2e1ae9..0000000000 --- a/cpp/src/kalman_filter/utils.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2018, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once -#include -#include -#include "cuda_utils.h" -#define IDX2C(i, j, ld) (j * ld + i) - -namespace kf { - -using namespace MLCommon; -// helper kernels -// (lkf) -template -__global__ void Linear_KF_ID_kernel(T *w, int dim) { - int j = threadIdx.x + blockDim.x * blockIdx.x; - int i = threadIdx.y + blockDim.y * blockIdx.y; - if (i < dim && j < dim) { - if (i == j) - w[IDX2C(i, j, dim)] = 1.0; - else - w[IDX2C(i, j, dim)] = 0.0; - } -} -// (enkf) -template -__global__ void vctwiseAccumulate_kernel(const int nPoints, const int dim, - const T scalar, const T *X, T *x) { - int idx = threadIdx.x + blockDim.x * blockIdx.x; - int col = idx % dim; - int row = idx / dim; - if (col < dim && row < nPoints) myAtomicAdd(x + col, scalar * X[idx]); -} - -template -__global__ void En_KF_normalize(const int divider, const int n, T *x) { - int xi = threadIdx.x + blockDim.x * blockIdx.x; - if (xi < n) x[xi] = x[xi] / divider; -} - -template -__global__ void vctwiseAdd_kernel(const int col, const int row, T scalar, - const T *in_m, const T *v, T *out_m) { - int m_i = threadIdx.x + blockDim.x * blockIdx.x; - int v_i = m_i % row; - if (m_i < row * col) out_m[m_i] = in_m[m_i] + scalar * v[v_i]; -} - -/** - * @brief identity matrix generating function - * @tparam T the data type - * @param I the out matrix - * @param dim dimension of I - */ -template -void make_ID_matrix(T *I, int dim) { - dim3 block(32, 32); - dim3 grid(ceildiv(dim, (int)block.x), ceildiv(dim, (int)block.y)); - Linear_KF_ID_kernel<<>>(I, dim); - CUDA_CHECK(cudaPeekAtLastError()); -} - -/** - * @brief scales column vectors of X to accumulate into x - * @tparam T the data type - * @param X the input matrix - * @param x the output vector - * @param nPoints the number of columns in X - * @param dim the dimension of x - * @param scalar scaling factor applied to col of X - * which will get accumulated into x - * @note - * All the matrices are assumed to be stored in col major form - * x = x + scalar * (X[0th col] + ... + X[nPoint'th col]) - */ -template -void vctwiseAccumulate(const int nPoints, const int dim, const T scalar, - const T *X, T *x) { - dim3 block(64); - dim3 grid(ceildiv(nPoints * dim, (int)block.x)); - vctwiseAccumulate_kernel<<>>(nPoints, dim, scalar, X, x); - CUDA_CHECK(cudaPeekAtLastError()); -} - -/** - * @brief add scaled vector to a matrix - * @tparam T the data type - * @param col col number of col - * @param row number of row - * @param scalar scalar to scale vector with - * @param in_m matrix in which the vector is supposed to be added - * @param v vector to be scaled and added - * @param out_m final added matrix generated by ver dan merwe method - * @note All the matrices are assumed to be stored in col major form - * vectors are added as if adding a column vector - */ -template -void vctwiseAdd(const int col, const int row, const T scalar, const T *in_m, - const T *v, T *out_m) { - dim3 block(64); - dim3 grid(ceildiv(row * col, (int)block.x)); - vctwiseAdd_kernel<<>>(col, row, scalar, in_m, v, out_m); - CUDA_CHECK(cudaPeekAtLastError()); -} - -}; // end of namespace kf diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 49234b0f95..58862bdbac 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -42,7 +42,6 @@ if(BUILD_CUML_TESTS) sg/holtwinters_test.cu sg/kmeans_test.cu sg/knn_test.cu - sg/lkf_test.cu sg/ols.cu sg/pca_test.cu sg/quasi_newton.cu diff --git a/cpp/test/sg/lkf_test.cu b/cpp/test/sg/lkf_test.cu deleted file mode 100644 index 579484b5a7..0000000000 --- a/cpp/test/sg/lkf_test.cu +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include "kalman_filter/KalmanFilter.cuh" -#include "linalg/cublas_wrappers.h" -#include "linalg/cusolver_wrappers.h" - -namespace kf { -namespace linear { - -using namespace MLCommon; - -template -struct LKFInputs { - T tolerance; - int dim_x, dim_z, iterations; - Option option; - unsigned long long int seed; - // 0 = Long, 1 = Shrt_Exp, 2 = Shrt_Imp (option) -}; - -template -::std::ostream &operator<<(::std::ostream &os, const LKFInputs &dims) { - return os; -} - -template -class LKFTest : public ::testing::TestWithParam> { - protected: // functionsv - void SetUp() override { - // getting params - params = ::testing::TestWithParam>::GetParam(); - dim_x = params.dim_x; - dim_z = params.dim_z; - iterations = params.iterations; - option = (Option)params.option; - tolerance = params.tolerance; - - // cpu mallocs - Phi = (T *)malloc(dim_x * dim_x * sizeof(T)); - x_up = (T *)malloc(dim_x * 1 * sizeof(T)); - x_est = (T *)malloc(dim_x * 1 * sizeof(T)); - P_up = (T *)malloc(dim_x * dim_x * sizeof(T)); - Q = (T *)malloc(dim_x * dim_x * sizeof(T)); - H = (T *)malloc(dim_z * dim_x * sizeof(T)); - R = (T *)malloc(dim_z * dim_z * sizeof(T)); - z = (T *)malloc(dim_z * 1 * sizeof(T)); - - cublasHandle_t cublas_handle; - cusolverDnHandle_t cusolver_handle = NULL; - cudaStream_t stream; - - CUBLAS_CHECK(cublasCreate(&cublas_handle)); - CUSOLVER_CHECK(cusolverDnCreate(&cusolver_handle)); - CUDA_CHECK(cudaStreamCreate(&stream)); - - // making sane model - x_up[0] = 0.0; - x_up[1] = 1.0; - Phi[0] = 1.0; - Phi[1] = 0.0; - Phi[2] = 1.0; - Phi[3] = 1.0; - P_up[0] = 100.0; - P_up[1] = 0.0; - P_up[2] = 0.0; - P_up[3] = 100.0; - R[0] = 100.0; - T var = 0.001; - Q[0] = 0.25 * var; - Q[1] = 0.5 * var; - Q[2] = 0.5 * var; - Q[3] = 1.1 * var; - H[0] = 1.0; - H[1] = 0.0; - - // gpu mallocs - CUDA_CHECK(cudaMalloc((void **)&x_est_d, dim_x * sizeof(T))); - CUDA_CHECK(cudaMalloc((void **)&x_up_d, dim_x * sizeof(T))); - CUDA_CHECK(cudaMalloc((void **)&Phi_d, dim_x * dim_x * sizeof(T))); - CUDA_CHECK(cudaMalloc((void **)&P_est_d, dim_x * dim_x * sizeof(T))); - CUDA_CHECK(cudaMalloc((void **)&P_up_d, dim_x * dim_x * sizeof(T))); - CUDA_CHECK(cudaMalloc((void **)&Q_d, dim_x * dim_x * sizeof(T))); - CUDA_CHECK(cudaMalloc((void **)&R_d, dim_z * dim_z * sizeof(T))); - CUDA_CHECK(cudaMalloc((void **)&H_d, dim_z * dim_x * sizeof(T))); - CUDA_CHECK(cudaMalloc((void **)&z_d, dim_z * sizeof(T))); - - // copy data to gpu (available in ml-common/cuda_utils.h) - updateDevice(Phi_d, Phi, dim_x * dim_x, stream); - updateDevice(x_up_d, x_up, dim_x, stream); - updateDevice(P_up_d, P_up, dim_x * dim_x, stream); - updateDevice(Q_d, Q, dim_x * dim_x, stream); - updateDevice(R_d, R, dim_z * dim_z, stream); - updateDevice(H_d, H, dim_z * dim_x, stream); - - // kf initialization - Variables vars; - size_t workspaceSize; - init(vars, dim_x, dim_z, option, x_est_d, x_up_d, Phi_d, P_est_d, P_up_d, - Q_d, R_d, H_d, nullptr, workspaceSize, cusolver_handle); - CUDA_CHECK(cudaMalloc((void **)&workspace_l, workspaceSize)); - init(vars, dim_x, dim_z, option, x_est_d, x_up_d, Phi_d, P_est_d, P_up_d, - Q_d, R_d, H_d, workspace_l, workspaceSize, cusolver_handle); - - // for random noise - std::default_random_engine generator(params.seed); - std::normal_distribution distribution(0.0, 1.0); - rmse_x = 0.0; - rmse_v = 0.0; - - for (int q = 0; q < iterations; q++) { - predict(vars, cublas_handle, stream); - // generating measurement - z[0] = q + distribution(generator); - updateDevice(z_d, z, dim_z, stream); - - update(vars, z_d, cublas_handle, cusolver_handle, stream); - // getting update - updateHost(x_up, x_up_d, dim_x, stream); - CUDA_CHECK(cudaStreamSynchronize(stream)); - - // summing squared ratios - rmse_v += pow(x_up[1] - 1, 2); // true velo is alwsy 1 - rmse_x += pow(x_up[0] - q, 2); - } - rmse_x /= iterations; - rmse_v /= iterations; - rmse_x = pow(rmse_x, 0.5); - rmse_v = pow(rmse_v, 0.5); - - CUBLAS_CHECK(cublasDestroy(cublas_handle)); - CUSOLVER_CHECK(cusolverDnDestroy(cusolver_handle)); - CUDA_CHECK(cudaStreamDestroy(stream)); - } - - void TearDown() override { - // freeing gpu mallocs - CUDA_CHECK(cudaFree(workspace_l)); - CUDA_CHECK(cudaFree(Phi_d)); - CUDA_CHECK(cudaFree(P_up_d)); - CUDA_CHECK(cudaFree(P_est_d)); - CUDA_CHECK(cudaFree(Q_d)); - CUDA_CHECK(cudaFree(R_d)); - CUDA_CHECK(cudaFree(H_d)); - CUDA_CHECK(cudaFree(x_est_d)); - CUDA_CHECK(cudaFree(x_up_d)); - CUDA_CHECK(cudaFree(z_d)); - - // freeing cpu mallocs - free(Phi); - free(x_up); - free(x_est); - free(P_up); - free(Q); - free(H); - free(R); - free(z); - } - - protected: // variables - LKFInputs params; - Option option; - T *Phi, *x_up, *x_est, *P_up, *Q, *H, *R, *z; //cpu pointers - T *x_est_d, *x_up_d, *Phi_d, *P_est_d, *P_up_d, *Q_d, *R_d, *H_d, *z_d, - *workspace_l; //gpu pointers - T rmse_x, rmse_v, tolerance; // root mean squared error - int dim_z, dim_x, iterations; -}; // LKFTest - -// float -const std::vector> inputsf = { - {1.5f, 2, 1, 100, Option::LongForm, 6ULL}, - {1.5f, 2, 1, 100, Option::ShortFormExplicit, 6ULL}, - {1.3f, 2, 1, 100, Option::ShortFormImplicit, 6ULL}}; -typedef LKFTest LKFTestF; -TEST_P(LKFTestF, RMSEUnderToleranceF) { - EXPECT_LT(rmse_x, tolerance) << " position out of tol."; - EXPECT_LT(rmse_v, tolerance) << " velocity out of tol."; -} -INSTANTIATE_TEST_CASE_P(LKFTests, LKFTestF, ::testing::ValuesIn(inputsf)); - -// double -const std::vector> inputsd = { - {1.5, 2, 1, 100, Option::LongForm, 6ULL}, - {1.5, 2, 1, 100, Option::ShortFormExplicit, 6ULL}, - {1.2, 2, 1, 100, Option::ShortFormImplicit, 6ULL}}; -typedef LKFTest LKFTestD; -TEST_P(LKFTestD, RMSEUnderToleranceD) { - EXPECT_LT(rmse_x, tolerance) << " position out of tol."; - EXPECT_LT(rmse_v, tolerance) << " velocity out of tol."; -} -INSTANTIATE_TEST_CASE_P(LKFTests, LKFTestD, ::testing::ValuesIn(inputsd)); - -}; // end namespace linear -}; // end namespace kf diff --git a/python/cuml/__init__.py b/python/cuml/__init__.py index dafe86022c..87fa779674 100644 --- a/python/cuml/__init__.py +++ b/python/cuml/__init__.py @@ -34,7 +34,6 @@ from cuml.ensemble.randomforestregressor import RandomForestRegressor from cuml.fil import fil -from cuml.filter.kalman_filter import KalmanFilter from cuml.linear_model.elastic_net import ElasticNet from cuml.linear_model.lasso import Lasso diff --git a/python/cuml/filter/__init__.py b/python/cuml/filter/__init__.py deleted file mode 100644 index 78030e44b0..0000000000 --- a/python/cuml/filter/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# -# Copyright (c) 2019, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from cuml.filter.kalman_filter import KalmanFilter diff --git a/python/cuml/filter/kalman_filter.pyx b/python/cuml/filter/kalman_filter.pyx deleted file mode 100644 index b7f406a3d9..0000000000 --- a/python/cuml/filter/kalman_filter.pyx +++ /dev/null @@ -1,513 +0,0 @@ -# -# Copyright (c) 2019, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -import cudf -import numpy as np - -from numba import cuda -from cuml.utils import numba_utils -import rmm - -from libc.stdint cimport uintptr_t -from libc.stdlib cimport calloc, malloc, free - -from cuml.common.base import Base -from cuml.utils import zeros -import warnings - -cdef extern from "kalman_filter/kf_variables.h" namespace "kf::linear": - enum Option: - LongForm = 0 - ShortFormExplicit - ShortFormImplicit - - cdef cppclass Variables[T]: - pass - - -cdef extern from "kalman_filter/lkf_py.h" namespace "kf::linear" nogil: - - cdef size_t get_workspace_size_f32(Variables[float]&, - int, - int, - Option, - float*, - float*, - float*, - float*, - float*, - float*, - float*, - float*) except + - - cdef void init_f32(Variables[float]&, - int, - int, - Option, - float*, - float*, - float*, - float*, - float*, - float*, - float*, - float*, - void*, - size_t&) except + - - cdef void predict_f32(Variables[float]&) except + - - cdef void update_f32(Variables[float]&, - float*) except + - - cdef size_t get_workspace_size_f64(Variables[double]&, - int, - int, - Option, - double*, - double*, - double*, - double*, - double*, - double*, - double*, - double*) except + - - cdef void init_f64(Variables[double]&, - int, - int, - Option, - double*, - double*, - double*, - double*, - double*, - double*, - double*, - double*, - void*, - size_t&) except + - - cdef void predict_f64(Variables[double]&) except + - - cdef void update_f64(Variables[double]&, - double*) except + - - -class KalmanFilter(Base): - """ - Implements a Kalman filter. You are responsible for setting the - various state variables to reasonable values; defaults will - not give you a functional filter. - After construction the filter will have default matrices created for you, - but you must specify the values for each. - - .. deprecated:: 0.13 - KalmanFilter is deprecated and will be removed in an upcoming version. - See issue #1754 for details. - - Examples - -------- - - .. code:: - - from cuml import KalmanFilter - f = KalmanFilter(dim_x=2, dim_z=1) - f.x = np.array([[2.], # position - [0.]]) # velocity - f.F = np.array([[1.,1.], [0.,1.]]) - f.H = np.array([[1.,0.]]) - f.P = np.array([[1000., 0.], [ 0., 1000.] ]) - f.R = 5 - - Now just perform the standard predict/update loop: - - .. code:: - - while some_condition_is_true: - z = numba.rmm.to_device(np.array([i]) - f.predict() - f.update(z) - - Parameters - ---------- - dim_x : int - Number of state variables for the Kalman filter. - This is used to set the default size of P, Q, and u - dim_z : int - Number of measurement inputs. - - Attributes - ---------- - x : numba device array, numpy array or cuDF series (dim_x, 1), - Current state estimate. Any call to update() or predict() updates - this variable. - P : numba device array, numpy array or cuDF dataframe(dim_x, dim_x) - Current state covariance matrix. Any call to update() or predict() - updates this variable. - x_prior : numba device array, numpy array or cuDF series(dim_x, 1) - Prior (predicted) state estimate. The *_prior and *_post attributes - are for convenience; they store the prior and posterior of the - current epoch. Read Only. - P_prior : numba device array, numpy array or cuDF dataframe(dim_x, dim_x) - Prior (predicted) state covariance matrix. Read Only. - x_post : numba device array, numpy array or cuDF series(dim_x, 1) - Posterior (updated) state estimate. Read Only. - P_post : numba device array, numpy array or cuDF dataframe(dim_x, dim_x) - Posterior (updated) state covariance matrix. Read Only. - z : numba device array or cuDF series (dim_x, 1) - Last measurement used in update(). Read only. - R : numba device array(dim_z, dim_z) - Measurement noise matrix - Q : numba device array(dim_x, dim_x) - Process noise matrix - F : numba device array() - State Transition matrix - H : numba device array(dim_z, dim_x) - Measurement function - y : numba device array - Residual of the update step. Read only. - K : numba device array(dim_x, dim_z) - Kalman gain of the update step. Read only. - precision: 'single' or 'double' - Whether the Kalman Filter uses single or double precision - - """ - - def _get_dtype(self, precision): - return { - 'single': np.float32, - 'double': np.float64, - }[precision] - - def __init__(self, dim_x, dim_z, solver='long', precision='single', - seed=False): - warnings.warn("""KalmanFilter is deprecated and will be removed in an - upcoming release. The current version has known - numerical and performance issues (see issue #1754 and - #1758)""", DeprecationWarning) - - cdef Option algo - - if solver in ['long', 'short_implicit', 'short_explicit']: - self.algorithm = solver - algo = _get_algorithm_c_name(self.algorithm) - else: - msg = "algorithm {!r} is not supported" - raise TypeError(msg.format(solver)) - - self.precision = precision - self.dtype = self._get_dtype(precision) - - Phi = np.ones((dim_x, dim_x), dtype=self.dtype) - x_up = zeros((dim_x, 1), dtype=self.dtype) - x_est = zeros((dim_x, 1), dtype=self.dtype) - P_up = np.eye(dim_x, dtype=self.dtype) - P_est = np.eye(dim_x, dtype=self.dtype) - Q = np.eye(dim_x, dtype=self.dtype) - H = zeros((dim_z, dim_x), dtype=self.dtype) - R = np.eye(dim_z, dtype=self.dtype) - z = np.array([[0]*dim_z], dtype=self.dtype).T - - self.F = rmm.to_device(Phi) - self.x = rmm.to_device(x_up) - self.x_prev = rmm.to_device(x_est) - self.P = rmm.to_device(P_up) - self.P_prev = rmm.to_device(P_est) - self.Q = rmm.to_device(Q) - self.H = rmm.to_device(H) - self.R = rmm.to_device(R) - self.z = rmm.to_device(z) - - self.dim_x = dim_x - self.dim_z = dim_z - - self._workspaceSize = -1 - - cdef uintptr_t _Phi_ptr = self.F.device_ctypes_pointer.value - cdef uintptr_t _x_up_ptr = self.x.device_ctypes_pointer.value - cdef uintptr_t _x_est_ptr = self.x_prev.device_ctypes_pointer.value - cdef uintptr_t _P_up_ptr = self.P.device_ctypes_pointer.value - cdef uintptr_t _P_est_ptr = self.P_prev.device_ctypes_pointer.value - cdef uintptr_t _Q_ptr = self.Q.device_ctypes_pointer.value - cdef uintptr_t _H_ptr = self.H.device_ctypes_pointer.value - cdef uintptr_t _R_ptr = self.R.device_ctypes_pointer.value - cdef uintptr_t _z_ptr = self.z.device_ctypes_pointer.value - - cdef Variables[float] var32 - cdef Variables[double] var64 - cdef size_t workspace_size - - cdef int c_dim_x = dim_x - cdef int c_dim_z = dim_z - - with nogil: - - workspace_size = get_workspace_size_f32(var32, - c_dim_x, - c_dim_z, -