ENH: Compatible with NumPy 2.x (#817)

Co-authored-by: hucorz <[email protected]> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
xorbitsai · Oct 14, 2024 · 9700db7 · 9700db7
1 parent 5b471fb
commit 9700db7
Show file tree

Hide file tree

Showing 52 changed files with 330 additions and 220 deletions.
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
@@ -149,7 +149,7 @@ jobs:
  pip install --upgrade --upgrade-strategy only-if-needed --no-cache-dir ".[doc]"
  else
  pip install -e "git+https://github.com/xorbitsai/xoscar.git@main#subdirectory=python&egg=xoscar"
- pip install "numpy<2.0.0" scipy cython pyftpdlib coverage flaky numexpr
+ pip install -U numpy scipy cython pyftpdlib coverage flaky numexpr
 
  if [[ "$MODULE" == "xorbits/pandas" ]]; then
  pip install openpyxl
@@ -360,7 +360,7 @@ jobs:
  export YARN_HOME=$HADOOP_HOME
  export HADOOP_COMMON_LIB_NATIVE_DIR="$HADOOP_HOME/lib/native"
  export PATH="$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin"
- pytest --timeout=1500 -W ignore::PendingDeprecationWarning xorbits/_mars -m hadoop
+ pytest --ignore xorbits/_mars/learn --timeout=1500 -W ignore::PendingDeprecationWarning xorbits/_mars -m hadoop
  elif [[ "$MODULE" == "vineyard" ]]; then
  pytest --timeout=1500 -W ignore::PendingDeprecationWarning \
  --cov-config=setup.cfg --cov-report=xml --cov=xorbits xorbits/_mars/storage/tests/test_libs.py

diff --git a/doc/source/user_guide/storage_backend.rst b/doc/source/user_guide/storage_backend.rst
@@ -40,9 +40,9 @@ create a YAML configuration file named `file.yml` which specify `backends` and `
  
  "@inherits": "@default"
  storage:
- backends: [disk]
- disk:
- root_dirs: "/tmp"
+  backends: [disk]
+  disk:
+  root_dirs: "/tmp"
 
 Start the worker using the :code:`-f file.yml` option:
 
@@ -86,9 +86,9 @@ and start the worker by adding :code:`-f file.yml` option.
  
  "@inherits": "@default"
  storage:
- backends: [disk]
- disk:
- root_dirs: "/mnt/xorbits"
+  backends: [disk]
+  disk:
+  root_dirs: "/mnt/xorbits"
 
 
 GPU

diff --git a/python/setup.cfg b/python/setup.cfg
@@ -24,7 +24,7 @@ include_package_data = True
 packages = find:
 install_requires =
  xoscar>=0.0.8
- numpy>=1.14.0,<2.0.0
+ numpy>=1.14.0
  pandas>=1.0.0
  scipy>=1.0.0; sys_platform!="win32" or python_version>="3.10"
  scipy>=1.0.0,<=1.9.1; sys_platform=="win32" and python_version<"3.10"

diff --git a/python/xorbits/_mars/core/operand/core.py b/python/xorbits/_mars/core/operand/core.py
@@ -19,8 +19,13 @@
 import numpy as np
 from xoscar.metrics import Metrics
 
+from ....utils import is_numpy_2
+
 try:
- from numpy.core._exceptions import UFuncTypeError
+ if is_numpy_2():
+ from numpy._core._exceptions import UFuncTypeError
+ else:
+ from numpy.core._exceptions import UFuncTypeError
 except ImportError: # pragma: no cover
  UFuncTypeError = None
 

diff --git a/python/xorbits/_mars/dataframe/indexing/align.py b/python/xorbits/_mars/dataframe/indexing/align.py
@@ -144,7 +144,7 @@ def _call_dataframe_series(self, lhs: TileableType, rhs: TileableType):
  series_index = rhs.index_value.to_pandas()
  dtypes = lhs.dtypes.reindex(
  lhs.dtypes.index.join(series_index, how=self.join)
- ).fillna(np.dtype(np.float_))
+ ).fillna(np.dtype(np.float64))
  l_shape[1] = r_size = len(dtypes)
  col_val = r_idx_val = parse_index(dtypes.index, store_data=True)
 

diff --git a/python/xorbits/_mars/dataframe/missing/tests/test_missing.py b/python/xorbits/_mars/dataframe/missing/tests/test_missing.py
@@ -241,36 +241,36 @@ def test_isna(setup):
  isna(midx)
 
  # list
- l = [1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT]
+ l = [1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT]
  actual = isna(l).execute().fetch()
  expected = pd.isna(l)
  np.testing.assert_array_equal(expected, actual)
 
  # tuple
- t = (1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT)
+ t = (1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT)
  assert not isna(t)
 
  # numpy ndarray
- narr = np.array((1, 2, 3, np.Inf, np.NaN))
+ narr = np.array((1, 2, 3, np.inf, np.nan))
  actual = isna(narr).execute().fetch()
  expected = pd.isna(narr)
  np.testing.assert_array_equal(expected, actual)
 
  # pandas index
- pi = pd.Index((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
+ pi = pd.Index((1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT))
  actual = isna(pi).execute().fetch()
  expected = pd.isna(pi)
  np.testing.assert_array_equal(expected, actual)
 
  # pandas series
- ps = pd.Series((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
+ ps = pd.Series((1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT))
  actual = isna(ps).execute().fetch()
  expected = pd.isna(ps)
  pd.testing.assert_series_equal(expected, actual)
 
  # pandas dataframe
  pdf = pd.DataFrame(
- {"foo": (1, 2, 3, np.Inf, pd.NA), "bar": (4, 5, 6, np.NaN, pd.NaT)}
+ {"foo": (1, 2, 3, np.inf, pd.NA), "bar": (4, 5, 6, np.nan, pd.NaT)}
  )
  actual = isna(pdf).execute().fetch()
  expected = pd.isna(pdf)
@@ -324,36 +324,36 @@ def test_notna(setup):
  notna(midx)
 
  # list
- l = [1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT]
+ l = [1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT]
  actual = notna(l).execute().fetch()
  expected = pd.notna(l)
  np.testing.assert_array_equal(expected, actual)
 
  # tuple
- t = (1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT)
+ t = (1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT)
  assert notna(t)
 
  # numpy ndarray
- narr = np.array((1, 2, 3, np.Inf, np.NaN))
+ narr = np.array((1, 2, 3, np.inf, np.nan))
  actual = notna(narr).execute().fetch()
  expected = pd.notna(narr)
  np.testing.assert_array_equal(expected, actual)
 
  # pandas index
- pi = pd.Index((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
+ pi = pd.Index((1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT))
  actual = notna(pi).execute().fetch()
  expected = pd.notna(pi)
  np.testing.assert_array_equal(expected, actual)
 
  # pandas series
- ps = pd.Series((1, 2, 3, np.Inf, np.NaN, pd.NA, pd.NaT))
+ ps = pd.Series((1, 2, 3, np.inf, np.nan, pd.NA, pd.NaT))
  actual = notna(ps).execute().fetch()
  expected = pd.notna(ps)
  pd.testing.assert_series_equal(expected, actual)
 
  # pandas dataframe
  pdf = pd.DataFrame(
- {"foo": (1, 2, 3, np.Inf, pd.NA), "bar": (4, 5, 6, np.NaN, pd.NaT)}
+ {"foo": (1, 2, 3, np.inf, pd.NA), "bar": (4, 5, 6, np.nan, pd.NaT)}
  )
  actual = notna(pdf).execute().fetch()
  expected = pd.notna(pdf)

diff --git a/python/xorbits/_mars/dataframe/reduction/core.py b/python/xorbits/_mars/dataframe/reduction/core.py
@@ -365,7 +365,7 @@ def _call_dataframe(self, df):
  # handle pandas Dtypes in the future more carefully.
  reduced_dtype = np.dtype("O")
  else:
- reduced_dtype = np.find_common_type(dtypes, [])
+ reduced_dtype = np.result_type(*dtypes)
 
  if level is not None:
  return self._call_groupby_level(df[reduced_cols], level)

diff --git a/python/xorbits/_mars/dataframe/statistics/corr.py b/python/xorbits/_mars/dataframe/statistics/corr.py
@@ -46,7 +46,7 @@ def _set_inputs(self, inputs):
  def __call__(self, df_or_series):
  if isinstance(df_or_series, SERIES_TYPE):
  inputs = filter_inputs([df_or_series, self.other])
- return self.new_scalar(inputs, dtype=np.dtype(np.float_))
+ return self.new_scalar(inputs, dtype=np.dtype(np.float64))
  else:
 
  def _filter_numeric(obj):
@@ -63,7 +63,7 @@ def _filter_numeric(obj):
  inputs = filter_inputs([df_or_series, self.other])
  if self.axis is None:
  dtypes = pd.Series(
- [np.dtype(np.float_)] * len(df_or_series.dtypes),
+ [np.dtype(np.float64)] * len(df_or_series.dtypes),
  index=df_or_series.dtypes.index,
  )
  return self.new_dataframe(
@@ -88,7 +88,7 @@ def _filter_numeric(obj):
  return self.new_series(
  inputs,
  shape=shape,
- dtype=np.dtype(np.float_),
+ dtype=np.dtype(np.float64),
  index_value=new_index_value,
  )
 
@@ -115,8 +115,8 @@ def _tile_pearson_cross(left, right, min_periods):
  right.fillna(0).to_tensor(),
  )
 
- nna_left = left.notna().to_tensor().astype(np.float_)
- nna_right = right.notna().to_tensor().astype(np.float_)
+ nna_left = left.notna().to_tensor().astype(np.float64)
+ nna_right = right.notna().to_tensor().astype(np.float64)
 
  sum_left = left_tensor.T.dot(nna_right)
  sum_right = right_tensor.T.dot(nna_left)
@@ -143,8 +143,8 @@ def _tile_pearson_align(cls, left, right, axis):
  if has_unknown_shape(left, right):
  yield left.chunks + right.chunks + [left, right]
 
- nna_left = left.notna().astype(np.float_)
- nna_right = right.notna().astype(np.float_)
+ nna_left = left.notna().astype(np.float64)
+ nna_right = right.notna().astype(np.float64)
 
  left, right = left.fillna(0), right.fillna(0)
 

diff --git a/python/xorbits/_mars/learn/contrib/lightgbm/_predict.py b/python/xorbits/_mars/learn/contrib/lightgbm/_predict.py
@@ -75,7 +75,7 @@ def __call__(self):
  shape = (self.data.shape[0],)
 
  if self._proba:
- dtype = np.dtype(np.float_)
+ dtype = np.dtype(np.float64)
  elif hasattr(self.model, "classes_"):
  dtype = np.array(self.model.classes_).dtype
  else:

diff --git a/python/xorbits/_mars/learn/decomposition/tests/test_truncated_svd.py b/python/xorbits/_mars/learn/decomposition/tests/test_truncated_svd.py
@@ -27,12 +27,10 @@
 shape = 60, 55
 n_samples, n_features = shape
 rng = check_random_state(42)
-X = rng.randint(-100, 20, np.product(shape)).reshape(shape)
+X = rng.randint(-100, 20, np.prod(shape)).reshape(shape)
 X = sp.csr_matrix(np.maximum(X, 0), dtype=np.float64)
 X.data[:] = 1 + np.log(X.data)
-Xdense = X.A
-n_samples = n_samples
-n_features = n_features
+Xdense = X.toarray()
 
 
 def test_attributes(setup):

diff --git a/python/xorbits/_mars/learn/utils/validation.py b/python/xorbits/_mars/learn/utils/validation.py
@@ -17,7 +17,13 @@
 import warnings
 
 import numpy as np
-from numpy.core.numeric import ComplexWarning
+
+from ....utils import is_numpy_2
+
+if is_numpy_2():
+ from numpy.exceptions import ComplexWarning
+else:
+ from numpy.core.numeric import ComplexWarning
 
 try:
  from sklearn.exceptions import DataConversionWarning

diff --git a/python/xorbits/_mars/tensor/__init__.py b/python/xorbits/_mars/tensor/__init__.py
@@ -303,9 +303,9 @@
 # noinspection PyUnresolvedReferences
 from ..core import ExecutableTuple
 
-from numpy import __version__ as np_ver
+from .utils import is_numpy_2
 
-if np_ver >= "2.0.0":
+if is_numpy_2():
  from numpy.exceptions import AxisError
 else:
  from numpy import AxisError

diff --git a/python/xorbits/_mars/tensor/arithmetic/tests/test_arithmetic.py b/python/xorbits/_mars/tensor/arithmetic/tests/test_arithmetic.py
@@ -357,7 +357,7 @@ def test_unify_chunk_add():
 
 def test_frexp():
  t1 = ones((3, 4, 5), chunk_size=2)
- t2 = empty((3, 4, 5), dtype=np.float_, chunk_size=2)
+ t2 = empty((3, 4, 5), dtype=np.float64, chunk_size=2)
  op_type = type(t1.op)
 
  o1, o2 = frexp(t1)

diff --git a/python/xorbits/_mars/tensor/array_utils.py b/python/xorbits/_mars/tensor/array_utils.py
@@ -21,6 +21,7 @@
 from ..lib import sparse
 from ..lib.sparse.core import get_dense_module, issparse
 from ..utils import is_same_module, lazy_import
+from .utils import is_numpy_2
 
 cp = lazy_import("cupy", rename="cp")
 
@@ -43,6 +44,18 @@ def is_cupy(x):
  return False
 
 
+def get_device_id(input_data):
+ # numpy2.x added `device` attribute to `np.ndarray`
+ # `np.ndarray.device` return `cpu` and does not have `id` attribute
+ # while `cupy.ndarray.device.id` return the GPU device id
+ if hasattr(input_data, "device") and not (
+ is_numpy_2() and isinstance(input_data, np.ndarray)
+ ):
+ return input_data.device.id
+ else:
+ return -1
+
+
 def get_array_module(x, nosparse=False):
  if issparse(x):
  if nosparse:
@@ -117,8 +130,7 @@ def as_same_device(inputs, device=None, ret_extra=False, copy_if_not_writeable=F
  if device is None:
  try:
  device = _most_nbytes_device(
- (i.device.id if hasattr(i, "device") else -1, i.nbytes)
- for i in input_tensors
+ (get_device_id(i), i.nbytes) for i in input_tensors
  )
  except ValueError:
  device = -1

diff --git a/python/xorbits/_mars/tensor/base/broadcast_to.py b/python/xorbits/_mars/tensor/base/broadcast_to.py
@@ -17,7 +17,7 @@
 
 from ... import opcodes as OperandDef
 from ...serialization.serializables import KeyField, TupleField
-from ..array_utils import device, get_array_module
+from ..array_utils import device, get_array_module, get_device_id
 from ..datasource import tensor as astensor
 from ..operands import TensorHasInput, TensorOperandMixin
 
@@ -80,7 +80,7 @@ def tile(cls, op):
  def execute(cls, ctx, op):
  xp = get_array_module(ctx[op.input.key])
  input_data = ctx[op.input.key]
- device_id = input_data.device.id if hasattr(input_data, "device") else -1
+ device_id = get_device_id(input_data)
 
  with device(device_id):
  shape = op.shape

diff --git a/python/xorbits/_mars/tensor/base/expand_dims.py b/python/xorbits/_mars/tensor/base/expand_dims.py
@@ -16,6 +16,12 @@
 import numpy as np
 
 from ..datasource import tensor as astensor
+from ..utils import is_numpy_2
+
+if is_numpy_2():
+ from numpy.exceptions import AxisError
+else:
+ from numpy import AxisError
 
 
 def expand_dims(a, axis):
@@ -77,9 +83,7 @@ def expand_dims(a, axis):
  a = astensor(a)
 
  if axis > a.ndim or axis < -a.ndim - 1:
- raise np.AxisError(
- f"Axis must be between -{a.ndim + 1} and {a.ndim}, got {axis}"
- )
+ raise AxisError(f"Axis must be between -{a.ndim + 1} and {a.ndim}, got {axis}")
 
  axis = axis if axis >= 0 else axis + a.ndim + 1
  indexes = (slice(None),) * axis + (np.newaxis,) + (slice(None),) * (a.ndim - axis)

diff --git a/python/xorbits/_mars/tensor/base/rollaxis.py b/python/xorbits/_mars/tensor/base/rollaxis.py
@@ -13,9 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
+from ..utils import is_numpy_2, validate_axis
 
-from ..utils import validate_axis
+if is_numpy_2():
+ from numpy.exceptions import AxisError
+else:
+ from numpy import AxisError
 
 
 def rollaxis(tensor, axis, start=0):
@@ -66,7 +69,7 @@ def rollaxis(tensor, axis, start=0):
  start += n
  msg = "'%s' arg requires %d <= %s < %d, but %d was passed in"
  if not (0 <= start < n + 1):
- raise np.AxisError(msg % ("start", -n, "start", n + 1, start))
+ raise AxisError(msg % ("start", -n, "start", n + 1, start))
  if axis < start:
  # it's been removed
  start -= 1