diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 1f27ffcffe3..075825e852e 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -125,5 +125,4 @@ jobs:
       branch: ${{ inputs.branch }}
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
-      # pr mode uses the HEAD of the branch, which is also correct for nightlies
-      script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
+      script: ci/cudf_pandas_scripts/pandas-tests/run.sh main
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9235c80bdc9..67a71021a63 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -23,13 +23,6 @@ repos:
         args: ["--config-root=python/", "--resolve-all-configs"]
         files: python/.*
         types_or: [python, cython, pyi]
-  - repo: https://github.com/psf/black
-    rev: 23.12.1
-    hooks:
-      - id: black
-        files: python/.*
-        # Explicitly specify the pyproject.toml at the repo root, not per-project.
-        args: ["--config", "pyproject.toml"]
   - repo: https://github.com/MarcoGorelli/cython-lint
     rev: v0.16.0
     hooks:
@@ -64,9 +57,6 @@ repos:
         # Use the cudf_kafka isort orderings in notebooks so that dask
         # and RAPIDS packages have their own sections.
         args: ["--settings-file=python/cudf_kafka/pyproject.toml"]
-      - id: nbqa-black
-        # Explicitly specify the pyproject.toml at the repo root, not per-project.
-        args: ["--config=pyproject.toml"]
   - repo: https://github.com/pre-commit/mirrors-clang-format
     rev: v16.0.6
     hooks:
@@ -155,6 +145,8 @@ repos:
     hooks:
       - id: ruff
         files: python/.*$
+      - id: ruff-format
+        files: python/.*$
   - repo: https://github.com/rapidsai/pre-commit-hooks
     rev: v0.0.1
     hooks:
diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh
index 667ca35163b..1f70ca78c41 100755
--- a/ci/cudf_pandas_scripts/pandas-tests/run.sh
+++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh
@@ -32,3 +32,4 @@ python python/cudf/cudf/pandas/scripts/summarize-test-results.py --output json p
 RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"}
 mkdir -p "${RAPIDS_ARTIFACTS_DIR}"
 mv pandas-testing/${PANDAS_TESTS_BRANCH}-results.json ${RAPIDS_ARTIFACTS_DIR}/
+rapids-upload-to-s3 ${RAPIDS_ARTIFACTS_DIR}/${PANDAS_TESTS_BRANCH}-results.json "${RAPIDS_ARTIFACTS_DIR}"
diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
index 8ecd02f70a1..cbc1dc1cb87 100755
--- a/ci/test_python_other.sh
+++ b/ci/test_python_other.sh
@@ -19,8 +19,8 @@ EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
-rapids-logger "pytest dask_cudf"
-./ci/run_dask_cudf_pytests.sh \
+rapids-logger "pytest dask_cudf (dask-expr)"
+DASK_DATAFRAME__QUERY_PLANNING=True ./ci/run_dask_cudf_pytests.sh \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \
   --numprocesses=8 \
   --dist=worksteal \
@@ -29,10 +29,9 @@ rapids-logger "pytest dask_cudf"
   --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
   --cov-report=term
 
-# Run tests in dask_cudf/tests and dask_cudf/io/tests with dask-expr
-rapids-logger "pytest dask_cudf + dask_expr"
-DASK_DATAFRAME__QUERY_PLANNING=True ./ci/run_dask_cudf_pytests.sh \
-  --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-expr.xml" \
+rapids-logger "pytest dask_cudf (legacy)"
+DASK_DATAFRAME__QUERY_PLANNING=False ./ci/run_dask_cudf_pytests.sh \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \
   --numprocesses=8 \
   --dist=loadscope \
   .
diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh
index af5e062a8bd..2b20b9d9ce4 100755
--- a/ci/test_wheel_dask_cudf.sh
+++ b/ci/test_wheel_dask_cudf.sh
@@ -18,19 +18,19 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
 mkdir -p "${RAPIDS_TESTS_DIR}"
 
 # Run tests in dask_cudf/tests and dask_cudf/io/tests
-rapids-logger "pytest dask_cudf"
+rapids-logger "pytest dask_cudf (dask-expr)"
 pushd python/dask_cudf/dask_cudf
-python -m pytest \
+DASK_DATAFRAME__QUERY_PLANNING=True python -m pytest \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \
   --numprocesses=8 \
   .
 popd
 
-# Run tests in dask_cudf/tests and dask_cudf/io/tests with dask-expr
-rapids-logger "pytest dask_cudf + dask_expr"
+# Run tests in dask_cudf/tests and dask_cudf/io/tests (legacy)
+rapids-logger "pytest dask_cudf (legacy)"
 pushd python/dask_cudf/dask_cudf
-DASK_DATAFRAME__QUERY_PLANNING=True python -m pytest \
-  --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-expr.xml" \
+DASK_DATAFRAME__QUERY_PLANNING=False python -m pytest \
+  --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \
   --numprocesses=8 \
   .
 popd
diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu
index f29e830eb41..fff1cf0c96a 100644
--- a/cpp/src/io/comp/gpuinflate.cu
+++ b/cpp/src/io/comp/gpuinflate.cu
@@ -804,8 +804,7 @@ __device__ void process_symbols(inflate_state_s* s, int t)
       dist   = symbol >> 16;
       for (int i = t; i < len; i += 32) {
         uint8_t const* src = out + ((i >= dist) ? (i % dist) : i) - dist;
-        uint8_t b          = (src < outbase) ? 0 : *src;
-        if (out + i < outend) { out[i] = b; }
+        if (out + i < outend and src >= outbase) { out[i] = *src; }
       }
       out += len;
       pos++;
diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp
index d2026473b6c..54e7c6bf1d6 100644
--- a/cpp/src/io/utilities/datasource.cpp
+++ b/cpp/src/io/utilities/datasource.cpp
@@ -44,6 +44,11 @@ class file_source : public datasource {
   explicit file_source(char const* filepath) : _file(filepath, O_RDONLY)
   {
     if (detail::cufile_integration::is_kvikio_enabled()) {
+      // Workaround for https://github.com/rapidsai/cudf/issues/14140, where cuFileDriverOpen errors
+      // out if no CUDA calls have been made before it. This is a no-op if the CUDA context is
+      // already initialized
+      cudaFree(0);
+
       _kvikio_file = kvikio::FileHandle(filepath);
       CUDF_LOG_INFO("Reading a file using kvikIO, with compatibility mode {}.",
                     _kvikio_file.is_compat_mode_on() ? "on" : "off");
diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu
index 5cb2d729f3d..674d2e0a6ea 100644
--- a/cpp/tests/error/error_handling_test.cu
+++ b/cpp/tests/error/error_handling_test.cu
@@ -97,7 +97,8 @@ TEST(DebugAssertDeathTest, cudf_assert_false)
   testing::FLAGS_gtest_death_test_style = "threadsafe";
 
   auto call_kernel = []() {
-    assert_false_kernel<<<1, 1>>>();
+    auto const stream = cudf::get_default_stream().value();
+    assert_false_kernel<<<1, 1, 0, stream>>>();
 
     // Kernel should fail with `cudaErrorAssert`
     // This error invalidates the current device context, so we need to kill
@@ -114,7 +115,8 @@ TEST(DebugAssertDeathTest, cudf_assert_false)
 
 TEST(DebugAssert, cudf_assert_true)
 {
-  assert_true_kernel<<<1, 1>>>();
+  auto const stream = cudf::get_default_stream().value();
+  assert_true_kernel<<<1, 1, 0, stream>>>();
   ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize());
 }
 
@@ -136,6 +138,7 @@ int main(int argc, char** argv)
     auto adaptor                       = make_stream_checking_resource_adaptor(
       resource, error_on_invalid_stream, check_default_stream);
     rmm::mr::set_current_device_resource(&adaptor);
+    return RUN_ALL_TESTS();
   }
   return RUN_ALL_TESTS();
 }
diff --git a/pyproject.toml b/pyproject.toml
index 4048eb9452c..c71394058df 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,22 +1,4 @@
-[tool.black]
-line-length = 79
-target-version = ["py39"]
-include = '\.py?$'
-force-exclude = '''
-/(
-    thirdparty |
-    \.eggs |
-    \.git |
-    \.hg |
-    \.mypy_cache |
-    \.tox |
-    \.venv |
-    _build |
-    buck-out |
-    build |
-    dist
-)/
-'''
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 [tool.pydocstyle]
 # Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather
@@ -60,13 +42,15 @@ select = ["E", "F", "W"]
 ignore = [
     # whitespace before :
     "E203",
+    # line-too-long (due to Copyright header)
+    "E501",
 ]
 fixable = ["ALL"]
 exclude = [
     # TODO: Remove this in a follow-up where we fix __all__.
     "__init__.py",
 ]
-line-length = 88
+line-length = 79
 
 [tool.ruff.per-file-ignores]
 # Lots of pytest implicitly injected attributes in conftest-patch.py
diff --git a/python/cudf/cudf/core/_internals/timezones.py b/python/cudf/cudf/core/_internals/timezones.py
index 053425fff8d..4e2fad08d56 100644
--- a/python/cudf/cudf/core/_internals/timezones.py
+++ b/python/cudf/cudf/core/_internals/timezones.py
@@ -85,8 +85,9 @@ def _read_tzfile_as_frame(tzdir, zone_name):
     if not transition_times_and_offsets:
         # this happens for UTC-like zones
         min_date = np.int64(np.iinfo("int64").min + 1).astype("M8[s]")
-        transition_times_and_offsets = as_column([min_date]), as_column(
-            [np.timedelta64(0, "s")]
+        transition_times_and_offsets = (
+            as_column([min_date]),
+            as_column([np.timedelta64(0, "s")]),
         )
 
     return DataFrame._from_data(
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 3e0ec4b5cd7..f13d8cf12f7 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1731,7 +1731,8 @@ def as_column(
         If None (default), treats NaN values in arbitrary as null if there is
         no mask passed along with it. If True, combines the mask and NaNs to
         form a new validity mask. If False, leaves NaN values as is.
-        Only applies when arbitrary is not a cudf object (Index, Series, Column).
+        Only applies when arbitrary is not a cudf object
+        (Index, Series, Column).
     dtype : optional
         Optionally typecast the constructed Column to the given
         dtype.
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 0440512c467..35588725655 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -470,9 +470,12 @@ class _DataFrameIlocIndexer(_DataFrameIndexer):
     _frame: DataFrame
 
     def __getitem__(self, arg):
-        row_key, (
-            col_is_scalar,
-            column_names,
+        (
+            row_key,
+            (
+                col_is_scalar,
+                column_names,
+            ),
         ) = indexing_utils.destructure_dataframe_iloc_indexer(arg, self._frame)
         row_spec = indexing_utils.parse_row_iloc_indexer(
             row_key, len(self._frame)
@@ -6901,16 +6904,18 @@ def stack(self, level=-1, dropna=no_default, future_stack=False):
         if future_stack:
             if dropna is not no_default:
                 raise ValueError(
-                    "dropna must be unspecified with future_stack=True as the new "
-                    "implementation does not introduce rows of NA values. This "
-                    "argument will be removed in a future version of cudf."
+                    "dropna must be unspecified with future_stack=True as "
+                    "the new implementation does not introduce rows of NA "
+                    "values. This argument will be removed in a future "
+                    "version of cudf."
                 )
         else:
             if dropna is not no_default or self._data.nlevels > 1:
                 warnings.warn(
-                    "The previous implementation of stack is deprecated and will be "
-                    "removed in a future version of cudf. Specify future_stack=True "
-                    "to adopt the new implementation and silence this warning.",
+                    "The previous implementation of stack is deprecated and "
+                    "will be removed in a future version of cudf. Specify "
+                    "future_stack=True to adopt the new implementation and "
+                    "silence this warning.",
                     FutureWarning,
                 )
             if dropna is no_default:
@@ -7028,9 +7033,13 @@ def unnamed_group_generator():
                             unique_named_levels, axis=0, fill_value=-1
                         ).values
                     else:
-                        yield grpdf.reindex(
-                            unique_named_levels, axis=0, fill_value=-1
-                        ).sort_index().values
+                        yield (
+                            grpdf.reindex(
+                                unique_named_levels, axis=0, fill_value=-1
+                            )
+                            .sort_index()
+                            .values
+                        )
             else:
                 if future_stack:
                     yield column_idx_df.values
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index e5030eb634b..945e546af1a 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -22,7 +22,12 @@
 from cudf._lib.types import size_type_dtype
 from cudf._typing import AggType, DataFrameOrSeries, MultiColumnAggType
 from cudf.api.extensions import no_default
-from cudf.api.types import is_bool_dtype, is_float_dtype, is_list_like
+from cudf.api.types import (
+    is_bool_dtype,
+    is_float_dtype,
+    is_list_like,
+    is_numeric_dtype,
+)
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core.abc import Serializable
 from cudf.core.column.column import ColumnBase, StructDtype, as_column
@@ -282,9 +287,12 @@ def __iter__(self):
         if isinstance(group_names, cudf.BaseIndex):
             group_names = group_names.to_pandas()
         for i, name in enumerate(group_names):
-            yield (name,) if isinstance(self._by, list) and len(
-                self._by
-            ) == 1 else name, grouped_values[offsets[i] : offsets[i + 1]]
+            yield (
+                (name,)
+                if isinstance(self._by, list) and len(self._by) == 1
+                else name,
+                grouped_values[offsets[i] : offsets[i + 1]],
+            )
 
     @property
     def dtypes(self):
@@ -698,6 +706,11 @@ def agg(self, func):
 
         return result
 
+    def _reduce_numeric_only(self, op: str):
+        raise NotImplementedError(
+            f"numeric_only is not implemented for {type(self)}"
+        )
+
     def _reduce(
         self,
         op: str,
@@ -728,14 +741,12 @@ def _reduce(
 
             The numeric_only, min_count
         """
-        if numeric_only:
-            raise NotImplementedError(
-                "numeric_only parameter is not implemented yet"
-            )
         if min_count != 0:
             raise NotImplementedError(
                 "min_count parameter is not implemented yet"
             )
+        if numeric_only:
+            return self._reduce_numeric_only(op)
         return self.agg(op)
 
     def _scan(self, op: str, *args, **kwargs):
@@ -2269,8 +2280,8 @@ def fillna(
         """
         warnings.warn(
             "groupby fillna is deprecated and "
-            "will be removed in a future version. Use groupby ffill or groupby bfill "
-            "for forward or backward filling instead.",
+            "will be removed in a future version. Use groupby ffill "
+            "or groupby bfill for forward or backward filling instead.",
             FutureWarning,
         )
         if inplace:
@@ -2645,6 +2656,17 @@ class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
 
     _PROTECTED_KEYS = frozenset(("obj",))
 
+    def _reduce_numeric_only(self, op: str):
+        columns = list(
+            name
+            for name in self.obj._data.names
+            if (
+                is_numeric_dtype(self.obj._data[name].dtype)
+                and name not in self.grouping.names
+            )
+        )
+        return self[columns].agg(op)
+
     def __getitem__(self, key):
         return self.obj[key].groupby(
             by=self.grouping.keys,
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 94d862d52b4..ca9d5590044 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -211,8 +211,8 @@ def _get_label_range_or_mask(index, start, stop, step):
                 return slice(start_loc, stop_loc)
             else:
                 raise KeyError(
-                    "Value based partial slicing on non-monotonic DatetimeIndexes "
-                    "with non-existing keys is not allowed.",
+                    "Value based partial slicing on non-monotonic "
+                    "DatetimeIndexes with non-existing keys is not allowed.",
                 )
         elif start is not None:
             boolean_mask = index >= start
@@ -2449,7 +2449,8 @@ def squeeze(self, axis: Literal["index", "columns", 0, 1, None] = None):
         ----------
         axis : {0 or 'index', 1 or 'columns', None}, default None
             A specific axis to squeeze. By default, all length-1 axes are
-            squeezed. For `Series` this parameter is unused and defaults to `None`.
+            squeezed. For `Series` this parameter is unused and defaults
+            to `None`.
 
         Returns
         -------
@@ -5835,9 +5836,7 @@ def floordiv(self, other, axis, level=None, fill_value=None):  # noqa: D102
             ),
         )
     )
-    def rfloordiv(
-        self, other, axis, level=None, fill_value=None
-    ):  # noqa: D102
+    def rfloordiv(self, other, axis, level=None, fill_value=None):  # noqa: D102
         if level is not None:
             raise NotImplementedError("level parameter is not supported yet.")
 
@@ -5967,9 +5966,7 @@ def rtruediv(self, other, axis, level=None, fill_value=None):  # noqa: D102
             ),
         )
     )
-    def eq(
-        self, other, axis="columns", level=None, fill_value=None
-    ):  # noqa: D102
+    def eq(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
         return self._binaryop(
             other=other, op="__eq__", fill_value=fill_value, can_reindex=True
         )
@@ -6009,9 +6006,7 @@ def eq(
             ),
         )
     )
-    def ne(
-        self, other, axis="columns", level=None, fill_value=None
-    ):  # noqa: D102
+    def ne(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
         return self._binaryop(
             other=other, op="__ne__", fill_value=fill_value, can_reindex=True
         )
@@ -6051,9 +6046,7 @@ def ne(
             ),
         )
     )
-    def lt(
-        self, other, axis="columns", level=None, fill_value=None
-    ):  # noqa: D102
+    def lt(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
         return self._binaryop(
             other=other, op="__lt__", fill_value=fill_value, can_reindex=True
         )
@@ -6093,9 +6086,7 @@ def lt(
             ),
         )
     )
-    def le(
-        self, other, axis="columns", level=None, fill_value=None
-    ):  # noqa: D102
+    def le(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
         return self._binaryop(
             other=other, op="__le__", fill_value=fill_value, can_reindex=True
         )
@@ -6135,9 +6126,7 @@ def le(
             ),
         )
     )
-    def gt(
-        self, other, axis="columns", level=None, fill_value=None
-    ):  # noqa: D102
+    def gt(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
         return self._binaryop(
             other=other, op="__gt__", fill_value=fill_value, can_reindex=True
         )
@@ -6177,9 +6166,7 @@ def gt(
             ),
         )
     )
-    def ge(
-        self, other, axis="columns", level=None, fill_value=None
-    ):  # noqa: D102
+    def ge(self, other, axis="columns", level=None, fill_value=None):  # noqa: D102
         return self._binaryop(
             other=other, op="__ge__", fill_value=fill_value, can_reindex=True
         )
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index d182b7b4a7c..65f97c99934 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -164,9 +164,9 @@ def to_datetime(
 
     if errors == "ignore":
         warnings.warn(
-            "errors='ignore' is deprecated and will raise in a future version. "
-            "Use to_datetime without passing `errors` and catch exceptions "
-            "explicitly instead",
+            "errors='ignore' is deprecated and will raise in a "
+            "future version. Use to_datetime without passing `errors` "
+            "and catch exceptions explicitly instead",
             FutureWarning,
         )
 
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index e1424459c8f..68b23f1e059 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -97,9 +97,9 @@ def to_numeric(arg, errors="raise", downcast=None):
         raise ValueError("invalid error value specified")
     elif errors == "ignore":
         warnings.warn(
-            "errors='ignore' is deprecated and will raise in a future version. "
-            "Use to_numeric without passing `errors` and catch exceptions "
-            "explicitly instead",
+            "errors='ignore' is deprecated and will raise in "
+            "a future version. Use to_numeric without passing `errors` "
+            "and catch exceptions explicitly instead",
             FutureWarning,
         )
 
diff --git a/python/cudf/cudf/core/udf/strings_lowering.py b/python/cudf/cudf/core/udf/strings_lowering.py
index fdce404d887..3c02ee52b25 100644
--- a/python/cudf/cudf/core/udf/strings_lowering.py
+++ b/python/cudf/cudf/core/udf/strings_lowering.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
 import operator
 from functools import partial
@@ -249,7 +249,7 @@ def replace_impl(context, builder, sig, args):
     replacement_ptr = builder.alloca(args[2].type)
 
     builder.store(args[0], src_ptr)
-    builder.store(args[1], to_replace_ptr),
+    builder.store(args[1], to_replace_ptr)
     builder.store(args[2], replacement_ptr)
 
     udf_str_ptr = builder.alloca(default_manager[udf_string].get_value_type())
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index 12baf1ea6d1..bc1f4f2557e 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -41,9 +41,7 @@
 from cudf.utils.utils import initfunc
 
 # Maximum size of a string column is 2 GiB
-_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get(
-    "STRINGS_UDF_HEAP_SIZE", 2**31
-)
+_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get("STRINGS_UDF_HEAP_SIZE", 2**31)
 _heap_size = 0
 _cudf_str_dtype = dtype(str)
 
diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py
index 3f5df18eae1..e811ba1351a 100644
--- a/python/cudf/cudf/pandas/fast_slow_proxy.py
+++ b/python/cudf/cudf/pandas/fast_slow_proxy.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.   # noqa: E501
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
@@ -437,9 +437,7 @@ def __get__(self, obj, owner=None) -> Any:
                 # methods because dir for the method won't be the same as for
                 # the pure unbound function, but the alternative is
                 # materializing the slow object when we don't really want to.
-                result._fsproxy_slow_dir = dir(
-                    slow_result_type
-                )  # type: ignore
+                result._fsproxy_slow_dir = dir(slow_result_type)  # type: ignore
 
         return result
 
diff --git a/python/cudf/cudf/pandas/profiler.py b/python/cudf/cudf/pandas/profiler.py
index c5662d06e09..0124d411e3b 100644
--- a/python/cudf/cudf/pandas/profiler.py
+++ b/python/cudf/cudf/pandas/profiler.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
@@ -124,7 +124,7 @@ def get_namespaced_function_name(
             _MethodProxy,
             type[_FinalProxy],
             type[_IntermediateProxy],
-        ]
+        ],
     ):
         if isinstance(func_obj, _MethodProxy):
             # Extract classname from method object
@@ -177,17 +177,15 @@ def _tracefunc(self, frame, event, arg):
             if self._currkey is not None and arg is not None:
                 if arg[1]:  # fast
                     run_time = time.perf_counter() - self._timer[self._currkey]
-                    self._results[self._currkey][
-                        "gpu_time"
-                    ] = run_time + self._results[self._currkey].get(
-                        "gpu_time", 0
+                    self._results[self._currkey]["gpu_time"] = (
+                        run_time
+                        + self._results[self._currkey].get("gpu_time", 0)
                     )
                 else:
                     run_time = time.perf_counter() - self._timer[self._currkey]
-                    self._results[self._currkey][
-                        "cpu_time"
-                    ] = run_time + self._results[self._currkey].get(
-                        "cpu_time", 0
+                    self._results[self._currkey]["cpu_time"] = (
+                        run_time
+                        + self._results[self._currkey].get("cpu_time", 0)
                     )
 
             frame_locals = inspect.getargvalues(frame).locals
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e034a3f5e10..ead1ab2da6c 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -2351,7 +2351,7 @@ def test_dataframe_reductions(data, axis, func, skipna):
     for kwargs in all_kwargs:
         if expected_exception is not None:
             with pytest.raises(expected_exception):
-                getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs),
+                (getattr(gdf, func)(axis=axis, skipna=skipna, **kwargs),)
         else:
             expect = getattr(pdf, func)(axis=axis, skipna=skipna, **kwargs)
             with expect_warning_if(
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 06516b6b4ea..c139b06d20f 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -1259,7 +1259,7 @@ def test_groupby_unsupported_columns():
     pdg = pdf.groupby("x").sum(numeric_only=True)
     # cudf does not yet support numeric_only, so our default is False (unlike
     # pandas, which defaults to inferring and throws a warning about it).
-    gdg = gdf.groupby("x").sum()
+    gdg = gdf.groupby("x").sum(numeric_only=True)
     assert_groupby_results_equal(pdg, gdg)
 
 
@@ -2158,7 +2158,9 @@ def test_groupby_list_columns_excluded():
     pandas_agg_result = pdf.groupby("a").agg("mean", numeric_only=True)
 
     assert_groupby_results_equal(
-        pandas_result, gdf.groupby("a").mean(), check_dtype=False
+        pandas_result,
+        gdf.groupby("a").mean(numeric_only=True),
+        check_dtype=False,
     )
 
     assert_groupby_results_equal(
@@ -3826,3 +3828,27 @@ def test_groupby_shift_series_multiindex():
     result = ser.groupby(level=0).shift(1)
     expected = ser.to_pandas().groupby(level=0).shift(1)
     assert_eq(expected, result)
+
+
+@pytest.mark.parametrize(
+    "func", ["min", "max", "sum", "mean", "idxmin", "idxmax"]
+)
+@pytest.mark.parametrize(
+    "by,data",
+    [
+        ("a", {"a": [1, 2, 3]}),
+        (["a", "id"], {"id": [0, 0, 1], "a": [1, 2, 3]}),
+        ("a", {"a": [1, 2, 3], "b": ["A", "B", "C"]}),
+        ("id", {"id": [0, 0, 1], "a": [1, 2, 3], "b": ["A", "B", "C"]}),
+        (["b", "id"], {"id": [0, 0, 1], "b": ["A", "B", "C"]}),
+        ("b", {"b": ["A", "B", "C"]}),
+    ],
+)
+def test_group_by_reduce_numeric_only(by, data, func):
+    # Test that simple groupby reductions support numeric_only=True
+    df = cudf.DataFrame(data)
+    expected = getattr(df.to_pandas().groupby(by, sort=True), func)(
+        numeric_only=True
+    )
+    result = getattr(df.groupby(by, sort=True), func)(numeric_only=True)
+    assert_eq(expected, result)
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 51e9a3022f4..05213d7601c 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -1721,8 +1721,7 @@ def test_get_indexer_single_unique_numeric(idx, key, method):
 
     if (
         # `method` only applicable to monotonic index
-        not pi.is_monotonic_increasing
-        and method is not None
+        not pi.is_monotonic_increasing and method is not None
     ):
         assert_exceptions_equal(
             lfunc=pi.get_loc,
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 69ddd936eee..a9bca7d8b98 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -608,7 +608,8 @@ def test_orc_write_statistics(tmpdir, datadir, nrows, stats_freq):
     from pyarrow import orc
 
     supported_stat_types = supported_numpy_dtypes + ["str"]
-    # Writing bool columns to multiple row groups is disabled until #6763 is fixed
+    # Writing bool columns to multiple row groups is disabled
+    # until #6763 is fixed
     if nrows == 100000:
         supported_stat_types.remove("bool")
 
@@ -683,7 +684,8 @@ def test_orc_chunked_write_statistics(tmpdir, datadir, nrows, stats_freq):
 
     np.random.seed(0)
     supported_stat_types = supported_numpy_dtypes + ["str"]
-    # Writing bool columns to multiple row groups is disabled until #6763 is fixed
+    # Writing bool columns to multiple row groups is disabled
+    # until #6763 is fixed
     if nrows == 200000:
         supported_stat_types.remove("bool")
 
@@ -697,8 +699,7 @@ def test_orc_chunked_write_statistics(tmpdir, datadir, nrows, stats_freq):
     # Make a dataframe
     gdf = cudf.DataFrame(
         {
-            "col_"
-            + str(dtype): gen_rand_series(
+            "col_" + str(dtype): gen_rand_series(
                 dtype,
                 nrows // 2,
                 has_nulls=True,
@@ -716,8 +717,7 @@ def test_orc_chunked_write_statistics(tmpdir, datadir, nrows, stats_freq):
     # write and no pointers are saved into the original table
     gdf = cudf.DataFrame(
         {
-            "col_"
-            + str(dtype): gen_rand_series(
+            "col_" + str(dtype): gen_rand_series(
                 dtype,
                 nrows // 2,
                 has_nulls=True,
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 18efd4417a1..8b72fe84359 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -1087,8 +1087,9 @@ def struct_gen(gen, skip_rows, num_rows, include_validity=False):
 
     def R(first_val, num_fields):
         return {
-            "col"
-            + str(f): (gen[f](first_val, first_val) if f % 4 != 0 else None)
+            "col" + str(f): (
+                gen[f](first_val, first_val) if f % 4 != 0 else None
+            )
             if include_validity
             else (gen[f](first_val, first_val))
             for f in range(len(gen))
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 925fd24e6c8..85abf438efb 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -85,9 +85,7 @@
 0       10   hello
 1       20  rapids
 2       30      ai
-""".format(
-    remote_data_sources=_docstring_remote_sources
-)
+""".format(remote_data_sources=_docstring_remote_sources)
 doc_read_avro = docfmt_partial(docstring=_docstring_read_avro)
 
 _docstring_read_parquet_metadata = """
@@ -1416,9 +1414,7 @@
     list of Filepath strings or in-memory buffers of data.
 compression : str
     Type of compression algorithm for the content
-    """.format(
-    bytes_per_thread=_BYTES_PER_THREAD_DEFAULT
-)
+    """.format(bytes_per_thread=_BYTES_PER_THREAD_DEFAULT)
 
 
 doc_get_reader_filepath_or_buffer = docfmt_partial(