diff --git a/.clang-format b/.clang-format
index 9448dc8d8c80d..abd823c103904 100644
--- a/.clang-format
+++ b/.clang-format
@@ -19,3 +19,4 @@ BasedOnStyle: Google
 ColumnLimit: 90
 DerivePointerAlignment: false
 IncludeBlocks: Preserve
+IndentPPDirectives: AfterHash
diff --git a/.dockerignore b/.dockerignore
index 3791cca95e3fe..1f1715d8e833d 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -27,11 +27,11 @@
 # include explicitly
 !ci/**
 !c_glib/Gemfile
-!dev/archery/setup.py
 !dev/release/setup-*.sh
 !docs/requirements*.txt
+!go/go.mod
+!go/go.sum
 !python/requirements*.txt
-!python/manylinux1/**
 !r/DESCRIPTION
 !ruby/Gemfile
 !ruby/red-arrow/Gemfile
@@ -46,20 +46,3 @@
 !ruby/red-parquet/Gemfile
 !ruby/red-parquet/lib/parquet/version.rb
 !ruby/red-parquet/red-parquet.gemspec
-!ruby/red-plasma/Gemfile
-!ruby/red-plasma/lib/plasma/version.rb
-!ruby/red-plasma/red-plasma.gemspec
-!rust/Cargo.toml
-!rust/benchmarks/Cargo.toml
-!rust/arrow/Cargo.toml
-!rust/arrow/benches
-!rust/arrow-flight/Cargo.toml
-!rust/parquet/Cargo.toml
-!rust/parquet/build.rs
-!rust/parquet_derive/Cargo.toml
-!rust/parquet_derive_test/Cargo.toml
-!rust/datafusion/Cargo.toml
-!rust/datafusion/benches
-!rust/integration-testing/Cargo.toml
-!go/go.mod
-!go/go.sum
\ No newline at end of file
diff --git a/.env b/.env
index be35921f94c3a..f41a142490716 100644
--- a/.env
+++ b/.env
@@ -58,10 +58,8 @@ CUDA=11.2.2
 DASK=latest
 DOTNET=8.0
 GCC_VERSION=""
-GO=1.21.8
-STATICCHECK=v0.4.7
 HDFS=3.2.1
-JDK=8
+JDK=11
 KARTOTHEK=latest
 # LLVM 12 and GCC 11 reports -Wmismatched-new-delete.
 LLVM=14
@@ -70,7 +68,9 @@ NODE=18
 NUMBA=latest
 NUMPY=latest
 PANDAS=latest
-PYTHON=3.8
+PYTHON=3.9
+PYTHON_IMAGE_TAG=3.9
+PYTHON_ABI_TAG=cp39
 R=4.4
 SPARK=master
 TURBODBC=latest
@@ -95,7 +95,7 @@ VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01"    # 2024.04.26 Release
 # ci/docker/python-wheel-windows-vs2019.dockerfile.
 # This is a workaround for our CI problem that "archery docker build" doesn't
 # use pulled built images in dev/tasks/python-wheels/github.windows.yml.
-PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-06-18
+PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-08-06
 
 # Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker-compose run --rm conan".
 # See https://github.com/conan-io/conan-docker-tools#readme and
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index e495bfd147de6..793dbb3806f80 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -40,7 +40,7 @@
 /matlab/ @kevingurney @kou @sgilmore10
 /python/pyarrow/_flight.pyx @lidavidm
 /python/pyarrow/**/*gandiva* @wjones127
-/r/ @paleolimbot @thisisnic
+/r/ @jonkeane @thisisnic
 /ruby/ @kou
 /swift/ @kou
 
@@ -53,7 +53,7 @@
 # *.txt
 
 # PR CI and repository files
-/.github/ @assignUser @kou @raulcd
+/.github/ @assignUser @jonkeane @kou @raulcd
 .asf.yaml @assignUser @kou @raulcd
 .pre-commit-config.yaml @raulcd
 .travis.yml @assignUser @kou @raulcd
@@ -61,11 +61,11 @@ appveyor.yml @assignUser @kou @raulcd
 # .git*
 
 # release scripts, archery etc.
-/ci/ @assignUser @kou @raulcd
-/dev/ @assignUser @kou @raulcd
+/ci/ @assignUser @jonkeane @kou @raulcd
+/dev/ @assignUser @jonkeane @kou @raulcd
 .dockerignore @raulcd
-.env @assignUser @kou @raulcd
-docker-compose.yml @assignUser @kou @raulcd
+.env @assignUser @jonkeane @kou @raulcd
+docker-compose.yml @assignUser @jonkeane @kou @raulcd
 
 # R specific packaging tooling
 /r/configure* @assignUser
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 7d9ff2f42e887..7ba9744ef005d 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -24,13 +24,6 @@ updates:
     commit-message:
       prefix: "MINOR: [CI] "
     open-pull-requests-limit: 10
-  - package-ecosystem: "gomod"
-    directory: "/go/"
-    schedule:
-      interval: "weekly"
-    commit-message:
-      prefix: "MINOR: [Go] "
-    open-pull-requests-limit: 10
   - package-ecosystem: "maven"
     directory: "/java/"
     schedule:
diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index c698baba2c816..e448209056d78 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -20,12 +20,14 @@ name: Archery & Crossbow
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/archery.yml'
       - 'dev/archery/**'
       - 'dev/tasks/**'
       - 'docker-compose.yml'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/archery.yml'
       - 'dev/archery/**'
       - 'dev/tasks/**'
@@ -34,7 +36,6 @@ on:
 env:
   ARCHERY_DEBUG: 1
   ARCHERY_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
-  ARCHERY_USE_DOCKER_CLI: 1
 
 concurrency:
   group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
@@ -59,7 +60,7 @@ jobs:
         shell: bash
         run: git branch $ARCHERY_DEFAULT_BRANCH origin/$ARCHERY_DEFAULT_BRANCH || true
       - name: Setup Python
-        uses: actions/setup-python@v5.1.0
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: '3.9'
       - name: Install pygit2 binary wheel
diff --git a/.github/workflows/comment_bot.yml b/.github/workflows/comment_bot.yml
index 6b9a9256a5290..2306ed6db0dc9 100644
--- a/.github/workflows/comment_bot.yml
+++ b/.github/workflows/comment_bot.yml
@@ -41,7 +41,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index e539fadb859fe..f5c8b6a7201be 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -20,6 +20,7 @@ name: C++
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/cpp.yml'
       - 'ci/conda_env_*'
       - 'ci/docker/**'
@@ -35,6 +36,7 @@ on:
       - 'testing'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/cpp.yml'
       - 'ci/conda_env_*'
       - 'ci/docker/**'
@@ -99,7 +101,6 @@ jobs:
             cat <<JSON >> "$GITHUB_OUTPUT"
           {
             "arch": "arm64v8",
-            "archery-use-docker-cli": "0",
             "clang-tools": "10",
             "image": "ubuntu-cpp",
             "llvm": "10",
@@ -124,9 +125,6 @@ jobs:
         include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
     env:
       ARCH: ${{ matrix.arch }}
-      # By default, use Docker CLI because docker-compose v1 is obsolete,
-      # except where the Docker client version is too old.
-      ARCHERY_USE_DOCKER_CLI: ${{ matrix.archery-use-docker-cli || '1' }}
       ARROW_SIMD_LEVEL: ${{ matrix.simd-level }}
       CLANG_TOOLS: ${{ matrix.clang-tools }}
       LLVM: ${{ matrix.llvm }}
@@ -147,6 +145,7 @@ jobs:
         run: |
           sudo apt update
           sudo apt install -y --no-install-recommends python3 python3-dev python3-pip
+          python3 -m pip install -U pip
       - name: Setup Archery
         run: python3 -m pip install -e dev/archery[docker]
       - name: Execute Docker Build
@@ -156,8 +155,7 @@ jobs:
         run: |
           # GH-40558: reduce ASLR to avoid ASAN/LSAN crashes
           sudo sysctl -w vm.mmap_rnd_bits=28
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run ${{ matrix.image }}
       - name: Docker Push
         if: >-
@@ -189,7 +187,7 @@ jobs:
       - name: Run minimal example
         run: |
           cd cpp/examples/minimal_build
-          docker-compose run --rm minimal
+          docker compose run --rm minimal
 
   macos:
     name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} C++
@@ -246,7 +244,7 @@ jobs:
           $(brew --prefix bash)/bin/bash \
             ci/scripts/install_minio.sh latest ${ARROW_HOME}
       - name: Set up Python
-        uses: actions/setup-python@v5.1.0
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: 3.12
       - name: Install Google Cloud Storage Testbench
@@ -273,7 +271,7 @@ jobs:
         shell: bash
         run: |
           sudo sysctl -w kern.coredump=1
-          sudo sysctl -w kern.corefile=core.%N.%P
+          sudo sysctl -w kern.corefile=/tmp/core.%N.%P
           ulimit -c unlimited  # must enable within the same shell
           ci/scripts/cpp_test.sh $(pwd) $(pwd)/build
 
@@ -412,12 +410,10 @@ jobs:
       ARROW_WITH_SNAPPY: ON
       ARROW_WITH_ZLIB: ON
       ARROW_WITH_ZSTD: ON
-      # Don't use preinstalled Boost by empty BOOST_ROOT and
-      # -DBoost_NO_BOOST_CMAKE=ON
+      # Don't use preinstalled Boost by empty BOOST_ROOT
       BOOST_ROOT: ""
       ARROW_CMAKE_ARGS: >-
         -DARROW_PACKAGE_PREFIX=/${{ matrix.msystem_lower}}
-        -DBoost_NO_BOOST_CMAKE=ON
         -DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON
       # We can't use unity build because we don't have enough memory on
       # GitHub Actions.
@@ -467,16 +463,18 @@ jobs:
             https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z
           chmod +x /usr/local/bin/minio.exe
       - name: Set up Python
-        uses: actions/setup-python@v5.1.0
+        uses: actions/setup-python@v5.2.0
+        id: python-install
         with:
           python-version: 3.9
       - name: Install Google Cloud Storage Testbench
-        shell: bash
+        shell: msys2 {0}
+        env:
+          PIPX_BIN_DIR: /usr/local/bin
+          PIPX_BASE_PYTHON: ${{ steps.python-install.outputs.python-path }}
         run: |
           ci/scripts/install_gcs_testbench.sh default
-          echo "PYTHON_BIN_DIR=$(cygpath --windows $(dirname $(which python3.exe)))" >> $GITHUB_ENV
       - name: Test
         shell: msys2 {0}
         run: |
-          PATH="$(cygpath --unix ${PYTHON_BIN_DIR}):${PATH}"
           ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build"
diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml
index e4db9f482e206..c618350affbeb 100644
--- a/.github/workflows/csharp.yml
+++ b/.github/workflows/csharp.yml
@@ -49,7 +49,7 @@ jobs:
         dotnet: ['8.0.x']
     steps:
       - name: Install C#
-        uses: actions/setup-dotnet@v4
+        uses: actions/setup-dotnet@v4.0.1
         with:
           dotnet-version: ${{ matrix.dotnet }}
       - name: Checkout Arrow
@@ -77,7 +77,7 @@ jobs:
         dotnet: ['8.0.x']
     steps:
       - name: Install C#
-        uses: actions/setup-dotnet@v4
+        uses: actions/setup-dotnet@v4.0.1
         with:
           dotnet-version: ${{ matrix.dotnet }}
       - name: Checkout Arrow
@@ -104,11 +104,11 @@ jobs:
         dotnet: ['8.0.x']
     steps:
       - name: Install C#
-        uses: actions/setup-dotnet@v4
+        uses: actions/setup-dotnet@v4.0.1
         with:
           dotnet-version: ${{ matrix.dotnet }}
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Checkout Arrow
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 5aec3638a8967..d2436fe3c4525 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -31,7 +31,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
 
 jobs:
 
@@ -46,7 +45,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install pre-commit
@@ -67,9 +66,9 @@ jobs:
         env:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
+          UBUNTU: 22.04
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run -e GITHUB_ACTIONS=true ubuntu-lint
       - name: Docker Push
         if: >-
@@ -105,7 +104,7 @@ jobs:
         with:
           fetch-depth: 0
       - name: Install Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: '3.12'
       - name: Install Ruby
@@ -113,7 +112,7 @@ jobs:
         with:
           ruby-version: ruby
       - name: Install .NET
-        uses: actions/setup-dotnet@4d6c8fcf3c8f7a60068d26b594648e99df24cee3 # v4.0.0
+        uses: actions/setup-dotnet@6bd8b7f7774af54e05809fcc5431931b3eb1ddee # v4.0.1
         with:
           dotnet-version: '8.0.x'
       - name: Install Dependencies
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 36a0dc014db8d..1219f7526f9f2 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -25,7 +25,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
   ARROW_ENABLE_TIMING_TESTS: OFF
   DOCKER_VOLUME_PREFIX: ".docker/"
 
@@ -53,7 +52,7 @@ jobs:
           key: debian-docs-${{ hashFiles('cpp/**') }}
           restore-keys: debian-docs-
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml
index 947e2ac21b83c..7d540b7cecdc9 100644
--- a/.github/workflows/docs_light.yml
+++ b/.github/workflows/docs_light.yml
@@ -20,6 +20,7 @@ name: Docs
 on:
   pull_request:
     paths:
+      - '.dockerignore'
       - 'docs/**'
       - '.github/workflows/docs_light.yml'
       - 'ci/docker/conda.dockerfile'
@@ -34,7 +35,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
   ARROW_ENABLE_TIMING_TESTS: OFF
   DOCKER_VOLUME_PREFIX: ".docker/"
 
@@ -59,7 +59,7 @@ jobs:
           key: conda-docs-${{ hashFiles('cpp/**') }}
           restore-keys: conda-docs-
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Setup Archery
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
deleted file mode 100644
index c247a89128b34..0000000000000
--- a/.github/workflows/go.yml
+++ /dev/null
@@ -1,488 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Go
-
-on:
-  push:
-    paths:
-      - '.github/workflows/go.yml'
-      - 'ci/docker/*_go.dockerfile'
-      - 'ci/scripts/go_*'
-      - 'docker-compose.yml'
-      - 'go/**'
-  pull_request:
-    paths:
-      - '.github/workflows/go.yml'
-      - 'ci/docker/*_go.dockerfile'
-      - 'ci/docker/**'
-      - 'ci/scripts/go_*'
-      - 'docker-compose.yml'
-      - 'go/**'
-
-concurrency:
-  group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
-env:
-  ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
-
-jobs:
-
-  docker-targets:
-    name: Docker targets
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    outputs:
-      targets: ${{ steps.detect-targets.outputs.targets }}
-    steps:
-      - name: Detect targets
-        id: detect-targets
-        run: |
-          echo "targets<<JSON" >> "$GITHUB_OUTPUT"
-          echo "[" >> "$GITHUB_OUTPUT"
-          cat <<JSON >> "$GITHUB_OUTPUT"
-          {
-            "arch-label": "AMD64",
-            "arch": "amd64",
-            "go": "1.21",
-            "runs-on": "ubuntu-latest"
-          },
-          {
-            "arch-label": "AMD64",
-            "arch": "amd64",
-            "go": "1.22",
-            "runs-on": "ubuntu-latest"
-          }
-          JSON
-          if [ "$GITHUB_REPOSITORY_OWNER" = "apache" ]; then
-            echo "," >> "$GITHUB_OUTPUT"
-            cat <<JSON >> "$GITHUB_OUTPUT"
-          {
-            "arch-label": "ARM64",
-            "arch": "arm64v8",
-            "archery-use-docker-cli": "0",
-            "go": "1.21",
-            "runs-on": ["self-hosted", "arm", "linux"]
-          },
-          {
-            "arch-label": "ARM64",
-            "arch": "arm64v8",
-            "archery-use-docker-cli": "0",
-            "go": "1.22",
-            "runs-on": ["self-hosted", "arm", "linux"]
-          }
-          JSON
-          fi
-          echo "]" >> "$GITHUB_OUTPUT"
-          echo "JSON" >> "$GITHUB_OUTPUT"
-
-  docker:
-    name: ${{ matrix.arch-label }} Debian 12 Go ${{ matrix.go }}
-    needs: docker-targets
-    runs-on: ${{ matrix.runs-on }}
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-      matrix:
-        include: ${{ fromJson(needs.docker-targets.outputs.targets) }}
-    env:
-      ARCH: ${{ matrix.arch }}
-      # By default, use Docker CLI because docker-compose v1 is obsolete,
-      # except where the Docker client version is too old.
-      ARCHERY_USE_DOCKER_CLI: ${{ matrix.archery-use-docker-cli || '1' }}
-      GO: ${{ matrix.go }}
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
-        with:
-          fetch-depth: 0
-          submodules: recursive
-      - name: Setup Python
-        run: |
-          sudo apt update
-          sudo apt install -y --no-install-recommends python3 python3-dev python3-pip
-      - name: Setup Archery
-        run: python3 -m pip install -e dev/archery[docker]
-      - name: Execute Docker Build
-        env:
-          ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
-          ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-        run: archery docker run debian-go
-      - name: Docker Push
-        if: >-
-          success() &&
-          github.event_name == 'push' &&
-          github.repository == 'apache/arrow' &&
-          github.ref_name == 'main'
-        env:
-          ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
-          ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-        continue-on-error: true
-        run: archery docker push debian-go
-      - name: Install Go ${{ matrix.go }} for Benchmarks
-        if: >-
-          success() &&
-          matrix.arch == 'amd64' &&
-          github.event_name == 'push' &&
-          github.repository == 'apache/arrow' &&
-          github.ref_name == 'main'
-        uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
-        with:
-          go-version: ${{ matrix.go }}
-          cache: true
-          cache-dependency-path: go/go.sum
-      - name: Run Benchmarks
-        if: >-
-          success() &&
-          matrix.arch == 'amd64' &&
-          github.event_name == 'push' &&
-          github.repository == 'apache/arrow' &&
-          github.ref_name == 'main'
-        env:
-          CONBENCH_URL: https://conbench.ursa.dev
-          CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }}
-          CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }}
-          CONBENCH_REF: ${{ github.ref_name }}
-          CONBENCH_MACHINE_INFO_NAME: ${{ matrix.arch }}-debian-12
-        run: |
-          python3 -m pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python
-          python3 ci/scripts/go_bench_adapt.py
-
-  build_test_386:
-    name: Go Cross-build and test for 386
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 20
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Get required Go version
-        run: |
-          (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV
-      - name: Install Go
-        uses: actions/setup-go@v5
-        with:
-          go-version: "${{ env.GO_VERSION }}"
-          cache: true
-          cache-dependency-path: go/go.sum
-      - name: Run build
-        run: GOARCH=386 go build ./...
-        working-directory: ./go
-      - name: Run test
-        # WIP refactor, only tests in the specified dirs have been fixed
-        run: GOARCH=386 go test ./parquet/file/...
-        working-directory: ./go
-
-  docker_cgo:
-    name: AMD64 Debian 12 Go ${{ matrix.go }} - CGO
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 20
-    strategy:
-      fail-fast: false
-      matrix:
-        go: ['1.21', '1.22']
-    env:
-      GO: ${{ matrix.go }}
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
-        with:
-          fetch-depth: 0
-          submodules: recursive
-      - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
-        with:
-          python-version: 3.8
-      - name: Setup Archery
-        run: pip install -e dev/archery[docker]
-      - name: Execute Docker Build
-        env:
-          ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
-          ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-        run: archery docker run debian-go-cgo
-      - name: Docker Push
-        if: >-
-          success() &&
-          github.event_name == 'push' &&
-          github.repository == 'apache/arrow' &&
-          github.ref_name == 'main'
-        env:
-          ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
-          ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-        continue-on-error: true
-        run: archery docker push debian-go-cgo
-
-
-  docker_cgo_python:
-    name: AMD64 Debian 12 Go ${{ matrix.go }} - CGO Python
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 15
-    strategy:
-      fail-fast: false
-      matrix:
-        go: ['1.21', '1.22']
-    env:
-      GO: ${{ matrix.go }}
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
-        with:
-          fetch-depth: 0
-      - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
-        with:
-          python-version: 3.8
-      - name: Setup Archery
-        run: pip install -e dev/archery[docker]
-      - name: Execute Docker Build
-        env:
-          ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
-          ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-        run: archery docker run debian-go-cgo-python
-      - name: Docker Push
-        if: >-
-          success() &&
-          github.event_name == 'push' &&
-          github.repository == 'apache/arrow' &&
-          github.ref_name == 'main'
-        env:
-          ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
-          ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-        continue-on-error: true
-        run: archery docker push debian-go-cgo-python
-
-  windows:
-    name: AMD64 Windows 2019 Go ${{ matrix.go }}
-    runs-on: windows-2019
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 25
-    strategy:
-      fail-fast: false
-      matrix:
-        go: ['1.21', '1.22']
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
-        with:
-          fetch-depth: 0
-          submodules: recursive
-      - name: Install go
-        uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
-        with:
-          go-version: ${{ matrix.go }}
-          cache: true
-          cache-dependency-path: go/go.sum
-      - name: Install staticcheck
-        shell: bash
-        run: |
-          . .env
-          go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK}
-      - name: Build
-        shell: bash
-        run: ci/scripts/go_build.sh $(pwd)
-      - name: Test
-        shell: bash
-        run: ci/scripts/go_test.sh $(pwd)
-
-  macos:
-    name: AMD64 macOS 12 Go ${{ matrix.go }}
-    runs-on: macos-12
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-      matrix:
-        go: ['1.21', '1.22']
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
-        with:
-          fetch-depth: 0
-          submodules: recursive
-      - name: Install go
-        uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0
-        with:
-          go-version: ${{ matrix.go }}
-          cache: true
-          cache-dependency-path: go/go.sum
-      - name: Install staticcheck      
-        run: |
-          . .env
-          go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK}
-      - name: Build
-        shell: bash
-        run: ci/scripts/go_build.sh $(pwd)
-      - name: Test
-        shell: bash
-        run: ci/scripts/go_test.sh $(pwd)
-      - name: Setup Python
-        if: >-
-          success() &&
-          github.event_name == 'push' &&
-          github.repository == 'apache/arrow' &&
-          github.ref_name == 'main'
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
-        with:
-          python-version: '3.10'
-      - name: Run Benchmarks
-        if: >-
-          success() &&
-          github.event_name == 'push' &&
-          github.repository == 'apache/arrow' &&
-          github.ref_name == 'main'
-        shell: bash
-        env:
-          CONBENCH_URL: 'https://conbench.ursa.dev'
-          CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }}
-          CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }}
-          CONBENCH_REF: ${{ github.ref_name }}
-          CONBENCH_MACHINE_INFO_NAME: amd64-macos-11
-        run: |
-          pip install benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python
-          python ci/scripts/go_bench_adapt.py
-
-
-  macos-cgo:
-    name: AMD64 macOS 12 Go ${{ matrix.go }} - CGO
-    runs-on: macos-12
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-      matrix:
-        go: ['1.21', '1.22']
-    env:
-      ARROW_GO_TESTCGO: "1"
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          submodules: recursive
-      - name: Install go
-        uses: actions/setup-go@v5
-        with:
-          go-version: ${{ matrix.go }}
-          cache: true
-          cache-dependency-path: go/go.sum
-      - name: Brew Install Arrow and pkg-config
-        shell: bash
-        run: brew install apache-arrow pkg-config
-      - name: Install staticcheck
-        run: |
-          . .env
-          go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK}
-      - name: Add To pkg config path
-        shell: bash
-        run: |
-          echo "PKG_CONFIG_PATH=$(brew --prefix openssl@3)/lib/pkgconfig:$PKG_CONFIG_PATH" >> $GITHUB_ENV
-      - name: Build
-        shell: bash
-        run: ci/scripts/go_build.sh $(pwd)
-      - name: Test
-        shell: bash
-        run: ci/scripts/go_test.sh $(pwd)
-
-  windows-mingw:
-    name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} CGO
-    runs-on: windows-2019
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    timeout-minutes: 60
-    strategy:
-      fail-fast: false
-      matrix:
-        mingw-n-bits:
-          #- 32 runtime handling for CGO needs 64-bit currently
-          - 64
-    env:
-      ARROW_GO_TESTCGO: "1"
-      MINGW_LINT: "1"
-    steps:
-      - name: Disable Crash Dialogs
-        run: |
-          reg add `
-            "HKCU\SOFTWARE\Microsoft\Windows\Windows Error Reporting" `
-            /v DontShowUI `
-            /t REG_DWORD `
-            /d 1 `
-            /f
-      - name: Checkout Arrow
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          submodules: recursive
-      - uses: msys2/setup-msys2@v2
-        with:
-          msystem: MINGW${{ matrix.mingw-n-bits }}
-          update: true
-      - name: Setup MSYS2
-        shell: msys2 {0}
-        run: |
-          ci/scripts/msys2_setup.sh cgo
-      - name: Get required Go version
-        run: |
-          (. .env && echo "GO_VERSION=${GO}") >> $GITHUB_ENV  
-      - name: Update CGO Env vars
-        shell: msys2 {0}
-        run: |
-          echo "CGO_CPPFLAGS=-I$(cygpath --windows ${MINGW_PREFIX}/include)" >> $GITHUB_ENV
-          echo "CGO_LDFLAGS=-g -O2 -L$(cygpath --windows ${MINGW_PREFIX}/lib) -L$(cygpath --windows ${MINGW_PREFIX}/bin)" >> $GITHUB_ENV
-          echo "MINGW_PREFIX=$(cygpath --windows ${MINGW_PREFIX})" >> $GITHUB_ENV
-      - name: Install go
-        uses: actions/setup-go@v5
-        with:
-          go-version: "${{ env.GO_VERSION }}"
-          cache: true
-          cache-dependency-path: go/go.sum
-      - name: Install staticcheck
-        shell: bash
-        run: |
-          . .env
-          go install honnef.co/go/tools/cmd/staticcheck@${STATICCHECK}
-      - name: Build
-        shell: bash
-        run: ci/scripts/go_build.sh $(pwd)
-      - name: Test
-        shell: bash
-        run: ci/scripts/go_test.sh $(pwd)
-
-  tinygo:
-    name: TinyGo
-    runs-on: ubuntu-latest
-    if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
-    env:
-      TINYGO_VERSION: 0.27.0
-    timeout-minutes: 60
-    steps:
-      - name: Checkout Arrow
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          submodules: recursive
-      - name: Build and Run Example
-        run: |
-          docker run --rm -v $(pwd)/go:/src -v $(pwd)/ci/scripts:/ci-scripts "tinygo/tinygo:$TINYGO_VERSION" /ci-scripts/go_tinygo_example.sh
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index f53f4aeb505d2..af9a98ed437f8 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -20,6 +20,7 @@ name: Integration
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/integration.yml'
       - 'ci/**'
       - 'dev/archery/**'
@@ -33,6 +34,7 @@ on:
       - 'format/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/integration.yml'
       - 'ci/**'
       - 'dev/archery/**'
@@ -54,7 +56,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
@@ -80,6 +81,11 @@ jobs:
         with:
           repository: apache/arrow-nanoarrow
           path: nanoarrow
+      - name: Checkout Arrow Go
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        with:
+          repository: apache/arrow-go
+          path: go
       - name: Free up disk space
         run: |
           ci/scripts/util_free_space.sh
@@ -90,18 +96,20 @@ jobs:
           key: conda-${{ hashFiles('cpp/**') }}
           restore-keys: conda-
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
         env:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-        run: >
+        run: |
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run \
             -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \
+            -e ARCHERY_INTEGRATION_WITH_GO=1 \
             -e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \
             -e ARCHERY_INTEGRATION_WITH_RUST=1 \
             conda-integration
diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml
index 08dbe7c8068c0..ad39dbc7d01e6 100644
--- a/.github/workflows/java.yml
+++ b/.github/workflows/java.yml
@@ -20,6 +20,7 @@ name: Java
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java.yml'
       - 'ci/docker/*java*'
       - 'ci/scripts/java*.sh'
@@ -29,6 +30,7 @@ on:
       - 'java/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java.yml'
       - 'ci/docker/*java*'
       - 'ci/scripts/java*.sh'
@@ -46,7 +48,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
@@ -58,7 +59,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        jdk: [8, 11, 17, 21, 22]
+        jdk: [11, 17, 21, 22]
         maven: [3.9.6]
         image: [java]
     env:
@@ -77,9 +78,9 @@ jobs:
           key: maven-${{ hashFiles('java/**') }}
           restore-keys: maven-
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml
index ea5f8d694a9c6..56aa1d0992887 100644
--- a/.github/workflows/java_jni.yml
+++ b/.github/workflows/java_jni.yml
@@ -20,6 +20,7 @@ name: Java JNI
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java_jni.yml'
       - 'ci/docker/**'
       - 'ci/scripts/cpp_build.sh'
@@ -29,6 +30,7 @@ on:
       - 'java/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/java_jni.yml'
       - 'ci/docker/**'
       - 'ci/scripts/cpp_build.sh'
@@ -46,7 +48,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
@@ -71,16 +72,18 @@ jobs:
           key: java-jni-manylinux-2014-${{ hashFiles('cpp/**', 'java/**') }}
           restore-keys: java-jni-manylinux-2014-
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
         env:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
-        run: archery docker run java-jni-manylinux-2014
+        run: |
+          source ci/scripts/util_enable_core_dumps.sh
+          archery docker run java-jni-manylinux-2014
       - name: Docker Push
         if: >-
           success() &&
@@ -111,9 +114,9 @@ jobs:
           key: maven-${{ hashFiles('java/**') }}
           restore-keys: maven-
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
diff --git a/.github/workflows/java_nightly.yml b/.github/workflows/java_nightly.yml
index f40d4ce5b42d6..0bf0c27288faf 100644
--- a/.github/workflows/java_nightly.yml
+++ b/.github/workflows/java_nightly.yml
@@ -58,7 +58,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           cache: 'pip'
           python-version: 3.12
diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml
index c11c8254011f6..c7693c05133b0 100644
--- a/.github/workflows/js.yml
+++ b/.github/workflows/js.yml
@@ -20,12 +20,14 @@ name: NodeJS
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/js.yml'
       - 'ci/docker/*js.dockerfile'
       - 'ci/scripts/js_*'
       - 'js/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/js.yml'
       - 'ci/docker/*js.dockerfile'
       - 'ci/scripts/js_*'
@@ -40,7 +42,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
 
 jobs:
 
@@ -55,9 +56,9 @@ jobs:
         with:
           fetch-depth: 0
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
@@ -65,8 +66,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run debian-js
       - name: Docker Push
         if: >-
diff --git a/.github/workflows/pr_bot.yml b/.github/workflows/pr_bot.yml
index e589610f536b3..bbb1a2d7228d0 100644
--- a/.github/workflows/pr_bot.yml
+++ b/.github/workflows/pr_bot.yml
@@ -82,7 +82,7 @@ jobs:
           # fetch the tags for version number generation
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           python-version: 3.12
       - name: Install Archery and Crossbow dependencies
diff --git a/.github/workflows/pr_review_trigger.yml b/.github/workflows/pr_review_trigger.yml
index 0cd89b3206715..68f922ce8b4d9 100644
--- a/.github/workflows/pr_review_trigger.yml
+++ b/.github/workflows/pr_review_trigger.yml
@@ -29,7 +29,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: "Upload PR review Payload"
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4.4.0
         with:
           path: "${{ github.event_path }}"
           name: "pr_review_payload"
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index a568f8346e7fc..4916287556b0c 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -20,6 +20,7 @@ name: Python
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/python.yml'
       - 'ci/**'
       - 'cpp/**'
@@ -27,6 +28,7 @@ on:
       - 'python/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/python.yml'
       - 'ci/**'
       - 'cpp/**'
@@ -42,7 +44,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
@@ -57,35 +58,41 @@ jobs:
       matrix:
         name:
           - conda-python-docs
-          - conda-python-3.9-nopandas
-          - conda-python-3.8-pandas-1.0
-          - conda-python-3.10-pandas-latest
+          - conda-python-3.10-nopandas
+          - conda-python-3.9-pandas-1.1.3
+          - conda-python-3.11-pandas-latest
+          - conda-python-3.11-no-numpy
         include:
           - name: conda-python-docs
-            cache: conda-python-3.9
+            cache: conda-python-3.10
             image: conda-python-docs
-            title: AMD64 Conda Python 3.9 Sphinx & Numpydoc
-            python: 3.9
-          - name: conda-python-3.9-nopandas
-            cache: conda-python-3.9
+            title: AMD64 Conda Python 3.10 Sphinx & Numpydoc
+            python: "3.10"
+          - name: conda-python-3.10-nopandas
+            cache: conda-python-3.10
             image: conda-python
-            title: AMD64 Conda Python 3.9 Without Pandas
-            python: 3.9
-          - name: conda-python-3.8-pandas-1.0
-            cache: conda-python-3.8
+            title: AMD64 Conda Python 3.10 Without Pandas
+            python: "3.10"
+          - name: conda-python-3.9-pandas-1.1.3
+            cache: conda-python-3.9
             image: conda-python-pandas
-            title: AMD64 Conda Python 3.8 Pandas 1.0
-            python: 3.8
-            pandas: "1.0"
-            numpy: 1.16
-          - name: conda-python-3.10-pandas-latest
-            cache: conda-python-3.10
+            title: AMD64 Conda Python 3.9 Pandas 1.1.3
+            python: 3.9
+            pandas: "1.1.3"
+            numpy: 1.19.5
+          - name: conda-python-3.11-pandas-latest
+            cache: conda-python-3.11
             image: conda-python-pandas
-            title: AMD64 Conda Python 3.10 Pandas latest
-            python: "3.10"
+            title: AMD64 Conda Python 3.11 Pandas latest
+            python: "3.11"
             pandas: latest
+          - name: conda-python-3.11-no-numpy
+            cache: conda-python-3.11
+            image: conda-python-no-numpy
+            title: AMD64 Conda Python 3.11 without NumPy
+            python: "3.11"
     env:
-      PYTHON: ${{ matrix.python || 3.8 }}
+      PYTHON: ${{ matrix.python || 3.9 }}
       UBUNTU: ${{ matrix.ubuntu || 20.04 }}
       PANDAS: ${{ matrix.pandas || 'latest' }}
       NUMPY: ${{ matrix.numpy || 'latest' }}
@@ -102,9 +109,9 @@ jobs:
           key: ${{ matrix.cache }}-${{ hashFiles('cpp/**') }}
           restore-keys: ${{ matrix.cache }}-
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
@@ -112,8 +119,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run ${{ matrix.image }}
       - name: Docker Push
         if: >-
@@ -164,7 +170,7 @@ jobs:
       ARROW_BUILD_TESTS: OFF
       PYARROW_TEST_LARGE_MEMORY: ON
       # Current oldest supported version according to https://endoflife.date/macos
-      MACOSX_DEPLOYMENT_TARGET: 10.15
+      MACOSX_DEPLOYMENT_TARGET: 12.0
     steps:
       - name: Checkout Arrow
         uses: actions/checkout@v4
@@ -172,7 +178,7 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@v5.1.0
+        uses: actions/setup-python@v5.2.0
         with:
           python-version: '3.11'
       - name: Install Dependencies
@@ -182,6 +188,10 @@ jobs:
           python -m pip install \
             -r python/requirements-build.txt \
             -r python/requirements-test.txt
+      - name: Install MinIO
+        run: |
+          $(brew --prefix bash)/bin/bash \
+            ci/scripts/install_minio.sh latest /usr/local
       - name: Setup ccache
         shell: bash
         run: ci/scripts/ccache_setup.sh
diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index e8f57db99c28c..9abedcd767150 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -20,6 +20,7 @@ name: R
 on:
   push:
     paths:
+      - '.dockerignore'
       - ".github/workflows/r.yml"
       - "ci/docker/**"
       - "ci/etc/rprofile"
@@ -32,6 +33,7 @@ on:
       - "r/**"
   pull_request:
     paths:
+      - '.dockerignore'
       - ".github/workflows/r.yml"
       - "ci/docker/**"
       - "ci/etc/rprofile"
@@ -52,7 +54,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
@@ -122,7 +123,7 @@ jobs:
       fail-fast: false
       matrix:
         r: ["4.4"]
-        ubuntu: [20.04]
+        ubuntu: [24.04]
         force-tests: ["true"]
     env:
       R: ${{ matrix.r }}
@@ -133,6 +134,9 @@ jobs:
         with:
           fetch-depth: 0
           submodules: recursive
+      - name: Free up disk space
+        run: |
+          ci/scripts/util_free_space.sh
       - name: Cache Docker Volumes
         uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0
         with:
@@ -144,9 +148,9 @@ jobs:
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-
             ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
@@ -154,8 +158,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           # Setting a non-default and non-probable Marquesas French Polynesia time
           # it has both with a .45 offset and very very few people who live there.
           archery docker run -e TZ=MART -e ARROW_R_FORCE_TESTS=${{ matrix.force-tests }} ubuntu-r
@@ -167,9 +170,9 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         with:
-          name: test-output
+          name: test-output-${{ matrix.ubuntu }}-${{ matrix.r }}
           path: r/check/arrow.Rcheck/tests/testthat.Rout*
       - name: Docker Push
         if: >-
@@ -204,9 +207,9 @@ jobs:
           fetch-depth: 0
           submodules: recursive
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
@@ -214,8 +217,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           # Don't set a TZ here to test that case. These builds will have the following warning in them:
           #   System has not been booted with systemd as init system (PID 1). Can't operate.
           #   Failed to connect to bus: Host is down
@@ -228,9 +230,9 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # v3.1.2
+        uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # v4.4.0
         with:
-          name: test-output
+          name: test-output-bundled
           path: r/check/arrow.Rcheck/tests/testthat.Rout*
       - name: Docker Push
         if: >-
@@ -290,7 +292,7 @@ jobs:
         # So that they're unique when multiple are downloaded in the next step
         shell: bash
         run: mv libarrow.zip libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874 # # v4.0.0
         with:
           name: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
           path: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
@@ -328,7 +330,7 @@ jobs:
           echo "$HOME/.local/bin" >> $GITHUB_PATH
       - run: mkdir r/windows
       - name: Download artifacts
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4.1.8
         with:
           name: libarrow-rtools40-ucrt64.zip
           path: r/windows
diff --git a/.github/workflows/r_nightly.yml b/.github/workflows/r_nightly.yml
index af5382f90834c..9817e41d3b61d 100644
--- a/.github/workflows/r_nightly.yml
+++ b/.github/workflows/r_nightly.yml
@@ -60,7 +60,7 @@ jobs:
           repository: ursacomputing/crossbow
           ref: main
       - name: Set up Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
           cache: 'pip'
           python-version: 3.12
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 6a29ec8e72cab..83a066dc27386 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -20,6 +20,7 @@ name: C GLib & Ruby
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/ruby.yml'
       - 'ci/docker/**'
       - 'ci/scripts/c_glib_*'
@@ -33,6 +34,7 @@ on:
       - 'ruby/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/ruby.yml'
       - 'ci/docker/**'
       - 'ci/scripts/c_glib_*'
@@ -54,7 +56,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
@@ -84,9 +85,9 @@ jobs:
           key: ubuntu-${{ matrix.ubuntu }}-ruby-${{ hashFiles('cpp/**') }}
           restore-keys: ubuntu-${{ matrix.ubuntu }}-ruby-
       - name: Setup Python
-        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
+        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
         with:
-          python-version: 3.8
+          python-version: 3.12
       - name: Setup Archery
         run: pip install -e dev/archery[docker]
       - name: Execute Docker Build
@@ -94,8 +95,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run \
             -e ARROW_FLIGHT=ON \
             -e ARROW_FLIGHT_SQL=ON \
@@ -407,7 +407,10 @@ jobs:
             -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json"
       - name: Build C++ vcpkg dependencies
         run: |
-          vcpkg\vcpkg.exe install --triplet $env:VCPKG_TRIPLET --x-manifest-root cpp --x-install-root build\cpp\vcpkg_installed
+          vcpkg\vcpkg.exe install `
+            --triplet $env:VCPKG_TRIPLET `
+            --x-manifest-root cpp `
+            --x-install-root build\cpp\vcpkg_installed
       - name: Build C++
         shell: cmd
         run: |
diff --git a/.github/workflows/swift.yml b/.github/workflows/swift.yml
index 3f039315b505a..87aa5cb83f714 100644
--- a/.github/workflows/swift.yml
+++ b/.github/workflows/swift.yml
@@ -20,6 +20,7 @@ name: Swift
 on:
   push:
     paths:
+      - '.dockerignore'
       - '.github/workflows/swift.yml'
       - 'ci/docker/*swift*'
       - 'ci/scripts/swift_*'
@@ -27,6 +28,7 @@ on:
       - 'swift/**'
   pull_request:
     paths:
+      - '.dockerignore'
       - '.github/workflows/swift.yml'
       - 'ci/docker/*swift*'
       - 'ci/scripts/swift_*'
@@ -42,7 +44,6 @@ permissions:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
   DOCKER_VOLUME_PREFIX: ".docker/"
 
 jobs:
@@ -64,8 +65,7 @@ jobs:
           ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
           ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source ci/scripts/util_enable_core_dumps.sh
           archery docker run ubuntu-swift
       - name: Docker Push
         if: >-
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bf0bcde14622a..bee20369c017e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -78,6 +78,26 @@ repos:
           ?^cpp/src/generated/|
           ?^cpp/thirdparty/|
           )
+  - repo: https://github.com/cpplint/cpplint
+    rev: 1.6.1
+    hooks:
+      - id: cpplint
+        name: C++ Lint
+        args:
+          - "--verbose=2"
+        types_or:
+          - c++
+        files: >-
+          ^cpp/
+        exclude: >-
+          (
+          ?\.grpc\.fb\.(cc|h)$|
+          ?\.pb\.(cc|h)$|
+          ?_generated.*\.(cc|h)$|
+          ?^cpp/src/arrow/vendored/|
+          ?^cpp/src/generated/|
+          ?^cpp/thirdparty/|
+          )
   - repo: https://github.com/pre-commit/mirrors-clang-format
     rev: v14.0.6
     hooks:
@@ -148,17 +168,3 @@ repos:
           '--disable',
           'dangling-hyphen,line-too-long',
         ]
-  - repo: https://github.com/golangci/golangci-lint
-    rev: v1.59.0
-    hooks:
-      # no built-in support for multiple go.mod
-      # https://github.com/golangci/golangci-lint/issues/828
-      - id: golangci-lint-full
-        name: golangci-lint-full-arrow
-        entry: bash -c 'cd go/arrow && golangci-lint run'
-      - id: golangci-lint-full
-        name: golangci-lint-full-parquet
-        entry: bash -c 'cd go/parquet && golangci-lint run'
-      - id: golangci-lint-full
-        name: golangci-lint-full-internal
-        entry: bash -c 'cd go/internal && golangci-lint run'
diff --git a/CPPLINT.cfg b/CPPLINT.cfg
new file mode 100644
index 0000000000000..2f47b4dbf57b7
--- /dev/null
+++ b/CPPLINT.cfg
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+filter = -build/c++11
+filter = -build/header_guard
+filter = -build/include_order
+filter = -build/include_what_you_use
+filter = -readability/alt_tokens
+# readability/casting is disabled as it aggressively warns about
+# functions with names like "int32", so "int32(x)", where int32 is a
+# function name, warns with
+filter = -readability/casting
+filter = -readability/todo
+filter = -runtime/references
+filter = -whitespace/comments
+linelength = 90
diff --git a/appveyor.yml b/appveyor.yml
index 5954251d34733..9e4582f1d8d7f 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -24,6 +24,7 @@ only_commits:
     - appveyor.yml
     - ci/appveyor*
     - ci/conda*
+    - ci/scripts/*.bat
     - cpp/
     - format/
     - python/
diff --git a/c_glib/Gemfile b/c_glib/Gemfile
index d32bc87ba72c6..cc6adecabe230 100644
--- a/c_glib/Gemfile
+++ b/c_glib/Gemfile
@@ -20,4 +20,4 @@
 source "https://rubygems.org/"
 
 gem "test-unit"
-gem "gobject-introspection", ">= 4.1.1"
+gem "gobject-introspection", ">= 4.2.3"
diff --git a/c_glib/arrow-cuda-glib/meson.build b/c_glib/arrow-cuda-glib/meson.build
index 47bed70f03b60..36730dec6c4b7 100644
--- a/c_glib/arrow-cuda-glib/meson.build
+++ b/c_glib/arrow-cuda-glib/meson.build
@@ -58,14 +58,15 @@ libarrow_cuda_glib = library('arrow-cuda-glib',
 arrow_cuda_glib = declare_dependency(link_with: libarrow_cuda_glib,
                                      include_directories: base_include_directories,
                                      dependencies: dependencies)
-
-pkgconfig.generate(libarrow_cuda_glib,
-                   description: 'C API for Apache Arrow CUDA based on GLib',
-                   filebase: 'arrow-cuda-glib',
-                   name: 'Apache Arrow CUDA GLib',
-                   requires: ['arrow-glib', 'arrow-cuda'],
-                   variables: pkgconfig_variables,
-                   version: version)
+if target_machine.system() != 'windows'
+  pkgconfig.generate(libarrow_cuda_glib,
+                     description: 'C API for Apache Arrow CUDA based on GLib',
+                     filebase: 'arrow-cuda-glib',
+                     name: 'Apache Arrow CUDA GLib',
+                     requires: ['arrow-glib', 'arrow-cuda'],
+                     variables: pkgconfig_variables,
+                     version: version)
+endif
 
 if have_gi
   gir_dependencies = [
diff --git a/c_glib/arrow-flight-glib/client.cpp b/c_glib/arrow-flight-glib/client.cpp
index 80c47e336f872..8ec8e9729a2d9 100644
--- a/c_glib/arrow-flight-glib/client.cpp
+++ b/c_glib/arrow-flight-glib/client.cpp
@@ -33,10 +33,19 @@ G_BEGIN_DECLS
  * #GAFlightStreamReader is a class for reading record batches from a
  * server.
  *
+ * #GAFlightStreamWriter is a class for writing record batches to a
+ * server.
+ *
+ * #GAFlightMetadataReader is a class for reading metadata from a
+ * server.
+ *
  * #GAFlightCallOptions is a class for options of each call.
  *
  * #GAFlightClientOptions is a class for options of each client.
  *
+ * #GAFlightDoPutResult is a class that has gaflight_client_do_put()
+ * result.
+ *
  * #GAFlightClient is a class for Apache Arrow Flight client.
  *
  * Since: 5.0.0
@@ -56,16 +65,142 @@ gaflight_stream_reader_class_init(GAFlightStreamReaderClass *klass)
 {
 }
 
-typedef struct GAFlightCallOptionsPrivate_
+G_DEFINE_TYPE(GAFlightStreamWriter,
+              gaflight_stream_writer,
+              GAFLIGHT_TYPE_RECORD_BATCH_WRITER)
+
+static void
+gaflight_stream_writer_init(GAFlightStreamWriter *object)
+{
+}
+
+static void
+gaflight_stream_writer_class_init(GAFlightStreamWriterClass *klass)
+{
+}
+
+/**
+ * gaflight_stream_writer_done_writing:
+ * @writer: A #GAFlightStreamWriter.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_stream_writer_done_writing(GAFlightStreamWriter *writer, GError **error)
+{
+  auto flight_writer = std::static_pointer_cast<arrow::flight::FlightStreamWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
+  return garrow::check(error,
+                       flight_writer->DoneWriting(),
+                       "[flight-stream-writer][done-writing]");
+}
+
+struct GAFlightMetadataReaderPrivate
+{
+  arrow::flight::FlightMetadataReader *reader;
+};
+
+enum {
+  PROP_METADATA_READER_READER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightMetadataReader,
+                           gaflight_metadata_reader,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_METADATA_READER_GET_PRIVATE(object)                                     \
+  static_cast<GAFlightMetadataReaderPrivate *>(                                          \
+    gaflight_metadata_reader_get_instance_private(GAFLIGHT_METADATA_READER(object)))
+
+static void
+gaflight_metadata_reader_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(object);
+  delete priv->reader;
+  G_OBJECT_CLASS(gaflight_metadata_reader_parent_class)->finalize(object);
+}
+
+static void
+gaflight_metadata_reader_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_METADATA_READER_READER:
+    priv->reader =
+      static_cast<arrow::flight::FlightMetadataReader *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_metadata_reader_init(GAFlightMetadataReader *object)
+{
+}
+
+static void
+gaflight_metadata_reader_class_init(GAFlightMetadataReaderClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_metadata_reader_finalize;
+  gobject_class->set_property = gaflight_metadata_reader_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "reader",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_METADATA_READER_READER, spec);
+}
+
+/**
+ * gaflight_metadata_reader_read:
+ * @reader: A #GAFlightMetadataReader.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (transfer full): The metadata on success, %NULL on error.
+ *
+ * Since: 18.0.0
+ */
+GArrowBuffer *
+gaflight_metadata_reader_read(GAFlightMetadataReader *reader, GError **error)
+{
+  auto flight_reader = gaflight_metadata_reader_get_raw(reader);
+  std::shared_ptr<arrow::Buffer> metadata;
+  if (garrow::check(error,
+                    flight_reader->ReadMetadata(&metadata),
+                    "[flight-metadata-reader][read]")) {
+    return garrow_buffer_new_raw(&metadata);
+  } else {
+    return nullptr;
+  }
+}
+
+struct GAFlightCallOptionsPrivate
 {
   arrow::flight::FlightCallOptions options;
-} GAFlightCallOptionsPrivate;
+};
+
+enum {
+  PROP_TIMEOUT = 1,
+};
 
 G_DEFINE_TYPE_WITH_PRIVATE(GAFlightCallOptions, gaflight_call_options, G_TYPE_OBJECT)
 
-#define GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(obj)                                           \
+#define GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(object)                                        \
   static_cast<GAFlightCallOptionsPrivate *>(                                             \
-    gaflight_call_options_get_instance_private(GAFLIGHT_CALL_OPTIONS(obj)))
+    gaflight_call_options_get_instance_private(GAFLIGHT_CALL_OPTIONS(object)))
 
 static void
 gaflight_call_options_finalize(GObject *object)
@@ -77,6 +212,42 @@ gaflight_call_options_finalize(GObject *object)
   G_OBJECT_CLASS(gaflight_call_options_parent_class)->finalize(object);
 }
 
+static void
+gaflight_call_options_set_property(GObject *object,
+                                   guint prop_id,
+                                   const GValue *value,
+                                   GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_TIMEOUT:
+    priv->options.timeout = arrow::flight::TimeoutDuration(g_value_get_double(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_call_options_get_property(GObject *object,
+                                   guint prop_id,
+                                   GValue *value,
+                                   GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_CALL_OPTIONS_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_TIMEOUT:
+    g_value_set_double(value, priv->options.timeout.count());
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
 static void
 gaflight_call_options_init(GAFlightCallOptions *object)
 {
@@ -90,6 +261,28 @@ gaflight_call_options_class_init(GAFlightCallOptionsClass *klass)
   auto gobject_class = G_OBJECT_CLASS(klass);
 
   gobject_class->finalize = gaflight_call_options_finalize;
+  gobject_class->set_property = gaflight_call_options_set_property;
+  gobject_class->get_property = gaflight_call_options_get_property;
+
+  arrow::flight::FlightCallOptions options;
+  GParamSpec *spec;
+  /**
+   * GAFlightCallOptions:timeout:
+   *
+   * An optional timeout for this call. Negative durations mean an
+   * implementation-defined default behavior will be used
+   * instead. This is the default value.
+   *
+   * Since: 18.0.0
+   */
+  spec = g_param_spec_double("timeout",
+                             nullptr,
+                             nullptr,
+                             -G_MAXDOUBLE,
+                             G_MAXDOUBLE,
+                             options.timeout.count(),
+                             static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class, PROP_TIMEOUT, spec);
 }
 
 /**
@@ -143,8 +336,8 @@ gaflight_call_options_clear_headers(GAFlightCallOptions *options)
 /**
  * gaflight_call_options_foreach_header:
  * @options: A #GAFlightCallOptions.
- * @func: (scope call): The user's callback function.
- * @user_data: (closure): Data for @func.
+ * @func: (scope call) (closure user_data): The user's callback function.
+ * @user_data: Data for @func.
  *
  * Iterates over all headers in the options.
  *
@@ -385,6 +578,139 @@ gaflight_client_options_new(void)
     g_object_new(GAFLIGHT_TYPE_CLIENT_OPTIONS, NULL));
 }
 
+struct GAFlightDoPutResultPrivate
+{
+  GAFlightStreamWriter *writer;
+  GAFlightMetadataReader *reader;
+};
+
+enum {
+  PROP_DO_PUT_RESULT_RESULT = 1,
+  PROP_DO_PUT_RESULT_WRITER,
+  PROP_DO_PUT_RESULT_READER,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightDoPutResult, gaflight_do_put_result, G_TYPE_OBJECT)
+
+#define GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object)                                       \
+  static_cast<GAFlightDoPutResultPrivate *>(                                             \
+    gaflight_do_put_result_get_instance_private(GAFLIGHT_DO_PUT_RESULT(object)))
+
+static void
+gaflight_do_put_result_dispose(GObject *object)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  if (priv->writer) {
+    g_object_unref(priv->writer);
+    priv->writer = nullptr;
+  }
+
+  if (priv->reader) {
+    g_object_unref(priv->reader);
+    priv->reader = nullptr;
+  }
+
+  G_OBJECT_CLASS(gaflight_do_put_result_parent_class)->dispose(object);
+}
+
+static void
+gaflight_do_put_result_init(GAFlightDoPutResult *object)
+{
+}
+
+static void
+gaflight_do_put_result_set_property(GObject *object,
+                                    guint prop_id,
+                                    const GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DO_PUT_RESULT_RESULT:
+    {
+      auto result = static_cast<arrow::flight::FlightClient::DoPutResult *>(
+        g_value_get_pointer(value));
+      std::shared_ptr<arrow::flight::FlightStreamWriter> writer =
+        std::move(result->writer);
+      priv->writer = gaflight_stream_writer_new_raw(&writer);
+      priv->reader = gaflight_metadata_reader_new_raw(result->reader.release());
+      break;
+    }
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_do_put_result_get_property(GObject *object,
+                                    guint prop_id,
+                                    GValue *value,
+                                    GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_DO_PUT_RESULT_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DO_PUT_RESULT_WRITER:
+    g_value_set_object(value, priv->writer);
+    break;
+  case PROP_DO_PUT_RESULT_READER:
+    g_value_set_object(value, priv->reader);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_do_put_result_class_init(GAFlightDoPutResultClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->dispose = gaflight_do_put_result_dispose;
+  gobject_class->set_property = gaflight_do_put_result_set_property;
+  gobject_class->get_property = gaflight_do_put_result_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "result",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_RESULT, spec);
+
+  /**
+   * GAFlightDoPutResult:writer:
+   *
+   * A writer to write record batches to.
+   *
+   * Since: 18.0.0
+   */
+  spec = g_param_spec_object("writer",
+                             nullptr,
+                             nullptr,
+                             GAFLIGHT_TYPE_STREAM_WRITER,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_WRITER, spec);
+
+  /**
+   * GAFlightDoPutResult:reader:
+   *
+   * A reader for application metadata from the server.
+   *
+   * Since: 18.0.0
+   */
+  spec = g_param_spec_object("reader",
+                             nullptr,
+                             nullptr,
+                             GAFLIGHT_TYPE_METADATA_READER,
+                             static_cast<GParamFlags>(G_PARAM_READABLE));
+  g_object_class_install_property(gobject_class, PROP_DO_PUT_RESULT_READER, spec);
+}
+
 struct GAFlightClientPrivate
 {
   std::shared_ptr<arrow::flight::FlightClient> client;
@@ -661,6 +987,51 @@ gaflight_client_do_get(GAFlightClient *client,
   return gaflight_stream_reader_new_raw(flight_reader.release(), TRUE);
 }
 
+/**
+ * gaflight_client_do_put:
+ * @client: A #GAFlightClient.
+ * @descriptor: A #GAFlightDescriptor.
+ * @schema: A #GArrowSchema.
+ * @options: (nullable): A #GAFlightCallOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Upload data to a Flight described by the given descriptor. The
+ * caller must call garrow_record_batch_writer_close() on the
+ * returned stream once they are done writing.
+ *
+ * The reader and writer are linked; closing the writer will also
+ * close the reader. Use garrow_flight_stream_writer_done_writing() to
+ * only close the write side of the channel.
+ *
+ * Returns: (nullable) (transfer full):
+ *   The #GAFlighDoPutResult holding a reader and a writer on success,
+ *   %NULL on error.
+ *
+ * Since: 18.0.0
+ */
+GAFlightDoPutResult *
+gaflight_client_do_put(GAFlightClient *client,
+                       GAFlightDescriptor *descriptor,
+                       GArrowSchema *schema,
+                       GAFlightCallOptions *options,
+                       GError **error)
+{
+  auto flight_client = gaflight_client_get_raw(client);
+  auto flight_descriptor = gaflight_descriptor_get_raw(descriptor);
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  arrow::flight::FlightCallOptions flight_default_options;
+  auto flight_options = &flight_default_options;
+  if (options) {
+    flight_options = gaflight_call_options_get_raw(options);
+  }
+  auto result = flight_client->DoPut(*flight_options, *flight_descriptor, arrow_schema);
+  if (!garrow::check(error, result, "[flight-client][do-put]")) {
+    return nullptr;
+  }
+  auto flight_result = std::move(*result);
+  return gaflight_do_put_result_new_raw(&flight_result);
+}
+
 G_END_DECLS
 
 GAFlightStreamReader *
@@ -672,7 +1043,31 @@ gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader,
                                              flight_reader,
                                              "is-owner",
                                              is_owner,
-                                             NULL));
+                                             nullptr));
+}
+
+GAFlightStreamWriter *
+gaflight_stream_writer_new_raw(
+  std::shared_ptr<arrow::flight::FlightStreamWriter> *flight_writer)
+{
+  return GAFLIGHT_STREAM_WRITER(g_object_new(GAFLIGHT_TYPE_STREAM_WRITER,
+                                             "record-batch-writer",
+                                             flight_writer,
+                                             nullptr));
+}
+
+GAFlightMetadataReader *
+gaflight_metadata_reader_new_raw(arrow::flight::FlightMetadataReader *flight_reader)
+{
+  return GAFLIGHT_METADATA_READER(
+    g_object_new(GAFLIGHT_TYPE_METADATA_READER, "reader", flight_reader, nullptr));
+}
+
+arrow::flight::FlightMetadataReader *
+gaflight_metadata_reader_get_raw(GAFlightMetadataReader *reader)
+{
+  auto priv = GAFLIGHT_METADATA_READER_GET_PRIVATE(reader);
+  return priv->reader;
 }
 
 arrow::flight::FlightCallOptions *
@@ -689,6 +1084,13 @@ gaflight_client_options_get_raw(GAFlightClientOptions *options)
   return &(priv->options);
 }
 
+GAFlightDoPutResult *
+gaflight_do_put_result_new_raw(arrow::flight::FlightClient::DoPutResult *flight_result)
+{
+  return GAFLIGHT_DO_PUT_RESULT(
+    g_object_new(GAFLIGHT_TYPE_DO_PUT_RESULT, "result", flight_result, nullptr));
+}
+
 std::shared_ptr<arrow::flight::FlightClient>
 gaflight_client_get_raw(GAFlightClient *client)
 {
diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h
index a91bbe55e3c04..12c5a06b810e1 100644
--- a/c_glib/arrow-flight-glib/client.h
+++ b/c_glib/arrow-flight-glib/client.h
@@ -35,6 +35,35 @@ struct _GAFlightStreamReaderClass
   GAFlightRecordBatchReaderClass parent_class;
 };
 
+#define GAFLIGHT_TYPE_STREAM_WRITER (gaflight_stream_writer_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(GAFlightStreamWriter,
+                         gaflight_stream_writer,
+                         GAFLIGHT,
+                         STREAM_WRITER,
+                         GAFlightRecordBatchWriter)
+struct _GAFlightStreamWriterClass
+{
+  GAFlightRecordBatchWriterClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_stream_writer_done_writing(GAFlightStreamWriter *writer, GError **error);
+
+#define GAFLIGHT_TYPE_METADATA_READER (gaflight_metadata_reader_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightMetadataReader, gaflight_metadata_reader, GAFLIGHT, METADATA_READER, GObject)
+struct _GAFlightMetadataReaderClass
+{
+  GObjectClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+GArrowBuffer *
+gaflight_metadata_reader_read(GAFlightMetadataReader *reader, GError **error);
+
 #define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type())
 GAFLIGHT_AVAILABLE_IN_5_0
 G_DECLARE_DERIVABLE_TYPE(
@@ -75,6 +104,15 @@ GAFLIGHT_AVAILABLE_IN_5_0
 GAFlightClientOptions *
 gaflight_client_options_new(void);
 
+#define GAFLIGHT_TYPE_DO_PUT_RESULT (gaflight_do_put_result_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightDoPutResult, gaflight_do_put_result, GAFLIGHT, DO_PUT_RESULT, GObject)
+struct _GAFlightDoPutResultClass
+{
+  GObjectClass parent_class;
+};
+
 #define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type())
 GAFLIGHT_AVAILABLE_IN_5_0
 G_DECLARE_DERIVABLE_TYPE(GAFlightClient, gaflight_client, GAFLIGHT, CLIENT, GObject)
@@ -124,4 +162,12 @@ gaflight_client_do_get(GAFlightClient *client,
                        GAFlightCallOptions *options,
                        GError **error);
 
+GAFLIGHT_AVAILABLE_IN_18_0
+GAFlightDoPutResult *
+gaflight_client_do_put(GAFlightClient *client,
+                       GAFlightDescriptor *descriptor,
+                       GArrowSchema *schema,
+                       GAFlightCallOptions *options,
+                       GError **error);
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp
index 185a28e6dc4bd..32ad35845aa12 100644
--- a/c_glib/arrow-flight-glib/client.hpp
+++ b/c_glib/arrow-flight-glib/client.hpp
@@ -28,6 +28,19 @@ GAFlightStreamReader *
 gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader,
                                gboolean is_owner);
 
+GAFLIGHT_EXTERN
+GAFlightStreamWriter *
+gaflight_stream_writer_new_raw(
+  std::shared_ptr<arrow::flight::FlightStreamWriter> *flight_writer);
+
+GAFLIGHT_EXTERN
+GAFlightMetadataReader *
+gaflight_metadata_reader_new_raw(arrow::flight::FlightMetadataReader *flight_reader);
+
+GAFLIGHT_EXTERN
+arrow::flight::FlightMetadataReader *
+gaflight_metadata_reader_get_raw(GAFlightMetadataReader *reader);
+
 GAFLIGHT_EXTERN
 arrow::flight::FlightCallOptions *
 gaflight_call_options_get_raw(GAFlightCallOptions *options);
@@ -36,6 +49,10 @@ GAFLIGHT_EXTERN
 arrow::flight::FlightClientOptions *
 gaflight_client_options_get_raw(GAFlightClientOptions *options);
 
+GAFLIGHT_EXTERN
+GAFlightDoPutResult *
+gaflight_do_put_result_new_raw(arrow::flight::FlightClient::DoPutResult *flight_result);
+
 GAFLIGHT_EXTERN
 std::shared_ptr<arrow::flight::FlightClient>
 gaflight_client_get_raw(GAFlightClient *client);
diff --git a/c_glib/arrow-flight-glib/common.cpp b/c_glib/arrow-flight-glib/common.cpp
index efc544f10cf66..3deaf67cc14e8 100644
--- a/c_glib/arrow-flight-glib/common.cpp
+++ b/c_glib/arrow-flight-glib/common.cpp
@@ -48,7 +48,11 @@ G_BEGIN_DECLS
  *
  * #GAFlightStreamChunk is a class for a chunk in stream.
  *
- * #GAFlightRecordBatchReader is a class for reading record batches.
+ * #GAFlightRecordBatchReader is an abstract class for reading record
+ * batches with metadata.
+ *
+ * #GAFlightRecordBatchWeriter is an abstract class for
+ * writing record batches with metadata.
  *
  * Since: 5.0.0
  */
@@ -1172,13 +1176,13 @@ typedef struct GAFlightRecordBatchReaderPrivate_
 } GAFlightRecordBatchReaderPrivate;
 
 enum {
-  PROP_READER = 1,
-  PROP_IS_OWNER,
+  PROP_RECORD_BATCH_READER_READER = 1,
+  PROP_RECORD_BATCH_READER_IS_OWNER,
 };
 
-G_DEFINE_TYPE_WITH_PRIVATE(GAFlightRecordBatchReader,
-                           gaflight_record_batch_reader,
-                           G_TYPE_OBJECT)
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GAFlightRecordBatchReader,
+                                    gaflight_record_batch_reader,
+                                    G_TYPE_OBJECT)
 
 #define GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(obj)                                    \
   static_cast<GAFlightRecordBatchReaderPrivate *>(                                       \
@@ -1192,7 +1196,7 @@ gaflight_record_batch_reader_finalize(GObject *object)
   if (priv->is_owner) {
     delete priv->reader;
   }
-  G_OBJECT_CLASS(gaflight_info_parent_class)->finalize(object);
+  G_OBJECT_CLASS(gaflight_record_batch_reader_parent_class)->finalize(object);
 }
 
 static void
@@ -1204,11 +1208,11 @@ gaflight_record_batch_reader_set_property(GObject *object,
   auto priv = GAFLIGHT_RECORD_BATCH_READER_GET_PRIVATE(object);
 
   switch (prop_id) {
-  case PROP_READER:
+  case PROP_RECORD_BATCH_READER_READER:
     priv->reader =
       static_cast<arrow::flight::MetadataRecordBatchReader *>(g_value_get_pointer(value));
     break;
-  case PROP_IS_OWNER:
+  case PROP_RECORD_BATCH_READER_IS_OWNER:
     priv->is_owner = g_value_get_boolean(value);
     break;
   default:
@@ -1236,7 +1240,7 @@ gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass)
     nullptr,
     nullptr,
     static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_READER, spec);
+  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_READER_READER, spec);
 
   spec = g_param_spec_boolean(
     "is-owner",
@@ -1244,7 +1248,7 @@ gaflight_record_batch_reader_class_init(GAFlightRecordBatchReaderClass *klass)
     nullptr,
     TRUE,
     static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
-  g_object_class_install_property(gobject_class, PROP_IS_OWNER, spec);
+  g_object_class_install_property(gobject_class, PROP_RECORD_BATCH_READER_IS_OWNER, spec);
 }
 
 /**
@@ -1296,6 +1300,108 @@ gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError
   }
 }
 
+G_DEFINE_ABSTRACT_TYPE(GAFlightRecordBatchWriter,
+                       gaflight_record_batch_writer,
+                       GARROW_TYPE_RECORD_BATCH_WRITER)
+
+static void
+gaflight_record_batch_writer_init(GAFlightRecordBatchWriter *object)
+{
+}
+
+static void
+gaflight_record_batch_writer_class_init(GAFlightRecordBatchWriterClass *klass)
+{
+}
+
+/**
+ * gaflight_record_batch_writer_begin:
+ * @writer: A #GAFlightRecordBatchWriter.
+ * @schema: A #GArrowSchema.
+ * @options: (nullable): A #GArrowWriteOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Begins writing data with the given schema. Only used with
+ * `DoExchange`.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_record_batch_writer_begin(GAFlightRecordBatchWriter *writer,
+                                   GArrowSchema *schema,
+                                   GArrowWriteOptions *options,
+                                   GError **error)
+{
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
+  auto arrow_schema = garrow_schema_get_raw(schema);
+  arrow::ipc::IpcWriteOptions arrow_write_options;
+  if (options) {
+    arrow_write_options = *garrow_write_options_get_raw(options);
+  } else {
+    arrow_write_options = arrow::ipc::IpcWriteOptions::Defaults();
+  }
+  return garrow::check(error,
+                       flight_writer->Begin(arrow_schema, arrow_write_options),
+                       "[flight-record-batch-writer][begin]");
+}
+
+/**
+ * gaflight_record_batch_writer_write_metadata:
+ * @writer: A #GAFlightRecordBatchWriter.
+ * @metadata: A #GArrowBuffer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Write metadata.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
+                                            GArrowBuffer *metadata,
+                                            GError **error)
+{
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
+  auto arrow_metadata = garrow_buffer_get_raw(metadata);
+  return garrow::check(error,
+                       flight_writer->WriteMetadata(arrow_metadata),
+                       "[flight-record-batch-writer][write-metadata]");
+}
+
+/**
+ * gaflight_record_batch_writer_write_record_batch:
+ * @writer: A #GAFlightRecordBatchWriter.
+ * @record_batch: A #GArrowRecordBatch.
+ * @metadata: (nullable): A #GArrowBuffer.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Write a record batch with metadata.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_record_batch_writer_write_record_batch(GAFlightRecordBatchWriter *writer,
+                                                GArrowRecordBatch *record_batch,
+                                                GArrowBuffer *metadata,
+                                                GError **error)
+{
+  auto flight_writer = std::static_pointer_cast<arrow::flight::MetadataRecordBatchWriter>(
+    garrow_record_batch_writer_get_raw(GARROW_RECORD_BATCH_WRITER(writer)));
+  auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  auto arrow_metadata = garrow_buffer_get_raw(metadata);
+  return garrow::check(
+    error,
+    flight_writer->WriteWithMetadata(*arrow_record_batch, arrow_metadata),
+    "[flight-record-batch-writer][write]");
+}
+
 G_END_DECLS
 
 GAFlightCriteria *
diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h
index b1d89f79c357e..726132fe4921b 100644
--- a/c_glib/arrow-flight-glib/common.h
+++ b/c_glib/arrow-flight-glib/common.h
@@ -232,4 +232,36 @@ GAFLIGHT_AVAILABLE_IN_6_0
 GArrowTable *
 gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError **error);
 
+#define GAFLIGHT_TYPE_RECORD_BATCH_WRITER (gaflight_record_batch_writer_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchWriter,
+                         gaflight_record_batch_writer,
+                         GAFLIGHT,
+                         RECORD_BATCH_WRITER,
+                         GArrowRecordBatchWriter)
+struct _GAFlightRecordBatchWriterClass
+{
+  GArrowRecordBatchWriterClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_record_batch_writer_begin(GAFlightRecordBatchWriter *writer,
+                                   GArrowSchema *schema,
+                                   GArrowWriteOptions *options,
+                                   GError **error);
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_record_batch_writer_write_metadata(GAFlightRecordBatchWriter *writer,
+                                            GArrowBuffer *metadata,
+                                            GError **error);
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_record_batch_writer_write_record_batch(GAFlightRecordBatchWriter *writer,
+                                                GArrowRecordBatch *record_batch,
+                                                GArrowBuffer *metadata,
+                                                GError **error);
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp
index db56fff579baf..ae5a7703397dd 100644
--- a/c_glib/arrow-flight-glib/common.hpp
+++ b/c_glib/arrow-flight-glib/common.hpp
@@ -79,3 +79,7 @@ gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk);
 GAFLIGHT_EXTERN
 arrow::flight::MetadataRecordBatchReader *
 gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader);
+
+GAFLIGHT_EXTERN
+arrow::flight::MetadataRecordBatchWriter *
+gaflight_record_batch_writer_get_raw(GAFlightRecordBatchWriter *writer);
diff --git a/c_glib/arrow-flight-glib/server.cpp b/c_glib/arrow-flight-glib/server.cpp
index f7444918e90f6..2feeb853e2c51 100644
--- a/c_glib/arrow-flight-glib/server.cpp
+++ b/c_glib/arrow-flight-glib/server.cpp
@@ -45,6 +45,9 @@ G_BEGIN_DECLS
  * client. Also allows reading application-defined metadata via the
  * Flight protocol.
  *
+ * #GAFlightMetadataWriter is a class for sending application-specific
+ * metadata back to client during an upload.
+ *
  * #GAFlightServerAuthSender is a class for sending messages to the
  * client during an authentication handshake.
  *
@@ -290,6 +293,98 @@ gaflight_message_reader_get_descriptor(GAFlightMessageReader *reader)
   return gaflight_descriptor_new_raw(&flight_descriptor);
 }
 
+struct GAFlightMetadataWriterPrivate
+{
+  arrow::flight::FlightMetadataWriter *writer;
+};
+
+enum {
+  PROP_WRITER = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GAFlightMetadataWriter,
+                           gaflight_metadata_writer,
+                           G_TYPE_OBJECT)
+
+#define GAFLIGHT_METADATA_WRITER_GET_PRIVATE(object)                                     \
+  static_cast<GAFlightMetadataWriterPrivate *>(                                          \
+    gaflight_metadata_writer_get_instance_private(GAFLIGHT_METADATA_WRITER(object)))
+
+static void
+gaflight_metadata_writer_finalize(GObject *object)
+{
+  auto priv = GAFLIGHT_METADATA_WRITER_GET_PRIVATE(object);
+
+  delete priv->writer;
+
+  G_OBJECT_CLASS(gaflight_metadata_writer_parent_class)->finalize(object);
+}
+
+static void
+gaflight_metadata_writer_set_property(GObject *object,
+                                      guint prop_id,
+                                      const GValue *value,
+                                      GParamSpec *pspec)
+{
+  auto priv = GAFLIGHT_METADATA_WRITER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_WRITER:
+    priv->writer =
+      static_cast<arrow::flight::FlightMetadataWriter *>(g_value_get_pointer(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+gaflight_metadata_writer_init(GAFlightMetadataWriter *object)
+{
+}
+
+static void
+gaflight_metadata_writer_class_init(GAFlightMetadataWriterClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = gaflight_metadata_writer_finalize;
+  gobject_class->set_property = gaflight_metadata_writer_set_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "writer",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_WRITER, spec);
+}
+
+/**
+ * gaflight_metadata_writer_write:
+ * @writer: A #GAFlightMetadataWriter.
+ * @metadata: A #GArrowBuffer to be sent.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Writes metadata to the client.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_metadata_writer_write(GAFlightMetadataWriter *writer,
+                               GArrowBuffer *metadata,
+                               GError **error)
+{
+  auto flight_writer = gaflight_metadata_writer_get_raw(writer);
+  auto flight_metadata = garrow_buffer_get_raw(metadata);
+  return garrow::check(error,
+                       flight_writer->WriteMetadata(*flight_metadata),
+                       "[flight-metadata-writer][write]");
+}
+
 struct GAFlightServerCallContextPrivate
 {
   arrow::flight::ServerCallContext *call_context;
@@ -366,8 +461,8 @@ gaflight_server_call_context_class_init(GAFlightServerCallContextClass *klass)
 /**
  * gaflight_server_call_context_foreach_incoming_header:
  * @context: A #GAFlightServerCallContext.
- * @func: (scope call): The user's callback function.
- * @user_data: (closure): Data for @func.
+ * @func: (scope call) (closure user_data): The user's callback function.
+ * @user_data: Data for @func.
  *
  * Iterates over all incoming headers.
  *
@@ -1034,6 +1129,34 @@ namespace gaflight {
       return arrow::Status::OK();
     }
 
+    arrow::Status
+    DoPut(const arrow::flight::ServerCallContext &context,
+          std::unique_ptr<arrow::flight::FlightMessageReader> reader,
+          std::unique_ptr<arrow::flight::FlightMetadataWriter> writer) override
+    {
+      auto gacontext = gaflight_server_call_context_new_raw(&context);
+      auto gareader = gaflight_message_reader_new_raw(reader.release(), TRUE);
+      auto gawriter = gaflight_metadata_writer_new_raw(writer.release());
+      GError *gerror = nullptr;
+      auto success =
+        gaflight_server_do_put(gaserver_, gacontext, gareader, gawriter, &gerror);
+      g_object_unref(gawriter);
+      g_object_unref(gareader);
+      g_object_unref(gacontext);
+      if (!success && !gerror) {
+        g_set_error(&gerror,
+                    GARROW_ERROR,
+                    GARROW_ERROR_UNKNOWN,
+                    "GAFlightServerClass::do_put() returns FALSE but error isn't set");
+      }
+      if (gerror) {
+        return garrow_error_to_status(gerror,
+                                      arrow::StatusCode::UnknownError,
+                                      "[flight-server][do-put]");
+      }
+      return arrow::Status::OK();
+    }
+
   private:
     GAFlightServer *gaserver_;
   };
@@ -1228,6 +1351,35 @@ gaflight_server_do_get(GAFlightServer *server,
   return (*(klass->do_get))(server, context, ticket, error);
 }
 
+/**
+ * gaflight_server_do_put:
+ * @server: A #GAFlightServer.
+ * @context: A #GAFlightServerCallContext.
+ * @reader: A #GAFlightMessageReader.
+ * @writer: A #GAFlightMetadataWriter.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Processes a stream of IPC payloads sent from a client.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gaflight_server_do_put(GAFlightServer *server,
+                       GAFlightServerCallContext *context,
+                       GAFlightMessageReader *reader,
+                       GAFlightMetadataWriter *writer,
+                       GError **error)
+{
+  auto klass = GAFLIGHT_SERVER_GET_CLASS(server);
+  if (!(klass && klass->do_put)) {
+    g_set_error(error, GARROW_ERROR, GARROW_ERROR_NOT_IMPLEMENTED, "not implemented");
+    return false;
+  }
+  return klass->do_put(server, context, reader, writer, error);
+}
+
 G_END_DECLS
 
 arrow::flight::FlightDataStream *
@@ -1257,6 +1409,20 @@ gaflight_message_reader_get_raw(GAFlightMessageReader *reader)
   return static_cast<arrow::flight::FlightMessageReader *>(flight_reader);
 }
 
+GAFlightMetadataWriter *
+gaflight_metadata_writer_new_raw(arrow::flight::FlightMetadataWriter *flight_writer)
+{
+  return GAFLIGHT_METADATA_WRITER(
+    g_object_new(GAFLIGHT_TYPE_METADATA_WRITER, "writer", flight_writer, nullptr));
+}
+
+arrow::flight::FlightMetadataWriter *
+gaflight_metadata_writer_get_raw(GAFlightMetadataWriter *writer)
+{
+  auto priv = GAFLIGHT_METADATA_WRITER_GET_PRIVATE(writer);
+  return priv->writer;
+}
+
 GAFlightServerCallContext *
 gaflight_server_call_context_new_raw(
   const arrow::flight::ServerCallContext *flight_call_context)
diff --git a/c_glib/arrow-flight-glib/server.h b/c_glib/arrow-flight-glib/server.h
index 7e594febb172f..e3a469098b32c 100644
--- a/c_glib/arrow-flight-glib/server.h
+++ b/c_glib/arrow-flight-glib/server.h
@@ -65,6 +65,21 @@ GAFLIGHT_AVAILABLE_IN_14_0
 GAFlightDescriptor *
 gaflight_message_reader_get_descriptor(GAFlightMessageReader *reader);
 
+#define GAFLIGHT_TYPE_METADATA_WRITER (gaflight_metadata_writer_get_type())
+GAFLIGHT_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GAFlightMetadataWriter, gaflight_metadata_writer, GAFLIGHT, METADATA_WRITER, GObject)
+struct _GAFlightMetadataWriterClass
+{
+  GObjectClass parent_class;
+};
+
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_metadata_writer_write(GAFlightMetadataWriter *writer,
+                               GArrowBuffer *metadata,
+                               GError **error);
+
 #define GAFLIGHT_TYPE_SERVER_CALL_CONTEXT (gaflight_server_call_context_get_type())
 GAFLIGHT_AVAILABLE_IN_5_0
 G_DECLARE_DERIVABLE_TYPE(GAFlightServerCallContext,
@@ -199,6 +214,7 @@ G_DECLARE_DERIVABLE_TYPE(GAFlightServer, gaflight_server, GAFLIGHT, SERVER, GObj
  * GAFlightServerClass:
  * @list_flights: A virtual function to implement `ListFlights` API.
  * @do_get: A virtual function to implement `DoGet` API.
+ * @do_put: A virtual function to implement `DoPut` API.
  *
  * Since: 5.0.0
  */
@@ -218,6 +234,11 @@ struct _GAFlightServerClass
                                 GAFlightServerCallContext *context,
                                 GAFlightTicket *ticket,
                                 GError **error);
+  gboolean (*do_put)(GAFlightServer *server,
+                     GAFlightServerCallContext *context,
+                     GAFlightMessageReader *reader,
+                     GAFlightMetadataWriter *writer,
+                     GError **error);
 };
 
 GAFLIGHT_AVAILABLE_IN_5_0
@@ -254,4 +275,12 @@ gaflight_server_do_get(GAFlightServer *server,
                        GAFlightTicket *ticket,
                        GError **error);
 
+GAFLIGHT_AVAILABLE_IN_18_0
+gboolean
+gaflight_server_do_put(GAFlightServer *server,
+                       GAFlightServerCallContext *context,
+                       GAFlightMessageReader *reader,
+                       GAFlightMetadataWriter *writer,
+                       GError **error);
+
 G_END_DECLS
diff --git a/c_glib/arrow-flight-glib/server.hpp b/c_glib/arrow-flight-glib/server.hpp
index ec4815751c8d8..f68eef83781ec 100644
--- a/c_glib/arrow-flight-glib/server.hpp
+++ b/c_glib/arrow-flight-glib/server.hpp
@@ -36,6 +36,14 @@ GAFLIGHT_EXTERN
 arrow::flight::FlightMessageReader *
 gaflight_message_reader_get_raw(GAFlightMessageReader *reader);
 
+GAFLIGHT_EXTERN
+GAFlightMetadataWriter *
+gaflight_metadata_writer_new_raw(arrow::flight::FlightMetadataWriter *flight_writer);
+
+GAFLIGHT_EXTERN
+arrow::flight::FlightMetadataWriter *
+gaflight_metadata_writer_get_raw(GAFlightMetadataWriter *writer);
+
 GAFLIGHT_EXTERN
 GAFlightServerCallContext *
 gaflight_server_call_context_new_raw(
diff --git a/c_glib/arrow-glib/arrow-glib.h b/c_glib/arrow-glib/arrow-glib.h
index 7ba20882610e8..272b6ba1dae10 100644
--- a/c_glib/arrow-glib/arrow-glib.h
+++ b/c_glib/arrow-glib/arrow-glib.h
@@ -30,6 +30,7 @@
 #include <arrow-glib/compute.h>
 #include <arrow-glib/data-type.h>
 #include <arrow-glib/datum.h>
+#include <arrow-glib/decoder.h>
 #include <arrow-glib/enums.h>
 #include <arrow-glib/error.h>
 #include <arrow-glib/expression.h>
diff --git a/c_glib/arrow-glib/arrow-glib.hpp b/c_glib/arrow-glib/arrow-glib.hpp
index 79e8dcbcce61a..49571eeae4929 100644
--- a/c_glib/arrow-glib/arrow-glib.hpp
+++ b/c_glib/arrow-glib/arrow-glib.hpp
@@ -29,6 +29,7 @@
 #include <arrow-glib/compute.hpp>
 #include <arrow-glib/data-type.hpp>
 #include <arrow-glib/datum.hpp>
+#include <arrow-glib/decoder.hpp>
 #include <arrow-glib/error.hpp>
 #include <arrow-glib/expression.hpp>
 #include <arrow-glib/field.hpp>
diff --git a/c_glib/arrow-glib/decoder.cpp b/c_glib/arrow-glib/decoder.cpp
new file mode 100644
index 0000000000000..83af6bc484394
--- /dev/null
+++ b/c_glib/arrow-glib/decoder.cpp
@@ -0,0 +1,607 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <arrow-glib/buffer.hpp>
+#include <arrow-glib/decoder.hpp>
+#include <arrow-glib/error.hpp>
+#include <arrow-glib/internal-hash-table.hpp>
+#include <arrow-glib/ipc-options.hpp>
+#include <arrow-glib/record-batch.hpp>
+#include <arrow-glib/schema.hpp>
+
+G_BEGIN_DECLS
+
+/**
+ * SECTION: decoder
+ * @section_id: decoder-classes
+ * @title: Decoder classes
+ * @include: arrow-glib/arrow-glib.h
+ *
+ * #GArrowStreamListener is a class for receiving decoded information
+ * from #GArrowStreamDecoder.
+ *
+ * #GArrowStreamDecoder is a class for decoding record batches in
+ * stream format from given data chunks.
+ */
+
+struct GArrowStreamListenerPrivate
+{
+  std::shared_ptr<arrow::ipc::Listener> listener;
+};
+
+G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowStreamListener,
+                                    garrow_stream_listener,
+                                    G_TYPE_OBJECT);
+
+#define GARROW_STREAM_LISTENER_GET_PRIVATE(object)                                       \
+  static_cast<GArrowStreamListenerPrivate *>(                                            \
+    garrow_stream_listener_get_instance_private(GARROW_STREAM_LISTENER(object)))
+
+G_END_DECLS
+
+namespace garrow {
+  class StreamListener : public arrow::ipc::Listener {
+  public:
+    StreamListener(GArrowStreamListener *listener) : listener_(listener)
+    {
+      g_object_ref(listener_);
+    }
+    ~StreamListener() { g_object_unref(listener_); }
+
+    arrow::Status
+    OnEOS() override
+    {
+      if (!klass()->on_eos) {
+        return arrow::Status::OK();
+      }
+
+      GError *error = nullptr;
+      if (garrow_stream_listener_on_eos(listener_, &error)) {
+        return arrow::Status::OK();
+      } else {
+        return garrow_error_to_status(error,
+                                      arrow::StatusCode::UnknownError,
+                                      "[stream-listener][on-eos]");
+      }
+    }
+
+    arrow::Status
+    OnRecordBatchWithMetadataDecoded(
+      arrow::RecordBatchWithMetadata arrow_record_batch_with_metadata) override
+    {
+      if (!klass()->on_record_batch_decoded) {
+        return arrow::Status::OK();
+      }
+
+      auto record_batch =
+        garrow_record_batch_new_raw(&(arrow_record_batch_with_metadata.batch));
+      GHashTable *metadata = nullptr;
+      if (arrow_record_batch_with_metadata.custom_metadata) {
+        metadata = garrow_internal_hash_table_from_metadata(
+          arrow_record_batch_with_metadata.custom_metadata);
+      }
+      GError *error = nullptr;
+      auto success = garrow_stream_listener_on_record_batch_decoded(listener_,
+                                                                    record_batch,
+                                                                    metadata,
+                                                                    &error);
+      g_object_unref(record_batch);
+      if (metadata) {
+        g_hash_table_unref(metadata);
+      }
+      if (success) {
+        return arrow::Status::OK();
+      } else {
+        return garrow_error_to_status(error,
+                                      arrow::StatusCode::UnknownError,
+                                      "[stream-listener][on-record-batch-decoded]");
+      }
+    }
+
+    arrow::Status
+    OnSchemaDecoded(std::shared_ptr<arrow::Schema> arrow_schema,
+                    std::shared_ptr<arrow::Schema> arrow_filtered_schema) override
+    {
+      if (!klass()->on_schema_decoded) {
+        return arrow::Status::OK();
+      }
+
+      auto schema = garrow_schema_new_raw(&arrow_schema);
+      auto filtered_schema = garrow_schema_new_raw(&arrow_filtered_schema);
+      GError *error = nullptr;
+      auto success = garrow_stream_listener_on_schema_decoded(listener_,
+                                                              schema,
+                                                              filtered_schema,
+                                                              &error);
+      g_object_unref(schema);
+      g_object_unref(filtered_schema);
+      if (success) {
+        return arrow::Status::OK();
+      } else {
+        return garrow_error_to_status(error,
+                                      arrow::StatusCode::UnknownError,
+                                      "[stream-listener][on-schema-decoded]");
+      }
+    }
+
+  private:
+    GArrowStreamListener *listener_;
+
+    GArrowStreamListenerClass *
+    klass()
+    {
+      return GARROW_STREAM_LISTENER_GET_CLASS(listener_);
+    }
+  };
+}; // namespace garrow
+
+G_BEGIN_DECLS
+
+static void
+garrow_stream_listener_finalize(GObject *object)
+{
+  auto priv = GARROW_STREAM_LISTENER_GET_PRIVATE(object);
+  priv->listener.~shared_ptr();
+  G_OBJECT_CLASS(garrow_stream_listener_parent_class)->finalize(object);
+}
+
+static void
+garrow_stream_listener_init(GArrowStreamListener *object)
+{
+  auto priv = GARROW_STREAM_LISTENER_GET_PRIVATE(object);
+  new (&priv->listener)
+    std::shared_ptr<garrow::StreamListener>(new garrow::StreamListener(object));
+}
+
+static void
+garrow_stream_listener_class_init(GArrowStreamListenerClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+  gobject_class->finalize = garrow_stream_listener_finalize;
+
+  klass->on_eos = nullptr;
+  klass->on_record_batch_decoded = nullptr;
+  klass->on_schema_decoded = nullptr;
+}
+
+/**
+ * garrow_stream_listener_on_eos:
+ * @listener: A #GArrowStreamListener.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Processes an EOS event.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+garrow_stream_listener_on_eos(GArrowStreamListener *listener, GError **error)
+{
+  auto klass = GARROW_STREAM_LISTENER_GET_CLASS(listener);
+  if (!(klass && klass->on_eos)) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_NOT_IMPLEMENTED,
+                "[stream-listener][on-eos] not implemented");
+    return false;
+  }
+  return klass->on_eos(listener, error);
+}
+
+/**
+ * garrow_stream_listener_on_record_batch_decoded:
+ * @listener: A #GArrowStreamListener.
+ * @record_batch: A decoded #GArrowRecordBatch.
+ * @metadata: (element-type utf8 utf8) (nullable): A decoded metadata.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Processes a decoded record batch.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+garrow_stream_listener_on_record_batch_decoded(GArrowStreamListener *listener,
+                                               GArrowRecordBatch *record_batch,
+                                               GHashTable *metadata,
+                                               GError **error)
+{
+  auto klass = GARROW_STREAM_LISTENER_GET_CLASS(listener);
+  if (!(klass && klass->on_record_batch_decoded)) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_NOT_IMPLEMENTED,
+                "[stream-listener][on-record-batch-decoded] not implemented");
+    return false;
+  }
+  return klass->on_record_batch_decoded(listener, record_batch, metadata, error);
+}
+
+/**
+ * garrow_stream_listener_on_schema_decoded:
+ * @listener: A #GArrowStreamListener.
+ * @schema: A decoded #GArrowSchema.
+ * @filtered_schema: A decoded #GArrowSchema that only has read fields.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Processes a decoded schema.
+ *
+ * Returns: %TRUE on success, %FALSE on error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+garrow_stream_listener_on_schema_decoded(GArrowStreamListener *listener,
+                                         GArrowSchema *schema,
+                                         GArrowSchema *filtered_schema,
+                                         GError **error)
+{
+  auto klass = GARROW_STREAM_LISTENER_GET_CLASS(listener);
+  if (!(klass && klass->on_schema_decoded)) {
+    g_set_error(error,
+                GARROW_ERROR,
+                GARROW_ERROR_NOT_IMPLEMENTED,
+                "[stream-listener][on-schema-decoded] not implemented");
+    return false;
+  }
+  return klass->on_schema_decoded(listener, schema, filtered_schema, error);
+}
+
+struct GArrowStreamDecoderPrivate
+{
+  std::shared_ptr<arrow::ipc::StreamDecoder> decoder;
+  GArrowStreamListener *listener;
+};
+
+enum {
+  PROP_DECODER = 1,
+  PROP_LISTENER,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowStreamDecoder, garrow_stream_decoder, G_TYPE_OBJECT);
+
+#define GARROW_STREAM_DECODER_GET_PRIVATE(object)                                        \
+  static_cast<GArrowStreamDecoderPrivate *>(                                             \
+    garrow_stream_decoder_get_instance_private(GARROW_STREAM_DECODER(object)))
+
+static void
+garrow_stream_decoder_finalize(GObject *object)
+{
+  auto priv = GARROW_STREAM_DECODER_GET_PRIVATE(object);
+  priv->decoder.~shared_ptr();
+  G_OBJECT_CLASS(garrow_stream_decoder_parent_class)->finalize(object);
+}
+
+static void
+garrow_stream_decoder_dispose(GObject *object)
+{
+  auto priv = GARROW_STREAM_DECODER_GET_PRIVATE(object);
+
+  if (priv->listener) {
+    g_object_unref(priv->listener);
+    priv->listener = nullptr;
+  }
+
+  G_OBJECT_CLASS(garrow_stream_decoder_parent_class)->dispose(object);
+}
+
+static void
+garrow_stream_decoder_set_property(GObject *object,
+                                   guint prop_id,
+                                   const GValue *value,
+                                   GParamSpec *pspec)
+{
+  auto priv = GARROW_STREAM_DECODER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_DECODER:
+    priv->decoder = *static_cast<std::shared_ptr<arrow::ipc::StreamDecoder> *>(
+      g_value_get_pointer(value));
+    break;
+  case PROP_LISTENER:
+    priv->listener = GARROW_STREAM_LISTENER(g_value_dup_object(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_stream_decoder_get_property(GObject *object,
+                                   guint prop_id,
+                                   GValue *value,
+                                   GParamSpec *pspec)
+{
+  auto priv = GARROW_STREAM_DECODER_GET_PRIVATE(object);
+
+  switch (prop_id) {
+  case PROP_LISTENER:
+    g_value_set_object(value, priv->listener);
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_stream_decoder_init(GArrowStreamDecoder *object)
+{
+  auto priv = GARROW_STREAM_DECODER_GET_PRIVATE(object);
+  new (&priv->decoder) std::shared_ptr<arrow::ipc::StreamDecoder>;
+}
+
+static void
+garrow_stream_decoder_class_init(GArrowStreamDecoderClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->finalize = garrow_stream_decoder_finalize;
+  gobject_class->dispose = garrow_stream_decoder_dispose;
+  gobject_class->set_property = garrow_stream_decoder_set_property;
+  gobject_class->get_property = garrow_stream_decoder_get_property;
+
+  GParamSpec *spec;
+  spec = g_param_spec_pointer(
+    "decoder",
+    nullptr,
+    nullptr,
+    static_cast<GParamFlags>(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_DECODER, spec);
+
+  /**
+   * GArrowStreamDecoder:listener:
+   *
+   * A listener that receives decoded events.
+   *
+   * Since: 18.0.0
+   */
+  spec = g_param_spec_object(
+    "listener",
+    nullptr,
+    nullptr,
+    GARROW_TYPE_STREAM_LISTENER,
+    static_cast<GParamFlags>(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY));
+  g_object_class_install_property(gobject_class, PROP_LISTENER, spec);
+}
+
+/**
+ * garrow_stream_decoder_new:
+ * @listener: The #GArrowStreamListener that receives decoded events.
+ * @options: (nullable): The #GArrowReadOptions.
+ *
+ * Returns: A newly created #GArrowStreamDecoder.
+ *
+ * Since: 18.0.0
+ */
+GArrowStreamDecoder *
+garrow_stream_decoder_new(GArrowStreamListener *listener, GArrowReadOptions *options)
+{
+  auto arrow_listener = garrow_stream_listener_get_raw(listener);
+  arrow::ipc::IpcReadOptions arrow_options;
+  if (options) {
+    arrow_options = *garrow_read_options_get_raw(options);
+  } else {
+    arrow_options = arrow::ipc::IpcReadOptions::Defaults();
+  }
+  auto arrow_decoder =
+    std::make_shared<arrow::ipc::StreamDecoder>(arrow_listener, arrow_options);
+  return garrow_stream_decoder_new_raw(&arrow_decoder, listener);
+}
+
+/**
+ * garrow_stream_decoder_consume_bytes:
+ * @decoder: A #GArrowStreamDecoder.
+ * @bytes: A #GBytes to be decoded.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Feed data to the decoder as a raw data.
+ *
+ * If the decoder can read one or more record batches by the data, the
+ * decoder calls [vfunc@GArrowStreamListener.on_record_batch_decoded]
+ * with a decoded record batch multiple times.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+garrow_stream_decoder_consume_bytes(GArrowStreamDecoder *decoder,
+                                    GBytes *bytes,
+                                    GError **error)
+{
+  auto arrow_decoder = garrow_stream_decoder_get_raw(decoder);
+  gsize size;
+  gconstpointer data = g_bytes_get_data(bytes, &size);
+  return garrow::check(error,
+                       arrow_decoder->Consume(static_cast<const uint8_t *>(data), size),
+                       "[stream-decoder][consume-bytes]");
+}
+
+/**
+ * garrow_stream_decoder_consume_buffer:
+ * @decoder: A #GArrowStreamDecoder.
+ * @buffer: A #GArrowBuffer to be decoded.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Feed data to the decoder as a #GArrowBuffer.
+ *
+ * If the decoder can read one or more record batches by the data, the
+ * decoder calls [vfunc@GArrowStreamListener.on_record_batch_decoded]
+ * with a decoded record batch multiple times.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+garrow_stream_decoder_consume_buffer(GArrowStreamDecoder *decoder,
+                                     GArrowBuffer *buffer,
+                                     GError **error)
+{
+  auto arrow_decoder = garrow_stream_decoder_get_raw(decoder);
+  auto arrow_buffer = garrow_buffer_get_raw(buffer);
+  return garrow::check(error,
+                       arrow_decoder->Consume(arrow_buffer),
+                       "[stream-decoder][consume-buffer]");
+}
+
+/**
+ * garrow_stream_decoder_reset:
+ * @decoder: A #GArrowStreamDecoder.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Reset the internal status.
+ *
+ * You can reuse this decoder for new stream after calling this.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+garrow_stream_decoder_reset(GArrowStreamDecoder *decoder, GError **error)
+{
+  auto arrow_decoder = garrow_stream_decoder_get_raw(decoder);
+  return garrow::check(error, arrow_decoder->Reset(), "[stream-decoder][reset]");
+}
+
+/**
+ * garrow_stream_decoder_get_schema:
+ * @decoder: A #GArrowStreamDecoder.
+ *
+ * Returns: (nullable) (transfer full): The shared #GArrowSchema of
+ *   the record batches in the stream.
+ *
+ * Since: 18.0.0
+ */
+GArrowSchema *
+garrow_stream_decoder_get_schema(GArrowStreamDecoder *decoder)
+{
+  auto arrow_decoder = garrow_stream_decoder_get_raw(decoder);
+  auto arrow_schema = arrow_decoder->schema();
+  if (arrow_schema) {
+    return garrow_schema_new_raw(&arrow_schema);
+  } else {
+    return nullptr;
+  }
+}
+
+/**
+ * garrow_stream_decoder_get_next_required_size:
+ * @decoder: A #GArrowStreamDecoder.
+ *
+ * This method is provided for users who want to optimize performance.
+ * Normal users don't need to use this method.
+ *
+ * Here is an example usage for normal users:
+ *
+ *     garrow_stream_decoder_consume_buffer(decoder, buffer1);
+ *     garrow_stream_decoder_consume_buffer(decoder, buffer2);
+ *     garrow_stream_decoder_consume_buffer(decoder, buffer3);
+ *
+ * Decoder has internal buffer. If consumed data isn't enough to
+ * advance the state of the decoder, consumed data is buffered to
+ * the internal buffer. It causes performance overhead.
+ *
+ * If you pass garrow_stream_decoer_get_next_required_size() size data
+ * to each
+ * garrow_stream_decoder_consume_bytes()/garrow_stream_decoder_consume_buffer()
+ * call, the decoder doesn't use its internal buffer. It improves
+ * performance.
+ *
+ * Here is an example usage to avoid using internal buffer:
+ *
+ *     buffer1 = get_data(garrow_stream_decoder_get_next_required_size(decoder));
+ *     garrow_stream_decoder_consume_buffer(buffer1);
+ *     buffer2 = get_data(garrow_stream_decoder_get_next_required_size(decoder));
+ *     garrow_stream_decoder_consume_buffer(buffer2);
+ *
+ * Users can use this method to avoid creating small chunks. Record
+ * batch data must be contiguous data. If users pass small chunks to
+ * the decoder, the decoder needs concatenate small chunks
+ * internally. It causes performance overhead.
+ *
+ * Here is an example usage to reduce small chunks:
+ *
+ *     GArrowResizablBuffer *buffer = garrow_resizable_buffer_new(1024, NULL);
+ *     while ((small_chunk = get_data(&small_chunk_size))) {
+ *       size_t current_buffer_size = garrow_buffer_get_size(GARROW_BUFFER(buffer));
+ *       garrow_resizable_buffer_resize(buffer, current_buffer_size + small_chunk_size,
+NULL);
+ *       garrow_mutable_buffer_set_data(GARROW_MUTABLE_BUFFER(buffer),
+ *                                      current_buffer_size,
+ *                                      small_chunk,
+ *                                      small_chunk_size,
+ *                                      NULL);
+ *       if (garrow_buffer_get_size(GARROW_BUFFER(buffer)) <
+ *           garrow_stream_decoder_get_next_required_size(decoder)) {
+ *         continue;
+ *       }
+ *       garrow_stream_decoder_consume_buffer(decoder, GARROW_BUFFER(buffer), NULL);
+ *       g_object_unref(buffer);
+ *       buffer = garrow_resizable_buffer_new(1024, NULL);
+ *     }
+ *     if (garrow_buffer_get_size(GARROW_BUFFER(buffer)) > 0) {
+ *       garrow_stream_decoder_consume_buffer(decoder, GARROW_BUFFER(buffer), NULL);
+ *     }
+ *     g_object_unref(buffer);
+ *
+ * Returns: The number of bytes needed to advance the state of
+ *   the decoder.
+ *
+ * Since: 18.0.0
+ */
+gsize
+garrow_stream_decoder_get_next_required_size(GArrowStreamDecoder *decoder)
+{
+  auto arrow_decoder = garrow_stream_decoder_get_raw(decoder);
+  return arrow_decoder->next_required_size();
+}
+
+G_END_DECLS
+
+std::shared_ptr<arrow::ipc::Listener>
+garrow_stream_listener_get_raw(GArrowStreamListener *listener)
+{
+  auto priv = GARROW_STREAM_LISTENER_GET_PRIVATE(listener);
+  return priv->listener;
+}
+
+GArrowStreamDecoder *
+garrow_stream_decoder_new_raw(std::shared_ptr<arrow::ipc::StreamDecoder> *arrow_decoder,
+                              GArrowStreamListener *listener)
+{
+  return GARROW_STREAM_DECODER(g_object_new(GARROW_TYPE_STREAM_DECODER,
+                                            "decoder",
+                                            arrow_decoder,
+                                            "listener",
+                                            listener,
+                                            nullptr));
+}
+
+std::shared_ptr<arrow::ipc::StreamDecoder>
+garrow_stream_decoder_get_raw(GArrowStreamDecoder *decoder)
+{
+  auto priv = GARROW_STREAM_DECODER_GET_PRIVATE(decoder);
+  return priv->decoder;
+}
diff --git a/c_glib/arrow-glib/decoder.h b/c_glib/arrow-glib/decoder.h
new file mode 100644
index 0000000000000..2ac0efbabfc7b
--- /dev/null
+++ b/c_glib/arrow-glib/decoder.h
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow-glib/record-batch.h>
+#include <arrow-glib/schema.h>
+
+G_BEGIN_DECLS
+
+#define GARROW_TYPE_STREAM_LISTENER (garrow_stream_listener_get_type())
+GARROW_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowStreamListener, garrow_stream_listener, GARROW, STREAM_LISTENER, GObject)
+struct _GArrowStreamListenerClass
+{
+  GObjectClass parent_class;
+
+  gboolean (*on_eos)(GArrowStreamListener *listener, GError **error);
+  gboolean (*on_record_batch_decoded)(GArrowStreamListener *listener,
+                                      GArrowRecordBatch *record_batch,
+                                      GHashTable *metadata,
+                                      GError **error);
+  gboolean (*on_schema_decoded)(GArrowStreamListener *listener,
+                                GArrowSchema *schema,
+                                GArrowSchema *filtered_schema,
+                                GError **error);
+};
+
+GARROW_AVAILABLE_IN_18_0
+gboolean
+garrow_stream_listener_on_eos(GArrowStreamListener *listener, GError **error);
+
+GARROW_AVAILABLE_IN_18_0
+gboolean
+garrow_stream_listener_on_record_batch_decoded(GArrowStreamListener *listener,
+                                               GArrowRecordBatch *record_batch,
+                                               GHashTable *metadata,
+                                               GError **error);
+
+GARROW_AVAILABLE_IN_18_0
+gboolean
+garrow_stream_listener_on_schema_decoded(GArrowStreamListener *listener,
+                                         GArrowSchema *schema,
+                                         GArrowSchema *filtered_schema,
+                                         GError **error);
+
+#define GARROW_TYPE_STREAM_DECODER (garrow_stream_decoder_get_type())
+GARROW_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(
+  GArrowStreamDecoder, garrow_stream_decoder, GARROW, STREAM_DECODER, GObject)
+struct _GArrowStreamDecoderClass
+{
+  GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_18_0
+GArrowStreamDecoder *
+garrow_stream_decoder_new(GArrowStreamListener *listener, GArrowReadOptions *options);
+GARROW_AVAILABLE_IN_18_0
+gboolean
+garrow_stream_decoder_consume_bytes(GArrowStreamDecoder *decoder,
+                                    GBytes *bytes,
+                                    GError **error);
+GARROW_AVAILABLE_IN_18_0
+gboolean
+garrow_stream_decoder_consume_buffer(GArrowStreamDecoder *decoder,
+                                     GArrowBuffer *buffer,
+                                     GError **error);
+GARROW_AVAILABLE_IN_18_0
+gboolean
+garrow_stream_decoder_reset(GArrowStreamDecoder *decoder, GError **error);
+GARROW_AVAILABLE_IN_18_0
+GArrowSchema *
+garrow_stream_decoder_get_schema(GArrowStreamDecoder *decoder);
+GARROW_AVAILABLE_IN_18_0
+gsize
+garrow_stream_decoder_get_next_required_size(GArrowStreamDecoder *decoder);
+
+G_END_DECLS
diff --git a/c_glib/arrow-glib/decoder.hpp b/c_glib/arrow-glib/decoder.hpp
new file mode 100644
index 0000000000000..24b329867c685
--- /dev/null
+++ b/c_glib/arrow-glib/decoder.hpp
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <arrow/api.h>
+#include <arrow/ipc/api.h>
+
+#include <arrow-glib/decoder.h>
+
+GARROW_EXTERN
+std::shared_ptr<arrow::ipc::Listener>
+garrow_stream_listener_get_raw(GArrowStreamListener *listener);
+
+GARROW_EXTERN
+GArrowStreamDecoder *
+garrow_stream_decoder_new_raw(std::shared_ptr<arrow::ipc::StreamDecoder> *arrow_decoder,
+                              GArrowStreamListener *listener);
+
+GARROW_EXTERN
+std::shared_ptr<arrow::ipc::StreamDecoder>
+garrow_stream_decoder_get_raw(GArrowStreamDecoder *decoder);
diff --git a/c_glib/arrow-glib/file-system.cpp b/c_glib/arrow-glib/file-system.cpp
index b6efa2b872635..9ba494e405957 100644
--- a/c_glib/arrow-glib/file-system.cpp
+++ b/c_glib/arrow-glib/file-system.cpp
@@ -56,6 +56,8 @@ G_BEGIN_DECLS
  * #GArrowS3FileSystem is a class for S3-backed file system.
  *
  * #GArrowGCSFileSystem is a class for GCS-backed file system.
+ *
+ * #GArrowAzureFileSystem is a class for Azure-backed file system.
  */
 
 /* arrow::fs::FileInfo */
@@ -1561,6 +1563,18 @@ garrow_gcs_file_system_class_init(GArrowGCSFileSystemClass *klass)
 {
 }
 
+G_DEFINE_TYPE(GArrowAzureFileSystem, garrow_azure_file_system, GARROW_TYPE_FILE_SYSTEM)
+
+static void
+garrow_azure_file_system_init(GArrowAzureFileSystem *file_system)
+{
+}
+
+static void
+garrow_azure_file_system_class_init(GArrowAzureFileSystemClass *klass)
+{
+}
+
 G_END_DECLS
 
 GArrowFileInfo *
@@ -1592,6 +1606,8 @@ garrow_file_system_new_raw(std::shared_ptr<arrow::fs::FileSystem> *arrow_file_sy
     file_system_type = GARROW_TYPE_S3_FILE_SYSTEM;
   } else if (type_name == "gcs") {
     file_system_type = GARROW_TYPE_GCS_FILE_SYSTEM;
+  } else if (type_name == "abfs") {
+    file_system_type = GARROW_TYPE_AZURE_FILE_SYSTEM;
   } else if (type_name == "mock") {
     file_system_type = GARROW_TYPE_MOCK_FILE_SYSTEM;
   }
diff --git a/c_glib/arrow-glib/file-system.h b/c_glib/arrow-glib/file-system.h
index 2e500672e145c..9a903c6af68cf 100644
--- a/c_glib/arrow-glib/file-system.h
+++ b/c_glib/arrow-glib/file-system.h
@@ -337,4 +337,16 @@ struct _GArrowGCSFileSystemClass
   GArrowFileSystemClass parent_class;
 };
 
+#define GARROW_TYPE_AZURE_FILE_SYSTEM (garrow_azure_file_system_get_type())
+GARROW_AVAILABLE_IN_18_0
+G_DECLARE_DERIVABLE_TYPE(GArrowAzureFileSystem,
+                         garrow_azure_file_system,
+                         GARROW,
+                         AZURE_FILE_SYSTEM,
+                         GArrowFileSystem)
+struct _GArrowAzureFileSystemClass
+{
+  GArrowFileSystemClass parent_class;
+};
+
 G_END_DECLS
diff --git a/c_glib/arrow-glib/internal-hash-table.hpp b/c_glib/arrow-glib/internal-hash-table.hpp
index 27ec060994c98..2e0a72561a7d8 100644
--- a/c_glib/arrow-glib/internal-hash-table.hpp
+++ b/c_glib/arrow-glib/internal-hash-table.hpp
@@ -37,3 +37,21 @@ garrow_internal_hash_table_to_metadata(GHashTable *metadata)
     &arrow_metadata);
   return arrow_metadata;
 }
+
+static inline GHashTable *
+garrow_internal_hash_table_from_metadata(
+  const std::shared_ptr<arrow::KeyValueMetadata> &arrow_metadata)
+{
+  auto metadata = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
+  const auto &keys = arrow_metadata->keys();
+  const auto &values = arrow_metadata->values();
+  auto n = arrow_metadata->size();
+  for (int64_t i = 0; i < n; ++i) {
+    const auto &key = keys[i];
+    const auto &value = values[i];
+    g_hash_table_insert(metadata,
+                        g_strndup(key.data(), key.size()),
+                        g_strndup(value.data(), value.size()));
+  }
+  return metadata;
+}
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index 36a8274513ed2..854988e348986 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -28,6 +28,7 @@ sources = files(
   'composite-data-type.cpp',
   'datum.cpp',
   'decimal.cpp',
+  'decoder.cpp',
   'error.cpp',
   'expression.cpp',
   'field.cpp',
@@ -91,6 +92,7 @@ c_headers = files(
   'data-type.h',
   'datum.h',
   'decimal.h',
+  'decoder.h',
   'error.h',
   'expression.h',
   'field.h',
@@ -153,6 +155,7 @@ cpp_headers = files(
   'data-type.hpp',
   'datum.hpp',
   'decimal.hpp',
+  'decoder.hpp',
   'error.hpp',
   'expression.hpp',
   'field.hpp',
diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index 8a1c3722d4a0f..9fe9d9d1b3199 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -668,10 +668,10 @@ garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *r
   }
 }
 
-typedef struct GArrowFeatherFileReaderPrivate_
+struct GArrowFeatherFileReaderPrivate
 {
   std::shared_ptr<arrow::ipc::feather::Reader> feather_reader;
-} GArrowFeatherFileReaderPrivate;
+};
 
 enum {
   PROP_FEATHER_READER = 1,
@@ -714,22 +714,11 @@ garrow_feather_file_reader_set_property(GObject *object,
   }
 }
 
-static void
-garrow_feather_file_reader_get_property(GObject *object,
-                                        guint prop_id,
-                                        GValue *value,
-                                        GParamSpec *pspec)
-{
-  switch (prop_id) {
-  default:
-    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
-    break;
-  }
-}
-
 static void
 garrow_feather_file_reader_init(GArrowFeatherFileReader *object)
 {
+  auto priv = GARROW_FEATHER_FILE_READER_GET_PRIVATE(object);
+  new (&priv->feather_reader) std::shared_ptr<arrow::ipc::feather::Reader>;
 }
 
 static void
@@ -739,7 +728,6 @@ garrow_feather_file_reader_class_init(GArrowFeatherFileReaderClass *klass)
 
   gobject_class->finalize = garrow_feather_file_reader_finalize;
   gobject_class->set_property = garrow_feather_file_reader_set_property;
-  gobject_class->get_property = garrow_feather_file_reader_get_property;
 
   GParamSpec *spec;
   spec = g_param_spec_pointer(
diff --git a/c_glib/arrow-glib/writer.cpp b/c_glib/arrow-glib/writer.cpp
index b0321d51b3ba4..08af1c7976965 100644
--- a/c_glib/arrow-glib/writer.cpp
+++ b/c_glib/arrow-glib/writer.cpp
@@ -45,14 +45,14 @@ G_BEGIN_DECLS
  * batches in file format into output.
  */
 
-typedef struct GArrowRecordBatchWriterPrivate_
+struct GArrowRecordBatchWriterPrivate
 {
   std::shared_ptr<arrow::ipc::RecordBatchWriter> record_batch_writer;
-} GArrowRecordBatchWriterPrivate;
+  bool is_closed;
+};
 
 enum {
-  PROP_0,
-  PROP_RECORD_BATCH_WRITER
+  PROP_RECORD_BATCH_WRITER = 1,
 };
 
 G_DEFINE_TYPE_WITH_PRIVATE(GArrowRecordBatchWriter,
@@ -111,6 +111,7 @@ garrow_record_batch_writer_init(GArrowRecordBatchWriter *object)
 {
   auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(object);
   new (&priv->record_batch_writer) std::shared_ptr<arrow::ipc::RecordBatchWriter>;
+  priv->is_closed = false;
 }
 
 static void
@@ -193,7 +194,27 @@ garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, GError **error
   auto arrow_writer = garrow_record_batch_writer_get_raw(writer);
 
   auto status = arrow_writer->Close();
-  return garrow_error_check(error, status, "[record-batch-writer][close]");
+  auto success = garrow_error_check(error, status, "[record-batch-writer][close]");
+  if (success) {
+    auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(writer);
+    priv->is_closed = true;
+  }
+  return success;
+}
+
+/**
+ * garrow_record_batch_writer_is_closed:
+ * @writer: A #GArrowRecordBatchWriter.
+ *
+ * Returns: %TRUE if the writer is closed, %FALSE otherwise.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+garrow_record_batch_writer_is_closed(GArrowRecordBatchWriter *writer)
+{
+  auto priv = GARROW_RECORD_BATCH_WRITER_GET_PRIVATE(writer);
+  return priv->is_closed;
 }
 
 G_DEFINE_TYPE(GArrowRecordBatchStreamWriter,
diff --git a/c_glib/arrow-glib/writer.h b/c_glib/arrow-glib/writer.h
index 46bbdddec8c9d..cea8390d9028f 100644
--- a/c_glib/arrow-glib/writer.h
+++ b/c_glib/arrow-glib/writer.h
@@ -53,6 +53,10 @@ GARROW_AVAILABLE_IN_ALL
 gboolean
 garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, GError **error);
 
+GARROW_AVAILABLE_IN_18_0
+gboolean
+garrow_record_batch_writer_is_closed(GArrowRecordBatchWriter *writer);
+
 #define GARROW_TYPE_RECORD_BATCH_STREAM_WRITER                                           \
   (garrow_record_batch_stream_writer_get_type())
 GARROW_AVAILABLE_IN_ALL
diff --git a/c_glib/arrow-glib/writer.hpp b/c_glib/arrow-glib/writer.hpp
index aa87ffe77d79b..1d85ac52f88d1 100644
--- a/c_glib/arrow-glib/writer.hpp
+++ b/c_glib/arrow-glib/writer.hpp
@@ -25,16 +25,20 @@
 
 #include <arrow-glib/writer.h>
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchWriter *
 garrow_record_batch_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
+GARROW_AVAILABLE_IN_ALL
 std::shared_ptr<arrow::ipc::RecordBatchWriter>
 garrow_record_batch_writer_get_raw(GArrowRecordBatchWriter *writer);
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchStreamWriter *
 garrow_record_batch_stream_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
 
+GARROW_AVAILABLE_IN_ALL
 GArrowRecordBatchFileWriter *
 garrow_record_batch_file_writer_new_raw(
   std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer);
diff --git a/c_glib/meson.build b/c_glib/meson.build
index 06aa5b941e77c..214c57747033e 100644
--- a/c_glib/meson.build
+++ b/c_glib/meson.build
@@ -35,7 +35,7 @@ project('arrow-glib', 'c', 'cpp',
         #   * 22.04: 0.61.2
         meson_version: '>=0.53.2')
 
-version = '17.0.0-SNAPSHOT'
+version = '18.0.0-SNAPSHOT'
 if version.endswith('-SNAPSHOT')
   version_numbers = version.split('-')[0].split('.')
   version_tag = version.split('-')[1]
diff --git a/c_glib/parquet-glib/arrow-file-writer.cpp b/c_glib/parquet-glib/arrow-file-writer.cpp
index b6f019ed27d46..2b8e2bdeac026 100644
--- a/c_glib/parquet-glib/arrow-file-writer.cpp
+++ b/c_glib/parquet-glib/arrow-file-writer.cpp
@@ -316,14 +316,13 @@ gparquet_writer_properties_get_data_page_size(GParquetWriterProperties *properti
   return parquet_properties->data_pagesize();
 }
 
-typedef struct GParquetArrowFileWriterPrivate_
+struct GParquetArrowFileWriterPrivate
 {
   parquet::arrow::FileWriter *arrow_file_writer;
-} GParquetArrowFileWriterPrivate;
+};
 
 enum {
-  PROP_0,
-  PROP_ARROW_FILE_WRITER
+  PROP_ARROW_FILE_WRITER = 1,
 };
 
 G_DEFINE_TYPE_WITH_PRIVATE(GParquetArrowFileWriter,
@@ -496,6 +495,58 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema,
   }
 }
 
+/**
+ * gparquet_arrow_file_writer_get_schema:
+ * @writer: A #GParquetArrowFileWriter.
+ *
+ * Returns: (transfer full): The schema to be written to.
+ *
+ * Since: 18.0.0
+ */
+GArrowSchema *
+gparquet_arrow_file_writer_get_schema(GParquetArrowFileWriter *writer)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  auto arrow_schema = parquet_arrow_file_writer->schema();
+  return garrow_schema_new_raw(&arrow_schema);
+}
+
+/**
+ * gparquet_arrow_file_writer_write_record_batch:
+ * @writer: A #GParquetArrowFileWriter.
+ * @record_batch: A record batch to be written.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Write a record batch into the buffered row group.
+ *
+ * Multiple record batches can be written into the same row group
+ * through this function.
+ *
+ * gparquet_writer_properties_get_max_row_group_length() is respected
+ * and a new row group will be created if the current row group
+ * exceeds the limit.
+ *
+ * Record batches get flushed to the output stream once
+ * gparquet_file_writer_new_buffered_row_group() or
+ * gparquet_file_writer_close() is called.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_write_record_batch(GParquetArrowFileWriter *writer,
+                                              GArrowRecordBatch *record_batch,
+                                              GError **error)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  auto arrow_record_batch = garrow_record_batch_get_raw(record_batch).get();
+  auto status = parquet_arrow_file_writer->WriteRecordBatch(*arrow_record_batch);
+  return garrow_error_check(error,
+                            status,
+                            "[parquet][arrow][file-writer][write-record-batch]");
+}
+
 /**
  * gparquet_arrow_file_writer_write_table:
  * @writer: A #GParquetArrowFileWriter.
@@ -510,13 +561,82 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema,
 gboolean
 gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
                                        GArrowTable *table,
-                                       guint64 chunk_size,
+                                       gsize chunk_size,
                                        GError **error)
 {
   auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
   auto arrow_table = garrow_table_get_raw(table).get();
-  auto status = parquet_arrow_file_writer->WriteTable(*arrow_table, chunk_size);
-  return garrow_error_check(error, status, "[parquet][arrow][file-writer][write-table]");
+  return garrow::check(error,
+                       parquet_arrow_file_writer->WriteTable(*arrow_table, chunk_size),
+                       "[parquet][arrow][file-writer][write-table]");
+}
+
+/**
+ * gparquet_arrow_file_writer_new_row_group:
+ * @writer: A #GParquetArrowFileWriter.
+ * @chunk_size: The max number of rows in a row group.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Start a new row group.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
+                                         gsize chunk_size,
+                                         GError **error)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  return garrow::check(error,
+                       parquet_arrow_file_writer->NewRowGroup(chunk_size),
+                       "[parquet][arrow][file-writer][new-row-group]");
+}
+
+/**
+ * gparquet_arrow_file_writer_new_buffered_row_group:
+ * @writer: A #GParquetArrowFileWriter.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Start a new buffered row group.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_new_buffered_row_group(GParquetArrowFileWriter *writer,
+                                                  GError **error)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  return garrow::check(error,
+                       parquet_arrow_file_writer->NewBufferedRowGroup(),
+                       "[parquet][arrow][file-writer][new-buffered-row-group]");
+}
+
+/**
+ * gparquet_arrow_file_writer_write_chunked_array:
+ * @writer: A #GParquetArrowFileWriter.
+ * @chunked_array: A #GArrowChunkedArray to be written.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Start a chunked array as a column chunk.
+ *
+ * Returns: %TRUE on success, %FALSE if there was an error.
+ *
+ * Since: 18.0.0
+ */
+gboolean
+gparquet_arrow_file_writer_write_chunked_array(GParquetArrowFileWriter *writer,
+                                               GArrowChunkedArray *chunked_array,
+                                               GError **error)
+{
+  auto parquet_arrow_file_writer = gparquet_arrow_file_writer_get_raw(writer);
+  auto arrow_chunked_array = garrow_chunked_array_get_raw(chunked_array);
+  return garrow::check(error,
+                       parquet_arrow_file_writer->WriteColumnChunk(arrow_chunked_array),
+                       "[parquet][arrow][file-writer][write-chunked-array]");
 }
 
 /**
diff --git a/c_glib/parquet-glib/arrow-file-writer.h b/c_glib/parquet-glib/arrow-file-writer.h
index 71cbfa195e842..2c82f7c1f87de 100644
--- a/c_glib/parquet-glib/arrow-file-writer.h
+++ b/c_glib/parquet-glib/arrow-file-writer.h
@@ -116,13 +116,40 @@ gparquet_arrow_file_writer_new_path(GArrowSchema *schema,
                                     GParquetWriterProperties *writer_properties,
                                     GError **error);
 
+GPARQUET_AVAILABLE_IN_18_0
+GArrowSchema *
+gparquet_arrow_file_writer_get_schema(GParquetArrowFileWriter *writer);
+
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_write_record_batch(GParquetArrowFileWriter *writer,
+                                              GArrowRecordBatch *record_batch,
+                                              GError **error);
+
 GPARQUET_AVAILABLE_IN_0_11
 gboolean
 gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer,
                                        GArrowTable *table,
-                                       guint64 chunk_size,
+                                       gsize chunk_size,
                                        GError **error);
 
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_new_row_group(GParquetArrowFileWriter *writer,
+                                         gsize chunk_size,
+                                         GError **error);
+
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_new_buffered_row_group(GParquetArrowFileWriter *writer,
+                                                  GError **error);
+
+GPARQUET_AVAILABLE_IN_18_0
+gboolean
+gparquet_arrow_file_writer_write_chunked_array(GParquetArrowFileWriter *writer,
+                                               GArrowChunkedArray *chunked_array,
+                                               GError **error);
+
 GPARQUET_AVAILABLE_IN_0_11
 gboolean
 gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, GError **error);
diff --git a/c_glib/test/flight/test-call-options.rb b/c_glib/test/flight/test-call-options.rb
index bf4dd6ae81a84..2574a9f7cbdbf 100644
--- a/c_glib/test/flight/test-call-options.rb
+++ b/c_glib/test/flight/test-call-options.rb
@@ -44,4 +44,10 @@ def test_clear_headers
     @options.clear_headers
     assert_equal([], collect_headers)
   end
+
+  def test_timeout
+    assert_in_delta(-1, @options.timeout)
+    @options.timeout = 10.1
+    assert_in_delta(10.1, @options.timeout)
+  end
 end
diff --git a/c_glib/test/flight/test-client.rb b/c_glib/test/flight/test-client.rb
index 7eb093d3cab80..f1e3f31234ab4 100644
--- a/c_glib/test/flight/test-client.rb
+++ b/c_glib/test/flight/test-client.rb
@@ -84,4 +84,37 @@ def test_error
       end
     end
   end
+
+  sub_test_case("#do_put") do
+    def test_success
+      client = ArrowFlight::Client.new(@location)
+      generator = Helper::FlightInfoGenerator.new
+      descriptor = generator.page_view_descriptor
+      table = generator.page_view_table
+      result = client.do_put(descriptor, table.schema)
+      writer = result.writer
+      writer.write_table(table)
+      writer.done_writing
+      reader = result.reader
+      metadata = reader.read
+      writer.close
+      assert_equal(["done", table],
+                   [metadata.data.to_s, @server.uploaded_table])
+    end
+
+    def test_error
+      client = ArrowFlight::Client.new(@location)
+      generator = Helper::FlightInfoGenerator.new
+      descriptor = generator.page_view_descriptor
+      table = generator.page_view_table
+      result = client.do_put(descriptor, table.schema)
+      assert_raise(Arrow::Error::Invalid) do
+        writer = result.writer
+        writer.done_writing
+        reader = result.reader
+        reader.read
+        writer.close
+      end
+    end
+  end
 end
diff --git a/c_glib/test/helper/flight-server.rb b/c_glib/test/helper/flight-server.rb
index 8c47029d41791..80b8a5c96cf9f 100644
--- a/c_glib/test/helper/flight-server.rb
+++ b/c_glib/test/helper/flight-server.rb
@@ -34,6 +34,8 @@ def virtual_do_is_valid(context, token)
   class FlightServer < ArrowFlight::Server
     type_register
 
+    attr_reader :uploaded_table
+
     private
     def virtual_do_list_flights(context, criteria)
       generator = FlightInfoGenerator.new
@@ -54,5 +56,14 @@ def virtual_do_do_get(context, ticket)
       reader = Arrow::TableBatchReader.new(table)
       ArrowFlight::RecordBatchStream.new(reader)
     end
+
+    def virtual_do_do_put(context, reader, writer)
+      @uploaded_table = reader.read_all
+      writer.write(Arrow::Buffer.new("done"))
+      if @uploaded_table.n_rows.zero?
+        raise Arrow::Error::Invalid.new("empty table")
+      end
+      true
+    end
   end
 end
diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb b/c_glib/test/parquet/test-arrow-file-writer.rb
index f899e7273b2a2..d8344bf1c50b0 100644
--- a/c_glib/test/parquet/test-arrow-file-writer.rb
+++ b/c_glib/test/parquet/test-arrow-file-writer.rb
@@ -26,7 +26,42 @@ def setup
     end
   end
 
-  def test_write
+  def test_schema
+    schema = build_schema("enabled" => :boolean)
+    writer = Parquet::ArrowFileWriter.new(schema, @file.path)
+    assert_equal(schema, writer.schema)
+    writer.close
+  end
+
+  def test_write_record_batch
+    enabled_values = [true, nil, false, true]
+    record_batch =
+      build_record_batch("enabled" => build_boolean_array(enabled_values))
+
+    writer = Parquet::ArrowFileWriter.new(record_batch.schema, @file.path)
+    writer.write_record_batch(record_batch)
+    writer.new_buffered_row_group
+    writer.write_record_batch(record_batch)
+    writer.close
+
+    reader = Parquet::ArrowFileReader.new(@file.path)
+    begin
+      reader.use_threads = true
+      assert_equal([
+                     2,
+                     Arrow::Table.new(record_batch.schema,
+                                      [record_batch, record_batch]),
+                   ],
+                   [
+                     reader.n_row_groups,
+                     reader.read_table,
+                   ])
+    ensure
+      reader.unref
+    end
+  end
+
+  def test_write_table
     enabled_values = [true, nil, false, true]
     table = build_table("enabled" => build_boolean_array(enabled_values))
     chunk_size = 2
@@ -40,11 +75,41 @@ def test_write
       reader.use_threads = true
       assert_equal([
                      enabled_values.length / chunk_size,
-                     true,
+                     table,
+                   ],
+                   [
+                     reader.n_row_groups,
+                     reader.read_table,
+                   ])
+    ensure
+      reader.unref
+    end
+  end
+
+  def test_write_chunked_array
+    schema = build_schema("enabled" => :boolean)
+    writer = Parquet::ArrowFileWriter.new(schema, @file.path)
+    writer.new_row_group(2)
+    chunked_array = Arrow::ChunkedArray.new([build_boolean_array([true, nil])])
+    writer.write_chunked_array(chunked_array)
+    writer.new_row_group(1)
+    chunked_array = Arrow::ChunkedArray.new([build_boolean_array([false])])
+    writer.write_chunked_array(chunked_array)
+    writer.close
+
+    reader = Parquet::ArrowFileReader.new(@file.path)
+    begin
+      reader.use_threads = true
+      assert_equal([
+                     2,
+                     build_table("enabled" => [
+                                   build_boolean_array([true, nil]),
+                                   build_boolean_array([false]),
+                                 ]),
                    ],
                    [
                      reader.n_row_groups,
-                     table.equal_metadata(reader.read_table, false),
+                     reader.read_table,
                    ])
     ensure
       reader.unref
diff --git a/c_glib/test/parquet/test-column-chunk-metadata.rb b/c_glib/test/parquet/test-column-chunk-metadata.rb
index f0012f0124577..4612e5bf0cc59 100644
--- a/c_glib/test/parquet/test-column-chunk-metadata.rb
+++ b/c_glib/test/parquet/test-column-chunk-metadata.rb
@@ -77,7 +77,7 @@ def setup
 
   test("#file_offset") do
     assert do
-      @metadata.file_offset > 0
+      @metadata.file_offset == 0
     end
   end
 
diff --git a/c_glib/test/test-file-writer.rb b/c_glib/test/test-file-writer.rb
index 5f9c3c4e19aa9..06c9dfa25c7fc 100644
--- a/c_glib/test/test-file-writer.rb
+++ b/c_glib/test/test-file-writer.rb
@@ -34,6 +34,9 @@ def test_write_record_batch
         file_writer.write_record_batch(record_batch)
       ensure
         file_writer.close
+        assert do
+          file_writer.closed?
+        end
       end
     ensure
       output.close
@@ -68,6 +71,9 @@ def test_write_table
         file_writer.write_table(table)
       ensure
         file_writer.close
+        assert do
+          file_writer.closed?
+        end
       end
     ensure
       output.close
diff --git a/c_glib/test/test-stream-decoder.rb b/c_glib/test/test-stream-decoder.rb
new file mode 100644
index 0000000000000..108e687e3aa6b
--- /dev/null
+++ b/c_glib/test/test-stream-decoder.rb
@@ -0,0 +1,126 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestStreamDecoder < Test::Unit::TestCase
+  include Helper::Buildable
+
+  class Listener < Arrow::StreamListener
+    type_register
+
+    attr_reader :events
+    def initialize
+      super
+      @events = []
+    end
+
+    private
+    def virtual_do_on_eos
+      @events << [:eos]
+      true
+    end
+
+    def virtual_do_on_record_batch_decoded(record_batch, metadata)
+      @events << [:record_batch_decoded, record_batch, metadata]
+      true
+    end
+
+    def virtual_do_on_schema_decoded(schema, filtered_schema)
+      @events << [:schema_decoded, schema, filtered_schema]
+      true
+    end
+  end
+
+  def setup
+    columns = {
+      "enabled": build_boolean_array([true, false, nil, true]),
+    }
+    @record_batch = build_record_batch(columns)
+    @schema = @record_batch.schema
+
+    @buffer = Arrow::ResizableBuffer.new(0)
+    output = Arrow::BufferOutputStream.new(@buffer)
+    stream_writer = Arrow::RecordBatchStreamWriter.new(output, @schema)
+    stream_writer.write_record_batch(@record_batch)
+    stream_writer.close
+    output.close
+
+    @listener = Listener.new
+    @decoder = Arrow::StreamDecoder.new(@listener)
+  end
+
+  def test_listener
+    assert_equal(@listener, @decoder.listener)
+  end
+
+  def test_consume_bytes
+    @buffer.data.to_s.each_byte do |byte|
+      @decoder.consume_bytes(GLib::Bytes.new(byte.chr))
+    end
+    assert_equal([
+                   [:schema_decoded, @schema, @schema],
+                   [:record_batch_decoded, @record_batch, nil],
+                   [:eos],
+                 ],
+                 @listener.events)
+  end
+
+  def test_consume_buffer
+    @buffer.data.to_s.each_byte do |byte|
+      @decoder.consume_buffer(Arrow::Buffer.new(byte.chr))
+    end
+    assert_equal([
+                   [:schema_decoded, @schema, @schema],
+                   [:record_batch_decoded, @record_batch, nil],
+                   [:eos],
+                 ],
+                 @listener.events)
+  end
+
+  def test_reset
+    @decoder.consume_bytes(@buffer.data.to_s[0, 10])
+    @decoder.reset
+    @decoder.consume_bytes(@buffer.data)
+    assert_equal([
+                   [:schema_decoded, @schema, @schema],
+                   [:record_batch_decoded, @record_batch, nil],
+                   [:eos],
+                 ],
+                 @listener.events)
+  end
+
+  def test_schema
+    assert_nil(@decoder.schema)
+    @decoder.consume_bytes(@buffer.data)
+    assert_equal(@schema, @decoder.schema)
+  end
+
+  def test_next_required_size
+    data = @buffer.data.to_s
+    loop do
+      next_required_size = @decoder.next_required_size
+      break if next_required_size.zero?
+      @decoder.consume_bytes(data[0, next_required_size])
+      data = data[next_required_size..-1]
+    end
+    assert_equal([
+                   [:schema_decoded, @schema, @schema],
+                   [:record_batch_decoded, @record_batch, nil],
+                   [:eos],
+                 ],
+                 @listener.events)
+  end
+end
diff --git a/c_glib/test/test-stream-writer.rb b/c_glib/test/test-stream-writer.rb
index 32754e20838b4..261732ae91e15 100644
--- a/c_glib/test/test-stream-writer.rb
+++ b/c_glib/test/test-stream-writer.rb
@@ -35,6 +35,9 @@ def test_write_record_batch
         stream_writer.write_record_batch(record_batch)
       ensure
         stream_writer.close
+        assert do
+          stream_writer.closed?
+        end
       end
     ensure
       output.close
diff --git a/c_glib/tool/generate-version-header.py b/c_glib/tool/generate-version-header.py
index 7422432251ff1..ba8cb03d15a3e 100755
--- a/c_glib/tool/generate-version-header.py
+++ b/c_glib/tool/generate-version-header.py
@@ -140,6 +140,7 @@ def generate_availability_macros(library: str) -> str:
 
 
 ALL_VERSIONS = [
+        (18, 0),
         (17, 0),
         (16, 0),
         (15, 0),
diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json
index e88d2b8fe30d5..3941edbfec527 100644
--- a/c_glib/vcpkg.json
+++ b/c_glib/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "arrow-glib",
-  "version-string": "17.0.0-SNAPSHOT",
+  "version-string": "18.0.0-SNAPSHOT",
   "dependencies": [
     "glib",
     "gobject-introspection",
diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat
index f688fbb63a9ad..08a052e82f24d 100644
--- a/ci/appveyor-cpp-build.bat
+++ b/ci/appveyor-cpp-build.bat
@@ -46,7 +46,9 @@ set ARROW_CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON
 set ARROW_CXXFLAGS=/WX /MP
 
 @rem Install GCS testbench
+set PIPX_BIN_DIR=C:\Windows\
 call %CD%\ci\scripts\install_gcs_testbench.bat
+storage-testbench -h || exit /B
 
 @rem
 @rem Build and test Arrow C++ libraries (including Parquet)
diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile
index dff1f2224809a..f0084894e19dc 100644
--- a/ci/docker/conda-cpp.dockerfile
+++ b/ci/docker/conda-cpp.dockerfile
@@ -42,17 +42,19 @@ RUN mamba install -q -y \
         valgrind && \
     mamba clean --all
 
+# We want to install the GCS testbench using the Conda base environment's Python,
+# because the test environment's Python may later change.
+ENV PIPX_BASE_PYTHON=/opt/conda/bin/python3
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
+
 # Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to 
-# be on the path for the tests to run.  
+# be on the path for the tests to run.
 ENV PATH=/opt/conda/envs/arrow/bin:$PATH
 
 COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_azurite.sh
 
-# We want to install the GCS testbench using the same Python binary that the Conda code will use.
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
-RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-
 COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
 
diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile
index 30b9cd5199fab..bc268e484d019 100644
--- a/ci/docker/conda-integration.dockerfile
+++ b/ci/docker/conda-integration.dockerfile
@@ -23,8 +23,7 @@ ARG arch=amd64
 ARG maven=3.8.7
 ARG node=16
 ARG yarn=1.22
-ARG jdk=8
-ARG go=1.21.8
+ARG jdk=11
 
 # Install Archery and integration dependencies
 COPY ci/conda_env_archery.txt /arrow/ci/
@@ -44,15 +43,30 @@ RUN mamba install -q -y \
 
 # Install Rust with only the needed components
 # (rustfmt is needed for tonic-build to compile the protobuf definitions)
+# GH-41637: Version pinned at 1.77 because the glibc for conda-cpp is currently too old
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --profile=minimal -y && \
-    $HOME/.cargo/bin/rustup toolchain install stable && \
+    $HOME/.cargo/bin/rustup override set 1.77 && \
+    $HOME/.cargo/bin/rustup toolchain install 1.77 && \
     $HOME/.cargo/bin/rustup component add rustfmt
 
 ENV GOROOT=/opt/go \
     GOBIN=/opt/go/bin \
     GOPATH=/go \
     PATH=/opt/go/bin:$PATH
-RUN wget -nv -O - https://dl.google.com/go/go${go}.linux-${arch}.tar.gz | tar -xzf - -C /opt
+# Use always latest go
+RUN wget -nv -O - https://dl.google.com/go/go$( \
+        curl \
+        --fail \
+        --location \
+        --show-error \
+        --silent \
+        https://api.github.com/repos/golang/go/git/matching-refs/tags/go | \
+        grep -o '"ref": "refs/tags/go.*"' | \
+        tail -n 1 | \
+        sed \
+        -e 's,^"ref": "refs/tags/go,,g' \
+        -e 's/"$//g' \
+    ).linux-${arch}.tar.gz | tar -xzf - -C /opt
 
 ENV DOTNET_ROOT=/opt/dotnet \
     PATH=/opt/dotnet:$PATH
diff --git a/ci/docker/debian-12-go.dockerfile b/ci/docker/conda-python-cpython-debug.dockerfile
similarity index 67%
rename from ci/docker/debian-12-go.dockerfile
rename to ci/docker/conda-python-cpython-debug.dockerfile
index c958e6bdee211..36ba7865a888c 100644
--- a/ci/docker/debian-12-go.dockerfile
+++ b/ci/docker/conda-python-cpython-debug.dockerfile
@@ -15,15 +15,14 @@
 # specific language governing permissions and limitations
 # under the License.
 
-ARG arch=amd64
-ARG go=1.21
-ARG staticcheck=v0.4.7
-FROM ${arch}/golang:${go}-bookworm
+ARG repo
+ARG arch
+ARG python=3.9
+FROM ${repo}:${arch}-conda-python-${python}
 
-# FROM collects all the args, get back the staticcheck version arg
-ARG staticcheck
-RUN GO111MODULE=on go install honnef.co/go/tools/cmd/staticcheck@${staticcheck}
-
-# Copy the go.mod and go.sum over and pre-download all the dependencies
-COPY go/ /arrow/go
-RUN cd /arrow/go && go mod download
+# (Docker oddity: ARG needs to be repeated after FROM)
+ARG python=3.9
+RUN mamba install -y "conda-forge/label/python_debug::python=${python}[build=*_cpython]" && \
+    mamba clean --all
+# Quick check that we do have a debug mode CPython
+RUN python -c "import sys; sys.gettotalrefcount()"
diff --git a/ci/docker/conda-python-cython2.dockerfile b/ci/docker/conda-python-cython2.dockerfile
index d67ef677276c7..859ad868b0c71 100644
--- a/ci/docker/conda-python-cython2.dockerfile
+++ b/ci/docker/conda-python-cython2.dockerfile
@@ -17,7 +17,7 @@
 
 ARG repo
 ARG arch
-ARG python=3.8
+ARG python=3.9
 FROM ${repo}:${arch}-conda-python-${python}
 
 RUN mamba install -q -y "cython<3" && \
diff --git a/ci/docker/conda-python-dask.dockerfile b/ci/docker/conda-python-dask.dockerfile
index 44840110817e9..2c063b2e643b6 100644
--- a/ci/docker/conda-python-dask.dockerfile
+++ b/ci/docker/conda-python-dask.dockerfile
@@ -17,7 +17,7 @@
 
 ARG repo
 ARG arch=amd64
-ARG python=3.8
+ARG python=3.9
 FROM ${repo}:${arch}-conda-python-${python}
 
 ARG dask=latest
diff --git a/ci/docker/conda-python-hdfs.dockerfile b/ci/docker/conda-python-hdfs.dockerfile
index fa4fa0d1fb772..4cf35f4b37a56 100644
--- a/ci/docker/conda-python-hdfs.dockerfile
+++ b/ci/docker/conda-python-hdfs.dockerfile
@@ -17,10 +17,10 @@
 
 ARG repo
 ARG arch=amd64
-ARG python=3.8
+ARG python=3.9
 FROM ${repo}:${arch}-conda-python-${python}
 
-ARG jdk=8
+ARG jdk=11
 ARG maven=3.8.7
 RUN mamba install -q -y \
         maven=${maven} \
diff --git a/ci/docker/conda-python-jpype.dockerfile b/ci/docker/conda-python-jpype.dockerfile
index d9b43afdaec9e..c28400f0262da 100644
--- a/ci/docker/conda-python-jpype.dockerfile
+++ b/ci/docker/conda-python-jpype.dockerfile
@@ -17,7 +17,7 @@
 
 ARG repo
 ARG arch=amd64
-ARG python=3.8
+ARG python=3.9
 FROM ${repo}:${arch}-conda-python-${python}
 
 ARG jdk=11
diff --git a/ci/docker/conda-python-pandas.dockerfile b/ci/docker/conda-python-pandas.dockerfile
index 83ad52a13d639..9ee62cd282d36 100644
--- a/ci/docker/conda-python-pandas.dockerfile
+++ b/ci/docker/conda-python-pandas.dockerfile
@@ -17,7 +17,7 @@
 
 ARG repo
 ARG arch=amd64
-ARG python=3.8
+ARG python=3.9
 FROM ${repo}:${arch}-conda-python-${python}
 
 ARG pandas=latest
diff --git a/ci/docker/conda-python-spark.dockerfile b/ci/docker/conda-python-spark.dockerfile
index 866f6f37f8bd9..a8e8250797fa8 100644
--- a/ci/docker/conda-python-spark.dockerfile
+++ b/ci/docker/conda-python-spark.dockerfile
@@ -17,10 +17,10 @@
 
 ARG repo
 ARG arch=amd64
-ARG python=3.8
+ARG python=3.9
 FROM ${repo}:${arch}-conda-python-${python}
 
-ARG jdk=8
+ARG jdk=11
 ARG maven=3.8.7
 
 ARG numpy=latest
diff --git a/ci/docker/conda-python.dockerfile b/ci/docker/conda-python.dockerfile
index 027fd589cecca..3897a7217d975 100644
--- a/ci/docker/conda-python.dockerfile
+++ b/ci/docker/conda-python.dockerfile
@@ -20,7 +20,7 @@ ARG arch
 FROM ${repo}:${arch}-conda-cpp
 
 # install python specific packages
-ARG python=3.8
+ARG python=3.9
 COPY ci/conda_env_python.txt \
      /arrow/ci/
 # If the Python version being tested is the same as the Python used by the system gdb,
@@ -32,11 +32,6 @@ RUN mamba install -q -y \
         nomkl && \
     mamba clean --all
 
-# XXX The GCS testbench was already installed in conda-cpp.dockerfile,
-# but we changed the installed Python version above, so we need to reinstall it.
-COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
-RUN /arrow/ci/scripts/install_gcs_testbench.sh default
-
 ENV ARROW_ACERO=ON \
     ARROW_BUILD_STATIC=OFF \
     ARROW_BUILD_TESTS=OFF \
diff --git a/ci/docker/debian-go-cgo.dockerfile b/ci/docker/debian-go-cgo.dockerfile
deleted file mode 100644
index a494d1e1564ff..0000000000000
--- a/ci/docker/debian-go-cgo.dockerfile
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-ARG base
-FROM ${base}
-
-ENV DEBIAN_FRONTEND noninteractive
-
-# install libarrow-dev to link against with CGO
-RUN apt-get update -y -q && \
-    apt-get install -y -q --no-install-recommends ca-certificates lsb-release wget && \
-    wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
-    apt-get install -y -q --no-install-recommends ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
-    apt-get update -y -q && \
-    apt-get install -y -q --no-install-recommends \
-        cmake \
-        libarrow-dev && \
-    apt-get clean
diff --git a/ci/docker/fedora-39-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile
index 33d11823094ce..2ac5afe7b91f6 100644
--- a/ci/docker/fedora-39-cpp.dockerfile
+++ b/ci/docker/fedora-39-cpp.dockerfile
@@ -34,6 +34,7 @@ RUN dnf update -y && \
         curl-devel \
         gcc \
         gcc-c++ \
+        gdb \
         gflags-devel \
         git \
         glog-devel \
diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile
index 8b73c73c1d240..479f4aa598b18 100644
--- a/ci/docker/java-jni-manylinux-201x.dockerfile
+++ b/ci/docker/java-jni-manylinux-201x.dockerfile
@@ -33,7 +33,7 @@ RUN vcpkg install \
         --x-feature=s3
 
 # Install Java
-ARG java=1.8.0
+ARG java=11
 ARG maven=3.9.3
 RUN yum install -y java-$java-openjdk-devel && \
       yum clean all && \
diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile
index 1c916840e071b..0804f3543c283 100644
--- a/ci/docker/linux-apt-docs.dockerfile
+++ b/ci/docker/linux-apt-docs.dockerfile
@@ -19,7 +19,7 @@ ARG base
 FROM ${base}
 
 ARG r=4.4
-ARG jdk=8
+ARG jdk=11
 
 ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
 
diff --git a/ci/docker/linux-apt-python-3.dockerfile b/ci/docker/linux-apt-python-3.dockerfile
index 2e07c244017a0..e215976d44850 100644
--- a/ci/docker/linux-apt-python-3.dockerfile
+++ b/ci/docker/linux-apt-python-3.dockerfile
@@ -23,6 +23,7 @@ COPY python/requirements-build.txt \
      /arrow/python/
 
 ENV ARROW_PYTHON_VENV /arrow-dev
+
 RUN python3 -m venv ${ARROW_PYTHON_VENV} && \
     . ${ARROW_PYTHON_VENV}/bin/activate && \
     pip install -U pip setuptools wheel && \
diff --git a/ci/docker/linux-apt-python-313-freethreading.dockerfile b/ci/docker/linux-apt-python-313-freethreading.dockerfile
new file mode 100644
index 0000000000000..f5505e67f00bb
--- /dev/null
+++ b/ci/docker/linux-apt-python-313-freethreading.dockerfile
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+RUN apt-get update -y -q && \
+    apt install -y -q --no-install-recommends software-properties-common gpg-agent && \
+    add-apt-repository -y ppa:deadsnakes/ppa && \
+    apt-get update -y -q && \
+    apt install -y -q --no-install-recommends python3.13-dev python3.13-nogil python3.13-venv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+COPY python/requirements-build.txt \
+     python/requirements-test.txt \
+     /arrow/python/
+
+ENV ARROW_PYTHON_VENV /arrow-dev
+RUN python3.13t -m venv ${ARROW_PYTHON_VENV}
+RUN ${ARROW_PYTHON_VENV}/bin/python -m pip install -U pip setuptools wheel
+RUN ${ARROW_PYTHON_VENV}/bin/python -m pip install \
+      --pre \
+      --prefer-binary \
+      --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" \
+      -r arrow/python/requirements-build.txt \
+      -r arrow/python/requirements-test.txt
+
+# We want to run the PyArrow test suite with the GIL disabled, but cffi
+# (more precisely, the `_cffi_backend` module) currently doesn't declare
+# itself safe to run without the GIL.
+# Therefore set PYTHON_GIL to 0.
+ENV ARROW_ACERO=ON \
+    ARROW_BUILD_STATIC=OFF \
+    ARROW_BUILD_TESTS=OFF \
+    ARROW_BUILD_UTILITIES=OFF \
+    ARROW_COMPUTE=ON \
+    ARROW_CSV=ON \
+    ARROW_DATASET=ON \
+    ARROW_FILESYSTEM=ON \
+    ARROW_GDB=ON \
+    ARROW_HDFS=ON \
+    ARROW_JSON=ON \
+    ARROW_USE_GLOG=OFF \
+    PYTHON_GIL=0
diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile
index 630b96e1007b9..4be5adf246b88 100644
--- a/ci/docker/linux-apt-r.dockerfile
+++ b/ci/docker/linux-apt-r.dockerfile
@@ -58,6 +58,7 @@ RUN apt-get update -y && \
         locales \
         # Need Python to check py-to-r bridge
         python3 \
+        python3-venv \
         python3-pip \
         python3-dev && \
     locale-gen en_US.UTF-8 && \
@@ -81,15 +82,16 @@ RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
 # Also ensure parallel compilation of C/C++ code
 RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site
 
-# Set up Python 3 and its dependencies
-RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
-    ln -s /usr/bin/pip3 /usr/local/bin/pip
-
 COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
 COPY r/DESCRIPTION /arrow/r/
 RUN /arrow/ci/scripts/r_deps.sh /arrow
 
-RUN pip install -U pip setuptools wheel
+ENV ARROW_PYTHON_VENV /arrow-dev
+COPY python/requirements-build.txt /arrow/python/
+RUN python3 -m venv ${ARROW_PYTHON_VENV} && \
+    source ${ARROW_PYTHON_VENV}/bin/activate && \
+    pip install -U pip setuptools wheel && \
+    pip install -r arrow/python/requirements-build.txt
 
 COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
@@ -97,9 +99,6 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
 RUN /arrow/ci/scripts/install_gcs_testbench.sh default
 
-COPY python/requirements-build.txt /arrow/python/
-RUN pip install -r arrow/python/requirements-build.txt
-
 ENV \
     ARROW_ACERO=ON \
     ARROW_BUILD_STATIC=OFF \
diff --git a/ci/docker/debian-12-go-cgo-python.dockerfile b/ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile
similarity index 67%
rename from ci/docker/debian-12-go-cgo-python.dockerfile
rename to ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile
index a24955f76e666..09530560e4f20 100644
--- a/ci/docker/debian-12-go-cgo-python.dockerfile
+++ b/ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile
@@ -18,17 +18,16 @@
 ARG base
 FROM ${base}
 
-ENV DEBIAN_FRONTEND noninteractive
-
-# Install python3 and pip so we can install pyarrow to test the C data interface.
 RUN apt-get update -y -q && \
-    apt-get install -y -q --no-install-recommends \
-        python3 \
-        python3-pip \
-        python3-venv && \
-    apt-get clean
+    apt install -y -q --no-install-recommends software-properties-common gpg-agent && \
+    add-apt-repository -y ppa:deadsnakes/ppa && \
+    apt-get update -y -q && \
+    apt install -y -q --no-install-recommends python3.13-dev python3.13-nogil python3.13-venv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
 
 ENV ARROW_PYTHON_VENV /arrow-dev
-RUN python3 -m venv ${ARROW_PYTHON_VENV} && \
-    . ${ARROW_PYTHON_VENV}/bin/activate && \
-    pip install pyarrow cffi --only-binary pyarrow
+RUN python3.13t -m venv ${ARROW_PYTHON_VENV}
+
+ENV PYTHON_GIL 0
+ENV PATH "${ARROW_PYTHON_VENV}/bin:${PATH}"
diff --git a/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile b/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile
new file mode 100644
index 0000000000000..13b3bc140a9a8
--- /dev/null
+++ b/ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+RUN apt-get update -y -q && \
+    apt install -y -q --no-install-recommends software-properties-common gpg-agent && \
+    add-apt-repository -y ppa:deadsnakes/ppa && \
+    apt-get update -y -q && \
+    apt install -y -q --no-install-recommends \
+        build-essential \
+        libffi-dev \
+        python3.13-dev \
+        python3.13-nogil \
+        python3.13-venv && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+ENV ARROW_PYTHON_VENV /arrow-dev
+RUN python3.13t -m venv ${ARROW_PYTHON_VENV}
+
+ENV PYTHON_GIL 0
+ENV PATH "${ARROW_PYTHON_VENV}/bin:${PATH}"
+
+# pandas doesn't provide wheels for aarch64 yet, so we have to install nightly Cython
+# along with the rest of pandas' build dependencies and disable build isolation
+COPY python/requirements-wheel-test.txt /arrow/python/
+RUN python -m pip install \
+    --pre \
+    --prefer-binary \
+    --extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" \
+    Cython numpy
+RUN python -m pip install "meson-python==0.13.1" "meson==1.2.1" wheel "versioneer[toml]" ninja
+RUN python -m pip install --no-build-isolation -r /arrow/python/requirements-wheel-test.txt
diff --git a/ci/docker/python-wheel-manylinux-test.dockerfile b/ci/docker/python-wheel-manylinux-test.dockerfile
index cdd0ae3ced756..09883f9780a36 100644
--- a/ci/docker/python-wheel-manylinux-test.dockerfile
+++ b/ci/docker/python-wheel-manylinux-test.dockerfile
@@ -16,15 +16,22 @@
 # under the License.
 
 ARG arch
-ARG python
-FROM ${arch}/python:${python}
-
-# RUN pip install --upgrade pip
+ARG python_image_tag
+FROM ${arch}/python:${python_image_tag}
 
 # pandas doesn't provide wheel for aarch64 yet, so cache the compiled
 # test dependencies in a docker image
 COPY python/requirements-wheel-test.txt /arrow/python/
 RUN pip install -r /arrow/python/requirements-wheel-test.txt
 
+# Install the GCS testbench with the system Python
+RUN apt-get update -y -q && \
+    apt-get install -y -q \
+        build-essential \
+        python3-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
 COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
-RUN PYTHON=python /arrow/ci/scripts/install_gcs_testbench.sh default
+ENV PIPX_PYTHON=/usr/bin/python3 PIPX_PIP_ARGS=--prefer-binary
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
diff --git a/ci/docker/python-wheel-manylinux.dockerfile b/ci/docker/python-wheel-manylinux.dockerfile
index cb39667af1e10..d22a70a2d777b 100644
--- a/ci/docker/python-wheel-manylinux.dockerfile
+++ b/ci/docker/python-wheel-manylinux.dockerfile
@@ -47,7 +47,7 @@ RUN dnf install -y git flex curl autoconf zip perl-IPC-Cmd wget
 # on manylinux_2_28, no system python is installed.
 # We therefore override the PATH with Python 3.8 in /opt/python
 # so that we have a consistent Python version across base images.
-ENV CPYTHON_VERSION=cp38
+ENV CPYTHON_VERSION=cp39
 ENV PATH=/opt/python/${CPYTHON_VERSION}-${CPYTHON_VERSION}/bin:${PATH}
 
 # Install CMake
@@ -100,10 +100,15 @@ RUN vcpkg install \
         --x-feature=parquet \
         --x-feature=s3
 
+# Make sure auditwheel is up-to-date
+RUN pipx upgrade auditwheel
+
 # Configure Python for applications running in the bash shell of this Dockerfile
-ARG python=3.8
+ARG python=3.9
+ARG python_abi_tag=cp39
 ENV PYTHON_VERSION=${python}
-RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-*) && \
+ENV PYTHON_ABI_TAG=${python_abi_tag}
+RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-${PYTHON_ABI_TAG}) && \
     echo "export PATH=$PYTHON_ROOT/bin:\$PATH" >> /etc/profile.d/python.sh
 
 SHELL ["/bin/bash", "-i", "-c"]
diff --git a/ci/docker/python-wheel-windows-test-vs2019.dockerfile b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
index 32bbb55e82689..8c17ebfa2fe0a 100644
--- a/ci/docker/python-wheel-windows-test-vs2019.dockerfile
+++ b/ci/docker/python-wheel-windows-test-vs2019.dockerfile
@@ -27,23 +27,37 @@ FROM abrarov/msvc-2019:2.11.0
 # Add unix tools to path
 RUN setx path "%path%;C:\Program Files\Git\usr\bin"
 
-# Remove previous installations of python from the base image
+# 1. Remove previous installations of python from the base image
 # NOTE: a more recent base image (tried with 2.12.1) comes with python 3.9.7
 # and the msi installers are failing to remove pip and tcl/tk "products" making
 # the subsequent choco python installation step failing for installing python
 # version 3.9.* due to existing python version
+# 2. Install Minio for S3 testing.
 RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
-    rm -rf Python*
+    rm -rf Python* && \
+    curl https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z \
+        --output "C:\Windows\Minio.exe"
 
-# Define the full version number otherwise choco falls back to patch number 0 (3.8 => 3.8.0)
-ARG python=3.8
-RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
-    (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
-    (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts") & \
-    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.5" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
-    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.0" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts")
+# Install the GCS testbench using a well-known Python version.
+# NOTE: cannot use pipx's `--fetch-missing-python` because of
+# https://github.com/pypa/pipx/issues/1521, therefore download Python ourselves.
+RUN choco install -r -y --pre --no-progress python --version=3.11.9
+ENV PIPX_BIN_DIR=C:\\Windows\\
+ENV PIPX_PYTHON="C:\Python311\python.exe"
+COPY ci/scripts/install_gcs_testbench.bat C:/arrow/ci/scripts/
+RUN call "C:\arrow\ci\scripts\install_gcs_testbench.bat" && \
+    storage-testbench -h
+
+# Define the full version number otherwise choco falls back to patch number 0 (3.9 => 3.9.0)
+ARG python=3.9
+RUN (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13") & \
+    (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11") & \
+    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9") & \
+    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.5") & \
+    (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1")
 
 # Install archiver to extract xz archives
-RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION% & \
-    python -m pip install --no-cache-dir -U pip setuptools & \
+RUN choco install -r -y --pre --no-progress --force python --version=%PYTHON_VERSION% && \
     choco install --no-progress -r -y archiver
+
+ENV PYTHON=$python
diff --git a/ci/docker/python-wheel-windows-vs2019.dockerfile b/ci/docker/python-wheel-windows-vs2019.dockerfile
index ff42de939d91f..f9d31eb5771ef 100644
--- a/ci/docker/python-wheel-windows-vs2019.dockerfile
+++ b/ci/docker/python-wheel-windows-vs2019.dockerfile
@@ -78,14 +78,14 @@ RUN vcpkg install \
 RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
     rm -rf Python*
 
-# Define the full version number otherwise choco falls back to patch number 0 (3.8 => 3.8.0)
-ARG python=3.8
-RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
-    (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
+# Define the full version number otherwise choco falls back to patch number 0 (3.9 => 3.9.0)
+ARG python=3.9
+RUN (if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
     (if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts") & \
-    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.5" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
-    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.0" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts")
-RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION%
+    (if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
+    (if "%python%"=="3.12" setx PYTHON_VERSION "3.12.5" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts") & \
+    (if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1" && setx PATH "%PATH%;C:\Python313;C:\Python313\Scripts")
+RUN choco install -r -y --pre --no-progress python --version=%PYTHON_VERSION%
 RUN python -m pip install -U pip setuptools
 
 COPY python/requirements-wheel-build.txt arrow/python/
diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
index e17c0306f115d..1b342df596c9d 100644
--- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
@@ -29,10 +29,12 @@ RUN apt-get update -y -q && \
         ccache \
         cmake \
         curl \
+        gdb \
         git \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         wget && \
     apt-get clean && \
diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
index 341d8a87e8661..ce31c457e909e 100644
--- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
+++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
@@ -29,10 +29,12 @@ RUN apt-get update -y -q && \
         ccache \
         cmake \
         curl \
+        gdb \
         git \
         libssl-dev \
         libcurl4-openssl-dev \
         python3-pip \
+        python3-venv \
         tzdata \
         wget && \
     apt-get clean && \
diff --git a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
new file mode 100644
index 0000000000000..a1fd178a2c754
--- /dev/null
+++ b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:24.04
+FROM ${base}
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+        debconf-set-selections
+
+RUN apt-get update -y -q && \
+    apt-get install -y -q \
+        build-essential \
+        ccache \
+        cmake \
+        curl \
+        gdb \
+        git \
+        libssl-dev \
+        libcurl4-openssl-dev \
+        python3-pip \
+        python3-venv \
+        tzdata \
+        tzdata-legacy \
+        wget && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+# Installs LLVM toolchain, for Gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build.
+ARG llvm
+RUN latest_system_llvm=14 && \
+    if [ ${llvm} -gt ${latest_system_llvm} ]; then \
+      apt-get update -y -q && \
+      apt-get install -y -q --no-install-recommends \
+          apt-transport-https \
+          ca-certificates \
+          gnupg \
+          lsb-release \
+          wget && \
+      wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+      code_name=$(lsb_release --codename --short) && \
+      if [ ${llvm} -gt 10 ]; then \
+        echo "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" > \
+           /etc/apt/sources.list.d/llvm.list; \
+      fi; \
+    fi && \
+    apt-get update -y -q && \
+    apt-get install -y -q --no-install-recommends \
+        clang-${llvm} \
+        llvm-${llvm}-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists*
+
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
+
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_gcs_testbench.sh default
+
+COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
+
+ENV ARROW_ACERO=ON \
+    ARROW_AZURE=OFF \
+    ARROW_BUILD_TESTS=ON \
+    ARROW_DATASET=ON \
+    ARROW_FLIGHT=ON \
+    ARROW_GANDIVA=ON \
+    ARROW_GCS=ON \
+    ARROW_HDFS=ON \
+    ARROW_HOME=/usr/local \
+    ARROW_INSTALL_NAME_RPATH=OFF \
+    ARROW_ORC=ON \
+    ARROW_PARQUET=ON \
+    ARROW_S3=ON \
+    ARROW_USE_CCACHE=ON \
+    ARROW_WITH_BROTLI=ON \
+    ARROW_WITH_BZ2=ON \
+    ARROW_WITH_LZ4=ON \
+    ARROW_WITH_OPENTELEMETRY=OFF \
+    ARROW_WITH_SNAPPY=ON \
+    ARROW_WITH_ZLIB=ON \
+    ARROW_WITH_ZSTD=ON \
+    CMAKE_GENERATOR="Unix Makefiles" \
+    PARQUET_BUILD_EXAMPLES=ON \
+    PARQUET_BUILD_EXECUTABLES=ON \
+    PATH=/usr/lib/ccache/:$PATH \
+    PYTHON=python3
diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile
index ecfb5e2f5096d..7d0772c33a255 100644
--- a/ci/docker/ubuntu-24.04-cpp.dockerfile
+++ b/ci/docker/ubuntu-24.04-cpp.dockerfile
@@ -57,6 +57,7 @@ RUN latest_system_llvm=18 && \
         clang-${llvm} \
         clang-format-${clang_tools} \
         clang-tidy-${clang_tools} \
+        libclang-rt-${llvm}-dev \
         llvm-${llvm}-dev && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists*
diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD
index e12099f2b405d..ed68faae950b1 100644
--- a/ci/scripts/PKGBUILD
+++ b/ci/scripts/PKGBUILD
@@ -18,7 +18,7 @@
 _realname=arrow
 pkgbase=mingw-w64-${_realname}
 pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
-pkgver=16.1.0.9000
+pkgver=17.0.0.9000
 pkgrel=8000
 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
 arch=("any")
diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh
index 2c640f2c1fb6a..7912bf23e491c 100755
--- a/ci/scripts/cpp_test.sh
+++ b/ci/scripts/cpp_test.sh
@@ -80,7 +80,7 @@ case "$(uname)" in
     ;;
 esac
 
-if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then  
+if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then
   n_jobs=1 # avoid spurious fails on emscripten due to loading too many big executables
 fi
 
diff --git a/ci/scripts/go_bench.sh b/ci/scripts/go_bench.sh
deleted file mode 100755
index 6d5305f9eeff2..0000000000000
--- a/ci/scripts/go_bench.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# this will output the benchmarks to STDOUT but if `-json` is passed
-# as the second argument, it will create a file "bench_stats.json"
-# in the directory this is called from containing a json representation
-
-set -ex
-
-# simplistic semver comparison
-verlte() {
-    [ "$1" = "`echo -e "$1\n$2" | sort -V | head -n1`" ]
-}
-verlt() {
-    [ "$1" = "$2" ] && return 1 || verlte $1 $2
-}
-
-ver=`go env GOVERSION`
-
-source_dir=${1}/go
-
-export PARQUET_TEST_DATA=${1}/cpp/submodules/parquet-testing/data
-pushd ${source_dir}
-
-# lots of benchmarks, they can take a while
-# the timeout is for *ALL* benchmarks together,
-# not per benchmark
-go test -bench=. -benchmem -timeout 40m -run=^$ ./... | tee bench_stat.dat
-
-popd
-
-if [[ "$2" = "-json" ]]; then
-    go install go.bobheadxi.dev/gobenchdata@latest
-    export PATH=`go env GOPATH`/bin:$PATH
-    cat ${source_dir}/bench_*.dat | gobenchdata --json bench_stats.json
-fi    
-
-rm ${source_dir}/bench_*.dat
\ No newline at end of file
diff --git a/ci/scripts/go_bench_adapt.py b/ci/scripts/go_bench_adapt.py
deleted file mode 100644
index a05e25de8bdd3..0000000000000
--- a/ci/scripts/go_bench_adapt.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import json
-import os
-import uuid
-import logging
-from pathlib import Path
-from typing import List
-
-from benchadapt import BenchmarkResult
-from benchadapt.adapters import BenchmarkAdapter
-from benchadapt.log import log
-
-log.setLevel(logging.DEBUG)
-
-ARROW_ROOT = Path(__file__).parent.parent.parent.resolve()
-SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts"
-
-# `github_commit_info` is meant to communicate GitHub-flavored commit
-# information to Conbench. See
-# https://github.com/conbench/conbench/blob/cf7931f/benchadapt/python/benchadapt/result.py#L66
-# for a specification.
-github_commit_info = {"repository": "https://github.com/apache/arrow"}
-
-if os.environ.get("CONBENCH_REF") == "main":
-    # Assume GitHub Actions CI. The environment variable lookups below are
-    # expected to fail when not running in GitHub Actions.
-    github_commit_info = {
-        "repository": f'{os.environ["GITHUB_SERVER_URL"]}/{os.environ["GITHUB_REPOSITORY"]}',
-        "commit": os.environ["GITHUB_SHA"],
-        "pr_number": None,  # implying default branch
-    }
-    run_reason = "commit"
-else:
-    # Assume that the environment is not GitHub Actions CI. Error out if that
-    # assumption seems to be wrong.
-    assert os.getenv("GITHUB_ACTIONS") is None
-
-    # This is probably a local dev environment, for testing. In this case, it
-    # does usually not make sense to provide commit information (not a
-    # controlled CI environment). Explicitly leave out "commit" and "pr_number" to
-    # reflect that (to not send commit information).
-
-    # Reflect 'local dev' scenario in run_reason. Allow user to (optionally)
-    # inject a custom piece of information into the run reason here, from
-    # environment.
-    run_reason = "localdev"
-    custom_reason_suffix = os.getenv("CONBENCH_CUSTOM_RUN_REASON")
-    if custom_reason_suffix is not None:
-        run_reason += f" {custom_reason_suffix.strip()}"
-
-
-class GoAdapter(BenchmarkAdapter):
-    result_file = "bench_stats.json"
-    command = ["bash", SCRIPTS_PATH / "go_bench.sh", ARROW_ROOT, "-json"]
-
-    def __init__(self, *args, **kwargs) -> None:
-        super().__init__(command=self.command, *args, **kwargs)
-
-    def _transform_results(self) -> List[BenchmarkResult]:
-        with open(self.result_file, "r") as f:
-            raw_results = json.load(f)
-
-        run_id = uuid.uuid4().hex
-        parsed_results = []
-        for suite in raw_results[0]["Suites"]:
-            batch_id = uuid.uuid4().hex
-            pkg = suite["Pkg"]
-
-            for benchmark in suite["Benchmarks"]:
-                data = benchmark["Mem"]["MBPerSec"] * 1e6
-                time = 1 / benchmark["NsPerOp"] * 1e9
-
-                name = benchmark["Name"].removeprefix("Benchmark")
-                ncpu = name[name.rfind("-") + 1 :]
-                pieces = name[: -(len(ncpu) + 1)].split("/")
-
-                parsed = BenchmarkResult(
-                    run_id=run_id,
-                    batch_id=batch_id,
-                    stats={
-                        "data": [data],
-                        "unit": "B/s",
-                        "times": [time],
-                        "time_unit": "i/s",
-                        "iterations": benchmark["Runs"],
-                    },
-                    context={
-                        "benchmark_language": "Go",
-                        "goos": suite["Goos"],
-                        "goarch": suite["Goarch"],
-                    },
-                    tags={
-                        "pkg": pkg,
-                        "num_cpu": ncpu,
-                        "name": pieces[0],
-                        "params": "/".join(pieces[1:]),
-                    },
-                    run_reason=run_reason,
-                    github=github_commit_info,
-                )
-                parsed.run_name = (
-                    f"{parsed.run_reason}: {github_commit_info.get('commit')}"
-                )
-                parsed_results.append(parsed)
-
-        return parsed_results
-
-
-if __name__ == "__main__":
-    go_adapter = GoAdapter(result_fields_override={"info": {}})
-    go_adapter()
diff --git a/ci/scripts/go_build.sh b/ci/scripts/go_build.sh
deleted file mode 100755
index ea77ecf56ac0e..0000000000000
--- a/ci/scripts/go_build.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -ex
-
-source_dir=${1}/go
-
-# Need "all=" as per https://github.com/golang/go/issues/42131#issuecomment-713917379
-export GOFLAGS="${GOFLAGS} -gcflags=all=-d=checkptr"
-
-pushd ${source_dir}/arrow
-
-if [[ -n "${ARROW_GO_TESTCGO}" ]]; then
-    if [[ "${MSYSTEM}" = "MINGW64" ]]; then        
-        export PATH=${MINGW_PREFIX}/bin:$PATH
-        go clean -cache
-        go clean -testcache        
-    fi
-    TAGS="-tags assert,test,ccalloc"    
-fi
-
-go install $TAGS -v ./...
-
-popd
-
-pushd ${source_dir}/parquet
-
-go install -v ./...
-
-popd
-
-: ${ARROW_INTEGRATION_GO:=ON}
-
-if [ "${ARROW_INTEGRATION_GO}" == "ON" ]; then
-    pushd ${source_dir}/arrow/internal/cdata_integration
-
-    case "$(uname)" in
-        Linux)
-            go_lib="arrow_go_integration.so"
-            ;;
-        Darwin)
-            go_lib="arrow_go_integration.dylib"
-            ;;
-        MINGW*)
-            go_lib="arrow_go_integration.dll"
-            ;;
-    esac
-    go build -buildvcs=false -tags cdata_integration,assert -buildmode=c-shared -o ${go_lib} .
-
-    popd
-fi
diff --git a/ci/scripts/go_test.sh b/ci/scripts/go_test.sh
deleted file mode 100755
index bad2ffe619026..0000000000000
--- a/ci/scripts/go_test.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -ex
-
-# simplistic semver comparison
-verlte() {
-    [ "$1" = "`echo -e "$1\n$2" | sort -V | head -n1`" ]
-}
-verlt() {
-    [ "$1" = "$2" ] && return 1 || verlte $1 $2
-}
-
-ver=`go env GOVERSION`
-
-source_dir=${1}/go
-
-testargs="-race"
-if verlte "1.18" "${ver#go}" && [ "$(go env GOOS)" != "darwin" ]; then
-    # asan not supported on darwin/amd64
-    testargs="-asan"
-fi
-
-case "$(uname)" in
-    MINGW*)
-        # -asan and -race don't work on windows currently
-        testargs=""
-        ;;
-esac
-
-if [[ "$(go env GOHOSTARCH)" = "s390x" ]]; then
-    testargs="" # -race and -asan not supported on s390x
-fi
-
-# Go static check (skipped in MinGW)
-if [[ -z "${MINGW_LINT}" ]]; then
-    pushd ${source_dir}
-    "$(go env GOPATH)"/bin/staticcheck ./...
-    popd
-fi
-
-
-pushd ${source_dir}/arrow
-
-TAGS="assert,test"
-if [[ -n "${ARROW_GO_TESTCGO}" ]]; then
-    if [[ "${MSYSTEM}" = "MINGW64" ]]; then
-        export PATH=${MINGW_PREFIX}\\bin:${MINGW_PREFIX}\\lib:$PATH
-    fi
-    TAGS="${TAGS},ccalloc"
-fi
-
-# the cgo implementation of the c data interface requires the "test"
-# tag in order to run its tests so that the testing functions implemented
-# in .c files don't get included in non-test builds.
-
-go test $testargs -tags $TAGS ./...
-
-# run it again but with the noasm tag
-go test $testargs -tags $TAGS,noasm ./...
-
-popd
-
-export PARQUET_TEST_DATA=${1}/cpp/submodules/parquet-testing/data
-export ARROW_TEST_DATA=${1}/testing/data
-pushd ${source_dir}/parquet
-
-go test $testargs -tags assert ./...
-
-# run the tests again but with the noasm tag
-go test $testargs -tags assert,noasm ./...
-
-popd
diff --git a/ci/scripts/install_azurite.sh b/ci/scripts/install_azurite.sh
index dda5e99405b7f..b8b1618bed314 100755
--- a/ci/scripts/install_azurite.sh
+++ b/ci/scripts/install_azurite.sh
@@ -19,20 +19,32 @@
 
 set -e
 
-# Pin azurite to 3.29.0 due to https://github.com/apache/arrow/issues/41505
+node_version="$(node --version)"
+echo "node version = ${node_version}"
+
+case "${node_version}" in
+  v12*)
+    # Pin azurite to 3.29.0 due to https://github.com/apache/arrow/issues/41505
+    azurite_version=v3.29.0
+    ;;
+  *)
+    azurite_version=latest
+    ;;
+esac
+
 case "$(uname)" in
   Darwin)
-    npm install -g azurite@v3.29.0
+    npm install -g azurite@${azurite_version}
     which azurite
     ;;
   MINGW*)
     choco install nodejs.install
-    npm install -g azurite@v3.29.0
+    npm install -g azurite@${azurite_version}
     ;;
   Linux)
-    npm install -g azurite@v3.29.0
+    npm install -g azurite@${azurite_version}
     which azurite
     ;;
 esac
-echo "node version = $(node --version)"
-echo "azurite version = $(azurite --version)"
\ No newline at end of file
+
+echo "azurite version = $(azurite --version)"
diff --git a/ci/scripts/install_gcs_testbench.bat b/ci/scripts/install_gcs_testbench.bat
index b03d0c2ad6608..f54f98db7cac8 100644
--- a/ci/scripts/install_gcs_testbench.bat
+++ b/ci/scripts/install_gcs_testbench.bat
@@ -17,9 +17,18 @@
 
 @echo on
 
-set GCS_TESTBENCH_VERSION="v0.36.0"
+set GCS_TESTBENCH_VERSION="v0.40.0"
+
+set PIPX_FLAGS=--verbose
+if NOT "%PIPX_PYTHON%"=="" (
+  set PIPX_FLAGS=--python %PIPX_PYTHON% %PIPX_FLAGS%
+)
+
+python -m pip install -U pipx || exit /B 1
 
 @REM Install GCS testbench %GCS_TESTBENCH_VERSION%
-python -m pip install  ^
+pipx install %PIPX_FLAGS% ^
         "https://github.com/googleapis/storage-testbench/archive/%GCS_TESTBENCH_VERSION%.tar.gz" ^
         || exit /B 1
+
+pipx list --verbose
diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh
index 2090290c99322..48a5858a358c9 100755
--- a/ci/scripts/install_gcs_testbench.sh
+++ b/ci/scripts/install_gcs_testbench.sh
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -ex
 
 if [ "$#" -ne 1 ]; then
   echo "Usage: $0 <storage-testbench version>"
@@ -34,15 +34,26 @@ case "$(uname -m)" in
     ;;
 esac
 
-# On newer pythons install into the system will fail, so override that
-export PIP_BREAK_SYSTEM_PACKAGES=1
-
 version=$1
 if [[ "${version}" -eq "default" ]]; then
   version="v0.39.0"
-  # Latests versions of Testbench require newer setuptools
-  ${PYTHON:-python3} -m pip install --upgrade setuptools
 fi
 
-${PYTHON:-python3} -m pip install \
+# The Python to install pipx with
+: ${PIPX_BASE_PYTHON:=$(which python3)}
+# The Python to install the GCS testbench with
+: ${PIPX_PYTHON:=${PIPX_BASE_PYTHON:-$(which python3)}}
+
+export PIP_BREAK_SYSTEM_PACKAGES=1
+${PIPX_BASE_PYTHON} -m pip install -U pipx
+
+pipx_flags=(--verbose --python ${PIPX_PYTHON})
+if [[ $(id -un) == "root" ]]; then
+  # Install globally as /root/.local/bin is typically not in $PATH
+  pipx_flags+=(--global)
+fi
+if [[ -n "${PIPX_PIP_ARGS}" ]]; then
+  pipx_flags+=(--pip-args "'${PIPX_PIP_ARGS}'")
+fi
+${PIPX_BASE_PYTHON} -m pipx install ${pipx_flags[@]} \
   "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
diff --git a/ci/scripts/install_python.sh b/ci/scripts/install_python.sh
index 5f962f02b911b..0f8a0804691e7 100755
--- a/ci/scripts/install_python.sh
+++ b/ci/scripts/install_python.sh
@@ -25,11 +25,12 @@ platforms=([windows]=Windows
            [linux]=Linux)
 
 declare -A versions
-versions=([3.8]=3.8.10
-          [3.9]=3.9.13
+versions=([3.9]=3.9.13
           [3.10]=3.10.11
-          [3.11]=3.11.5
-          [3.12]=3.12.0)
+          [3.11]=3.11.9
+          [3.12]=3.12.5
+          [3.13]=3.13.0
+          [3.13t]=3.13.0)
 
 if [ "$#" -ne 2 ]; then
   echo "Usage: $0 <platform> <version>"
@@ -46,7 +47,14 @@ full_version=${versions[$2]}
 if [ $platform = "macOS" ]; then
     echo "Downloading Python installer..."
 
-    if [ "$(uname -m)" = "arm64" ] || [ "$version" = "3.10" ] || [ "$version" = "3.11" ] || [ "$version" = "3.12" ]; then
+    if [ "$version" = "3.13" ] || [ "$version" = "3.13t" ];
+    then
+        fname="python-${full_version}rc2-macos11.pkg"
+    elif [ "$(uname -m)" = "arm64" ] || \
+         [ "$version" = "3.10" ] || \
+         [ "$version" = "3.11" ] || \
+         [ "$version" = "3.12" ];
+    then
         fname="python-${full_version}-macos11.pkg"
     else
         fname="python-${full_version}-macosx10.9.pkg"
@@ -54,15 +62,40 @@ if [ $platform = "macOS" ]; then
     wget "https://www.python.org/ftp/python/${full_version}/${fname}"
 
     echo "Installing Python..."
-    installer -pkg $fname -target /
+    if [[ $2 == "3.13t" ]]; then
+        # See https://github.com/python/cpython/issues/120098#issuecomment-2151122033 for more info on this.
+        cat > ./choicechanges.plist <<EOF
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<array>
+        <dict>
+                <key>attributeSetting</key>
+                <integer>1</integer>
+                <key>choiceAttribute</key>
+                <string>selected</string>
+                <key>choiceIdentifier</key>
+                <string>org.python.Python.PythonTFramework-3.13</string>
+        </dict>
+</array>
+</plist>
+EOF
+        installer -pkg $fname -applyChoiceChangesXML ./choicechanges.plist -target /
+        rm ./choicechanges.plist
+    else
+        installer -pkg $fname -target /
+    fi
     rm $fname
 
-    echo "Installing Pip..."
     python="/Library/Frameworks/Python.framework/Versions/${version}/bin/python${version}"
-    pip="${python} -m pip"
+    if [[ $2 == "3.13t" ]]; then
+        python="/Library/Frameworks/PythonT.framework/Versions/3.13/bin/python3.13t"
+    fi
 
+    echo "Installing Pip..."
     $python -m ensurepip
-    $pip install -U pip setuptools
+    $python -m pip install -U pip setuptools
 else
     echo "Unsupported platform: $platform"
+    exit 1
 fi
diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh
index 2eb58e8dc75ec..8d0a343ebb443 100755
--- a/ci/scripts/integration_arrow.sh
+++ b/ci/scripts/integration_arrow.sh
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
+set -e
 
 arrow_dir=${1}
 build_dir=${2}
@@ -26,12 +26,19 @@ gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration
 
 : ${ARROW_INTEGRATION_CPP:=ON}
 : ${ARROW_INTEGRATION_CSHARP:=ON}
-: ${ARROW_INTEGRATION_GO:=ON}
 : ${ARROW_INTEGRATION_JAVA:=ON}
 : ${ARROW_INTEGRATION_JS:=ON}
 
+: ${ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS:=cpp,csharp,java,js}
+export ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS
+
+. ${arrow_dir}/ci/scripts/util_log.sh
+
+github_actions_group_begin "Integration: Prepare: Archery"
 pip install -e $arrow_dir/dev/archery[integration]
+github_actions_group_end
 
+github_actions_group_begin "Integration: Prepare: Dependencies"
 # For C Data Interface testing
 if [ "${ARROW_INTEGRATION_CSHARP}" == "ON" ]; then
     pip install pythonnet
@@ -39,6 +46,7 @@ fi
 if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then
     pip install jpype1
 fi
+github_actions_group_end
 
 export ARROW_BUILD_ROOT=${build_dir}
 
@@ -58,7 +66,6 @@ time archery integration \
     --run-flight \
     --with-cpp=$([ "$ARROW_INTEGRATION_CPP" == "ON" ] && echo "1" || echo "0") \
     --with-csharp=$([ "$ARROW_INTEGRATION_CSHARP" == "ON" ] && echo "1" || echo "0") \
-    --with-go=$([ "$ARROW_INTEGRATION_GO" == "ON" ] && echo "1" || echo "0") \
     --with-java=$([ "$ARROW_INTEGRATION_JAVA" == "ON" ] && echo "1" || echo "0") \
     --with-js=$([ "$ARROW_INTEGRATION_JS" == "ON" ] && echo "1" || echo "0") \
     --gold-dirs=$gold_dir/0.14.1 \
diff --git a/ci/scripts/integration_arrow_build.sh b/ci/scripts/integration_arrow_build.sh
index 9b54049a2b803..4dfcf8768c71f 100755
--- a/ci/scripts/integration_arrow_build.sh
+++ b/ci/scripts/integration_arrow_build.sh
@@ -17,33 +17,45 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
+set -e
 
 arrow_dir=${1}
 build_dir=${2}
 
 : ${ARROW_INTEGRATION_CPP:=ON}
 : ${ARROW_INTEGRATION_CSHARP:=ON}
-: ${ARROW_INTEGRATION_GO:=ON}
 : ${ARROW_INTEGRATION_JAVA:=ON}
 : ${ARROW_INTEGRATION_JS:=ON}
 
+. ${arrow_dir}/ci/scripts/util_log.sh
+
+github_actions_group_begin "Integration: Build: Rust"
 ${arrow_dir}/ci/scripts/rust_build.sh ${arrow_dir} ${build_dir}
+github_actions_group_end
 
+github_actions_group_begin "Integration: Build: nanoarrow"
 ${arrow_dir}/ci/scripts/nanoarrow_build.sh ${arrow_dir} ${build_dir}
+github_actions_group_end
+
+github_actions_group_begin "Integration: Build: Go"
+if [ "${ARCHERY_INTEGRATION_WITH_GO}" -gt "0" ]; then
+    ${arrow_dir}/go/ci/scripts/build.sh ${arrow_dir}/go
+fi
+github_actions_group_end
 
+github_actions_group_begin "Integration: Build: C++"
 if [ "${ARROW_INTEGRATION_CPP}" == "ON" ]; then
     ${arrow_dir}/ci/scripts/cpp_build.sh ${arrow_dir} ${build_dir}
 fi
+github_actions_group_end
 
+github_actions_group_begin "Integration: Build: C#"
 if [ "${ARROW_INTEGRATION_CSHARP}" == "ON" ]; then
     ${arrow_dir}/ci/scripts/csharp_build.sh ${arrow_dir} ${build_dir}
 fi
+github_actions_group_end
 
-if [ "${ARROW_INTEGRATION_GO}" == "ON" ]; then
-    ${arrow_dir}/ci/scripts/go_build.sh ${arrow_dir} ${build_dir}
-fi
-
+github_actions_group_begin "Integration: Build: Java"
 if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then
     export ARROW_JAVA_CDATA="ON"
     export JAVA_JNI_CMAKE_ARGS="-DARROW_JAVA_JNI_ENABLE_DEFAULT=OFF -DARROW_JAVA_JNI_ENABLE_C=ON"
@@ -51,7 +63,10 @@ if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then
     ${arrow_dir}/ci/scripts/java_jni_build.sh ${arrow_dir} ${ARROW_HOME} ${build_dir} /tmp/dist/java
     ${arrow_dir}/ci/scripts/java_build.sh ${arrow_dir} ${build_dir} /tmp/dist/java
 fi
+github_actions_group_end
 
+github_actions_group_begin "Integration: Build: JavaScript"
 if [ "${ARROW_INTEGRATION_JS}" == "ON" ]; then
     ${arrow_dir}/ci/scripts/js_build.sh ${arrow_dir} ${build_dir}
 fi
+github_actions_group_end
diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh
index 0fa1edab429c0..212ec6eb11476 100755
--- a/ci/scripts/java_build.sh
+++ b/ci/scripts/java_build.sh
@@ -72,9 +72,6 @@ if [ $ARROW_JAVA_SKIP_GIT_PLUGIN ]; then
   mvn="${mvn} -Dmaven.gitcommitid.skip=true"
 fi
 
-# Use `2 * ncores` threads
-mvn="${mvn} -T 2C"
-
 # https://github.com/apache/arrow/issues/41429
 # TODO: We want to out-of-source build. This is a workaround. We copy
 # all needed files to the build directory from the source directory
@@ -98,10 +95,12 @@ if [ "${ARROW_JAVA_JNI}" = "ON" ]; then
   mvn="${mvn} -Darrow.cpp.build.dir=${java_jni_dist_dir} -Parrow-jni"
 fi
 
-${mvn} clean install
+# Use `2 * ncores` threads
+${mvn} -T 2C clean install
 
 if [ "${BUILD_DOCS_JAVA}" == "ON" ]; then
   # HTTP pooling is turned of to avoid download issues https://issues.apache.org/jira/browse/ARROW-11633
+  # GH-43378: Maven site plugins not compatible with multithreading
   mkdir -p ${build_dir}/docs/java/reference
   ${mvn} -Dcheckstyle.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false clean install site
   rsync -a target/site/apidocs/ ${build_dir}/docs/java/reference
diff --git a/ci/scripts/java_test.sh b/ci/scripts/java_test.sh
index dd483ff254197..5efda4318f15a 100755
--- a/ci/scripts/java_test.sh
+++ b/ci/scripts/java_test.sh
@@ -38,14 +38,10 @@ pushd ${source_dir}
 ${mvn} clean test
 
 projects=()
-if [ "${ARROW_DATASET}" = "ON" ]; then
-  projects+=(gandiva)
-fi
-if [ "${ARROW_GANDIVA}" = "ON" ]; then
-  projects+=(gandiva)
-fi
-if [ "${ARROW_ORC}" = "ON" ]; then
+if [ "${ARROW_JAVA_JNI}" = "ON" ]; then
   projects+=(adapter/orc)
+  projects+=(dataset)
+  projects+=(gandiva)
 fi
 if [ "${#projects[@]}" -gt 0 ]; then
   ${mvn} clean test \
diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh
index 3ed9d5d8dd12f..d2c392e6b9db3 100755
--- a/ci/scripts/python_wheel_macos_build.sh
+++ b/ci/scripts/python_wheel_macos_build.sh
@@ -34,7 +34,7 @@ rm -rf ${source_dir}/python/pyarrow/*.so.*
 
 echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ==="
 export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}"
-export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.15}
+export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-12.0}
 export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)}
 
 if [ $arch = "arm64" ]; then
@@ -48,13 +48,11 @@ fi
 
 echo "=== (${PYTHON_VERSION}) Install Python build dependencies ==="
 export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
-export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}"
 
 pip install \
   --upgrade \
   --only-binary=:all: \
   --target $PIP_SITE_PACKAGES \
-  --platform $PIP_TARGET_PLATFORM \
   -r ${source_dir}/python/requirements-wheel-build.txt
 pip install "delocate>=0.10.3"
 
@@ -152,7 +150,6 @@ echo "=== (${PYTHON_VERSION}) Building wheel ==="
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 export PYARROW_BUNDLE_ARROW_CPP=1
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
-export PYARROW_INSTALL_TESTS=1
 export PYARROW_WITH_ACERO=${ARROW_ACERO}
 export PYARROW_WITH_AZURE=${ARROW_AZURE}
 export PYARROW_WITH_DATASET=${ARROW_DATASET}
diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh
index aa86494a9d47d..885019ff3049f 100755
--- a/ci/scripts/python_wheel_manylinux_build.sh
+++ b/ci/scripts/python_wheel_manylinux_build.sh
@@ -140,7 +140,6 @@ echo "=== (${PYTHON_VERSION}) Building wheel ==="
 export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
 export PYARROW_BUNDLE_ARROW_CPP=1
 export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
-export PYARROW_INSTALL_TESTS=1
 export PYARROW_WITH_ACERO=${ARROW_ACERO}
 export PYARROW_WITH_AZURE=${ARROW_AZURE}
 export PYARROW_WITH_DATASET=${ARROW_DATASET}
@@ -181,5 +180,5 @@ popd
 rm -rf dist/temp-fix-wheel
 
 echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux${MANYLINUX_VERSION} ==="
-auditwheel repair -L . dist/pyarrow-*.whl -w repaired_wheels
+auditwheel repair dist/pyarrow-*.whl -w repaired_wheels
 popd
diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh
index a25e5c51bddbc..1487581eaef51 100755
--- a/ci/scripts/python_wheel_unix_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -34,6 +34,7 @@ source_dir=${1}
 : ${ARROW_S3:=ON}
 : ${ARROW_SUBSTRAIT:=ON}
 : ${CHECK_IMPORTS:=ON}
+: ${CHECK_WHEEL_CONTENT:=ON}
 : ${CHECK_UNITTESTS:=ON}
 : ${INSTALL_PYARROW:=ON}
 
@@ -54,11 +55,11 @@ export PYARROW_TEST_S3=${ARROW_S3}
 export PYARROW_TEST_TENSORFLOW=ON
 
 export ARROW_TEST_DATA=${source_dir}/testing/data
-export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
+export PARQUET_TEST_DATA=${source_dir}/cpp/submodules/parquet-testing/data
 
 if [ "${INSTALL_PYARROW}" == "ON" ]; then
   # Install the built wheels
-  pip install ${source_dir}/python/repaired_wheels/*.whl
+  python -m pip install ${source_dir}/python/repaired_wheels/*.whl
 fi
 
 if [ "${CHECK_IMPORTS}" == "ON" ]; then
@@ -87,9 +88,14 @@ import pyarrow.parquet
   fi
 fi
 
+if [ "${CHECK_WHEEL_CONTENT}" == "ON" ]; then
+  python ${source_dir}/ci/scripts/python_wheel_validate_contents.py \
+    --path ${source_dir}/python/repaired_wheels
+fi
+
 if [ "${CHECK_UNITTESTS}" == "ON" ]; then
   # Install testing dependencies
-  pip install -U -r ${source_dir}/python/requirements-wheel-test.txt
+  python -m pip install -U -r ${source_dir}/python/requirements-wheel-test.txt
 
   # Execute unittest, test dependencies must be installed
   python -c 'import pyarrow; pyarrow.create_library_symlinks()'
diff --git a/ci/scripts/python_wheel_validate_contents.py b/ci/scripts/python_wheel_validate_contents.py
new file mode 100644
index 0000000000000..22b3a890f036b
--- /dev/null
+++ b/ci/scripts/python_wheel_validate_contents.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+from pathlib import Path
+import re
+import zipfile
+
+
+def validate_wheel(path):
+    p = Path(path)
+    wheels = list(p.glob('*.whl'))
+    error_msg = f"{len(wheels)} wheels found but only 1 expected ({wheels})"
+    assert len(wheels) == 1, error_msg
+    f = zipfile.ZipFile(wheels[0])
+    outliers = [
+        info.filename for info in f.filelist if not re.match(
+            r'(pyarrow/|pyarrow-[-.\w\d]+\.dist-info/)', info.filename
+        )
+    ]
+    assert not outliers, f"Unexpected contents in wheel: {sorted(outliers)}"
+    print(f"The wheel: {wheels[0]} seems valid.")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--path", type=str, required=True,
+                        help="Directory where wheel is located")
+    args = parser.parse_args()
+    validate_wheel(args.path)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat
index 54f02ec6f6ed0..1f1d5dca721d9 100644
--- a/ci/scripts/python_wheel_windows_build.bat
+++ b/ci/scripts/python_wheel_windows_build.bat
@@ -106,7 +106,6 @@ echo "=== (%PYTHON_VERSION%) Building wheel ==="
 set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE%
 set PYARROW_BUNDLE_ARROW_CPP=ON
 set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR%
-set PYARROW_INSTALL_TESTS=ON
 set PYARROW_WITH_ACERO=%ARROW_ACERO%
 set PYARROW_WITH_DATASET=%ARROW_DATASET%
 set PYARROW_WITH_FLIGHT=%ARROW_FLIGHT%
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index a928c3571d0cb..ae5b7e36ad7ab 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -28,37 +28,44 @@ set PYARROW_TEST_ORC=ON
 set PYARROW_TEST_PARQUET=ON
 set PYARROW_TEST_PARQUET_ENCRYPTION=ON
 set PYARROW_TEST_SUBSTRAIT=ON
-set PYARROW_TEST_S3=OFF
+set PYARROW_TEST_S3=ON
 set PYARROW_TEST_TENSORFLOW=ON
 
 @REM Enable again once https://github.com/scipy/oldest-supported-numpy/pull/27 gets merged
 @REM set PYARROW_TEST_PANDAS=ON
 
 set ARROW_TEST_DATA=C:\arrow\testing\data
-set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data
+set PARQUET_TEST_DATA=C:\arrow\cpp\submodules\parquet-testing\data
 
-@REM Install testing dependencies
-pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
+@REM List installed Pythons
+py -0p
+
+set PYTHON_CMD=py -%PYTHON%
 
-@REM Install GCS testbench
-call "C:\arrow\ci\scripts\install_gcs_testbench.bat"
+%PYTHON_CMD% -m pip install -U pip setuptools || exit /B 1
+
+@REM Install testing dependencies
+%PYTHON_CMD% -m pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
 
 @REM Install the built wheels
-python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1 
+%PYTHON_CMD% -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1
 
 @REM Test that the modules are importable
-python -c "import pyarrow" || exit /B 1
-python -c "import pyarrow._gcsfs" || exit /B 1
-python -c "import pyarrow._hdfs" || exit /B 1 
-python -c "import pyarrow._s3fs" || exit /B 1
-python -c "import pyarrow.csv" || exit /B 1
-python -c "import pyarrow.dataset" || exit /B 1
-python -c "import pyarrow.flight" || exit /B 1
-python -c "import pyarrow.fs" || exit /B 1
-python -c "import pyarrow.json" || exit /B 1
-python -c "import pyarrow.orc" || exit /B 1
-python -c "import pyarrow.parquet" || exit /B 1
-python -c "import pyarrow.substrait" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._gcsfs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._hdfs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow._s3fs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.csv" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.dataset" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.flight" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.fs" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.json" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.orc" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.parquet" || exit /B 1
+%PYTHON_CMD% -c "import pyarrow.substrait" || exit /B 1
+
+@REM Validate wheel contents
+%PYTHON_CMD% C:\arrow\ci\scripts\python_wheel_validate_contents.py --path C:\arrow\python\dist || exit /B 1
 
 @rem Download IANA Timezone Database for ORC C++
 curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B
@@ -67,4 +74,4 @@ arc unarchive tzdata.tar.xz %USERPROFILE%\Downloads\test\tzdata
 set TZDIR=%USERPROFILE%\Downloads\test\tzdata\usr\share\zoneinfo
 
 @REM Execute unittest
-pytest -r s --pyargs pyarrow || exit /B 1
+%PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1
diff --git a/ci/scripts/r_install_system_dependencies.sh b/ci/scripts/r_install_system_dependencies.sh
index 7ddc2604f661a..ae2a04656c528 100755
--- a/ci/scripts/r_install_system_dependencies.sh
+++ b/ci/scripts/r_install_system_dependencies.sh
@@ -54,7 +54,7 @@ if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "T
     case "$PACKAGE_MANAGER" in
       zypper)
         # python3 is Python 3.6 on OpenSUSE 15.3.
-        # PyArrow supports Python 3.8 or later.
+        # PyArrow supports Python 3.9 or later.
         $PACKAGE_MANAGER install -y python39-pip
         ln -s /usr/bin/python3.9 /usr/local/bin/python
         ln -s /usr/bin/pip3.9 /usr/local/bin/pip
diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh
index fe9d18edb8cbb..d5fd78914755e 100755
--- a/ci/scripts/r_test.sh
+++ b/ci/scripts/r_test.sh
@@ -26,6 +26,10 @@ pushd ${source_dir}
 
 printenv
 
+if [ -n "${ARROW_PYTHON_VENV:-}" ]; then
+  . "${ARROW_PYTHON_VENV}/bin/activate"
+fi
+
 # Run the nixlibs.R test suite, which is not included in the installed package
 ${R_BIN} -e 'setwd("tools"); testthat::test_dir(".", stop_on_warning = TRUE)'
 
diff --git a/ci/scripts/util_enable_core_dumps.sh b/ci/scripts/util_enable_core_dumps.sh
new file mode 100644
index 0000000000000..09f8d2d727099
--- /dev/null
+++ b/ci/scripts/util_enable_core_dumps.sh
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: this script is not marked executable as it should be source'd
+# for `ulimit` to take effect.
+
+set -e
+
+platform=$(uname)
+
+if [ "${platform}" = "Linux" ]; then
+  # We need to override `core_pattern` because
+  # 1. the original setting may reference apport, which is not available under
+  #    most Docker containers;
+  # 2. we want to write the core file in a well-known directory.
+  sudo sysctl -w kernel.core_pattern="/tmp/core.%e.%p"
+fi
+
+ulimit -c unlimited
diff --git a/ci/scripts/go_tinygo_example.sh b/ci/scripts/util_log.sh
old mode 100755
new mode 100644
similarity index 84%
rename from ci/scripts/go_tinygo_example.sh
rename to ci/scripts/util_log.sh
index 7bde56226db7b..b34c44059adb2
--- a/ci/scripts/go_tinygo_example.sh
+++ b/ci/scripts/util_log.sh
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,11 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
-
-cd ~
-pushd /src
-tinygo build -tags noasm -o ~/example_tinygo arrow/_examples/helloworld/main.go
-popd
+github_actions_group_begin() {
+  echo "::group::$1"
+  set -x
+}
 
-./example_tinygo
+github_actions_group_end() {
+  set +x
+  echo "::endgroup::"
+}
diff --git a/ci/vcpkg/arm64-osx-static-debug.cmake b/ci/vcpkg/arm64-osx-static-debug.cmake
index f511819a2edd9..32ae7bc433489 100644
--- a/ci/vcpkg/arm64-osx-static-debug.cmake
+++ b/ci/vcpkg/arm64-osx-static-debug.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES arm64)
-set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/arm64-osx-static-release.cmake b/ci/vcpkg/arm64-osx-static-release.cmake
index 43d65efb2651b..dde46cd763afe 100644
--- a/ci/vcpkg/arm64-osx-static-release.cmake
+++ b/ci/vcpkg/arm64-osx-static-release.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES arm64)
-set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE release)
diff --git a/ci/vcpkg/universal2-osx-static-debug.cmake b/ci/vcpkg/universal2-osx-static-debug.cmake
index 8abc1ebf838f1..d3ef0d67eb719 100644
--- a/ci/vcpkg/universal2-osx-static-debug.cmake
+++ b/ci/vcpkg/universal2-osx-static-debug.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64")
-set(VCPKG_OSX_DEPLOYMENT_TARGET "10.15")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE debug)
diff --git a/ci/vcpkg/universal2-osx-static-release.cmake b/ci/vcpkg/universal2-osx-static-release.cmake
index 2eb36c15175b2..3018aa93e5fbb 100644
--- a/ci/vcpkg/universal2-osx-static-release.cmake
+++ b/ci/vcpkg/universal2-osx-static-release.cmake
@@ -21,6 +21,6 @@ set(VCPKG_LIBRARY_LINKAGE static)
 
 set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
 set(VCPKG_OSX_ARCHITECTURES "x86_64;arm64")
-set(VCPKG_OSX_DEPLOYMENT_TARGET "10.15")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "12.0")
 
 set(VCPKG_BUILD_TYPE release)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8247043b8bf84..423744c388471 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -79,12 +79,12 @@ if(POLICY CMP0170)
   cmake_policy(SET CMP0170 NEW)
 endif()
 
-set(ARROW_VERSION "17.0.0-SNAPSHOT")
+set(ARROW_VERSION "18.0.0-SNAPSHOT")
 
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}")
 
 # if no build type is specified, default to release builds
-if(NOT DEFINED CMAKE_BUILD_TYPE)
+if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE
       Release
       CACHE STRING "Choose the type of build.")
@@ -301,7 +301,8 @@ add_custom_target(lint
                   --cpplint_binary
                   ${CPPLINT_BIN}
                   ${COMMON_LINT_OPTIONS}
-                  ${ARROW_LINT_QUIET})
+                  ${ARROW_LINT_QUIET}
+                  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
 
 #
 # "make format" and "make check-format" targets
diff --git a/cpp/build-support/fuzzing/generate_corpuses.sh b/cpp/build-support/fuzzing/generate_corpuses.sh
index e3f00e64782c1..ffd5c54e4436a 100755
--- a/cpp/build-support/fuzzing/generate_corpuses.sh
+++ b/cpp/build-support/fuzzing/generate_corpuses.sh
@@ -56,4 +56,5 @@ rm -rf ${CORPUS_DIR}
 ${OUT}/parquet-arrow-generate-fuzz-corpus ${CORPUS_DIR}
 # Add Parquet testing examples
 cp ${ARROW_CPP}/submodules/parquet-testing/data/*.parquet ${CORPUS_DIR}
+cp ${ARROW_CPP}/submodules/parquet-testing/bad_data/*.parquet ${CORPUS_DIR}
 ${ARROW_CPP}/build-support/fuzzing/pack_corpus.py ${CORPUS_DIR} ${OUT}/parquet-arrow-fuzz_seed_corpus.zip
diff --git a/cpp/build-support/lint_cpp_cli.py b/cpp/build-support/lint_cpp_cli.py
index a0eb8f0efe6d5..47abd53fe925d 100755
--- a/cpp/build-support/lint_cpp_cli.py
+++ b/cpp/build-support/lint_cpp_cli.py
@@ -31,6 +31,7 @@
 _NULLPTR_REGEX = re.compile(r'.*\bnullptr\b.*')
 _RETURN_NOT_OK_REGEX = re.compile(r'.*\sRETURN_NOT_OK.*')
 _ASSIGN_OR_RAISE_REGEX = re.compile(r'.*\sASSIGN_OR_RAISE.*')
+_DCHECK_REGEX = re.compile(r'.*\sDCHECK.*')
 
 
 def _paths(paths):
@@ -54,14 +55,12 @@ def lint_file(path):
         (lambda x: re.match(_RETURN_NOT_OK_REGEX, x),
          'Use ARROW_RETURN_NOT_OK in header files', _paths('''\
          arrow/status.h
-         test
-         arrow/util/hash.h
          arrow/python/util''')),
         (lambda x: re.match(_ASSIGN_OR_RAISE_REGEX, x),
-         'Use ARROW_ASSIGN_OR_RAISE in header files', _paths('''\
-         arrow/result_internal.h
-         test
-         '''))
+         'Use ARROW_ASSIGN_OR_RAISE in header files', []),
+        (lambda x: re.match(_DCHECK_REGEX, x),
+         'Use ARROW_DCHECK in header files', _paths('''\
+         arrow/util/logging.h'''))
 
     ]
 
diff --git a/cpp/build-support/run-test.sh b/cpp/build-support/run-test.sh
index 8e42438a23c1c..55e3fe0980749 100755
--- a/cpp/build-support/run-test.sh
+++ b/cpp/build-support/run-test.sh
@@ -121,12 +121,15 @@ function print_coredumps() {
   # patterns must be set with prefix `core.{test-executable}*`:
   #
   # In case of macOS:
-  #   sudo sysctl -w kern.corefile=core.%N.%P
+  #   sudo sysctl -w kern.corefile=/tmp/core.%N.%P
   # On Linux:
-  #   sudo sysctl -w kernel.core_pattern=core.%e.%p
+  #   sudo sysctl -w kernel.core_pattern=/tmp/core.%e.%p
   #
   # and the ulimit must be increased:
   #   ulimit -c unlimited
+  #
+  # If the tests are run in a Docker container, the instructions are slightly
+  # different: see the 'Coredumps' comment section in `docker-compose.yml`.
 
   # filename is truncated to the first 15 characters in case of linux, so limit
   # the pattern for the first 15 characters
@@ -134,19 +137,21 @@ function print_coredumps() {
   FILENAME=$(echo ${FILENAME} | cut -c-15)
   PATTERN="^core\.${FILENAME}"
 
-  COREFILES=$(ls | grep $PATTERN)
+  COREFILES=$(ls /tmp | grep $PATTERN)
   if [ -n "$COREFILES" ]; then
-    echo "Found core dump, printing backtrace:"
-
     for COREFILE in $COREFILES; do
+      COREPATH="/tmp/${COREFILE}"
+      echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
+      echo "Running '${TEST_EXECUTABLE}' produced core dump at '${COREPATH}', printing backtrace:"
       # Print backtrace
       if [ "$(uname)" == "Darwin" ]; then
-        lldb -c "${COREFILE}" --batch --one-line "thread backtrace all -e true"
+        lldb -c "${COREPATH}" --batch --one-line "thread backtrace all -e true"
       else
-        gdb -c "${COREFILE}" $TEST_EXECUTABLE -ex "thread apply all bt" -ex "set pagination 0" -batch
+        gdb -c "${COREPATH}" $TEST_EXECUTABLE -ex "thread apply all bt" -ex "set pagination 0" -batch
       fi
-      # Remove the coredump, regenerate it via running the test case directly
-      rm "${COREFILE}"
+      echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
+      # Remove the coredump, it can be regenerated via running the test case directly
+      rm "${COREPATH}"
     done
   fi
 }
diff --git a/cpp/build-support/run_cpplint.py b/cpp/build-support/run_cpplint.py
index 76c0fe0aefaca..a81acf2eb2ff9 100755
--- a/cpp/build-support/run_cpplint.py
+++ b/cpp/build-support/run_cpplint.py
@@ -26,24 +26,6 @@
 from functools import partial
 
 
-# NOTE(wesm):
-#
-# * readability/casting is disabled as it aggressively warns about functions
-#   with names like "int32", so "int32(x)", where int32 is a function name,
-#   warns with
-_filters = '''
--whitespace/comments
--readability/casting
--readability/todo
--readability/alt_tokens
--build/header_guard
--build/c++11
--build/include_what_you_use
--runtime/references
--build/include_order
-'''.split()
-
-
 def _get_chunk_key(filenames):
     # lists are not hashable so key on the first filename in a chunk
     return filenames[0]
@@ -87,8 +69,6 @@ def _check_some_files(completed_processes, filenames):
     cmd = [
         arguments.cpplint_binary,
         '--verbose=2',
-        '--linelength=90',
-        '--filter=' + ','.join(_filters)
     ]
     if (arguments.cpplint_binary.endswith('.py') and
             platform.system() == 'Windows'):
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index e7523add27223..692efa78376f4 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -721,6 +721,11 @@ function(ADD_TEST_CASE REL_TEST_NAME)
                                      "${EXECUTABLE_OUTPUT_PATH};$ENV{CONDA_PREFIX}/lib")
   endif()
 
+  # Ensure using bundled GoogleTest when we use bundled GoogleTest.
+  # ARROW_GTEST_GTEST_HEADERS is defined only when we use bundled
+  # GoogleTest.
+  target_link_libraries(${TEST_NAME} PRIVATE ${ARROW_GTEST_GTEST_HEADERS})
+
   if(ARG_STATIC_LINK_LIBS)
     # Customize link libraries
     target_link_libraries(${TEST_NAME} PRIVATE ${ARG_STATIC_LINK_LIBS})
diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake
index 41466a1c22404..755887314d110 100644
--- a/cpp/cmake_modules/DefineOptions.cmake
+++ b/cpp/cmake_modules/DefineOptions.cmake
@@ -303,7 +303,10 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_IPC)
 
   define_option(ARROW_AZURE
-                "Build Arrow with Azure support (requires the Azure SDK for C++)" OFF)
+                "Build Arrow with Azure support (requires the Azure SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_BUILD_UTILITIES "Build Arrow commandline utilities" OFF)
 
@@ -346,9 +349,16 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_WITH_UTF8PROC)
 
   define_option(ARROW_GCS
-                "Build Arrow with GCS support (requires the GCloud SDK for C++)" OFF)
+                "Build Arrow with GCS support (requires the GCloud SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
-  define_option(ARROW_HDFS "Build the Arrow HDFS bridge" OFF)
+  define_option(ARROW_HDFS
+                "Build the Arrow HDFS bridge"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_IPC "Build the Arrow IPC extensions" ON)
 
@@ -398,7 +408,11 @@ takes precedence over ccache if a storage backend is configured" ON)
                 ARROW_HDFS
                 ARROW_JSON)
 
-  define_option(ARROW_S3 "Build Arrow with S3 support (requires the AWS SDK for C++)" OFF)
+  define_option(ARROW_S3
+                "Build Arrow with S3 support (requires the AWS SDK for C++)"
+                OFF
+                DEPENDS
+                ARROW_FILESYSTEM)
 
   define_option(ARROW_SKYHOOK
                 "Build the Skyhook libraries"
diff --git a/cpp/cmake_modules/FindThriftAlt.cmake b/cpp/cmake_modules/FindThriftAlt.cmake
index f3e49021d5738..98a706deb9919 100644
--- a/cpp/cmake_modules/FindThriftAlt.cmake
+++ b/cpp/cmake_modules/FindThriftAlt.cmake
@@ -191,6 +191,10 @@ if(ThriftAlt_FOUND)
     # thrift/windows/config.h for Visual C++.
     set_target_properties(thrift::thrift PROPERTIES INTERFACE_LINK_LIBRARIES "ws2_32")
   endif()
+  # Workaround: thrift.pc doesn't have Boost dependency.
+  if(TARGET Boost::headers)
+    target_link_libraries(thrift::thrift INTERFACE Boost::headers)
+  endif()
 
   if(Thrift_COMPILER_FOUND)
     add_executable(thrift::compiler IMPORTED)
diff --git a/cpp/cmake_modules/Findlz4Alt.cmake b/cpp/cmake_modules/Findlz4Alt.cmake
index 77a22957f7964..91e735107a954 100644
--- a/cpp/cmake_modules/Findlz4Alt.cmake
+++ b/cpp/cmake_modules/Findlz4Alt.cmake
@@ -29,9 +29,15 @@ endif()
 find_package(lz4 ${find_package_args})
 if(lz4_FOUND)
   set(lz4Alt_FOUND TRUE)
-  # Conan uses lz4::lz4 not LZ4::lz4
-  if(NOT TARGET LZ4::lz4 AND TARGET lz4::lz4)
-    add_library(LZ4::lz4 ALIAS lz4::lz4)
+  if(NOT TARGET LZ4::lz4)
+    # Conan uses lz4::lz4 not LZ4::lz4
+    if(TARGET lz4::lz4)
+      add_library(LZ4::lz4 ALIAS lz4::lz4)
+    elseif(ARROW_LZ4_USE_SHARED)
+      add_library(LZ4::lz4 ALIAS LZ4::lz4_shared)
+    else()
+      add_library(LZ4::lz4 ALIAS LZ4::lz4_static)
+    endif()
   endif()
   return()
 endif()
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 5b89a831ff7fe..db151b4e0f44b 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -259,7 +259,7 @@ macro(resolve_dependency DEPENDENCY_NAME)
       IS_RUNTIME_DEPENDENCY
       REQUIRED_VERSION
       USE_CONFIG)
-  set(multi_value_args COMPONENTS PC_PACKAGE_NAMES)
+  set(multi_value_args COMPONENTS OPTIONAL_COMPONENTS PC_PACKAGE_NAMES)
   cmake_parse_arguments(ARG
                         "${options}"
                         "${one_value_args}"
@@ -287,6 +287,9 @@ macro(resolve_dependency DEPENDENCY_NAME)
   if(ARG_COMPONENTS)
     list(APPEND FIND_PACKAGE_ARGUMENTS COMPONENTS ${ARG_COMPONENTS})
   endif()
+  if(ARG_OPTIONAL_COMPONENTS)
+    list(APPEND FIND_PACKAGE_ARGUMENTS OPTIONAL_COMPONENTS ${ARG_OPTIONAL_COMPONENTS})
+  endif()
   if(${DEPENDENCY_NAME}_SOURCE STREQUAL "AUTO")
     find_package(${FIND_PACKAGE_ARGUMENTS})
     set(COMPATIBLE ${${PACKAGE_NAME}_FOUND})
@@ -1289,15 +1292,19 @@ if(ARROW_USE_BOOST)
     set(Boost_USE_STATIC_LIBS ON)
   endif()
   if(ARROW_BOOST_REQUIRE_LIBRARY)
-    set(ARROW_BOOST_COMPONENTS system filesystem)
+    set(ARROW_BOOST_COMPONENTS filesystem system)
+    set(ARROW_BOOST_OPTIONAL_COMPONENTS process)
   else()
     set(ARROW_BOOST_COMPONENTS)
+    set(ARROW_BOOST_OPTIONAL_COMPONENTS)
   endif()
   resolve_dependency(Boost
                      REQUIRED_VERSION
                      ${ARROW_BOOST_REQUIRED_VERSION}
                      COMPONENTS
                      ${ARROW_BOOST_COMPONENTS}
+                     OPTIONAL_COMPONENTS
+                     ${ARROW_BOOST_OPTIONAL_COMPONENTS}
                      IS_RUNTIME_DEPENDENCY
                      # libarrow.so doesn't depend on libboost*.
                      FALSE)
@@ -1316,14 +1323,35 @@ if(ARROW_USE_BOOST)
     endif()
   endforeach()
 
-  if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-    # boost/process/detail/windows/handle_workaround.hpp doesn't work
-    # without BOOST_USE_WINDOWS_H with MinGW because MinGW doesn't
-    # provide __kernel_entry without winternl.h.
-    #
-    # See also:
-    # https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
-    target_compile_definitions(Boost::headers INTERFACE "BOOST_USE_WINDOWS_H=1")
+  if(TARGET Boost::process)
+    # Boost >= 1.86
+    target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V1")
+    target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2")
+  else()
+    # Boost < 1.86
+    add_library(Boost::process INTERFACE IMPORTED)
+    if(TARGET Boost::filesystem)
+      target_link_libraries(Boost::process INTERFACE Boost::filesystem)
+    endif()
+    if(TARGET Boost::system)
+      target_link_libraries(Boost::process INTERFACE Boost::system)
+    endif()
+    if(TARGET Boost::headers)
+      target_link_libraries(Boost::process INTERFACE Boost::headers)
+    endif()
+    if(Boost_VERSION VERSION_GREATER_EQUAL 1.80)
+      target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_HAVE_V2")
+      # Boost < 1.86 has a bug that
+      # boost::process::v2::process_environment::on_setup() isn't
+      # defined. We need to build Boost Process source to define it.
+      #
+      # See also:
+      # https://github.com/boostorg/process/issues/312
+      target_compile_definitions(Boost::process INTERFACE "BOOST_PROCESS_NEED_SOURCE")
+      if(WIN32)
+        target_link_libraries(Boost::process INTERFACE bcrypt ntdll)
+      endif()
+    endif()
   endif()
 
   message(STATUS "Boost include dir: ${Boost_INCLUDE_DIRS}")
@@ -1355,15 +1383,23 @@ macro(build_snappy)
       "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
   # Snappy unconditionally enables -Werror when building with clang this can lead
   # to build failures by way of new compiler warnings. This adds a flag to disable
-  # Werror to the very end of the invocation to override the snappy internal setting.
+  # -Werror to the very end of the invocation to override the snappy internal setting.
+  set(SNAPPY_ADDITIONAL_CXX_FLAGS "")
   if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    foreach(CONFIG DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
-      list(APPEND
-           SNAPPY_CMAKE_ARGS
-           "-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS_${CONFIG}} -Wno-error"
-      )
-    endforeach()
+    string(APPEND SNAPPY_ADDITIONAL_CXX_FLAGS " -Wno-error")
   endif()
+  # Snappy unconditionally disables RTTI, which is incompatible with some other
+  # build settings (https://github.com/apache/arrow/issues/43688).
+  if(NOT MSVC)
+    string(APPEND SNAPPY_ADDITIONAL_CXX_FLAGS " -frtti")
+  endif()
+
+  foreach(CONFIG DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
+    list(APPEND
+         SNAPPY_CMAKE_ARGS
+         "-DCMAKE_CXX_FLAGS_${CONFIG}=${EP_CXX_FLAGS_${CONFIG}} ${SNAPPY_ADDITIONAL_CXX_FLAGS}"
+    )
+  endforeach()
 
   if(APPLE AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
     # On macOS 10.13 we need to explicitly add <functional> to avoid a missing include error
@@ -2306,6 +2342,10 @@ function(build_gtest)
   install(DIRECTORY "${googletest_SOURCE_DIR}/googlemock/include/"
                     "${googletest_SOURCE_DIR}/googletest/include/"
           DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+  add_library(arrow::GTest::gtest_headers INTERFACE IMPORTED)
+  target_include_directories(arrow::GTest::gtest_headers
+                             INTERFACE "${googletest_SOURCE_DIR}/googlemock/include/"
+                                       "${googletest_SOURCE_DIR}/googletest/include/")
   install(TARGETS gmock gmock_main gtest gtest_main
           EXPORT arrow_testing_targets
           RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
@@ -2350,12 +2390,14 @@ if(ARROW_TESTING)
 
       string(APPEND ARROW_TESTING_PC_LIBS " $<TARGET_FILE:GTest::gtest>")
     endif()
+    set(ARROW_GTEST_GTEST_HEADERS)
     set(ARROW_GTEST_GMOCK GTest::gmock)
     set(ARROW_GTEST_GTEST GTest::gtest)
     set(ARROW_GTEST_GTEST_MAIN GTest::gtest_main)
   else()
     string(APPEND ARROW_TESTING_PC_CFLAGS " -I\${includedir}/arrow-gtest")
     string(APPEND ARROW_TESTING_PC_LIBS " -larrow_gtest")
+    set(ARROW_GTEST_GTEST_HEADERS arrow::GTest::gtest_headers)
     set(ARROW_GTEST_GMOCK arrow::GTest::gmock)
     set(ARROW_GTEST_GTEST arrow::GTest::gtest)
     set(ARROW_GTEST_GTEST_MAIN arrow::GTest::gtest_main)
@@ -2882,6 +2924,10 @@ macro(build_absl)
   set(ABSL_INCLUDE_DIR "${ABSL_PREFIX}/include")
   set(ABSL_CMAKE_ARGS "${EP_COMMON_CMAKE_ARGS}" -DABSL_RUN_TESTS=OFF
                       "-DCMAKE_INSTALL_PREFIX=${ABSL_PREFIX}")
+  if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)
+    set(ABSL_CXX_FLAGS "${EP_CXX_FLAGS} -include stdint.h")
+    list(APPEND ABSL_CMAKE_ARGS "-DCMAKE_CXX_FLAGS=${ABSL_CXX_FLAGS}")
+  endif()
   set(ABSL_BUILD_BYPRODUCTS)
   set(ABSL_LIBRARIES)
 
@@ -4506,9 +4552,12 @@ function(build_orc)
         OFF
         CACHE BOOL "" FORCE)
     get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES)
+    if(NOT LZ4_INCLUDE_DIR)
+      find_path(LZ4_INCLUDE_DIR NAMES lz4.h)
+    endif()
     get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY)
     set(LZ4_HOME
-        ${LZ4_ROOT}
+        "${LZ4_ROOT}"
         CACHE STRING "" FORCE)
     set(LZ4_LIBRARY
         LZ4::lz4
@@ -4944,8 +4993,24 @@ macro(build_awssdk)
   set(AWSSDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/awssdk_ep-install")
   set(AWSSDK_INCLUDE_DIR "${AWSSDK_PREFIX}/include")
 
+  # The AWS SDK has a few warnings around shortening lengths
+  set(AWS_C_FLAGS "${EP_C_FLAGS}")
+  set(AWS_CXX_FLAGS "${EP_CXX_FLAGS}")
+  if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
+                                                    "Clang")
+    # Negate warnings that AWS SDK cannot build under
+    string(APPEND AWS_C_FLAGS " -Wno-error=shorten-64-to-32")
+    string(APPEND AWS_CXX_FLAGS " -Wno-error=shorten-64-to-32")
+  endif()
+  if(NOT MSVC)
+    string(APPEND AWS_C_FLAGS " -Wno-deprecated")
+    string(APPEND AWS_CXX_FLAGS " -Wno-deprecated")
+  endif()
+
   set(AWSSDK_COMMON_CMAKE_ARGS
       ${EP_COMMON_CMAKE_ARGS}
+      -DCMAKE_C_FLAGS=${AWS_C_FLAGS}
+      -DCMAKE_CXX_FLAGS=${AWS_CXX_FLAGS}
       -DCPP_STANDARD=${CMAKE_CXX_STANDARD}
       -DCMAKE_INSTALL_PREFIX=${AWSSDK_PREFIX}
       -DCMAKE_PREFIX_PATH=${AWSSDK_PREFIX}
diff --git a/cpp/cmake_modules/UseCython.cmake b/cpp/cmake_modules/UseCython.cmake
index e15ac59490c6e..7d88daa4fade9 100644
--- a/cpp/cmake_modules/UseCython.cmake
+++ b/cpp/cmake_modules/UseCython.cmake
@@ -184,4 +184,9 @@ function(cython_add_module _name pyx_target_name generated_files)
   add_dependencies(${_name} ${pyx_target_name})
 endfunction()
 
+execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "from Cython.Compiler.Version import version; print(version)"
+                OUTPUT_VARIABLE CYTHON_VERSION_OUTPUT
+                OUTPUT_STRIP_TRAILING_WHITESPACE)
+set(CYTHON_VERSION "${CYTHON_VERSION_OUTPUT}")
+
 include(CMakeParseArguments)
diff --git a/cpp/examples/minimal_build/CMakeLists.txt b/cpp/examples/minimal_build/CMakeLists.txt
index b4a7cde938c87..95dad34221add 100644
--- a/cpp/examples/minimal_build/CMakeLists.txt
+++ b/cpp/examples/minimal_build/CMakeLists.txt
@@ -30,7 +30,7 @@ endif()
 # We require a C++17 compliant compiler
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-if(NOT DEFINED CMAKE_BUILD_TYPE)
+if(NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE Release)
 endif()
 
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 6dc8358f502f5..e77a02d0c0800 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -373,7 +373,11 @@ set(ARROW_SRCS
     config.cc
     datum.cc
     device.cc
+    device_allocation_type_set.cc
     extension_type.cc
+    extension/bool8.cc
+    extension/json.cc
+    extension/uuid.cc
     pretty_print.cc
     record_batch.cc
     result.cc
@@ -412,6 +416,7 @@ arrow_add_object_library(ARROW_ARRAY
                          array/concatenate.cc
                          array/data.cc
                          array/diff.cc
+                         array/statistics.cc
                          array/util.cc
                          array/validate.cc)
 
@@ -640,9 +645,13 @@ else()
 endif()
 
 set(ARROW_TESTING_SHARED_LINK_LIBS arrow_shared ${ARROW_GTEST_GTEST})
-set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON)
-set(ARROW_TESTING_STATIC_LINK_LIBS arrow::flatbuffers RapidJSON arrow_static
-                                   ${ARROW_GTEST_GTEST})
+set(ARROW_TESTING_SHARED_PRIVATE_LINK_LIBS arrow::flatbuffers RapidJSON Boost::process)
+set(ARROW_TESTING_STATIC_LINK_LIBS
+    arrow::flatbuffers
+    RapidJSON
+    Boost::process
+    arrow_static
+    ${ARROW_GTEST_GTEST})
 set(ARROW_TESTING_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared)
 set(ARROW_TESTING_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static)
 # that depend on gtest
@@ -663,9 +672,10 @@ set(ARROW_TESTING_SRCS
     io/test_common.cc
     ipc/test_common.cc
     testing/fixed_width_test_util.cc
+    testing/generator.cc
     testing/gtest_util.cc
+    testing/process.cc
     testing/random.cc
-    testing/generator.cc
     testing/util.cc)
 
 #
@@ -722,7 +732,6 @@ set(ARROW_COMPUTE_SRCS
     compute/ordering.cc
     compute/registry.cc
     compute/kernels/codegen_internal.cc
-    compute/kernels/row_encoder.cc
     compute/kernels/ree_util_internal.cc
     compute/kernels/scalar_cast_boolean.cc
     compute/kernels/scalar_cast_dictionary.cc
@@ -741,6 +750,7 @@ set(ARROW_COMPUTE_SRCS
     compute/row/encode_internal.cc
     compute/row/compare_internal.cc
     compute/row/grouper.cc
+    compute/row/row_encoder_internal.cc
     compute/row/row_internal.cc
     compute/util.cc
     compute/util_internal.cc)
@@ -906,6 +916,7 @@ endif()
 if(ARROW_JSON)
   arrow_add_object_library(ARROW_JSON
                            extension/fixed_shape_tensor.cc
+                           extension/opaque.cc
                            json/options.cc
                            json/chunked_builder.cc
                            json/chunker.cc
@@ -1168,6 +1179,7 @@ add_arrow_test(array_test
                array/array_struct_test.cc
                array/array_union_test.cc
                array/array_view_test.cc
+               array/statistics_test.cc
                PRECOMPILED_HEADERS
                "$<$<COMPILE_LANGUAGE:CXX>:arrow/testing/pch.h>")
 
@@ -1221,6 +1233,7 @@ add_subdirectory(testing)
 add_subdirectory(array)
 add_subdirectory(c)
 add_subdirectory(compute)
+add_subdirectory(extension)
 add_subdirectory(io)
 add_subdirectory(tensor)
 add_subdirectory(util)
@@ -1263,7 +1276,6 @@ endif()
 
 if(ARROW_JSON)
   add_subdirectory(json)
-  add_subdirectory(extension)
 endif()
 
 if(ARROW_ORC)
diff --git a/cpp/src/arrow/acero/aggregate_benchmark.cc b/cpp/src/arrow/acero/aggregate_benchmark.cc
index 854862e3e48ca..9c90b63904eb3 100644
--- a/cpp/src/arrow/acero/aggregate_benchmark.cc
+++ b/cpp/src/arrow/acero/aggregate_benchmark.cc
@@ -24,6 +24,7 @@
 #include "arrow/array/array_primitive.h"
 #include "arrow/compute/api.h"
 #include "arrow/table.h"
+#include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/benchmark_util.h"
@@ -165,11 +166,11 @@ struct SumSentinelUnrolled : public Summer<T> {
   static void Sum(const ArrayType& array, SumState<T>* state) {
     SumState<T> local;
 
-#define SUM_NOT_NULL(ITEM)                                                  \
-  do {                                                                      \
-    local.total += values[i + ITEM] * Traits<T>::NotNull(values[i + ITEM]); \
-    local.valid_count++;                                                    \
-  } while (0)
+#  define SUM_NOT_NULL(ITEM)                                                  \
+    do {                                                                      \
+      local.total += values[i + ITEM] * Traits<T>::NotNull(values[i + ITEM]); \
+      local.valid_count++;                                                    \
+    } while (0)
 
     const auto values = array.raw_values();
     const auto length = array.length();
@@ -185,7 +186,7 @@ struct SumSentinelUnrolled : public Summer<T> {
       SUM_NOT_NULL(7);
     }
 
-#undef SUM_NOT_NULL
+#  undef SUM_NOT_NULL
 
     for (int64_t i = length_rounded * 8; i < length; ++i) {
       local.total += values[i] * Traits<T>::NotNull(values[i]);
@@ -256,7 +257,7 @@ struct SumBitmapVectorizeUnroll : public Summer<T> {
     for (int64_t i = 0; i < length_rounded; i += 8) {
       const uint8_t valid_byte = bitmap[i / 8];
 
-#define SUM_SHIFT(ITEM) (values[i + ITEM] * ((valid_byte >> ITEM) & 1))
+#  define SUM_SHIFT(ITEM) (values[i + ITEM] * ((valid_byte >> ITEM) & 1))
 
       if (valid_byte < 0xFF) {
         // Some nulls
@@ -277,7 +278,7 @@ struct SumBitmapVectorizeUnroll : public Summer<T> {
       }
     }
 
-#undef SUM_SHIFT
+#  undef SUM_SHIFT
 
     for (int64_t i = length_rounded; i < length; ++i) {
       if (bit_util::GetBit(bitmap, i)) {
@@ -325,7 +326,8 @@ BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapVectorizeUnroll<int64_t>)
 
 std::shared_ptr<RecordBatch> RecordBatchFromArrays(
     const std::vector<std::shared_ptr<Array>>& arguments,
-    const std::vector<std::shared_ptr<Array>>& keys) {
+    const std::vector<std::shared_ptr<Array>>& keys,
+    const std::vector<std::shared_ptr<Array>>& segment_keys) {
   std::vector<std::shared_ptr<Field>> fields;
   std::vector<std::shared_ptr<Array>> all_arrays;
   int64_t length = -1;
@@ -347,37 +349,56 @@ std::shared_ptr<RecordBatch> RecordBatchFromArrays(
     fields.push_back(field("key" + ToChars(key_idx), key->type()));
     all_arrays.push_back(key);
   }
+  for (std::size_t segment_key_idx = 0; segment_key_idx < segment_keys.size();
+       segment_key_idx++) {
+    const auto& segment_key = segment_keys[segment_key_idx];
+    DCHECK_EQ(segment_key->length(), length);
+    fields.push_back(
+        field("segment_key" + ToChars(segment_key_idx), segment_key->type()));
+    all_arrays.push_back(segment_key);
+  }
   return RecordBatch::Make(schema(std::move(fields)), length, std::move(all_arrays));
 }
 
 Result<std::shared_ptr<Table>> BatchGroupBy(
     std::shared_ptr<RecordBatch> batch, std::vector<Aggregate> aggregates,
-    std::vector<FieldRef> keys, bool use_threads = false,
-    MemoryPool* memory_pool = default_memory_pool()) {
+    std::vector<FieldRef> keys, std::vector<FieldRef> segment_keys,
+    bool use_threads = false, MemoryPool* memory_pool = default_memory_pool()) {
   ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Table> table,
                         Table::FromRecordBatches({std::move(batch)}));
   Declaration plan = Declaration::Sequence(
       {{"table_source", TableSourceNodeOptions(std::move(table))},
-       {"aggregate", AggregateNodeOptions(std::move(aggregates), std::move(keys))}});
+       {"aggregate", AggregateNodeOptions(std::move(aggregates), std::move(keys),
+                                          std::move(segment_keys))}});
   return DeclarationToTable(std::move(plan), use_threads, memory_pool);
 }
 
-static void BenchmarkGroupBy(benchmark::State& state, std::vector<Aggregate> aggregates,
-                             const std::vector<std::shared_ptr<Array>>& arguments,
-                             const std::vector<std::shared_ptr<Array>>& keys) {
-  std::shared_ptr<RecordBatch> batch = RecordBatchFromArrays(arguments, keys);
+static void BenchmarkAggregate(
+    benchmark::State& state, std::vector<Aggregate> aggregates,
+    const std::vector<std::shared_ptr<Array>>& arguments,
+    const std::vector<std::shared_ptr<Array>>& keys,
+    const std::vector<std::shared_ptr<Array>>& segment_keys = {}) {
+  std::shared_ptr<RecordBatch> batch =
+      RecordBatchFromArrays(arguments, keys, segment_keys);
   std::vector<FieldRef> key_refs;
   for (std::size_t key_idx = 0; key_idx < keys.size(); key_idx++) {
     key_refs.emplace_back(static_cast<int>(key_idx + arguments.size()));
   }
+  std::vector<FieldRef> segment_key_refs;
+  for (std::size_t segment_key_idx = 0; segment_key_idx < segment_keys.size();
+       segment_key_idx++) {
+    segment_key_refs.emplace_back(
+        static_cast<int>(segment_key_idx + arguments.size() + keys.size()));
+  }
   for (std::size_t arg_idx = 0; arg_idx < arguments.size(); arg_idx++) {
     aggregates[arg_idx].target = {FieldRef(static_cast<int>(arg_idx))};
   }
   int64_t total_bytes = TotalBufferSize(*batch);
   for (auto _ : state) {
-    ABORT_NOT_OK(BatchGroupBy(batch, aggregates, key_refs));
+    ABORT_NOT_OK(BatchGroupBy(batch, aggregates, key_refs, segment_key_refs));
   }
   state.SetBytesProcessed(total_bytes * state.iterations());
+  state.SetItemsProcessed(batch->num_rows() * state.iterations());
 }
 
 #define GROUP_BY_BENCHMARK(Name, Impl)                               \
@@ -404,7 +425,7 @@ GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyStringSet, [&] {
                                    /*min_length=*/3,
                                    /*max_length=*/32);
 
-  BenchmarkGroupBy(state, {{"hash_sum", ""}}, {summand}, {key});
+  BenchmarkAggregate(state, {{"hash_sum", ""}}, {summand}, {key});
 });
 
 GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallStringSet, [&] {
@@ -419,7 +440,7 @@ GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallStringSet, [&] {
                                    /*min_length=*/3,
                                    /*max_length=*/32);
 
-  BenchmarkGroupBy(state, {{"hash_sum", ""}}, {summand}, {key});
+  BenchmarkAggregate(state, {{"hash_sum", ""}}, {summand}, {key});
 });
 
 GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumStringSet, [&] {
@@ -434,7 +455,7 @@ GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumStringSet, [&] {
                                    /*min_length=*/3,
                                    /*max_length=*/32);
 
-  BenchmarkGroupBy(state, {{"hash_sum", ""}}, {summand}, {key});
+  BenchmarkAggregate(state, {{"hash_sum", ""}}, {summand}, {key});
 });
 
 GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyIntegerSet, [&] {
@@ -448,7 +469,7 @@ GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyIntegerSet, [&] {
                        /*min=*/0,
                        /*max=*/15);
 
-  BenchmarkGroupBy(state, {{"hash_sum", ""}}, {summand}, {key});
+  BenchmarkAggregate(state, {{"hash_sum", ""}}, {summand}, {key});
 });
 
 GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallIntegerSet, [&] {
@@ -462,7 +483,7 @@ GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallIntegerSet, [&] {
                        /*min=*/0,
                        /*max=*/255);
 
-  BenchmarkGroupBy(state, {{"hash_sum", ""}}, {summand}, {key});
+  BenchmarkAggregate(state, {{"hash_sum", ""}}, {summand}, {key});
 });
 
 GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumIntegerSet, [&] {
@@ -476,7 +497,7 @@ GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumIntegerSet, [&] {
                        /*min=*/0,
                        /*max=*/4095);
 
-  BenchmarkGroupBy(state, {{"hash_sum", ""}}, {summand}, {key});
+  BenchmarkAggregate(state, {{"hash_sum", ""}}, {summand}, {key});
 });
 
 GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyIntStringPairSet, [&] {
@@ -494,7 +515,7 @@ GROUP_BY_BENCHMARK(SumDoublesGroupedByTinyIntStringPairSet, [&] {
                                        /*min_length=*/3,
                                        /*max_length=*/32);
 
-  BenchmarkGroupBy(state, {{"hash_sum", ""}}, {summand}, {int_key, str_key});
+  BenchmarkAggregate(state, {{"hash_sum", ""}}, {summand}, {int_key, str_key});
 });
 
 GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallIntStringPairSet, [&] {
@@ -512,7 +533,7 @@ GROUP_BY_BENCHMARK(SumDoublesGroupedBySmallIntStringPairSet, [&] {
                                        /*min_length=*/3,
                                        /*max_length=*/32);
 
-  BenchmarkGroupBy(state, {{"hash_sum", ""}}, {summand}, {int_key, str_key});
+  BenchmarkAggregate(state, {{"hash_sum", ""}}, {summand}, {int_key, str_key});
 });
 
 GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumIntStringPairSet, [&] {
@@ -530,7 +551,7 @@ GROUP_BY_BENCHMARK(SumDoublesGroupedByMediumIntStringPairSet, [&] {
                                        /*min_length=*/3,
                                        /*max_length=*/32);
 
-  BenchmarkGroupBy(state, {{"hash_sum", ""}}, {summand}, {int_key, str_key});
+  BenchmarkAggregate(state, {{"hash_sum", ""}}, {summand}, {int_key, str_key});
 });
 
 // Grouped MinMax
@@ -543,7 +564,7 @@ GROUP_BY_BENCHMARK(MinMaxDoublesGroupedByMediumInt, [&] {
                            /*nan_probability=*/args.null_proportion / 10);
   auto int_key = rng.Int64(args.size, /*min=*/0, /*max=*/63);
 
-  BenchmarkGroupBy(state, {{"hash_min_max", ""}}, {input}, {int_key});
+  BenchmarkAggregate(state, {{"hash_min_max", ""}}, {input}, {int_key});
 });
 
 GROUP_BY_BENCHMARK(MinMaxShortStringsGroupedByMediumInt, [&] {
@@ -553,7 +574,7 @@ GROUP_BY_BENCHMARK(MinMaxShortStringsGroupedByMediumInt, [&] {
                           /*null_probability=*/args.null_proportion);
   auto int_key = rng.Int64(args.size, /*min=*/0, /*max=*/63);
 
-  BenchmarkGroupBy(state, {{"hash_min_max", ""}}, {input}, {int_key});
+  BenchmarkAggregate(state, {{"hash_min_max", ""}}, {input}, {int_key});
 });
 
 GROUP_BY_BENCHMARK(MinMaxLongStringsGroupedByMediumInt, [&] {
@@ -563,7 +584,7 @@ GROUP_BY_BENCHMARK(MinMaxLongStringsGroupedByMediumInt, [&] {
                           /*null_probability=*/args.null_proportion);
   auto int_key = rng.Int64(args.size, /*min=*/0, /*max=*/63);
 
-  BenchmarkGroupBy(state, {{"hash_min_max", ""}}, {input}, {int_key});
+  BenchmarkAggregate(state, {{"hash_min_max", ""}}, {input}, {int_key});
 });
 
 //
@@ -866,5 +887,61 @@ BENCHMARK(TDigestKernelDoubleMedian)->Apply(QuantileKernelArgs);
 BENCHMARK(TDigestKernelDoubleDeciles)->Apply(QuantileKernelArgs);
 BENCHMARK(TDigestKernelDoubleCentiles)->Apply(QuantileKernelArgs);
 
+//
+// Segmented Aggregate
+//
+
+static void BenchmarkSegmentedAggregate(
+    benchmark::State& state, int64_t num_rows, std::vector<Aggregate> aggregates,
+    const std::vector<std::shared_ptr<Array>>& arguments,
+    const std::vector<std::shared_ptr<Array>>& keys, int64_t num_segment_keys,
+    int64_t num_segments) {
+  ASSERT_GT(num_segments, 0);
+
+  auto rng = random::RandomArrayGenerator(42);
+  auto segment_key = rng.Int64(num_rows, /*min=*/0, /*max=*/num_segments - 1);
+  int64_t* values = segment_key->data()->GetMutableValues<int64_t>(1);
+  std::sort(values, values + num_rows);
+  // num_segment_keys copies of the segment key.
+  ArrayVector segment_keys(num_segment_keys, segment_key);
+
+  BenchmarkAggregate(state, std::move(aggregates), arguments, keys, segment_keys);
+}
+
+template <typename... Args>
+static void CountScalarSegmentedByInts(benchmark::State& state, Args&&...) {
+  constexpr int64_t num_rows = 32 * 1024;
+
+  // A trivial column to count from.
+  auto arg = ConstantArrayGenerator::Zeroes(num_rows, int32());
+
+  BenchmarkSegmentedAggregate(state, num_rows, {{"count", ""}}, {arg}, /*keys=*/{},
+                              state.range(0), state.range(1));
+}
+BENCHMARK(CountScalarSegmentedByInts)
+    ->ArgNames({"SegmentKeys", "Segments"})
+    ->ArgsProduct({{0, 1, 2}, benchmark::CreateRange(1, 256, 8)});
+
+template <typename... Args>
+static void CountGroupByIntsSegmentedByInts(benchmark::State& state, Args&&...) {
+  constexpr int64_t num_rows = 32 * 1024;
+
+  // A trivial column to count from.
+  auto arg = ConstantArrayGenerator::Zeroes(num_rows, int32());
+
+  auto rng = random::RandomArrayGenerator(42);
+  int64_t num_keys = state.range(0);
+  ArrayVector keys(num_keys);
+  for (auto& key : keys) {
+    key = rng.Int64(num_rows, /*min=*/0, /*max=*/64);
+  }
+
+  BenchmarkSegmentedAggregate(state, num_rows, {{"hash_count", ""}}, {arg}, keys,
+                              state.range(1), state.range(2));
+}
+BENCHMARK(CountGroupByIntsSegmentedByInts)
+    ->ArgNames({"Keys", "SegmentKeys", "Segments"})
+    ->ArgsProduct({{1, 2}, {0, 1, 2}, benchmark::CreateRange(1, 256, 8)});
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/acero/aggregate_internal.h b/cpp/src/arrow/acero/aggregate_internal.h
index 5730d99f93f88..7cdc424cbb76b 100644
--- a/cpp/src/arrow/acero/aggregate_internal.h
+++ b/cpp/src/arrow/acero/aggregate_internal.h
@@ -131,17 +131,14 @@ void AggregatesToString(std::stringstream* ss, const Schema& input_schema,
 template <typename BatchHandler>
 Status HandleSegments(RowSegmenter* segmenter, const ExecBatch& batch,
                       const std::vector<int>& ids, const BatchHandler& handle_batch) {
-  int64_t offset = 0;
   ARROW_ASSIGN_OR_RAISE(auto segment_exec_batch, batch.SelectValues(ids));
   ExecSpan segment_batch(segment_exec_batch);
 
-  while (true) {
-    ARROW_ASSIGN_OR_RAISE(compute::Segment segment,
-                          segmenter->GetNextSegment(segment_batch, offset));
-    if (segment.offset >= segment_batch.length) break;  // condition of no-next-segment
+  ARROW_ASSIGN_OR_RAISE(auto segments, segmenter->GetSegments(segment_batch));
+  for (const auto& segment : segments) {
     ARROW_RETURN_NOT_OK(handle_batch(batch, segment));
-    offset = segment.offset + segment.length;
   }
+
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/acero/aggregate_node_test.cc b/cpp/src/arrow/acero/aggregate_node_test.cc
index d398fb24b73d5..c623271db9fb4 100644
--- a/cpp/src/arrow/acero/aggregate_node_test.cc
+++ b/cpp/src/arrow/acero/aggregate_node_test.cc
@@ -210,5 +210,57 @@ TEST(GroupByNode, NoSkipNulls) {
   AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch}, out_batches.batches);
 }
 
+TEST(ScalarAggregateNode, AnyAll) {
+  // GH-43768: boolean_any and boolean_all with constant input should work well
+  // when min_count != 0.
+  std::shared_ptr<Schema> in_schema = schema({field("not_used", int32())});
+  std::shared_ptr<Schema> out_schema = schema({field("agg_out", boolean())});
+  struct AnyAllCase {
+    std::string batches_json;
+    Expression literal;
+    std::string expected_json;
+    bool skip_nulls = false;
+    uint32_t min_count = 2;
+  };
+  std::vector<AnyAllCase> cases{
+      {"[[42], [42], [42], [42]]", literal(true), "[[true]]"},
+      {"[[42], [42], [42], [42]]", literal(false), "[[false]]"},
+      {"[[42], [42], [42], [42]]", literal(BooleanScalar{}), "[[null]]"},
+      {"[[42]]", literal(true), "[[null]]"},
+      {"[[42], [42], [42]]", literal(true), "[[true]]"},
+      {"[[42], [42], [42]]", literal(true), "[[null]]", /*skip_nulls=*/false,
+       /*min_count=*/4},
+      {"[[42], [42], [42], [42]]", literal(BooleanScalar{}), "[[null]]",
+       /*skip_nulls=*/true},
+  };
+  for (const AnyAllCase& any_all_case : cases) {
+    for (auto func_name : {"any", "all"}) {
+      std::vector<ExecBatch> batches{
+          ExecBatchFromJSON({int32()}, any_all_case.batches_json)};
+      std::vector<Aggregate> aggregates = {
+          Aggregate(func_name,
+                    std::make_shared<compute::ScalarAggregateOptions>(
+                        /*skip_nulls=*/any_all_case.skip_nulls,
+                        /*min_count=*/any_all_case.min_count),
+                    FieldRef("literal"))};
+
+      // And a projection to make the input including a Scalar Boolean
+      Declaration plan = Declaration::Sequence(
+          {{"exec_batch_source", ExecBatchSourceNodeOptions(in_schema, batches)},
+           {"project", ProjectNodeOptions({any_all_case.literal}, {"literal"})},
+           {"aggregate", AggregateNodeOptions(aggregates)}});
+
+      ASSERT_OK_AND_ASSIGN(BatchesWithCommonSchema out_batches,
+                           DeclarationToExecBatches(plan));
+
+      ExecBatch expected_batch =
+          ExecBatchFromJSON({boolean()}, any_all_case.expected_json);
+
+      AssertExecBatchesEqualIgnoringOrder(out_schema, {expected_batch},
+                                          out_batches.batches);
+    }
+  }
+}
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc
index 848cbdf7506ad..c4f11d01f3d5c 100644
--- a/cpp/src/arrow/acero/asof_join_node.cc
+++ b/cpp/src/arrow/acero/asof_join_node.cc
@@ -32,9 +32,9 @@
 
 #include "arrow/acero/exec_plan.h"
 #include "arrow/acero/options.h"
-#include "arrow/acero/unmaterialized_table.h"
+#include "arrow/acero/unmaterialized_table_internal.h"
 #ifndef NDEBUG
-#include "arrow/acero/options_internal.h"
+#  include "arrow/acero/options_internal.h"
 #endif
 #include "arrow/acero/query_context.h"
 #include "arrow/acero/schema_util.h"
@@ -42,7 +42,7 @@
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_primitive.h"
 #ifndef NDEBUG
-#include "arrow/compute/function_internal.h"
+#  include "arrow/compute/function_internal.h"
 #endif
 #include "arrow/acero/time_series_util.h"
 #include "arrow/compute/key_hash_internal.h"
@@ -207,16 +207,16 @@ class DebugSync {
   std::unique_lock<std::mutex> debug_lock_;
 };
 
-#define DEBUG_SYNC(node, ...) DebugSync(node).insert(__VA_ARGS__)
-#define DEBUG_MANIP(manip) \
-  DebugSync::Manip([](DebugSync& d) -> DebugSync& { return d << manip; })
-#define NDEBUG_EXPLICIT
-#define DEBUG_ADD(ndebug, ...) ndebug, __VA_ARGS__
+#  define DEBUG_SYNC(node, ...) DebugSync(node).insert(__VA_ARGS__)
+#  define DEBUG_MANIP(manip) \
+    DebugSync::Manip([](DebugSync& d) -> DebugSync& { return d << manip; })
+#  define NDEBUG_EXPLICIT
+#  define DEBUG_ADD(ndebug, ...) ndebug, __VA_ARGS__
 #else
-#define DEBUG_SYNC(...)
-#define DEBUG_MANIP(...)
-#define NDEBUG_EXPLICIT explicit
-#define DEBUG_ADD(ndebug, ...) ndebug
+#  define DEBUG_SYNC(...)
+#  define DEBUG_MANIP(...)
+#  define NDEBUG_EXPLICIT explicit
+#  define DEBUG_ADD(ndebug, ...) ndebug
 #endif
 
 struct MemoStore {
diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc
index 051e280a4c53c..5d3e9fba08bbf 100644
--- a/cpp/src/arrow/acero/asof_join_node_test.cc
+++ b/cpp/src/arrow/acero/asof_join_node_test.cc
@@ -26,13 +26,13 @@
 #include "arrow/acero/exec_plan.h"
 #include "arrow/testing/future_util.h"
 #ifndef NDEBUG
-#include <sstream>
+#  include <sstream>
 #endif
 #include <unordered_set>
 
 #include "arrow/acero/options.h"
 #ifndef NDEBUG
-#include "arrow/acero/options_internal.h"
+#  include "arrow/acero/options_internal.h"
 #endif
 #include "arrow/acero/map_node.h"
 #include "arrow/acero/query_context.h"
@@ -41,8 +41,8 @@
 #include "arrow/acero/util.h"
 #include "arrow/api.h"
 #include "arrow/compute/api_scalar.h"
-#include "arrow/compute/kernels/row_encoder_internal.h"
 #include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/row/row_encoder_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
 #include "arrow/testing/random.h"
diff --git a/cpp/src/arrow/acero/bloom_filter.h b/cpp/src/arrow/acero/bloom_filter.h
index 50d07bfd948e0..530beaea64827 100644
--- a/cpp/src/arrow/acero/bloom_filter.h
+++ b/cpp/src/arrow/acero/bloom_filter.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-#include <immintrin.h>
+#  include <immintrin.h>
 #endif
 
 #include <atomic>
diff --git a/cpp/src/arrow/acero/bloom_filter_test.cc b/cpp/src/arrow/acero/bloom_filter_test.cc
index a2d6e9575a1aa..30cafd120caea 100644
--- a/cpp/src/arrow/acero/bloom_filter_test.cc
+++ b/cpp/src/arrow/acero/bloom_filter_test.cc
@@ -503,9 +503,9 @@ TEST(BloomFilter, Scaling) {
   num_build.push_back(4000000);
 
   std::vector<BloomFilterBuildStrategy> strategies;
-#ifdef ARROW_ENABLE_THREADING
+#  ifdef ARROW_ENABLE_THREADING
   strategies.push_back(BloomFilterBuildStrategy::PARALLEL);
-#endif
+#  endif
   strategies.push_back(BloomFilterBuildStrategy::SINGLE_THREADED);
 
   for (const auto hardware_flags : HardwareFlagsForTesting()) {
diff --git a/cpp/src/arrow/acero/groupby_aggregate_node.cc b/cpp/src/arrow/acero/groupby_aggregate_node.cc
index 723c8b7377e13..06b034ab2d459 100644
--- a/cpp/src/arrow/acero/groupby_aggregate_node.cc
+++ b/cpp/src/arrow/acero/groupby_aggregate_node.cc
@@ -369,13 +369,14 @@ Status GroupByNode::InputReceived(ExecNode* input, ExecBatch batch) {
   DCHECK_EQ(input, inputs_[0]);
 
   auto handler = [this](const ExecBatch& full_batch, const Segment& segment) {
-    if (!segment.extends && segment.offset == 0) RETURN_NOT_OK(OutputResult(false));
+    if (!segment.extends && segment.offset == 0)
+      RETURN_NOT_OK(OutputResult(/*is_last=*/false));
     auto exec_batch = full_batch.Slice(segment.offset, segment.length);
     auto batch = ExecSpan(exec_batch);
     RETURN_NOT_OK(Consume(batch));
     RETURN_NOT_OK(
         ExtractSegmenterValues(&segmenter_values_, exec_batch, segment_key_field_ids_));
-    if (!segment.is_open) RETURN_NOT_OK(OutputResult(false));
+    if (!segment.is_open) RETURN_NOT_OK(OutputResult(/*is_last=*/false));
     return Status::OK();
   };
   ARROW_RETURN_NOT_OK(
diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc
index 743cb20d1960d..1e2975afc91b3 100644
--- a/cpp/src/arrow/acero/hash_aggregate_test.cc
+++ b/cpp/src/arrow/acero/hash_aggregate_test.cc
@@ -585,19 +585,12 @@ void TestGroupClassSupportedKeys(
 
 void TestSegments(std::unique_ptr<RowSegmenter>& segmenter, const ExecSpan& batch,
                   std::vector<Segment> expected_segments) {
-  int64_t offset = 0, segment_num = 0;
-  for (auto expected_segment : expected_segments) {
-    SCOPED_TRACE("segment #" + ToChars(segment_num++));
-    ASSERT_OK_AND_ASSIGN(auto segment, segmenter->GetNextSegment(batch, offset));
-    ASSERT_EQ(expected_segment, segment);
-    offset = segment.offset + segment.length;
+  ASSERT_OK_AND_ASSIGN(auto actual_segments, segmenter->GetSegments(batch));
+  ASSERT_EQ(actual_segments.size(), expected_segments.size());
+  for (size_t i = 0; i < actual_segments.size(); ++i) {
+    SCOPED_TRACE("segment #" + ToChars(i));
+    ASSERT_EQ(actual_segments[i], expected_segments[i]);
   }
-  // Assert next is the last (empty) segment.
-  ASSERT_OK_AND_ASSIGN(auto segment, segmenter->GetNextSegment(batch, offset));
-  ASSERT_GE(segment.offset, batch.length);
-  ASSERT_EQ(segment.length, 0);
-  ASSERT_TRUE(segment.is_open);
-  ASSERT_TRUE(segment.extends);
 }
 
 Result<std::unique_ptr<Grouper>> MakeGrouper(const std::vector<TypeHolder>& key_types) {
@@ -629,91 +622,68 @@ TEST(RowSegmenter, Basics) {
   auto batch2 = ExecBatchFromJSON(types2, "[[1, 1], [1, 2], [2, 2]]");
   auto batch1 = ExecBatchFromJSON(types1, "[[1], [1], [2]]");
   ExecBatch batch0({}, 3);
-  {
-    SCOPED_TRACE("offset");
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types0));
-    ExecSpan span0(batch0);
-    for (int64_t offset : {-1, 4}) {
-      EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
-                                      HasSubstr("invalid grouping segmenter offset"),
-                                      segmenter->GetNextSegment(span0, offset));
-    }
-  }
   {
     SCOPED_TRACE("types0 segmenting of batch2");
     ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types0));
     ExecSpan span2(batch2);
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 0 "),
-                                    segmenter->GetNextSegment(span2, 0));
+                                    segmenter->GetSegments(span2));
     ExecSpan span0(batch0);
-    TestSegments(segmenter, span0, {{0, 3, true, true}, {3, 0, true, true}});
+    TestSegments(segmenter, span0, {{0, 3, true, true}});
   }
   {
     SCOPED_TRACE("bad_types1 segmenting of batch1");
     ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(bad_types1));
     ExecSpan span1(batch1);
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch value 0 of type "),
-                                    segmenter->GetNextSegment(span1, 0));
+                                    segmenter->GetSegments(span1));
   }
   {
     SCOPED_TRACE("types1 segmenting of batch2");
     ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types1));
     ExecSpan span2(batch2);
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 1 "),
-                                    segmenter->GetNextSegment(span2, 0));
+                                    segmenter->GetSegments(span2));
     ExecSpan span1(batch1);
-    TestSegments(segmenter, span1,
-                 {{0, 2, false, true}, {2, 1, true, false}, {3, 0, true, true}});
+    TestSegments(segmenter, span1, {{0, 2, false, true}, {2, 1, true, false}});
   }
   {
     SCOPED_TRACE("bad_types2 segmenting of batch2");
     ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(bad_types2));
     ExecSpan span2(batch2);
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch value 1 of type "),
-                                    segmenter->GetNextSegment(span2, 0));
+                                    segmenter->GetSegments(span2));
   }
   {
     SCOPED_TRACE("types2 segmenting of batch1");
     ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types2));
     ExecSpan span1(batch1);
     EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 2 "),
-                                    segmenter->GetNextSegment(span1, 0));
+                                    segmenter->GetSegments(span1));
     ExecSpan span2(batch2);
     TestSegments(segmenter, span2,
-                 {{0, 1, false, true},
-                  {1, 1, false, false},
-                  {2, 1, true, false},
-                  {3, 0, true, true}});
+                 {{0, 1, false, true}, {1, 1, false, false}, {2, 1, true, false}});
   }
 }
 
 TEST(RowSegmenter, NonOrdered) {
-  {
-    std::vector<TypeHolder> types = {int32()};
-    auto batch = ExecBatchFromJSON(types, "[[1], [1], [2], [1], [2]]");
+  for (int num_keys = 1; num_keys <= 2; ++num_keys) {
+    SCOPED_TRACE("non-ordered " + ToChars(num_keys) + " int32(s)");
+    std::vector<TypeHolder> types(num_keys, int32());
+    std::vector<Datum> values(num_keys, ArrayFromJSON(int32(), "[1, 1, 2, 1, 2]"));
+    ExecBatch batch(std::move(values), 5);
     ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
     TestSegments(segmenter, ExecSpan(batch),
                  {{0, 2, false, true},
                   {2, 1, false, false},
                   {3, 1, false, false},
-                  {4, 1, true, false},
-                  {5, 0, true, true}});
-  }
-  {
-    std::vector<TypeHolder> types = {int32(), int32()};
-    auto batch = ExecBatchFromJSON(types, "[[1, 1], [1, 1], [2, 2], [1, 2], [2, 2]]");
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
-    TestSegments(segmenter, ExecSpan(batch),
-                 {{0, 2, false, true},
-                  {2, 1, false, false},
-                  {3, 1, false, false},
-                  {4, 1, true, false},
-                  {5, 0, true, true}});
+                  {4, 1, true, false}});
   }
 }
 
 TEST(RowSegmenter, EmptyBatches) {
   {
+    SCOPED_TRACE("empty batches {int32}");
     std::vector<TypeHolder> types = {int32()};
     std::vector<ExecBatch> batches = {
         ExecBatchFromJSON(types, "[]"),         ExecBatchFromJSON(types, "[]"),
@@ -732,6 +702,7 @@ TEST(RowSegmenter, EmptyBatches) {
     TestSegments(segmenter, ExecSpan(batches[7]), {});
   }
   {
+    SCOPED_TRACE("empty batches {int32, int32}");
     std::vector<TypeHolder> types = {int32(), int32()};
     std::vector<ExecBatch> batches = {
         ExecBatchFromJSON(types, "[]"),
@@ -756,25 +727,12 @@ TEST(RowSegmenter, EmptyBatches) {
 }
 
 TEST(RowSegmenter, MultipleSegments) {
-  {
-    std::vector<TypeHolder> types = {int32()};
-    auto batch =
-        ExecBatchFromJSON(types, "[[1], [1], [2], [5], [3], [3], [5], [5], [4]]");
-    ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
-    TestSegments(segmenter, ExecSpan(batch),
-                 {{0, 2, false, true},
-                  {2, 1, false, false},
-                  {3, 1, false, false},
-                  {4, 2, false, false},
-                  {6, 2, false, false},
-                  {8, 1, true, false},
-                  {9, 0, true, true}});
-  }
-  {
-    std::vector<TypeHolder> types = {int32(), int32()};
-    auto batch = ExecBatchFromJSON(
-        types,
-        "[[1, 1], [1, 1], [2, 2], [5, 5], [3, 3], [3, 3], [5, 5], [5, 5], [4, 4]]");
+  auto test_with_keys = [](int num_keys, const std::shared_ptr<Array>& key) {
+    SCOPED_TRACE("multiple segments " + ToChars(num_keys) + " " +
+                 key->type()->ToString());
+    std::vector<TypeHolder> types(num_keys, key->type());
+    std::vector<Datum> values(num_keys, key);
+    ExecBatch batch(std::move(values), key->length());
     ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types));
     TestSegments(segmenter, ExecSpan(batch),
                  {{0, 2, false, true},
@@ -782,13 +740,23 @@ TEST(RowSegmenter, MultipleSegments) {
                   {3, 1, false, false},
                   {4, 2, false, false},
                   {6, 2, false, false},
-                  {8, 1, true, false},
-                  {9, 0, true, true}});
+                  {8, 1, true, false}});
+  };
+  for (int num_keys = 1; num_keys <= 2; ++num_keys) {
+    test_with_keys(num_keys, ArrayFromJSON(int32(), "[1, 1, 2, 5, 3, 3, 5, 5, 4]"));
+    test_with_keys(
+        num_keys,
+        ArrayFromJSON(fixed_size_binary(2),
+                      R"(["aa", "aa", "bb", "ee", "cc", "cc", "ee", "ee", "dd"])"));
+    test_with_keys(num_keys, DictArrayFromJSON(dictionary(int8(), utf8()),
+                                               "[0, 0, 1, 4, 2, 2, 4, 4, 3]",
+                                               R"(["a", "b", "c", "d", "e"])"));
   }
 }
 
 TEST(RowSegmenter, MultipleSegmentsMultipleBatches) {
   {
+    SCOPED_TRACE("multiple segments multiple batches {int32}");
     std::vector<TypeHolder> types = {int32()};
     std::vector<ExecBatch> batches = {
         ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[[1], [2]]"),
@@ -806,6 +774,7 @@ TEST(RowSegmenter, MultipleSegmentsMultipleBatches) {
     TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}});
   }
   {
+    SCOPED_TRACE("multiple segments multiple batches {int32, int32}");
     std::vector<TypeHolder> types = {int32(), int32()};
     std::vector<ExecBatch> batches = {
         ExecBatchFromJSON(types, "[[1, 1]]"),
@@ -829,74 +798,217 @@ TEST(RowSegmenter, MultipleSegmentsMultipleBatches) {
 namespace {
 
 void TestRowSegmenterConstantBatch(
-    std::function<ArgShape(size_t i)> shape_func,
+    const std::shared_ptr<DataType>& type,
+    std::function<ArgShape(int64_t key)> shape_func,
+    std::function<Result<std::shared_ptr<Scalar>>(int64_t key)> value_func,
     std::function<Result<std::unique_ptr<RowSegmenter>>(const std::vector<TypeHolder>&)>
         make_segmenter) {
-  constexpr size_t n = 3, repetitions = 3;
-  std::vector<TypeHolder> types = {int32(), int32(), int32()};
-  std::vector<ArgShape> shapes(n);
-  for (size_t i = 0; i < n; i++) shapes[i] = shape_func(i);
-  auto full_batch = ExecBatchFromJSON(types, shapes, "[[1, 1, 1], [1, 1, 1], [1, 1, 1]]");
-  auto test_by_size = [&](size_t size) -> Status {
-    SCOPED_TRACE("constant-batch with " + ToChars(size) + " key(s)");
-    std::vector<Datum> values(full_batch.values.begin(),
-                              full_batch.values.begin() + size);
-    ExecBatch batch(values, full_batch.length);
-    std::vector<TypeHolder> key_types(types.begin(), types.begin() + size);
+  constexpr int64_t n_keys = 3, n_rows = 3, repetitions = 3;
+  std::vector<TypeHolder> types(n_keys, type);
+  std::vector<Datum> full_values(n_keys);
+  for (int64_t i = 0; i < n_keys; i++) {
+    auto shape = shape_func(i);
+    ASSERT_OK_AND_ASSIGN(auto scalar, value_func(i));
+    if (shape == ArgShape::SCALAR) {
+      full_values[i] = std::move(scalar);
+    } else {
+      ASSERT_OK_AND_ASSIGN(full_values[i], MakeArrayFromScalar(*scalar, n_rows));
+    }
+  }
+  auto test_with_keys = [&](int64_t keys) -> Status {
+    SCOPED_TRACE("constant-batch with " + ToChars(keys) + " key(s)");
+    std::vector<Datum> values(full_values.begin(), full_values.begin() + keys);
+    ExecBatch batch(values, n_rows);
+    std::vector<TypeHolder> key_types(types.begin(), types.begin() + keys);
     ARROW_ASSIGN_OR_RAISE(auto segmenter, make_segmenter(key_types));
-    for (size_t i = 0; i < repetitions; i++) {
-      TestSegments(segmenter, ExecSpan(batch), {{0, 3, true, true}, {3, 0, true, true}});
+    for (int64_t i = 0; i < repetitions; i++) {
+      TestSegments(segmenter, ExecSpan(batch), {{0, n_rows, true, true}});
       ARROW_RETURN_NOT_OK(segmenter->Reset());
     }
     return Status::OK();
   };
-  for (size_t i = 0; i <= 3; i++) {
-    ASSERT_OK(test_by_size(i));
+  for (int64_t i = 0; i <= n_keys; i++) {
+    ASSERT_OK(test_with_keys(i));
   }
 }
 
 }  // namespace
 
 TEST(RowSegmenter, ConstantArrayBatch) {
-  TestRowSegmenterConstantBatch([](size_t i) { return ArgShape::ARRAY; },
-                                MakeRowSegmenter);
+  TestRowSegmenterConstantBatch(
+      int32(), [](int64_t key) { return ArgShape::ARRAY; },
+      [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
 }
 
 TEST(RowSegmenter, ConstantScalarBatch) {
-  TestRowSegmenterConstantBatch([](size_t i) { return ArgShape::SCALAR; },
-                                MakeRowSegmenter);
+  TestRowSegmenterConstantBatch(
+      int32(), [](int64_t key) { return ArgShape::SCALAR; },
+      [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
 }
 
 TEST(RowSegmenter, ConstantMixedBatch) {
   TestRowSegmenterConstantBatch(
-      [](size_t i) { return i % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
-      MakeRowSegmenter);
+      int32(),
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter);
 }
 
 TEST(RowSegmenter, ConstantArrayBatchWithAnyKeysSegmenter) {
-  TestRowSegmenterConstantBatch([](size_t i) { return ArgShape::ARRAY; },
-                                MakeGenericSegmenter);
+  TestRowSegmenterConstantBatch(
+      int32(), [](int64_t key) { return ArgShape::ARRAY; },
+      [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
 }
 
 TEST(RowSegmenter, ConstantScalarBatchWithAnyKeysSegmenter) {
-  TestRowSegmenterConstantBatch([](size_t i) { return ArgShape::SCALAR; },
-                                MakeGenericSegmenter);
+  TestRowSegmenterConstantBatch(
+      int32(), [](int64_t key) { return ArgShape::SCALAR; },
+      [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
 }
 
 TEST(RowSegmenter, ConstantMixedBatchWithAnyKeysSegmenter) {
   TestRowSegmenterConstantBatch(
-      [](size_t i) { return i % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
-      MakeGenericSegmenter);
+      int32(),
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryArrayBatch) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      type, [](int64_t key) { return ArgShape::ARRAY; },
+      [&](int64_t key) { return value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryScalarBatch) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      fixed_size_binary(8), [](int64_t key) { return ArgShape::SCALAR; },
+      [&](int64_t key) { return value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryMixedBatch) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      fixed_size_binary(8),
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [&](int64_t key) { return value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryArrayBatchWithAnyKeysSegmenter) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      type, [](int64_t key) { return ArgShape::ARRAY; },
+      [&](int64_t key) { return value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryScalarBatchWithAnyKeysSegmenter) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      fixed_size_binary(8), [](int64_t key) { return ArgShape::SCALAR; },
+      [&](int64_t key) { return value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantFixedSizeBinaryMixedBatchWithAnyKeysSegmenter) {
+  constexpr int fsb = 8;
+  auto type = fixed_size_binary(fsb);
+  ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X')));
+  TestRowSegmenterConstantBatch(
+      fixed_size_binary(8),
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [&](int64_t key) { return value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryArrayBatch) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type, [](int64_t key) { return ArgShape::ARRAY; },
+      [&](int64_t key) { return dict_value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryScalarBatch) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type, [](int64_t key) { return ArgShape::SCALAR; },
+      [&](int64_t key) { return dict_value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryMixedBatch) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type,
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [&](int64_t key) { return dict_value; }, MakeRowSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryArrayBatchWithAnyKeysSegmenter) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type, [](int64_t key) { return ArgShape::ARRAY; },
+      [&](int64_t key) { return dict_value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryScalarBatchWithAnyKeysSegmenter) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type, [](int64_t key) { return ArgShape::SCALAR; },
+      [&](int64_t key) { return dict_value; }, MakeGenericSegmenter);
+}
+
+TEST(RowSegmenter, ConstantDictionaryMixedBatchWithAnyKeysSegmenter) {
+  auto index_type = int32();
+  auto value_type = utf8();
+  auto dict_type = dictionary(index_type, value_type);
+  auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])");
+  ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0));
+  auto dict_value = DictionaryScalar::Make(std::move(index_value), dict);
+  TestRowSegmenterConstantBatch(
+      dict_type,
+      [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; },
+      [&](int64_t key) { return dict_value; }, MakeGenericSegmenter);
 }
 
 TEST(RowSegmenter, RowConstantBatch) {
   constexpr size_t n = 3;
   std::vector<TypeHolder> types = {int32(), int32(), int32()};
   auto full_batch = ExecBatchFromJSON(types, "[[1, 1, 1], [2, 2, 2], [3, 3, 3]]");
-  std::vector<Segment> expected_segments_for_size_0 = {{0, 3, true, true},
-                                                       {3, 0, true, true}};
+  std::vector<Segment> expected_segments_for_size_0 = {{0, 3, true, true}};
   std::vector<Segment> expected_segments = {
-      {0, 1, false, true}, {1, 1, false, false}, {2, 1, true, false}, {3, 0, true, true}};
+      {0, 1, false, true}, {1, 1, false, false}, {2, 1, true, false}};
   auto test_by_size = [&](size_t size) -> Status {
     SCOPED_TRACE("constant-batch with " + ToChars(size) + " key(s)");
     std::vector<Datum> values(full_batch.values.begin(),
diff --git a/cpp/src/arrow/acero/hash_join.cc b/cpp/src/arrow/acero/hash_join.cc
index 5aa70a23f7c9e..ddcd2a0995701 100644
--- a/cpp/src/arrow/acero/hash_join.cc
+++ b/cpp/src/arrow/acero/hash_join.cc
@@ -27,8 +27,8 @@
 
 #include "arrow/acero/hash_join_dict.h"
 #include "arrow/acero/task_util.h"
-#include "arrow/compute/kernels/row_encoder_internal.h"
 #include "arrow/compute/row/encode_internal.h"
+#include "arrow/compute/row/row_encoder_internal.h"
 #include "arrow/util/tracing_internal.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/acero/hash_join_benchmark.cc b/cpp/src/arrow/acero/hash_join_benchmark.cc
index 1f8e02e9f0fcf..e3e37e249e6a3 100644
--- a/cpp/src/arrow/acero/hash_join_benchmark.cc
+++ b/cpp/src/arrow/acero/hash_join_benchmark.cc
@@ -23,7 +23,7 @@
 #include "arrow/acero/test_util_internal.h"
 #include "arrow/acero/util.h"
 #include "arrow/api.h"
-#include "arrow/compute/kernels/row_encoder_internal.h"
+#include "arrow/compute/row/row_encoder_internal.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/thread_pool.h"
 
@@ -104,7 +104,7 @@ class JoinBenchmark {
       key_cmp.push_back(JoinKeyCmp::EQ);
     }
 
-    for (size_t i = 0; i < settings.build_payload_types.size(); i++) {
+    for (size_t i = 0; i < settings.probe_payload_types.size(); i++) {
       std::string name = "lp" + std::to_string(i);
       DCHECK_OK(l_schema_builder.AddField(field(name, settings.probe_payload_types[i])));
     }
@@ -279,7 +279,7 @@ static void BM_HashJoinBasic_MatchesPerRow(benchmark::State& st) {
   settings.cardinality = 1.0 / static_cast<double>(st.range(0));
 
   settings.num_build_batches = static_cast<int>(st.range(1));
-  settings.num_probe_batches = settings.num_probe_batches;
+  settings.num_probe_batches = settings.num_build_batches;
 
   HashJoinBasicBenchmarkImpl(st, settings);
 }
@@ -291,7 +291,7 @@ static void BM_HashJoinBasic_PayloadSize(benchmark::State& st) {
   settings.cardinality = 1.0 / static_cast<double>(st.range(1));
 
   settings.num_build_batches = static_cast<int>(st.range(2));
-  settings.num_probe_batches = settings.num_probe_batches;
+  settings.num_probe_batches = settings.num_build_batches;
 
   HashJoinBasicBenchmarkImpl(st, settings);
 }
diff --git a/cpp/src/arrow/acero/hash_join_dict.cc b/cpp/src/arrow/acero/hash_join_dict.cc
index 3aef08e6e9ccf..8db9dddb2c3a0 100644
--- a/cpp/src/arrow/acero/hash_join_dict.cc
+++ b/cpp/src/arrow/acero/hash_join_dict.cc
@@ -225,21 +225,20 @@ Status HashJoinDictBuild::Init(ExecContext* ctx, std::shared_ptr<Array> dictiona
     return Status::OK();
   }
 
-  dictionary_ = dictionary;
+  dictionary_ = std::move(dictionary);
 
   // Initialize encoder
   RowEncoder encoder;
-  std::vector<TypeHolder> encoder_types;
-  encoder_types.emplace_back(value_type_);
+  std::vector<TypeHolder> encoder_types{value_type_};
   encoder.Init(encoder_types, ctx);
 
   // Encode all dictionary values
-  int64_t length = dictionary->data()->length;
+  int64_t length = dictionary_->data()->length;
   if (length >= std::numeric_limits<int32_t>::max()) {
     return Status::Invalid(
         "Dictionary length in hash join must fit into signed 32-bit integer.");
   }
-  RETURN_NOT_OK(encoder.EncodeAndAppend(ExecSpan({*dictionary->data()}, length)));
+  RETURN_NOT_OK(encoder.EncodeAndAppend(ExecSpan({*dictionary_->data()}, length)));
 
   std::vector<int32_t> entries_to_take;
 
diff --git a/cpp/src/arrow/acero/hash_join_dict.h b/cpp/src/arrow/acero/hash_join_dict.h
index c7d8d785d079e..02454a7146278 100644
--- a/cpp/src/arrow/acero/hash_join_dict.h
+++ b/cpp/src/arrow/acero/hash_join_dict.h
@@ -22,7 +22,7 @@
 
 #include "arrow/acero/schema_util.h"
 #include "arrow/compute/exec.h"
-#include "arrow/compute/kernels/row_encoder_internal.h"
+#include "arrow/compute/row/row_encoder_internal.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc
index 67f902e64be93..80dd163ced740 100644
--- a/cpp/src/arrow/acero/hash_join_node.cc
+++ b/cpp/src/arrow/acero/hash_join_node.cc
@@ -61,30 +61,30 @@ Result<std::vector<FieldRef>> HashJoinSchema::ComputePayload(
     const std::vector<FieldRef>& filter, const std::vector<FieldRef>& keys) {
   // payload = (output + filter) - keys, with no duplicates
   std::unordered_set<int> payload_fields;
-  for (auto ref : output) {
+  for (const auto& ref : output) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.insert(match[0]);
   }
 
-  for (auto ref : filter) {
+  for (const auto& ref : filter) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.insert(match[0]);
   }
 
-  for (auto ref : keys) {
+  for (const auto& ref : keys) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     payload_fields.erase(match[0]);
   }
 
   std::vector<FieldRef> payload_refs;
-  for (auto ref : output) {
+  for (const auto& ref : output) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     if (payload_fields.find(match[0]) != payload_fields.end()) {
       payload_refs.push_back(ref);
       payload_fields.erase(match[0]);
     }
   }
-  for (auto ref : filter) {
+  for (const auto& ref : filter) {
     ARROW_ASSIGN_OR_RAISE(auto match, ref.FindOne(schema));
     if (payload_fields.find(match[0]) != payload_fields.end()) {
       payload_refs.push_back(ref);
@@ -198,7 +198,7 @@ Status HashJoinSchema::ValidateSchemas(JoinType join_type, const Schema& left_sc
     return Status::Invalid("Different number of key fields on left (", left_keys.size(),
                            ") and right (", right_keys.size(), ") side of the join");
   }
-  if (left_keys.size() < 1) {
+  if (left_keys.empty()) {
     return Status::Invalid("Join key cannot be empty");
   }
   for (size_t i = 0; i < left_keys.size() + right_keys.size(); ++i) {
@@ -432,7 +432,7 @@ Status HashJoinSchema::CollectFilterColumns(std::vector<FieldRef>& left_filter,
         indices[0] -= left_schema.num_fields();
         FieldPath corrected_path(std::move(indices));
         if (right_seen_paths.find(*path) == right_seen_paths.end()) {
-          right_filter.push_back(corrected_path);
+          right_filter.emplace_back(corrected_path);
           right_seen_paths.emplace(std::move(corrected_path));
         }
       } else if (left_seen_paths.find(*path) == left_seen_paths.end()) {
@@ -698,7 +698,7 @@ class HashJoinNode : public ExecNode, public TracedNode {
                std::shared_ptr<Schema> output_schema,
                std::unique_ptr<HashJoinSchema> schema_mgr, Expression filter,
                std::unique_ptr<HashJoinImpl> impl)
-      : ExecNode(plan, inputs, {"left", "right"},
+      : ExecNode(plan, std::move(inputs), {"left", "right"},
                  /*output_schema=*/std::move(output_schema)),
         TracedNode(this),
         join_type_(join_options.join_type),
diff --git a/cpp/src/arrow/acero/hash_join_node.h b/cpp/src/arrow/acero/hash_join_node.h
index ad60019ceabc4..19745b8675cf0 100644
--- a/cpp/src/arrow/acero/hash_join_node.h
+++ b/cpp/src/arrow/acero/hash_join_node.h
@@ -65,9 +65,9 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
   std::shared_ptr<Schema> MakeOutputSchema(const std::string& left_field_name_suffix,
                                            const std::string& right_field_name_suffix);
 
-  bool LeftPayloadIsEmpty() { return PayloadIsEmpty(0); }
+  bool LeftPayloadIsEmpty() const { return PayloadIsEmpty(0); }
 
-  bool RightPayloadIsEmpty() { return PayloadIsEmpty(1); }
+  bool RightPayloadIsEmpty() const { return PayloadIsEmpty(1); }
 
   static int kMissingField() {
     return SchemaProjectionMaps<HashJoinProjection>::kMissingField;
@@ -88,7 +88,7 @@ class ARROW_ACERO_EXPORT HashJoinSchema {
                                             const SchemaProjectionMap& right_to_filter,
                                             const Expression& filter);
 
-  bool PayloadIsEmpty(int side) {
+  bool PayloadIsEmpty(int side) const {
     assert(side == 0 || side == 1);
     return proj_maps[side].num_cols(HashJoinProjection::PAYLOAD) == 0;
   }
diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc
index f7b442cc3c624..76ad9c7d650eb 100644
--- a/cpp/src/arrow/acero/hash_join_node_test.cc
+++ b/cpp/src/arrow/acero/hash_join_node_test.cc
@@ -26,10 +26,12 @@
 #include "arrow/acero/test_util_internal.h"
 #include "arrow/acero/util.h"
 #include "arrow/api.h"
-#include "arrow/compute/kernels/row_encoder_internal.h"
 #include "arrow/compute/kernels/test_util.h"
 #include "arrow/compute/light_array_internal.h"
+#include "arrow/compute/row/row_encoder_internal.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/testing/extension_type.h"
+#include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
 #include "arrow/testing/random.h"
@@ -40,6 +42,10 @@ using testing::UnorderedElementsAreArray;
 
 namespace arrow {
 
+using arrow::gen::Constant;
+using arrow::random::kSeedMax;
+using arrow::random::RandomArrayGenerator;
+using compute::and_;
 using compute::call;
 using compute::default_exec_context;
 using compute::ExecBatchBuilder;
@@ -3253,5 +3259,192 @@ TEST(HashJoin, ManyJoins) {
   ASSERT_OK_AND_ASSIGN(std::ignore, DeclarationToTable(std::move(root)));
 }
 
+namespace {
+
+void AssertRowCountEq(Declaration source, int64_t expected) {
+  Declaration count{"aggregate",
+                    {std::move(source)},
+                    AggregateNodeOptions{/*aggregates=*/{{"count_all", "count(*)"}}}};
+  ASSERT_OK_AND_ASSIGN(auto batches, DeclarationToExecBatches(std::move(count)));
+  ASSERT_EQ(batches.batches.size(), 1);
+  ASSERT_EQ(batches.batches[0].values.size(), 1);
+  ASSERT_TRUE(batches.batches[0].values[0].is_scalar());
+  ASSERT_EQ(batches.batches[0].values[0].scalar()->type->id(), Type::INT64);
+  ASSERT_TRUE(batches.batches[0].values[0].scalar_as<Int64Scalar>().is_valid);
+  ASSERT_EQ(batches.batches[0].values[0].scalar_as<Int64Scalar>().value, expected);
+}
+
+}  // namespace
+
+// GH-43495: Test that both the key and the payload of the right side (the build side) are
+// fixed length and larger than 4GB, and the 64-bit offset in the hash table can handle it
+// correctly.
+TEST(HashJoin, LARGE_MEMORY_TEST(BuildSideOver4GBFixedLength)) {
+  constexpr int64_t k5GB = 5ll * 1024 * 1024 * 1024;
+  constexpr int fixed_length = 128;
+  const auto type = fixed_size_binary(fixed_length);
+  constexpr uint8_t byte_no_match_min = static_cast<uint8_t>('A');
+  constexpr uint8_t byte_no_match_max = static_cast<uint8_t>('y');
+  constexpr uint8_t byte_match = static_cast<uint8_t>('z');
+  const auto value_match =
+      std::make_shared<FixedSizeBinaryScalar>(std::string(fixed_length, byte_match));
+  constexpr int16_t num_rows_per_batch_left = 128;
+  constexpr int16_t num_rows_per_batch_right = 4096;
+  const int64_t num_batches_left = 8;
+  const int64_t num_batches_right =
+      k5GB / (num_rows_per_batch_right * type->byte_width());
+
+  // Left side composed of num_batches_left identical batches of num_rows_per_batch_left
+  // rows of value_match-es.
+  BatchesWithSchema batches_left;
+  {
+    // A column with num_rows_per_batch_left value_match-es.
+    ASSERT_OK_AND_ASSIGN(auto column,
+                         Constant(value_match)->Generate(num_rows_per_batch_left));
+
+    // Use the column as both the key and the payload.
+    ExecBatch batch({column, column}, num_rows_per_batch_left);
+    batches_left =
+        BatchesWithSchema{std::vector<ExecBatch>(num_batches_left, std::move(batch)),
+                          schema({field("l_key", type), field("l_payload", type)})};
+  }
+
+  // Right side composed of num_batches_right identical batches of
+  // num_rows_per_batch_right rows containing only 1 value_match.
+  BatchesWithSchema batches_right;
+  {
+    // A column with (num_rows_per_batch_right - 1) non-value_match-es (possibly null) and
+    // 1 value_match.
+    auto non_matches = RandomArrayGenerator(kSeedMax).FixedSizeBinary(
+        num_rows_per_batch_right - 1, fixed_length,
+        /*null_probability =*/0.01, /*min_byte=*/byte_no_match_min,
+        /*max_byte=*/byte_no_match_max);
+    ASSERT_OK_AND_ASSIGN(auto match, Constant(value_match)->Generate(1));
+    ASSERT_OK_AND_ASSIGN(auto column, Concatenate({non_matches, match}));
+
+    // Use the column as both the key and the payload.
+    ExecBatch batch({column, column}, num_rows_per_batch_right);
+    batches_right =
+        BatchesWithSchema{std::vector<ExecBatch>(num_batches_right, std::move(batch)),
+                          schema({field("r_key", type), field("r_payload", type)})};
+  }
+
+  Declaration left{"exec_batch_source",
+                   ExecBatchSourceNodeOptions(std::move(batches_left.schema),
+                                              std::move(batches_left.batches))};
+
+  Declaration right{"exec_batch_source",
+                    ExecBatchSourceNodeOptions(std::move(batches_right.schema),
+                                               std::move(batches_right.batches))};
+
+  HashJoinNodeOptions join_opts(JoinType::INNER, /*left_keys=*/{"l_key"},
+                                /*right_keys=*/{"r_key"});
+  Declaration join{"hashjoin", {std::move(left), std::move(right)}, join_opts};
+
+  ASSERT_OK_AND_ASSIGN(auto batches_result, DeclarationToExecBatches(std::move(join)));
+  Declaration result{"exec_batch_source",
+                     ExecBatchSourceNodeOptions(std::move(batches_result.schema),
+                                                std::move(batches_result.batches))};
+
+  // The row count of hash join should be (number of value_match-es in left side) *
+  // (number of value_match-es in right side).
+  AssertRowCountEq(result,
+                   num_batches_left * num_rows_per_batch_left * num_batches_right);
+
+  // All rows should be value_match-es.
+  auto predicate = and_({equal(field_ref("l_key"), literal(value_match)),
+                         equal(field_ref("l_payload"), literal(value_match)),
+                         equal(field_ref("r_key"), literal(value_match)),
+                         equal(field_ref("r_payload"), literal(value_match))});
+  Declaration filter{"filter", {result}, FilterNodeOptions{std::move(predicate)}};
+  AssertRowCountEq(std::move(filter),
+                   num_batches_left * num_rows_per_batch_left * num_batches_right);
+}
+
+// GH-43495: Test that both the key and the payload of the right side (the build side) are
+// var length and larger than 4GB, and the 64-bit offset in the hash table can handle it
+// correctly.
+TEST(HashJoin, LARGE_MEMORY_TEST(BuildSideOver4GBVarLength)) {
+  constexpr int64_t k5GB = 5ll * 1024 * 1024 * 1024;
+  const auto type = utf8();
+  constexpr int value_no_match_length_min = 128;
+  constexpr int value_no_match_length_max = 129;
+  constexpr int value_match_length = 130;
+  const auto value_match =
+      std::make_shared<StringScalar>(std::string(value_match_length, 'X'));
+  constexpr int16_t num_rows_per_batch_left = 128;
+  constexpr int16_t num_rows_per_batch_right = 4096;
+  const int64_t num_batches_left = 8;
+  const int64_t num_batches_right =
+      k5GB / (num_rows_per_batch_right * value_no_match_length_min);
+
+  // Left side composed of num_batches_left identical batches of num_rows_per_batch_left
+  // rows of value_match-es.
+  BatchesWithSchema batches_left;
+  {
+    // A column with num_rows_per_batch_left value_match-es.
+    ASSERT_OK_AND_ASSIGN(auto column,
+                         Constant(value_match)->Generate(num_rows_per_batch_left));
+
+    // Use the column as both the key and the payload.
+    ExecBatch batch({column, column}, num_rows_per_batch_left);
+    batches_left =
+        BatchesWithSchema{std::vector<ExecBatch>(num_batches_left, std::move(batch)),
+                          schema({field("l_key", type), field("l_payload", type)})};
+  }
+
+  // Right side composed of num_batches_right identical batches of
+  // num_rows_per_batch_right rows containing only 1 value_match.
+  BatchesWithSchema batches_right;
+  {
+    // A column with (num_rows_per_batch_right - 1) non-value_match-es (possibly null) and
+    // 1 value_match.
+    auto non_matches =
+        RandomArrayGenerator(kSeedMax).String(num_rows_per_batch_right - 1,
+                                              /*min_length=*/value_no_match_length_min,
+                                              /*max_length=*/value_no_match_length_max,
+                                              /*null_probability =*/0.01);
+    ASSERT_OK_AND_ASSIGN(auto match, Constant(value_match)->Generate(1));
+    ASSERT_OK_AND_ASSIGN(auto column, Concatenate({non_matches, match}));
+
+    // Use the column as both the key and the payload.
+    ExecBatch batch({column, column}, num_rows_per_batch_right);
+    batches_right =
+        BatchesWithSchema{std::vector<ExecBatch>(num_batches_right, std::move(batch)),
+                          schema({field("r_key", type), field("r_payload", type)})};
+  }
+
+  Declaration left{"exec_batch_source",
+                   ExecBatchSourceNodeOptions(std::move(batches_left.schema),
+                                              std::move(batches_left.batches))};
+
+  Declaration right{"exec_batch_source",
+                    ExecBatchSourceNodeOptions(std::move(batches_right.schema),
+                                               std::move(batches_right.batches))};
+
+  HashJoinNodeOptions join_opts(JoinType::INNER, /*left_keys=*/{"l_key"},
+                                /*right_keys=*/{"r_key"});
+  Declaration join{"hashjoin", {std::move(left), std::move(right)}, join_opts};
+
+  ASSERT_OK_AND_ASSIGN(auto batches_result, DeclarationToExecBatches(std::move(join)));
+  Declaration result{"exec_batch_source",
+                     ExecBatchSourceNodeOptions(std::move(batches_result.schema),
+                                                std::move(batches_result.batches))};
+
+  // The row count of hash join should be (number of value_match-es in left side) *
+  // (number of value_match-es in right side).
+  AssertRowCountEq(result,
+                   num_batches_left * num_rows_per_batch_left * num_batches_right);
+
+  // All rows should be value_match-es.
+  auto predicate = and_({equal(field_ref("l_key"), literal(value_match)),
+                         equal(field_ref("l_payload"), literal(value_match)),
+                         equal(field_ref("r_key"), literal(value_match)),
+                         equal(field_ref("r_payload"), literal(value_match))});
+  Declaration filter{"filter", {result}, FilterNodeOptions{std::move(predicate)}};
+  AssertRowCountEq(std::move(filter),
+                   num_batches_left * num_rows_per_batch_left * num_batches_right);
+}
+
 }  // namespace acero
 }  // namespace arrow
diff --git a/cpp/src/arrow/acero/options_internal.h b/cpp/src/arrow/acero/options_internal.h
index d4bf79a7cd008..fd3ea78116572 100644
--- a/cpp/src/arrow/acero/options_internal.h
+++ b/cpp/src/arrow/acero/options_internal.h
@@ -18,8 +18,8 @@
 #pragma once
 
 #ifndef NDEBUG
-#include <mutex>
-#include <ostream>
+#  include <mutex>
+#  include <ostream>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/acero/scalar_aggregate_node.cc b/cpp/src/arrow/acero/scalar_aggregate_node.cc
index c7805f4d24eb2..b34f7511cc12b 100644
--- a/cpp/src/arrow/acero/scalar_aggregate_node.cc
+++ b/cpp/src/arrow/acero/scalar_aggregate_node.cc
@@ -234,7 +234,8 @@ Status ScalarAggregateNode::InputReceived(ExecNode* input, ExecBatch batch) {
     // (1) The segment is starting of a new segment group and points to
     // the beginning of the batch, then it means no data in the batch belongs
     // to the current segment group. We can output and reset kernel states.
-    if (!segment.extends && segment.offset == 0) RETURN_NOT_OK(OutputResult(false));
+    if (!segment.extends && segment.offset == 0)
+      RETURN_NOT_OK(OutputResult(/*is_last=*/false));
 
     // We add segment to the current segment group aggregation
     auto exec_batch = full_batch.Slice(segment.offset, segment.length);
@@ -244,7 +245,7 @@ Status ScalarAggregateNode::InputReceived(ExecNode* input, ExecBatch batch) {
 
     // If the segment closes the current segment group, we can output segment group
     // aggregation.
-    if (!segment.is_open) RETURN_NOT_OK(OutputResult(false));
+    if (!segment.is_open) RETURN_NOT_OK(OutputResult(/*is_last=*/false));
 
     return Status::OK();
   };
diff --git a/cpp/src/arrow/acero/sorted_merge_node.cc b/cpp/src/arrow/acero/sorted_merge_node.cc
index a71ac79efcc46..2845383cee982 100644
--- a/cpp/src/arrow/acero/sorted_merge_node.cc
+++ b/cpp/src/arrow/acero/sorted_merge_node.cc
@@ -28,7 +28,7 @@
 #include "arrow/acero/options.h"
 #include "arrow/acero/query_context.h"
 #include "arrow/acero/time_series_util.h"
-#include "arrow/acero/unmaterialized_table.h"
+#include "arrow/acero/unmaterialized_table_internal.h"
 #include "arrow/acero/util.h"
 #include "arrow/array/builder_base.h"
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc
index 732deb72861d6..6c783110af571 100644
--- a/cpp/src/arrow/acero/swiss_join.cc
+++ b/cpp/src/arrow/acero/swiss_join.cc
@@ -24,10 +24,10 @@
 #include "arrow/acero/swiss_join_internal.h"
 #include "arrow/acero/util.h"
 #include "arrow/array/util.h"  // MakeArrayFromScalar
-#include "arrow/compute/kernels/row_encoder_internal.h"
 #include "arrow/compute/key_hash_internal.h"
 #include "arrow/compute/row/compare_internal.h"
 #include "arrow/compute/row/encode_internal.h"
+#include "arrow/compute/row/row_encoder_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/tracing_internal.h"
@@ -122,7 +122,7 @@ void RowArrayAccessor::Visit(const RowTableImpl& rows, int column_id, int num_ro
   if (!is_fixed_length_column) {
     int varbinary_column_id = VarbinaryColumnId(rows.metadata(), column_id);
     const uint8_t* row_ptr_base = rows.data(2);
-    const uint32_t* row_offsets = rows.offsets();
+    const RowTableImpl::offset_type* row_offsets = rows.offsets();
     uint32_t field_offset_within_row, field_length;
 
     if (varbinary_column_id == 0) {
@@ -173,7 +173,7 @@ void RowArrayAccessor::Visit(const RowTableImpl& rows, int column_id, int num_ro
       // Case 4: This is a fixed length column in a varying length row
       //
       const uint8_t* row_ptr_base = rows.data(2) + field_offset_within_row;
-      const uint32_t* row_offsets = rows.offsets();
+      const RowTableImpl::offset_type* row_offsets = rows.offsets();
       for (int i = 0; i < num_rows; ++i) {
         uint32_t row_id = row_ids[i];
         const uint8_t* row_ptr = row_ptr_base + row_offsets[row_id];
@@ -473,17 +473,10 @@ Status RowArrayMerge::PrepareForMerge(RowArray* target,
     (*first_target_row_id)[sources.size()] = num_rows;
   }
 
-  if (num_bytes > std::numeric_limits<uint32_t>::max()) {
-    return Status::Invalid(
-        "There are more than 2^32 bytes of key data.  Acero cannot "
-        "process a join of this magnitude");
-  }
-
   // Allocate target memory
   //
   target->rows_.Clean();
-  RETURN_NOT_OK(target->rows_.AppendEmpty(static_cast<uint32_t>(num_rows),
-                                          static_cast<uint32_t>(num_bytes)));
+  RETURN_NOT_OK(target->rows_.AppendEmpty(static_cast<uint32_t>(num_rows), num_bytes));
 
   // In case of varying length rows,
   // initialize the first row offset for each range of rows corresponding to a
@@ -565,15 +558,15 @@ void RowArrayMerge::CopyVaryingLength(RowTableImpl* target, const RowTableImpl&
                                       int64_t first_target_row_offset,
                                       const int64_t* source_rows_permutation) {
   int64_t num_source_rows = source.length();
-  uint32_t* target_offsets = target->mutable_offsets();
-  const uint32_t* source_offsets = source.offsets();
+  RowTableImpl::offset_type* target_offsets = target->mutable_offsets();
+  const RowTableImpl::offset_type* source_offsets = source.offsets();
 
   // Permutation of source rows is optional.
   //
   if (!source_rows_permutation) {
     int64_t target_row_offset = first_target_row_offset;
     for (int64_t i = 0; i < num_source_rows; ++i) {
-      target_offsets[first_target_row_id + i] = static_cast<uint32_t>(target_row_offset);
+      target_offsets[first_target_row_id + i] = target_row_offset;
       target_row_offset += source_offsets[i + 1] - source_offsets[i];
     }
     // We purposefully skip outputting of N+1 offset, to allow concurrent
@@ -593,7 +586,10 @@ void RowArrayMerge::CopyVaryingLength(RowTableImpl* target, const RowTableImpl&
       int64_t source_row_id = source_rows_permutation[i];
       const uint64_t* source_row_ptr = reinterpret_cast<const uint64_t*>(
           source.data(2) + source_offsets[source_row_id]);
-      uint32_t length = source_offsets[source_row_id + 1] - source_offsets[source_row_id];
+      int64_t length = source_offsets[source_row_id + 1] - source_offsets[source_row_id];
+      // Though the row offset is 64-bit, the length of a single row must be 32-bit as
+      // required by current row table implementation.
+      DCHECK_LE(length, std::numeric_limits<uint32_t>::max());
 
       // Rows should be 64-bit aligned.
       // In that case we can copy them using a sequence of 64-bit read/writes.
@@ -604,7 +600,7 @@ void RowArrayMerge::CopyVaryingLength(RowTableImpl* target, const RowTableImpl&
         *target_row_ptr++ = *source_row_ptr++;
       }
 
-      target_offsets[first_target_row_id + i] = static_cast<uint32_t>(target_row_offset);
+      target_offsets[first_target_row_id + i] = target_row_offset;
       target_row_offset += length;
     }
   }
@@ -1671,7 +1667,7 @@ Result<std::shared_ptr<ArrayData>> JoinResultMaterialize::FlushBuildColumn(
     const std::shared_ptr<DataType>& data_type, const RowArray* row_array, int column_id,
     uint32_t* row_ids) {
   ResizableArrayData output;
-  output.Init(data_type, pool_, bit_util::Log2(num_rows_));
+  RETURN_NOT_OK(output.Init(data_type, pool_, bit_util::Log2(num_rows_)));
 
   for (size_t i = 0; i <= null_ranges_.size(); ++i) {
     int row_id_begin =
@@ -2251,8 +2247,9 @@ Result<ExecBatch> JoinResidualFilter::MaterializeFilterInput(
         build_schemas_->map(HashJoinProjection::FILTER, HashJoinProjection::PAYLOAD);
     for (int i = 0; i < num_build_cols; ++i) {
       ResizableArrayData column_data;
-      column_data.Init(build_schemas_->data_type(HashJoinProjection::FILTER, i), pool_,
-                       bit_util::Log2(num_batch_rows));
+      RETURN_NOT_OK(
+          column_data.Init(build_schemas_->data_type(HashJoinProjection::FILTER, i),
+                           pool_, bit_util::Log2(num_batch_rows)));
       if (auto idx = to_key.get(i); idx != SchemaProjectionMap::kMissingField) {
         RETURN_NOT_OK(build_keys_->DecodeSelected(&column_data, idx, num_batch_rows,
                                                   key_ids_maybe_null, pool_));
diff --git a/cpp/src/arrow/acero/swiss_join_avx2.cc b/cpp/src/arrow/acero/swiss_join_avx2.cc
index 0888dd8938455..1076073523448 100644
--- a/cpp/src/arrow/acero/swiss_join_avx2.cc
+++ b/cpp/src/arrow/acero/swiss_join_avx2.cc
@@ -15,14 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <immintrin.h>
-
 #include "arrow/acero/swiss_join_internal.h"
 #include "arrow/util/bit_util.h"
+#include "arrow/util/simd.h"
 
 namespace arrow {
 namespace acero {
 
+// TODO(GH-43693): The functions in this file are not wired anywhere. We may consider
+// actually utilizing them or removing them.
+
 template <class PROCESS_8_VALUES_FN>
 int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int num_rows,
                                  const uint32_t* row_ids,
@@ -45,48 +47,78 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu
   if (!is_fixed_length_column) {
     int varbinary_column_id = VarbinaryColumnId(rows.metadata(), column_id);
     const uint8_t* row_ptr_base = rows.data(2);
-    const uint32_t* row_offsets = rows.offsets();
+    const RowTableImpl::offset_type* row_offsets = rows.offsets();
+    static_assert(
+        sizeof(RowTableImpl::offset_type) == sizeof(int64_t),
+        "RowArrayAccessor::Visit_avx2 only supports 64-bit RowTableImpl::offset_type");
 
     if (varbinary_column_id == 0) {
       // Case 1: This is the first varbinary column
       //
       __m256i field_offset_within_row = _mm256_set1_epi32(rows.metadata().fixed_length);
       __m256i varbinary_end_array_offset =
-          _mm256_set1_epi32(rows.metadata().varbinary_end_array_offset);
+          _mm256_set1_epi64x(rows.metadata().varbinary_end_array_offset);
       for (int i = 0; i < num_rows / unroll; ++i) {
+        // Load 8 32-bit row ids.
         __m256i row_id =
             _mm256_loadu_si256(reinterpret_cast<const __m256i*>(row_ids) + i);
-        __m256i row_offset = _mm256_i32gather_epi32(
-            reinterpret_cast<const int*>(row_offsets), row_id, sizeof(uint32_t));
+        // Gather the lower/higher 4 64-bit row offsets based on the lower/higher 4 32-bit
+        // row ids.
+        __m256i row_offset_lo =
+            _mm256_i32gather_epi64(row_offsets, _mm256_castsi256_si128(row_id),
+                                   sizeof(RowTableImpl::offset_type));
+        __m256i row_offset_hi =
+            _mm256_i32gather_epi64(row_offsets, _mm256_extracti128_si256(row_id, 1),
+                                   sizeof(RowTableImpl::offset_type));
+        // Gather the lower/higher 4 32-bit field lengths based on the lower/higher 4
+        // 64-bit row offsets.
+        __m128i field_length_lo = _mm256_i64gather_epi32(
+            reinterpret_cast<const int*>(row_ptr_base),
+            _mm256_add_epi64(row_offset_lo, varbinary_end_array_offset), 1);
+        __m128i field_length_hi = _mm256_i64gather_epi32(
+            reinterpret_cast<const int*>(row_ptr_base),
+            _mm256_add_epi64(row_offset_hi, varbinary_end_array_offset), 1);
+        // The final 8 32-bit field lengths, subtracting the field offset within row.
         __m256i field_length = _mm256_sub_epi32(
-            _mm256_i32gather_epi32(
-                reinterpret_cast<const int*>(row_ptr_base),
-                _mm256_add_epi32(row_offset, varbinary_end_array_offset), 1),
-            field_offset_within_row);
+            _mm256_set_m128i(field_length_hi, field_length_lo), field_offset_within_row);
         process_8_values_fn(i * unroll, row_ptr_base,
-                            _mm256_add_epi32(row_offset, field_offset_within_row),
+                            _mm256_add_epi64(row_offset_lo, field_offset_within_row),
+                            _mm256_add_epi64(row_offset_hi, field_offset_within_row),
                             field_length);
       }
     } else {
       // Case 2: This is second or later varbinary column
       //
       __m256i varbinary_end_array_offset =
-          _mm256_set1_epi32(rows.metadata().varbinary_end_array_offset +
-                            sizeof(uint32_t) * (varbinary_column_id - 1));
+          _mm256_set1_epi64x(rows.metadata().varbinary_end_array_offset +
+                             sizeof(uint32_t) * (varbinary_column_id - 1));
       auto row_ptr_base_i64 =
           reinterpret_cast<const arrow::util::int64_for_gather_t*>(row_ptr_base);
       for (int i = 0; i < num_rows / unroll; ++i) {
+        // Load 8 32-bit row ids.
         __m256i row_id =
             _mm256_loadu_si256(reinterpret_cast<const __m256i*>(row_ids) + i);
-        __m256i row_offset = _mm256_i32gather_epi32(
-            reinterpret_cast<const int*>(row_offsets), row_id, sizeof(uint32_t));
-        __m256i end_array_offset =
-            _mm256_add_epi32(row_offset, varbinary_end_array_offset);
-
-        __m256i field_offset_within_row_A = _mm256_i32gather_epi64(
-            row_ptr_base_i64, _mm256_castsi256_si128(end_array_offset), 1);
-        __m256i field_offset_within_row_B = _mm256_i32gather_epi64(
-            row_ptr_base_i64, _mm256_extracti128_si256(end_array_offset, 1), 1);
+        // Gather the lower/higher 4 64-bit row offsets based on the lower/higher 4 32-bit
+        // row ids.
+        __m256i row_offset_lo =
+            _mm256_i32gather_epi64(row_offsets, _mm256_castsi256_si128(row_id),
+                                   sizeof(RowTableImpl::offset_type));
+        // Gather the lower/higher 4 32-bit field lengths based on the lower/higher 4
+        // 64-bit row offsets.
+        __m256i row_offset_hi =
+            _mm256_i32gather_epi64(row_offsets, _mm256_extracti128_si256(row_id, 1),
+                                   sizeof(RowTableImpl::offset_type));
+        // Prepare the lower/higher 4 64-bit end array offsets based on the lower/higher 4
+        // 64-bit row offsets.
+        __m256i end_array_offset_lo =
+            _mm256_add_epi64(row_offset_lo, varbinary_end_array_offset);
+        __m256i end_array_offset_hi =
+            _mm256_add_epi64(row_offset_hi, varbinary_end_array_offset);
+
+        __m256i field_offset_within_row_A =
+            _mm256_i64gather_epi64(row_ptr_base_i64, end_array_offset_lo, 1);
+        __m256i field_offset_within_row_B =
+            _mm256_i64gather_epi64(row_ptr_base_i64, end_array_offset_hi, 1);
         field_offset_within_row_A = _mm256_permutevar8x32_epi32(
             field_offset_within_row_A, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7));
         field_offset_within_row_B = _mm256_permutevar8x32_epi32(
@@ -110,8 +142,14 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu
                                                 0x4e);  // Swapping low and high 128-bits
         field_length = _mm256_sub_epi32(field_length, field_offset_within_row);
 
+        field_offset_within_row_A =
+            _mm256_add_epi32(field_offset_within_row_A, alignment_padding);
+        field_offset_within_row_B =
+            _mm256_add_epi32(field_offset_within_row_B, alignment_padding);
+
         process_8_values_fn(i * unroll, row_ptr_base,
-                            _mm256_add_epi32(row_offset, field_offset_within_row),
+                            _mm256_add_epi64(row_offset_lo, field_offset_within_row_A),
+                            _mm256_add_epi64(row_offset_hi, field_offset_within_row_B),
                             field_length);
       }
     }
@@ -119,7 +157,7 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu
 
   if (is_fixed_length_column) {
     __m256i field_offset_within_row =
-        _mm256_set1_epi32(rows.metadata().encoded_field_offset(
+        _mm256_set1_epi64x(rows.metadata().encoded_field_offset(
             rows.metadata().pos_after_encoding(column_id)));
     __m256i field_length =
         _mm256_set1_epi32(rows.metadata().column_metadatas[column_id].fixed_length);
@@ -130,24 +168,51 @@ int RowArrayAccessor::Visit_avx2(const RowTableImpl& rows, int column_id, int nu
       //
       const uint8_t* row_ptr_base = rows.data(1);
       for (int i = 0; i < num_rows / unroll; ++i) {
+        // Load 8 32-bit row ids.
         __m256i row_id =
             _mm256_loadu_si256(reinterpret_cast<const __m256i*>(row_ids) + i);
-        __m256i row_offset = _mm256_mullo_epi32(row_id, field_length);
-        __m256i field_offset = _mm256_add_epi32(row_offset, field_offset_within_row);
-        process_8_values_fn(i * unroll, row_ptr_base, field_offset, field_length);
+        // Widen the 32-bit row ids to 64-bit and store the lower/higher 4 of them into 2
+        // 256-bit registers.
+        __m256i row_id_lo = _mm256_cvtepi32_epi64(_mm256_castsi256_si128(row_id));
+        __m256i row_id_hi = _mm256_cvtepi32_epi64(_mm256_extracti128_si256(row_id, 1));
+        // Calculate the lower/higher 4 64-bit row offsets based on the lower/higher 4
+        // 64-bit row ids and the fixed field length.
+        __m256i row_offset_lo = _mm256_mul_epi32(row_id_lo, field_length);
+        __m256i row_offset_hi = _mm256_mul_epi32(row_id_hi, field_length);
+        // Calculate the lower/higher 4 64-bit field offsets based on the lower/higher 4
+        // 64-bit row offsets and field offset within row.
+        __m256i field_offset_lo =
+            _mm256_add_epi64(row_offset_lo, field_offset_within_row);
+        __m256i field_offset_hi =
+            _mm256_add_epi64(row_offset_hi, field_offset_within_row);
+        process_8_values_fn(i * unroll, row_ptr_base, field_offset_lo, field_offset_hi,
+                            field_length);
       }
     } else {
       // Case 4: This is a fixed length column in varying length row
       //
       const uint8_t* row_ptr_base = rows.data(2);
-      const uint32_t* row_offsets = rows.offsets();
+      const RowTableImpl::offset_type* row_offsets = rows.offsets();
       for (int i = 0; i < num_rows / unroll; ++i) {
+        // Load 8 32-bit row ids.
         __m256i row_id =
             _mm256_loadu_si256(reinterpret_cast<const __m256i*>(row_ids) + i);
-        __m256i row_offset = _mm256_i32gather_epi32(
-            reinterpret_cast<const int*>(row_offsets), row_id, sizeof(uint32_t));
-        __m256i field_offset = _mm256_add_epi32(row_offset, field_offset_within_row);
-        process_8_values_fn(i * unroll, row_ptr_base, field_offset, field_length);
+        // Gather the lower/higher 4 64-bit row offsets based on the lower/higher 4 32-bit
+        // row ids.
+        __m256i row_offset_lo =
+            _mm256_i32gather_epi64(row_offsets, _mm256_castsi256_si128(row_id),
+                                   sizeof(RowTableImpl::offset_type));
+        __m256i row_offset_hi =
+            _mm256_i32gather_epi64(row_offsets, _mm256_extracti128_si256(row_id, 1),
+                                   sizeof(RowTableImpl::offset_type));
+        // Calculate the lower/higher 4 64-bit field offsets based on the lower/higher 4
+        // 64-bit row offsets and field offset within row.
+        __m256i field_offset_lo =
+            _mm256_add_epi64(row_offset_lo, field_offset_within_row);
+        __m256i field_offset_hi =
+            _mm256_add_epi64(row_offset_hi, field_offset_within_row);
+        process_8_values_fn(i * unroll, row_ptr_base, field_offset_lo, field_offset_hi,
+                            field_length);
       }
     }
   }
diff --git a/cpp/src/arrow/acero/swiss_join_internal.h b/cpp/src/arrow/acero/swiss_join_internal.h
index dceb74abe4f1b..4d749c1c529ae 100644
--- a/cpp/src/arrow/acero/swiss_join_internal.h
+++ b/cpp/src/arrow/acero/swiss_join_internal.h
@@ -22,10 +22,10 @@
 #include "arrow/acero/partition_util.h"
 #include "arrow/acero/schema_util.h"
 #include "arrow/acero/task_util.h"
-#include "arrow/compute/kernels/row_encoder_internal.h"
 #include "arrow/compute/key_map_internal.h"
 #include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/row/encode_internal.h"
+#include "arrow/compute/row/row_encoder_internal.h"
 
 namespace arrow {
 
diff --git a/cpp/src/arrow/acero/tpch_node_test.cc b/cpp/src/arrow/acero/tpch_node_test.cc
index 076bcf634a6ba..17fb43452bc58 100644
--- a/cpp/src/arrow/acero/tpch_node_test.cc
+++ b/cpp/src/arrow/acero/tpch_node_test.cc
@@ -27,8 +27,8 @@
 #include "arrow/acero/test_util_internal.h"
 #include "arrow/acero/tpch_node.h"
 #include "arrow/acero/util.h"
-#include "arrow/compute/kernels/row_encoder_internal.h"
 #include "arrow/compute/kernels/test_util.h"
+#include "arrow/compute/row/row_encoder_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
 #include "arrow/testing/random.h"
diff --git a/cpp/src/arrow/acero/unmaterialized_table.h b/cpp/src/arrow/acero/unmaterialized_table_internal.h
similarity index 100%
rename from cpp/src/arrow/acero/unmaterialized_table.h
rename to cpp/src/arrow/acero/unmaterialized_table_internal.h
diff --git a/cpp/src/arrow/acero/util.h b/cpp/src/arrow/acero/util.h
index 0eb9f4c87e180..ee46e8527422a 100644
--- a/cpp/src/arrow/acero/util.h
+++ b/cpp/src/arrow/acero/util.h
@@ -65,7 +65,7 @@ class ARROW_ACERO_EXPORT AtomicCounter {
 
   // return true if the counter is complete
   bool Increment() {
-    DCHECK_NE(count_.load(), total_.load());
+    ARROW_DCHECK_NE(count_.load(), total_.load());
     int count = count_.fetch_add(1) + 1;
     if (count != total_.load()) return false;
     return DoneOnce();
diff --git a/cpp/src/arrow/acero/visibility.h b/cpp/src/arrow/acero/visibility.h
index 02382232b69dd..21a697a56eca9 100644
--- a/cpp/src/arrow/acero/visibility.h
+++ b/cpp/src/arrow/acero/visibility.h
@@ -20,31 +20,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_ACERO_STATIC
-#define ARROW_ACERO_EXPORT
-#elif defined(ARROW_ACERO_EXPORTING)
-#define ARROW_ACERO_EXPORT __declspec(dllexport)
-#else
-#define ARROW_ACERO_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_ACERO_STATIC
+#    define ARROW_ACERO_EXPORT
+#  elif defined(ARROW_ACERO_EXPORTING)
+#    define ARROW_ACERO_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_ACERO_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_ACERO_NO_EXPORT
+#  define ARROW_ACERO_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_ACERO_EXPORT
-#define ARROW_ACERO_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_ACERO_NO_EXPORT
-#define ARROW_ACERO_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_ACERO_EXPORT
+#    define ARROW_ACERO_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_ACERO_NO_EXPORT
+#    define ARROW_ACERO_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Not-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc
index 25759f8471365..d16b6cfd2e97d 100644
--- a/cpp/src/arrow/adapters/orc/adapter.cc
+++ b/cpp/src/arrow/adapters/orc/adapter.cc
@@ -25,7 +25,7 @@
 #include <vector>
 
 #ifdef ARROW_ORC_NEED_TIME_ZONE_DATABASE_CHECK
-#include <filesystem>
+#  include <filesystem>
 #endif
 
 #include "arrow/adapters/orc/util.h"
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index 716ae0722069e..e4af67d7e5f0b 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -232,6 +232,14 @@ class ARROW_EXPORT Array {
   /// \return DeviceAllocationType
   DeviceAllocationType device_type() const { return data_->device_type(); }
 
+  /// \brief Return the statistics of this Array
+  ///
+  /// This just delegates to calling statistics on the underlying ArrayData
+  /// object which backs this Array.
+  ///
+  /// \return const ArrayStatistics&
+  std::shared_ptr<ArrayStatistics> statistics() const { return data_->statistics; }
+
  protected:
   Array() = default;
   ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
diff --git a/cpp/src/arrow/array/array_binary.cc b/cpp/src/arrow/array/array_binary.cc
index d83ba0ca8936d..1266819bdb311 100644
--- a/cpp/src/arrow/array/array_binary.cc
+++ b/cpp/src/arrow/array/array_binary.cc
@@ -125,12 +125,8 @@ FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<DataType>& type
                                            int64_t length,
                                            const std::shared_ptr<Buffer>& data,
                                            const std::shared_ptr<Buffer>& null_bitmap,
-                                           int64_t null_count, int64_t offset)
-    : PrimitiveArray(type, length, data, null_bitmap, null_count, offset),
-      byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()) {}
-
-const uint8_t* FixedSizeBinaryArray::GetValue(int64_t i) const {
-  return raw_values_ + (i + data_->offset) * byte_width_;
+                                           int64_t null_count, int64_t offset) {
+  SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset));
 }
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/array_binary.h b/cpp/src/arrow/array/array_binary.h
index 19fdee61243d1..63903eac46d41 100644
--- a/cpp/src/arrow/array/array_binary.h
+++ b/cpp/src/arrow/array/array_binary.h
@@ -57,8 +57,6 @@ class BaseBinaryArray : public FlatArray {
   /// Return the pointer to the given elements bytes
   // XXX should GetValue(int64_t i) return a string_view?
   const uint8_t* GetValue(int64_t i, offset_type* out_length) const {
-    // Account for base offset
-    i += data_->offset;
     const offset_type pos = raw_value_offsets_[i];
     *out_length = raw_value_offsets_[i + 1] - pos;
     return raw_data_ + pos;
@@ -69,8 +67,6 @@ class BaseBinaryArray : public FlatArray {
   /// \param i the value index
   /// \return the view over the selected value
   std::string_view GetView(int64_t i) const {
-    // Account for base offset
-    i += data_->offset;
     const offset_type pos = raw_value_offsets_[i];
     return std::string_view(reinterpret_cast<const char*>(raw_data_ + pos),
                             raw_value_offsets_[i + 1] - pos);
@@ -99,9 +95,7 @@ class BaseBinaryArray : public FlatArray {
   /// Note that this buffer does not account for any slice offset
   std::shared_ptr<Buffer> value_data() const { return data_->buffers[2]; }
 
-  const offset_type* raw_value_offsets() const {
-    return raw_value_offsets_ + data_->offset;
-  }
+  const offset_type* raw_value_offsets() const { return raw_value_offsets_; }
 
   const uint8_t* raw_data() const { return raw_data_; }
 
@@ -109,15 +103,12 @@ class BaseBinaryArray : public FlatArray {
   /// at the passed index.
   ///
   /// Does not perform boundschecking
-  offset_type value_offset(int64_t i) const {
-    return raw_value_offsets_[i + data_->offset];
-  }
+  offset_type value_offset(int64_t i) const { return raw_value_offsets_[i]; }
 
   /// \brief Return the length of the data for the value at the passed index.
   ///
   /// Does not perform boundschecking
   offset_type value_length(int64_t i) const {
-    i += data_->offset;
     return raw_value_offsets_[i + 1] - raw_value_offsets_[i];
   }
 
@@ -126,8 +117,7 @@ class BaseBinaryArray : public FlatArray {
   /// less than the size of the data buffer (data_->buffers[2]).
   offset_type total_values_length() const {
     if (data_->length > 0) {
-      return raw_value_offsets_[data_->length + data_->offset] -
-             raw_value_offsets_[data_->offset];
+      return raw_value_offsets_[data_->length] - raw_value_offsets_[0];
     } else {
       return 0;
     }
@@ -144,7 +134,7 @@ class BaseBinaryArray : public FlatArray {
   // Protected method for constructors
   void SetData(const std::shared_ptr<ArrayData>& data) {
     this->Array::SetData(data);
-    raw_value_offsets_ = data->GetValuesSafe<offset_type>(1, /*offset=*/0);
+    raw_value_offsets_ = data->GetValuesSafe<offset_type>(1);
     raw_data_ = data->GetValuesSafe<uint8_t>(2, /*offset=*/0);
   }
 
@@ -293,11 +283,11 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
                        const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
                        int64_t null_count = kUnknownNullCount, int64_t offset = 0);
 
-  const uint8_t* GetValue(int64_t i) const;
+  const uint8_t* GetValue(int64_t i) const { return values_ + i * byte_width_; }
   const uint8_t* Value(int64_t i) const { return GetValue(i); }
 
   std::string_view GetView(int64_t i) const {
-    return std::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width());
+    return std::string_view(reinterpret_cast<const char*>(GetValue(i)), byte_width_);
   }
 
   std::optional<std::string_view> operator[](int64_t i) const {
@@ -308,7 +298,7 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
 
   int32_t byte_width() const { return byte_width_; }
 
-  const uint8_t* raw_values() const { return raw_values_ + data_->offset * byte_width_; }
+  const uint8_t* raw_values() const { return values_; }
 
   IteratorType begin() const { return IteratorType(*this); }
 
@@ -319,8 +309,10 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
     this->PrimitiveArray::SetData(data);
     byte_width_ =
         internal::checked_cast<const FixedSizeBinaryType&>(*type()).byte_width();
+    values_ = raw_values_ + data_->offset * byte_width_;
   }
 
+  const uint8_t* values_;
   int32_t byte_width_;
 };
 
diff --git a/cpp/src/arrow/array/array_dict.cc b/cpp/src/arrow/array/array_dict.cc
index 7fd76a1dae81b..55e086af30bc2 100644
--- a/cpp/src/arrow/array/array_dict.cc
+++ b/cpp/src/arrow/array/array_dict.cc
@@ -349,7 +349,7 @@ class DictionaryUnifierImpl : public DictionaryUnifier {
   using MemoTableType = typename DictTraits::MemoTableType;
 
   DictionaryUnifierImpl(MemoryPool* pool, std::shared_ptr<DataType> value_type)
-      : pool_(pool), value_type_(value_type), memo_table_(pool) {}
+      : pool_(pool), value_type_(std::move(value_type)), memo_table_(pool) {}
 
   Status Unify(const Array& dictionary, std::shared_ptr<Buffer>* out) override {
     if (dictionary.null_count() > 0) {
@@ -432,7 +432,7 @@ struct MakeUnifier {
   std::unique_ptr<DictionaryUnifier> result;
 
   MakeUnifier(MemoryPool* pool, std::shared_ptr<DataType> value_type)
-      : pool(pool), value_type(value_type) {}
+      : pool(pool), value_type(std::move(value_type)) {}
 
   template <typename T>
   enable_if_no_memoize<T, Status> Visit(const T&) {
diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc
index 47c0fd35829a1..db52551eadc7f 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -461,8 +461,7 @@ inline void SetListData(VarLengthListLikeArray<TYPE>* self,
   self->Array::SetData(data);
 
   self->list_type_ = checked_cast<const TYPE*>(data->type.get());
-  self->raw_value_offsets_ =
-      data->GetValuesSafe<typename TYPE::offset_type>(1, /*offset=*/0);
+  self->raw_value_offsets_ = data->GetValuesSafe<typename TYPE::offset_type>(1);
   // BaseListViewArray::SetData takes care of setting raw_value_sizes_.
 
   ARROW_CHECK_EQ(self->list_type_->value_type()->id(), data->child_data[0]->type->id());
@@ -542,7 +541,7 @@ Result<std::shared_ptr<ListArray>> ListArray::FromArrays(
     const Array& offsets, const Array& values, MemoryPool* pool,
     std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   return ListArrayFromArrays<ListType>(std::make_shared<ListType>(values.type()), offsets,
-                                       values, pool, null_bitmap, null_count);
+                                       values, pool, std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<ListArray>> ListArray::FromListView(const ListViewArray& source,
@@ -563,7 +562,7 @@ Result<std::shared_ptr<ListArray>> ListArray::FromArrays(
     return Status::TypeError("Mismatching list value type");
   }
   return ListArrayFromArrays<ListType>(std::move(type), offsets, values, pool,
-                                       null_bitmap, null_count);
+                                       std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<Array>> ListArray::Flatten(MemoryPool* memory_pool) const {
@@ -599,8 +598,8 @@ Result<std::shared_ptr<LargeListArray>> LargeListArray::FromArrays(
     const Array& offsets, const Array& values, MemoryPool* pool,
     std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   return ListArrayFromArrays<LargeListType>(
-      std::make_shared<LargeListType>(values.type()), offsets, values, pool, null_bitmap,
-      null_count);
+      std::make_shared<LargeListType>(values.type()), offsets, values, pool,
+      std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<LargeListArray>> LargeListArray::FromListView(
@@ -622,7 +621,7 @@ Result<std::shared_ptr<LargeListArray>> LargeListArray::FromArrays(
     return Status::TypeError("Mismatching list value type");
   }
   return ListArrayFromArrays<LargeListType>(std::move(type), offsets, values, pool,
-                                            null_bitmap, null_count);
+                                            std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<Array>> LargeListArray::Flatten(MemoryPool* memory_pool) const {
@@ -654,7 +653,7 @@ ListViewArray::ListViewArray(std::shared_ptr<DataType> type, int64_t length,
 
 void ListViewArray::SetData(const std::shared_ptr<ArrayData>& data) {
   internal::SetListData(this, data);
-  raw_value_sizes_ = data->GetValuesSafe<ListViewType::offset_type>(2, /*offset=*/0);
+  raw_value_sizes_ = data->GetValuesSafe<ListViewType::offset_type>(2);
 }
 
 Result<std::shared_ptr<ListViewArray>> ListViewArray::FromArrays(
@@ -662,7 +661,7 @@ Result<std::shared_ptr<ListViewArray>> ListViewArray::FromArrays(
     std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   return ListViewArrayFromArrays<ListViewType>(
       std::make_shared<ListViewType>(values.type()), offsets, sizes, values, pool,
-      null_bitmap, null_count);
+      std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<ListViewArray>> ListViewArray::FromArrays(
@@ -677,7 +676,7 @@ Result<std::shared_ptr<ListViewArray>> ListViewArray::FromArrays(
     return Status::TypeError("Mismatching list-view value type");
   }
   return ListViewArrayFromArrays<ListViewType>(std::move(type), offsets, sizes, values,
-                                               pool, null_bitmap, null_count);
+                                               pool, std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<ListViewArray>> ListViewArray::FromList(const ListArray& source,
@@ -722,14 +721,14 @@ LargeListViewArray::LargeListViewArray(std::shared_ptr<DataType> type, int64_t l
                                        std::shared_ptr<Buffer> null_bitmap,
                                        int64_t null_count, int64_t offset) {
   LargeListViewArray::SetData(ArrayData::Make(
-      type, length,
+      std::move(type), length,
       {std::move(null_bitmap), std::move(value_offsets), std::move(value_sizes)},
       /*child_data=*/{values->data()}, null_count, offset));
 }
 
 void LargeListViewArray::SetData(const std::shared_ptr<ArrayData>& data) {
   internal::SetListData(this, data);
-  raw_value_sizes_ = data->GetValuesSafe<LargeListViewType::offset_type>(2, /*offset=*/0);
+  raw_value_sizes_ = data->GetValuesSafe<LargeListViewType::offset_type>(2);
 }
 
 Result<std::shared_ptr<LargeListViewArray>> LargeListViewArray::FromArrays(
@@ -737,7 +736,7 @@ Result<std::shared_ptr<LargeListViewArray>> LargeListViewArray::FromArrays(
     std::shared_ptr<Buffer> null_bitmap, int64_t null_count) {
   return ListViewArrayFromArrays<LargeListViewType>(
       std::make_shared<LargeListViewType>(values.type()), offsets, sizes, values, pool,
-      null_bitmap, null_count);
+      std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<LargeListViewArray>> LargeListViewArray::FromArrays(
@@ -752,7 +751,7 @@ Result<std::shared_ptr<LargeListViewArray>> LargeListViewArray::FromArrays(
     return Status::TypeError("Mismatching large list-view value type");
   }
   return ListViewArrayFromArrays<LargeListViewType>(
-      std::move(type), offsets, sizes, values, pool, null_bitmap, null_count);
+      std::move(type), offsets, sizes, values, pool, std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<Array>> LargeListViewArray::Flatten(
@@ -854,8 +853,9 @@ Result<std::shared_ptr<Array>> MapArray::FromArraysInternal(
     null_count = kUnknownNullCount;
   }
   buffers[1] = typed_offsets.values();
-  return std::make_shared<MapArray>(type, offsets->length() - 1, std::move(buffers), keys,
-                                    items, /*null_count=*/null_count, offsets->offset());
+  return std::make_shared<MapArray>(std::move(type), offsets->length() - 1,
+                                    std::move(buffers), keys, items,
+                                    /*null_count=*/null_count, offsets->offset());
 }
 
 Result<std::shared_ptr<Array>> MapArray::FromArrays(const std::shared_ptr<Array>& offsets,
@@ -971,8 +971,8 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
   int64_t length = values->length() / list_size;
   auto list_type = std::make_shared<FixedSizeListType>(values->type(), list_size);
 
-  return std::make_shared<FixedSizeListArray>(list_type, length, values, null_bitmap,
-                                              null_count);
+  return std::make_shared<FixedSizeListArray>(list_type, length, values,
+                                              std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
@@ -992,8 +992,8 @@ Result<std::shared_ptr<Array>> FixedSizeListArray::FromArrays(
   }
   int64_t length = values->length() / list_type.list_size();
 
-  return std::make_shared<FixedSizeListArray>(type, length, values, null_bitmap,
-                                              null_count);
+  return std::make_shared<FixedSizeListArray>(std::move(type), length, values,
+                                              std::move(null_bitmap), null_count);
 }
 
 Result<std::shared_ptr<Array>> FixedSizeListArray::Flatten(
@@ -1015,7 +1015,7 @@ StructArray::StructArray(const std::shared_ptr<DataType>& type, int64_t length,
                          std::shared_ptr<Buffer> null_bitmap, int64_t null_count,
                          int64_t offset) {
   ARROW_CHECK_EQ(type->id(), Type::STRUCT);
-  SetData(ArrayData::Make(type, length, {null_bitmap}, null_count, offset));
+  SetData(ArrayData::Make(type, length, {std::move(null_bitmap)}, null_count, offset));
   for (const auto& child : children) {
     data_->child_data.push_back(child->data());
   }
@@ -1048,7 +1048,7 @@ Result<std::shared_ptr<StructArray>> StructArray::Make(
     null_count = 0;
   }
   return std::make_shared<StructArray>(struct_(fields), length - offset, children,
-                                       null_bitmap, null_count, offset);
+                                       std::move(null_bitmap), null_count, offset);
 }
 
 Result<std::shared_ptr<StructArray>> StructArray::Make(
@@ -1085,8 +1085,8 @@ const std::shared_ptr<Array>& StructArray::field(int i) const {
     } else {
       field_data = data_->child_data[i];
     }
-    std::shared_ptr<Array> result = MakeArray(field_data);
-    std::atomic_store(&boxed_fields_[i], result);
+    result = MakeArray(field_data);
+    std::atomic_store(&boxed_fields_[i], std::move(result));
     return boxed_fields_[i];
   }
   return boxed_fields_[i];
@@ -1183,7 +1183,7 @@ void UnionArray::SetData(std::shared_ptr<ArrayData> data) {
   union_type_ = checked_cast<const UnionType*>(data_->type.get());
 
   ARROW_CHECK_GE(data_->buffers.size(), 2);
-  raw_type_codes_ = data->GetValuesSafe<int8_t>(1, /*offset=*/0);
+  raw_type_codes_ = data->GetValuesSafe<int8_t>(1);
   boxed_fields_.resize(data_->child_data.size());
 }
 
@@ -1205,7 +1205,7 @@ void DenseUnionArray::SetData(const std::shared_ptr<ArrayData>& data) {
   // No validity bitmap
   ARROW_CHECK_EQ(data_->buffers[0], nullptr);
 
-  raw_value_offsets_ = data->GetValuesSafe<int32_t>(2, /*offset=*/0);
+  raw_value_offsets_ = data->GetValuesSafe<int32_t>(2);
 }
 
 SparseUnionArray::SparseUnionArray(std::shared_ptr<ArrayData> data) {
diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h
index a6d4977839ef1..f122f9378b525 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -94,15 +94,11 @@ class VarLengthListLikeArray : public Array {
   const std::shared_ptr<DataType>& value_type() const { return list_type_->value_type(); }
 
   /// Return pointer to raw value offsets accounting for any slice offset
-  const offset_type* raw_value_offsets() const {
-    return raw_value_offsets_ + data_->offset;
-  }
+  const offset_type* raw_value_offsets() const { return raw_value_offsets_; }
 
   // The following functions will not perform boundschecking
 
-  offset_type value_offset(int64_t i) const {
-    return raw_value_offsets_[i + data_->offset];
-  }
+  offset_type value_offset(int64_t i) const { return raw_value_offsets_[i]; }
 
   /// \brief Return the size of the value at a particular index
   ///
@@ -154,7 +150,6 @@ class BaseListArray : public VarLengthListLikeArray<TYPE> {
   ///
   /// \pre IsValid(i)
   offset_type value_length(int64_t i) const final {
-    i += this->data_->offset;
     return this->raw_value_offsets_[i + 1] - this->raw_value_offsets_[i];
   }
 };
@@ -302,9 +297,7 @@ class BaseListViewArray : public VarLengthListLikeArray<TYPE> {
   const std::shared_ptr<Buffer>& value_sizes() const { return this->data_->buffers[2]; }
 
   /// \brief Return pointer to raw value offsets accounting for any slice offset
-  const offset_type* raw_value_sizes() const {
-    return raw_value_sizes_ + this->data_->offset;
-  }
+  const offset_type* raw_value_sizes() const { return raw_value_sizes_; }
 
   /// \brief Return the size of the value at a particular index
   ///
@@ -313,9 +306,7 @@ class BaseListViewArray : public VarLengthListLikeArray<TYPE> {
   /// length of the child values array.
   ///
   /// \pre IsValid(i)
-  offset_type value_length(int64_t i) const final {
-    return this->raw_value_sizes_[i + this->data_->offset];
-  }
+  offset_type value_length(int64_t i) const final { return this->raw_value_sizes_[i]; }
 
  protected:
   const offset_type* raw_value_sizes_ = NULLPTR;
@@ -744,15 +735,13 @@ class ARROW_EXPORT UnionArray : public Array {
   /// Note that this buffer does not account for any slice offset
   const std::shared_ptr<Buffer>& type_codes() const { return data_->buffers[1]; }
 
-  const type_code_t* raw_type_codes() const { return raw_type_codes_ + data_->offset; }
+  const type_code_t* raw_type_codes() const { return raw_type_codes_; }
 
   /// The logical type code of the value at index.
-  type_code_t type_code(int64_t i) const { return raw_type_codes_[i + data_->offset]; }
+  type_code_t type_code(int64_t i) const { return raw_type_codes_[i]; }
 
   /// The physical child id containing value at index.
-  int child_id(int64_t i) const {
-    return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
-  }
+  int child_id(int64_t i) const { return union_type_->child_ids()[raw_type_codes_[i]]; }
 
   const UnionType* union_type() const { return union_type_; }
 
@@ -883,9 +872,9 @@ class ARROW_EXPORT DenseUnionArray : public UnionArray {
   /// Note that this buffer does not account for any slice offset
   const std::shared_ptr<Buffer>& value_offsets() const { return data_->buffers[2]; }
 
-  int32_t value_offset(int64_t i) const { return raw_value_offsets_[i + data_->offset]; }
+  int32_t value_offset(int64_t i) const { return raw_value_offsets_[i]; }
 
-  const int32_t* raw_value_offsets() const { return raw_value_offsets_ + data_->offset; }
+  const int32_t* raw_value_offsets() const { return raw_value_offsets_; }
 
  protected:
   const int32_t* raw_value_offsets_;
diff --git a/cpp/src/arrow/array/array_primitive.cc b/cpp/src/arrow/array/array_primitive.cc
index da3810aa392c9..10d4e9e6aa284 100644
--- a/cpp/src/arrow/array/array_primitive.cc
+++ b/cpp/src/arrow/array/array_primitive.cc
@@ -78,15 +78,16 @@ DayTimeIntervalArray::DayTimeIntervalArray(const std::shared_ptr<DataType>& type
                                            int64_t length,
                                            const std::shared_ptr<Buffer>& data,
                                            const std::shared_ptr<Buffer>& null_bitmap,
-                                           int64_t null_count, int64_t offset)
-    : PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {}
+                                           int64_t null_count, int64_t offset) {
+  SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset));
+}
 
 DayTimeIntervalArray::DayTimeIntervalArray(int64_t length,
                                            const std::shared_ptr<Buffer>& data,
                                            const std::shared_ptr<Buffer>& null_bitmap,
                                            int64_t null_count, int64_t offset)
-    : PrimitiveArray(day_time_interval(), length, data, null_bitmap, null_count, offset) {
-}
+    : DayTimeIntervalArray(day_time_interval(), length, data, null_bitmap, null_count,
+                           offset) {}
 
 DayTimeIntervalType::DayMilliseconds DayTimeIntervalArray::GetValue(int64_t i) const {
   DCHECK(i < length());
@@ -105,14 +106,15 @@ MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
 MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
     const std::shared_ptr<DataType>& type, int64_t length,
     const std::shared_ptr<Buffer>& data, const std::shared_ptr<Buffer>& null_bitmap,
-    int64_t null_count, int64_t offset)
-    : PrimitiveArray(type, length, data, null_bitmap, null_count, offset) {}
+    int64_t null_count, int64_t offset) {
+  SetData(ArrayData::Make(type, length, {null_bitmap, data}, null_count, offset));
+}
 
 MonthDayNanoIntervalArray::MonthDayNanoIntervalArray(
     int64_t length, const std::shared_ptr<Buffer>& data,
     const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count, int64_t offset)
-    : PrimitiveArray(month_day_nano_interval(), length, data, null_bitmap, null_count,
-                     offset) {}
+    : MonthDayNanoIntervalArray(month_day_nano_interval(), length, data, null_bitmap,
+                                null_count, offset) {}
 
 MonthDayNanoIntervalType::MonthDayNanos MonthDayNanoIntervalArray::GetValue(
     int64_t i) const {
diff --git a/cpp/src/arrow/array/array_primitive.h b/cpp/src/arrow/array/array_primitive.h
index e6df92e3b788c..3e2893b7dd898 100644
--- a/cpp/src/arrow/array/array_primitive.h
+++ b/cpp/src/arrow/array/array_primitive.h
@@ -90,7 +90,7 @@ class NumericArray : public PrimitiveArray {
   using value_type = typename TypeClass::c_type;
   using IteratorType = stl::ArrayIterator<NumericArray<TYPE>>;
 
-  explicit NumericArray(const std::shared_ptr<ArrayData>& data) : PrimitiveArray(data) {}
+  explicit NumericArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
 
   // Only enable this constructor without a type argument for types without additional
   // metadata
@@ -98,18 +98,17 @@ class NumericArray : public PrimitiveArray {
   NumericArray(enable_if_parameter_free<T1, int64_t> length,
                const std::shared_ptr<Buffer>& data,
                const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
-               int64_t null_count = kUnknownNullCount, int64_t offset = 0)
-      : PrimitiveArray(TypeTraits<T1>::type_singleton(), length, data, null_bitmap,
-                       null_count, offset) {}
-
-  const value_type* raw_values() const {
-    return reinterpret_cast<const value_type*>(raw_values_) + data_->offset;
+               int64_t null_count = kUnknownNullCount, int64_t offset = 0) {
+    SetData(ArrayData::Make(TypeTraits<T1>::type_singleton(), length, {null_bitmap, data},
+                            null_count, offset));
   }
 
-  value_type Value(int64_t i) const { return raw_values()[i]; }
+  const value_type* raw_values() const { return values_; }
+
+  value_type Value(int64_t i) const { return values_[i]; }
 
   // For API compatibility with BinaryArray etc.
-  value_type GetView(int64_t i) const { return Value(i); }
+  value_type GetView(int64_t i) const { return values_[i]; }
 
   std::optional<value_type> operator[](int64_t i) const {
     return *IteratorType(*this, i);
@@ -121,6 +120,15 @@ class NumericArray : public PrimitiveArray {
 
  protected:
   using PrimitiveArray::PrimitiveArray;
+
+  void SetData(const std::shared_ptr<ArrayData>& data) {
+    this->PrimitiveArray::SetData(data);
+    values_ = raw_values_
+                  ? (reinterpret_cast<const value_type*>(raw_values_) + data_->offset)
+                  : NULLPTR;
+  }
+
+  const value_type* values_;
 };
 
 /// DayTimeArray
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 32806d9d2edb3..73e0c692432b6 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -3709,6 +3709,132 @@ TEST(TestSwapEndianArrayData, InvalidLength) {
   }
 }
 
+class TestArrayDataStatistics : public ::testing::Test {
+ public:
+  void SetUp() {
+    valids_ = {1, 0, 1, 1};
+    null_count_ = std::count(valids_.begin(), valids_.end(), 0);
+    null_buffer_ = *internal::BytesToBits(valids_);
+    values_ = {1, 0, 3, -4};
+    min_ = *std::min_element(values_.begin(), values_.end());
+    max_ = *std::max_element(values_.begin(), values_.end());
+    values_buffer_ = Buffer::FromVector(values_);
+    data_ = ArrayData::Make(int32(), values_.size(), {null_buffer_, values_buffer_},
+                            null_count_);
+    data_->statistics = std::make_shared<ArrayStatistics>();
+    data_->statistics->null_count = null_count_;
+    data_->statistics->min = min_;
+    data_->statistics->is_min_exact = true;
+    data_->statistics->max = max_;
+    data_->statistics->is_max_exact = true;
+  }
+
+ protected:
+  std::vector<uint8_t> valids_;
+  size_t null_count_;
+  std::shared_ptr<Buffer> null_buffer_;
+  std::vector<int32_t> values_;
+  int64_t min_;
+  int64_t max_;
+  std::shared_ptr<Buffer> values_buffer_;
+  std::shared_ptr<ArrayData> data_;
+};
+
+TEST_F(TestArrayDataStatistics, MoveConstructor) {
+  ArrayData copied_data(*data_);
+  ArrayData moved_data(std::move(copied_data));
+
+  ASSERT_TRUE(moved_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, moved_data.statistics->null_count.value());
+
+  ASSERT_TRUE(moved_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_TRUE(moved_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(moved_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_TRUE(moved_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyConstructor) {
+  ArrayData copied_data(*data_);
+
+  ASSERT_TRUE(copied_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data.statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_TRUE(copied_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_TRUE(copied_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, MoveAssignment) {
+  ArrayData copied_data(*data_);
+  ArrayData moved_data;
+  moved_data = std::move(copied_data);
+
+  ASSERT_TRUE(moved_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, moved_data.statistics->null_count.value());
+
+  ASSERT_TRUE(moved_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(moved_data.statistics->min.value()));
+  ASSERT_TRUE(moved_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(moved_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(moved_data.statistics->max.value()));
+  ASSERT_TRUE(moved_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyAssignment) {
+  ArrayData copied_data;
+  copied_data = *data_;
+
+  ASSERT_TRUE(copied_data.statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data.statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data.statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data.statistics->min.value()));
+  ASSERT_TRUE(copied_data.statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data.statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data.statistics->max.value()));
+  ASSERT_TRUE(copied_data.statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, CopyTo) {
+  ASSERT_OK_AND_ASSIGN(auto copied_data,
+                       data_->CopyTo(arrow::default_cpu_memory_manager()));
+
+  ASSERT_TRUE(copied_data->statistics->null_count.has_value());
+  ASSERT_EQ(null_count_, copied_data->statistics->null_count.value());
+
+  ASSERT_TRUE(copied_data->statistics->min.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data->statistics->min.value()));
+  ASSERT_EQ(min_, std::get<int64_t>(copied_data->statistics->min.value()));
+  ASSERT_TRUE(copied_data->statistics->is_min_exact);
+
+  ASSERT_TRUE(copied_data->statistics->max.has_value());
+  ASSERT_TRUE(std::holds_alternative<int64_t>(copied_data->statistics->max.value()));
+  ASSERT_EQ(max_, std::get<int64_t>(copied_data->statistics->max.value()));
+  ASSERT_TRUE(copied_data->statistics->is_max_exact);
+}
+
+TEST_F(TestArrayDataStatistics, Slice) {
+  auto sliced_data = data_->Slice(0, 1);
+  ASSERT_FALSE(sliced_data->statistics);
+}
+
 template <typename PType>
 class TestPrimitiveArray : public ::testing::Test {
  public:
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index d825f7d32520a..442e4a26320a2 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -500,9 +500,9 @@ class ARROW_EXPORT StringHeapBuilder {
       ARROW_RETURN_NOT_OK(Reserve(length));
     }
 
-    auto v =
-        util::ToBinaryView(value, static_cast<int32_t>(length),
-                           static_cast<int32_t>(blocks_.size() - 1), current_offset_);
+    auto v = util::ToNonInlineBinaryView(value, static_cast<int32_t>(length),
+                                         static_cast<int32_t>(blocks_.size() - 1),
+                                         current_offset_);
 
     memcpy(current_out_buffer_, value, static_cast<size_t>(length));
     current_out_buffer_ += length;
diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc
index 87e55246c78fe..b4638dd6593d8 100644
--- a/cpp/src/arrow/array/concatenate.cc
+++ b/cpp/src/arrow/array/concatenate.cc
@@ -75,6 +75,31 @@ struct Bitmap {
   bool AllSet() const { return data == nullptr; }
 };
 
+enum class OffsetBufferOpOutcome {
+  kOk,
+  kOffsetOverflow,
+};
+
+Status OffsetOverflowStatus() {
+  return Status::Invalid("offset overflow while concatenating arrays");
+}
+
+#define RETURN_IF_NOT_OK_OUTCOME(outcome)        \
+  switch (outcome) {                             \
+    case OffsetBufferOpOutcome::kOk:             \
+      break;                                     \
+    case OffsetBufferOpOutcome::kOffsetOverflow: \
+      return OffsetOverflowStatus();             \
+  }
+
+struct ErrorHints {
+  /// \brief Suggested cast to avoid overflow during concatenation.
+  ///
+  /// If the concatenation of offsets overflows, this field might be set to the
+  /// a type that uses larger offsets (e.g. large_utf8, large_list).
+  std::shared_ptr<DataType> suggested_cast;
+};
+
 // Allocate a buffer and concatenate bitmaps into it.
 Status ConcatenateBitmaps(const std::vector<Bitmap>& bitmaps, MemoryPool* pool,
                           std::shared_ptr<Buffer>* out) {
@@ -112,15 +137,16 @@ int64_t SumBufferSizesInBytes(const BufferVector& buffers) {
 // Write offsets in src into dst, adjusting them such that first_offset
 // will be the first offset written.
 template <typename Offset>
-Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst,
-                  Range* values_range);
+Result<OffsetBufferOpOutcome> PutOffsets(const Buffer& src, Offset first_offset,
+                                         Offset* dst, Range* values_range);
 
 // Concatenate buffers holding offsets into a single buffer of offsets,
 // also computing the ranges of values spanned by each buffer of offsets.
 template <typename Offset>
-Status ConcatenateOffsets(const BufferVector& buffers, MemoryPool* pool,
-                          std::shared_ptr<Buffer>* out,
-                          std::vector<Range>* values_ranges) {
+Result<OffsetBufferOpOutcome> ConcatenateOffsets(const BufferVector& buffers,
+                                                 MemoryPool* pool,
+                                                 std::shared_ptr<Buffer>* out,
+                                                 std::vector<Range>* values_ranges) {
   values_ranges->resize(buffers.size());
 
   // allocate output buffer
@@ -133,26 +159,30 @@ Status ConcatenateOffsets(const BufferVector& buffers, MemoryPool* pool,
   for (size_t i = 0; i < buffers.size(); ++i) {
     // the first offset from buffers[i] will be adjusted to values_length
     // (the cumulative length of values spanned by offsets in previous buffers)
-    RETURN_NOT_OK(PutOffsets<Offset>(*buffers[i], values_length,
-                                     out_data + elements_length, &(*values_ranges)[i]));
+    ARROW_ASSIGN_OR_RAISE(auto outcome, PutOffsets<Offset>(*buffers[i], values_length,
+                                                           out_data + elements_length,
+                                                           &(*values_ranges)[i]));
+    if (ARROW_PREDICT_FALSE(outcome != OffsetBufferOpOutcome::kOk)) {
+      return outcome;
+    }
     elements_length += buffers[i]->size() / sizeof(Offset);
     values_length += static_cast<Offset>((*values_ranges)[i].length);
   }
 
   // the final element in out_data is the length of all values spanned by the offsets
   out_data[out_size_in_bytes / sizeof(Offset)] = values_length;
-  return Status::OK();
+  return OffsetBufferOpOutcome::kOk;
 }
 
 template <typename Offset>
-Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst,
-                  Range* values_range) {
+Result<OffsetBufferOpOutcome> PutOffsets(const Buffer& src, Offset first_offset,
+                                         Offset* dst, Range* values_range) {
   if (src.size() == 0) {
     // It's allowed to have an empty offsets buffer for a 0-length array
     // (see Array::Validate)
     values_range->offset = 0;
     values_range->length = 0;
-    return Status::OK();
+    return OffsetBufferOpOutcome::kOk;
   }
 
   // Get the range of offsets to transfer from src
@@ -162,8 +192,9 @@ Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst,
   // Compute the range of values which is spanned by this range of offsets
   values_range->offset = src_begin[0];
   values_range->length = *src_end - values_range->offset;
-  if (first_offset > std::numeric_limits<Offset>::max() - values_range->length) {
-    return Status::Invalid("offset overflow while concatenating arrays");
+  if (ARROW_PREDICT_FALSE(first_offset >
+                          std::numeric_limits<Offset>::max() - values_range->length)) {
+    return OffsetBufferOpOutcome::kOffsetOverflow;
   }
 
   // Write offsets into dst, ensuring that the first offset written is
@@ -175,12 +206,14 @@ Status PutOffsets(const Buffer& src, Offset first_offset, Offset* dst,
   std::transform(src_begin, src_end, dst, [displacement](Offset offset) {
     return SafeSignedAdd(offset, displacement);
   });
-  return Status::OK();
+  return OffsetBufferOpOutcome::kOk;
 }
 
 template <typename offset_type>
-Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buffer& src,
-                          offset_type displacement, offset_type* dst);
+Result<OffsetBufferOpOutcome> PutListViewOffsets(const ArrayData& input,
+                                                 offset_type* sizes, const Buffer& src,
+                                                 offset_type displacement,
+                                                 offset_type* dst);
 
 // Concatenate buffers holding list-view offsets into a single buffer of offsets
 //
@@ -198,10 +231,10 @@ Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buff
 // \param[in] in The child arrays
 // \param[in,out] sizes The concatenated sizes buffer
 template <typename offset_type>
-Status ConcatenateListViewOffsets(const ArrayDataVector& in, offset_type* sizes,
-                                  const BufferVector& offset_buffers,
-                                  const std::vector<Range>& value_ranges,
-                                  MemoryPool* pool, std::shared_ptr<Buffer>* out) {
+Result<OffsetBufferOpOutcome> ConcatenateListViewOffsets(
+    const ArrayDataVector& in, offset_type* sizes, const BufferVector& offset_buffers,
+    const std::vector<Range>& value_ranges, MemoryPool* pool,
+    std::shared_ptr<Buffer>* out) {
   DCHECK_EQ(offset_buffers.size(), value_ranges.size());
 
   // Allocate resulting offsets buffer and initialize it with zeros
@@ -216,26 +249,32 @@ Status ConcatenateListViewOffsets(const ArrayDataVector& in, offset_type* sizes,
   for (size_t i = 0; i < offset_buffers.size(); ++i) {
     const auto displacement =
         static_cast<offset_type>(num_child_values - value_ranges[i].offset);
-    RETURN_NOT_OK(PutListViewOffsets(*in[i], /*sizes=*/sizes + elements_length,
-                                     /*src=*/*offset_buffers[i], displacement,
-                                     /*dst=*/out_offsets + elements_length));
+    ARROW_ASSIGN_OR_RAISE(auto outcome,
+                          PutListViewOffsets(*in[i], /*sizes=*/sizes + elements_length,
+                                             /*src=*/*offset_buffers[i], displacement,
+                                             /*dst=*/out_offsets + elements_length));
+    if (ARROW_PREDICT_FALSE(outcome != OffsetBufferOpOutcome::kOk)) {
+      return outcome;
+    }
     elements_length += offset_buffers[i]->size() / sizeof(offset_type);
     num_child_values += value_ranges[i].length;
     if (num_child_values > std::numeric_limits<offset_type>::max()) {
-      return Status::Invalid("offset overflow while concatenating arrays");
+      return OffsetBufferOpOutcome::kOffsetOverflow;
     }
   }
   DCHECK_EQ(elements_length,
             static_cast<int64_t>(out_size_in_bytes / sizeof(offset_type)));
 
-  return Status::OK();
+  return OffsetBufferOpOutcome::kOk;
 }
 
 template <typename offset_type>
-Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buffer& src,
-                          offset_type displacement, offset_type* dst) {
+Result<OffsetBufferOpOutcome> PutListViewOffsets(const ArrayData& input,
+                                                 offset_type* sizes, const Buffer& src,
+                                                 offset_type displacement,
+                                                 offset_type* dst) {
   if (src.size() == 0) {
-    return Status::OK();
+    return OffsetBufferOpOutcome::kOk;
   }
   const auto& validity_buffer = input.buffers[0];
   if (validity_buffer) {
@@ -291,7 +330,7 @@ Status PutListViewOffsets(const ArrayData& input, offset_type* sizes, const Buff
       }
     }
   }
-  return Status::OK();
+  return OffsetBufferOpOutcome::kOk;
 }
 
 class ConcatenateImpl {
@@ -316,11 +355,17 @@ class ConcatenateImpl {
     }
   }
 
-  Status Concatenate(std::shared_ptr<ArrayData>* out) && {
+  Status Concatenate(std::shared_ptr<ArrayData>* out, ErrorHints* out_hints) && {
     if (out_->null_count != 0 && internal::may_have_validity_bitmap(out_->type->id())) {
       RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0]));
     }
-    RETURN_NOT_OK(VisitTypeInline(*out_->type, this));
+    auto status = VisitTypeInline(*out_->type, this);
+    if (!status.ok()) {
+      if (out_hints) {
+        out_hints->suggested_cast = std::move(suggested_cast_);
+      }
+      return status;
+    }
     *out = std::move(out_);
     return Status::OK();
   }
@@ -337,11 +382,29 @@ class ConcatenateImpl {
     return ConcatenateBuffers(buffers, pool_).Value(&out_->buffers[1]);
   }
 
-  Status Visit(const BinaryType&) {
+  Status Visit(const BinaryType& input_type) {
     std::vector<Range> value_ranges;
     ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t)));
-    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(index_buffers, pool_, &out_->buffers[1],
-                                              &value_ranges));
+    ARROW_ASSIGN_OR_RAISE(
+        auto outcome, ConcatenateOffsets<int32_t>(index_buffers, pool_, &out_->buffers[1],
+                                                  &value_ranges));
+    switch (outcome) {
+      case OffsetBufferOpOutcome::kOk:
+        break;
+      case OffsetBufferOpOutcome::kOffsetOverflow:
+        switch (input_type.id()) {
+          case Type::BINARY:
+            suggested_cast_ = large_binary();
+            break;
+          case Type::STRING:
+            suggested_cast_ = large_utf8();
+            break;
+          default:
+            DCHECK(false) << "unexpected type id from BinaryType: " << input_type;
+            break;
+        }
+        return OffsetOverflowStatus();
+    }
     ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges));
     return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]);
   }
@@ -349,8 +412,10 @@ class ConcatenateImpl {
   Status Visit(const LargeBinaryType&) {
     std::vector<Range> value_ranges;
     ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t)));
-    RETURN_NOT_OK(ConcatenateOffsets<int64_t>(index_buffers, pool_, &out_->buffers[1],
-                                              &value_ranges));
+    ARROW_ASSIGN_OR_RAISE(
+        auto outcome, ConcatenateOffsets<int64_t>(index_buffers, pool_, &out_->buffers[1],
+                                                  &value_ranges));
+    RETURN_IF_NOT_OK_OUTCOME(outcome);
     ARROW_ASSIGN_OR_RAISE(auto value_buffers, Buffers(2, value_ranges));
     return ConcatenateBuffers(value_buffers, pool_).Value(&out_->buffers[2]);
   }
@@ -394,22 +459,44 @@ class ConcatenateImpl {
     return Status::OK();
   }
 
-  Status Visit(const ListType&) {
+  Status Visit(const ListType& input_type) {
     std::vector<Range> value_ranges;
     ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int32_t)));
-    RETURN_NOT_OK(ConcatenateOffsets<int32_t>(index_buffers, pool_, &out_->buffers[1],
-                                              &value_ranges));
+    ARROW_ASSIGN_OR_RAISE(auto offsets_outcome,
+                          ConcatenateOffsets<int32_t>(index_buffers, pool_,
+                                                      &out_->buffers[1], &value_ranges));
+    switch (offsets_outcome) {
+      case OffsetBufferOpOutcome::kOk:
+        break;
+      case OffsetBufferOpOutcome::kOffsetOverflow:
+        suggested_cast_ = large_list(input_type.value_type());
+        return OffsetOverflowStatus();
+    }
     ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges));
-    return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
+    ErrorHints child_error_hints;
+    auto status = ConcatenateImpl(child_data, pool_)
+                      .Concatenate(&out_->child_data[0], &child_error_hints);
+    if (!status.ok() && child_error_hints.suggested_cast) {
+      suggested_cast_ = list(std::move(child_error_hints.suggested_cast));
+    }
+    return status;
   }
 
   Status Visit(const LargeListType&) {
     std::vector<Range> value_ranges;
     ARROW_ASSIGN_OR_RAISE(auto index_buffers, Buffers(1, sizeof(int64_t)));
-    RETURN_NOT_OK(ConcatenateOffsets<int64_t>(index_buffers, pool_, &out_->buffers[1],
-                                              &value_ranges));
+    ARROW_ASSIGN_OR_RAISE(
+        auto outcome, ConcatenateOffsets<int64_t>(index_buffers, pool_, &out_->buffers[1],
+                                                  &value_ranges));
+    RETURN_IF_NOT_OK_OUTCOME(outcome);
     ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, value_ranges));
-    return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
+    ErrorHints child_error_hints;
+    auto status = ConcatenateImpl(child_data, pool_)
+                      .Concatenate(&out_->child_data[0], &child_error_hints);
+    if (!status.ok() && child_error_hints.suggested_cast) {
+      suggested_cast_ = large_list(std::move(child_error_hints.suggested_cast));
+    }
+    return status;
   }
 
   template <typename T>
@@ -430,8 +517,17 @@ class ConcatenateImpl {
     }
 
     // Concatenate the values
+    ErrorHints child_error_hints;
     ARROW_ASSIGN_OR_RAISE(ArrayDataVector value_data, ChildData(0, value_ranges));
-    RETURN_NOT_OK(ConcatenateImpl(value_data, pool_).Concatenate(&out_->child_data[0]));
+    auto values_status = ConcatenateImpl(value_data, pool_)
+                             .Concatenate(&out_->child_data[0], &child_error_hints);
+    if (!values_status.ok()) {
+      if (child_error_hints.suggested_cast) {
+        suggested_cast_ = std::make_shared<std::remove_reference_t<T>>(
+            std::move(child_error_hints.suggested_cast));
+      }
+      return values_status;
+    }
     out_->child_data[0]->type = type.value_type();
 
     // Concatenate the sizes first
@@ -440,22 +536,39 @@ class ConcatenateImpl {
 
     // Concatenate the offsets
     ARROW_ASSIGN_OR_RAISE(auto offset_buffers, Buffers(1, sizeof(offset_type)));
-    RETURN_NOT_OK(ConcatenateListViewOffsets<offset_type>(
-        in_, /*sizes=*/out_->buffers[2]->mutable_data_as<offset_type>(), offset_buffers,
-        value_ranges, pool_, &out_->buffers[1]));
-
+    ARROW_ASSIGN_OR_RAISE(
+        auto outcome, ConcatenateListViewOffsets<offset_type>(
+                          in_, /*sizes=*/out_->buffers[2]->mutable_data_as<offset_type>(),
+                          offset_buffers, value_ranges, pool_, &out_->buffers[1]));
+    switch (outcome) {
+      case OffsetBufferOpOutcome::kOk:
+        break;
+      case OffsetBufferOpOutcome::kOffsetOverflow:
+        if constexpr (T::type_id == Type::LIST_VIEW) {
+          suggested_cast_ = large_list_view(type.value_type());
+        }
+        return OffsetOverflowStatus();
+    }
     return Status::OK();
   }
 
-  Status Visit(const FixedSizeListType& fixed_size_list) {
-    ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, fixed_size_list.list_size()));
-    return ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0]);
+  Status Visit(const FixedSizeListType& fsl_type) {
+    ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(0, fsl_type.list_size()));
+    ErrorHints hints;
+    auto status =
+        ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[0], &hints);
+    if (!status.ok() && hints.suggested_cast) {
+      suggested_cast_ =
+          fixed_size_list(std::move(hints.suggested_cast), fsl_type.list_size());
+    }
+    return status;
   }
 
   Status Visit(const StructType& s) {
     for (int i = 0; i < s.num_fields(); ++i) {
       ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(i));
-      RETURN_NOT_OK(ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i]));
+      RETURN_NOT_OK(ConcatenateImpl(child_data, pool_)
+                        .Concatenate(&out_->child_data[i], /*hints=*/nullptr));
     }
     return Status::OK();
   }
@@ -570,8 +683,8 @@ class ConcatenateImpl {
       case UnionMode::SPARSE: {
         for (int i = 0; i < u.num_fields(); i++) {
           ARROW_ASSIGN_OR_RAISE(auto child_data, ChildData(i));
-          RETURN_NOT_OK(
-              ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i]));
+          RETURN_NOT_OK(ConcatenateImpl(child_data, pool_)
+                            .Concatenate(&out_->child_data[i], /*hints=*/nullptr));
         }
         break;
       }
@@ -581,8 +694,8 @@ class ConcatenateImpl {
           for (size_t j = 0; j < in_.size(); j++) {
             child_data[j] = in_[j]->child_data[i];
           }
-          RETURN_NOT_OK(
-              ConcatenateImpl(child_data, pool_).Concatenate(&out_->child_data[i]));
+          RETURN_NOT_OK(ConcatenateImpl(child_data, pool_)
+                            .Concatenate(&out_->child_data[i], /*hints=*/nullptr));
         }
         break;
       }
@@ -666,7 +779,8 @@ class ConcatenateImpl {
       storage_data[i]->type = e.storage_type();
     }
     std::shared_ptr<ArrayData> out_storage;
-    RETURN_NOT_OK(ConcatenateImpl(storage_data, pool_).Concatenate(&out_storage));
+    RETURN_NOT_OK(ConcatenateImpl(storage_data, pool_)
+                      .Concatenate(&out_storage, /*hints=*/nullptr));
     out_storage->type = in_[0]->type;
     out_ = std::move(out_storage);
     return Status::OK();
@@ -797,11 +911,18 @@ class ConcatenateImpl {
   const ArrayDataVector& in_;
   MemoryPool* pool_;
   std::shared_ptr<ArrayData> out_;
+  std::shared_ptr<DataType> suggested_cast_;
 };
 
 }  // namespace
 
-Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays, MemoryPool* pool) {
+namespace internal {
+
+Result<std::shared_ptr<Array>> Concatenate(
+    const ArrayVector& arrays, MemoryPool* pool,
+    std::shared_ptr<DataType>* out_suggested_cast) {
+  DCHECK(out_suggested_cast);
+  *out_suggested_cast = nullptr;
   if (arrays.size() == 0) {
     return Status::Invalid("Must pass at least one array");
   }
@@ -818,8 +939,31 @@ Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays, MemoryPool
   }
 
   std::shared_ptr<ArrayData> out_data;
-  RETURN_NOT_OK(ConcatenateImpl(data, pool).Concatenate(&out_data));
+  ErrorHints hints;
+  auto status = ConcatenateImpl(data, pool).Concatenate(&out_data, &hints);
+  if (!status.ok()) {
+    if (hints.suggested_cast) {
+      DCHECK(status.IsInvalid());
+      *out_suggested_cast = std::move(hints.suggested_cast);
+    }
+    return status;
+  }
   return MakeArray(std::move(out_data));
 }
 
+}  // namespace internal
+
+Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays, MemoryPool* pool) {
+  std::shared_ptr<DataType> suggested_cast;
+  auto result = internal::Concatenate(arrays, pool, &suggested_cast);
+  if (!result.ok() && suggested_cast && arrays.size() > 0) {
+    DCHECK(result.status().IsInvalid());
+    return Status::Invalid(result.status().message(), ", consider casting input from `",
+                           *arrays[0]->type(), "` to `", *suggested_cast, "` first.");
+  }
+  return result;
+}
+
+#undef RETURN_IF_NOT_OK_OUTCOME
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/array/concatenate.h b/cpp/src/arrow/array/concatenate.h
index e7597aad812c4..aada5624d63a3 100644
--- a/cpp/src/arrow/array/concatenate.h
+++ b/cpp/src/arrow/array/concatenate.h
@@ -24,6 +24,22 @@
 #include "arrow/util/visibility.h"
 
 namespace arrow {
+namespace internal {
+
+/// \brief Concatenate arrays
+///
+/// \param[in] arrays a vector of arrays to be concatenated
+/// \param[in] pool memory to store the result will be allocated from this memory pool
+/// \param[out] out_suggested_cast if a non-OK Result is returned, the function might set
+///   out_suggested_cast to a cast suggestion that would allow concatenating the arrays
+///   without overflow of offsets (e.g. string to large_string)
+///
+/// \return the concatenated array
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> Concatenate(const ArrayVector& arrays, MemoryPool* pool,
+                                           std::shared_ptr<DataType>* out_suggested_cast);
+
+}  // namespace internal
 
 /// \brief Concatenate arrays
 ///
diff --git a/cpp/src/arrow/array/concatenate_test.cc b/cpp/src/arrow/array/concatenate_test.cc
index af595e897f9ee..aea5311575299 100644
--- a/cpp/src/arrow/array/concatenate_test.cc
+++ b/cpp/src/arrow/array/concatenate_test.cc
@@ -29,6 +29,7 @@
 #include <utility>
 #include <vector>
 
+#include <gmock/gmock-matchers.h>
 #include <gtest/gtest.h>
 
 #include "arrow/array.h"
@@ -42,6 +43,7 @@
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
 #include "arrow/util/list_util.h"
+#include "arrow/util/unreachable.h"
 
 namespace arrow {
 
@@ -661,14 +663,103 @@ TEST_F(ConcatenateTest, ExtensionType) {
   });
 }
 
+std::shared_ptr<DataType> LargeVersionOfType(const std::shared_ptr<DataType>& type) {
+  switch (type->id()) {
+    case Type::BINARY:
+      return large_binary();
+    case Type::STRING:
+      return large_utf8();
+    case Type::LIST:
+      return large_list(static_cast<const ListType&>(*type).value_type());
+    case Type::LIST_VIEW:
+      return large_list_view(static_cast<const ListViewType&>(*type).value_type());
+    case Type::LARGE_BINARY:
+    case Type::LARGE_STRING:
+    case Type::LARGE_LIST:
+    case Type::LARGE_LIST_VIEW:
+      return type;
+    default:
+      Unreachable();
+  }
+}
+
+std::shared_ptr<DataType> fixed_size_list_of_1(std::shared_ptr<DataType> type) {
+  return fixed_size_list(std::move(type), 1);
+}
+
 TEST_F(ConcatenateTest, OffsetOverflow) {
-  auto fake_long = ArrayFromJSON(utf8(), "[\"\"]");
-  fake_long->data()->GetMutableValues<int32_t>(1)[1] =
+  using TypeFactory = std::shared_ptr<DataType> (*)(std::shared_ptr<DataType>);
+  static const std::vector<TypeFactory> kNestedTypeFactories = {
+      list, large_list, list_view, large_list_view, fixed_size_list_of_1,
+  };
+
+  auto* pool = default_memory_pool();
+  std::shared_ptr<DataType> suggested_cast;
+  for (auto& ty : {binary(), utf8()}) {
+    auto large_ty = LargeVersionOfType(ty);
+
+    auto fake_long = ArrayFromJSON(ty, "[\"\"]");
+    fake_long->data()->GetMutableValues<int32_t>(1)[1] =
+        std::numeric_limits<int32_t>::max();
+    // XXX: since the data fake_long claims to own isn't there, this would
+    // segfault if Concatenate didn't detect overflow and raise an error.
+    auto concatenate_status = Concatenate({fake_long, fake_long});
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid,
+        ::testing::StrEq("Invalid: offset overflow while concatenating arrays, "
+                         "consider casting input from `" +
+                         ty->ToString() + "` to `large_" + ty->ToString() + "` first."),
+        concatenate_status);
+
+    concatenate_status =
+        internal::Concatenate({fake_long, fake_long}, pool, &suggested_cast);
+    // Message is doesn't contain the suggested cast type when the caller
+    // asks for it by passing the output parameter.
+    EXPECT_RAISES_WITH_MESSAGE_THAT(
+        Invalid, ::testing::StrEq("Invalid: offset overflow while concatenating arrays"),
+        concatenate_status);
+    ASSERT_TRUE(large_ty->Equals(*suggested_cast));
+
+    // Check that the suggested cast is correct when concatenation
+    // fails due to the child array being too large.
+    for (auto factory : kNestedTypeFactories) {
+      auto nested_ty = factory(ty);
+      auto expected_suggestion = factory(large_ty);
+      auto fake_long_list = ArrayFromJSON(nested_ty, "[[\"\"]]");
+      fake_long_list->data()->child_data[0] = fake_long->data();
+
+      ASSERT_RAISES(Invalid, internal::Concatenate({fake_long_list, fake_long_list}, pool,
+                                                   &suggested_cast)
+                                 .status());
+      ASSERT_TRUE(suggested_cast->Equals(*expected_suggestion));
+    }
+  }
+
+  auto list_ty = list(utf8());
+  auto fake_long_list = ArrayFromJSON(list_ty, "[[\"Hello\"]]");
+  fake_long_list->data()->GetMutableValues<int32_t>(1)[1] =
       std::numeric_limits<int32_t>::max();
-  std::shared_ptr<Array> concatenated;
-  // XX since the data fake_long claims to own isn't there, this will segfault if
-  // Concatenate doesn't detect overflow and raise an error.
-  ASSERT_RAISES(Invalid, Concatenate({fake_long, fake_long}).status());
+  ASSERT_RAISES(Invalid, internal::Concatenate({fake_long_list, fake_long_list}, pool,
+                                               &suggested_cast)
+                             .status());
+  ASSERT_TRUE(suggested_cast->Equals(LargeVersionOfType(list_ty)));
+
+  auto list_view_ty = list_view(null());
+  auto fake_long_list_view = ArrayFromJSON(list_view_ty, "[[], []]");
+  {
+    constexpr int kInt32Max = std::numeric_limits<int32_t>::max();
+    auto* values = fake_long_list_view->data()->child_data[0].get();
+    auto* mutable_offsets = fake_long_list_view->data()->GetMutableValues<int32_t>(1);
+    auto* mutable_sizes = fake_long_list_view->data()->GetMutableValues<int32_t>(2);
+    values->length = 2 * static_cast<int64_t>(kInt32Max);
+    mutable_offsets[1] = kInt32Max;
+    mutable_offsets[0] = kInt32Max;
+    mutable_sizes[0] = kInt32Max;
+  }
+  ASSERT_RAISES(Invalid, internal::Concatenate({fake_long_list_view, fake_long_list_view},
+                                               pool, &suggested_cast)
+                             .status());
+  ASSERT_TRUE(suggested_cast->Equals(LargeVersionOfType(list_view_ty)));
 }
 
 TEST_F(ConcatenateTest, DictionaryConcatenateWithEmptyUint16) {
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index 83eeb56c496cf..8e29297a8c175 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -165,6 +165,8 @@ Result<std::shared_ptr<ArrayData>> CopyToImpl(const ArrayData& data,
     ARROW_ASSIGN_OR_RAISE(output->dictionary, CopyToImpl(*data.dictionary, to, copy_fn));
   }
 
+  output->statistics = data.statistics;
+
   return output;
 }
 }  // namespace
@@ -195,6 +197,7 @@ std::shared_ptr<ArrayData> ArrayData::Slice(int64_t off, int64_t len) const {
   } else {
     copy->null_count = null_count != 0 ? kUnknownNullCount : 0;
   }
+  copy->statistics = nullptr;
   return copy;
 }
 
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index e0508fe6980a7..1e6ee9a1d32ff 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/array/statistics.h"
 #include "arrow/buffer.h"
 #include "arrow/result.h"
 #include "arrow/type.h"
@@ -152,7 +153,8 @@ struct ARROW_EXPORT ArrayData {
         offset(other.offset),
         buffers(std::move(other.buffers)),
         child_data(std::move(other.child_data)),
-        dictionary(std::move(other.dictionary)) {
+        dictionary(std::move(other.dictionary)),
+        statistics(std::move(other.statistics)) {
     SetNullCount(other.null_count);
   }
 
@@ -163,7 +165,8 @@ struct ARROW_EXPORT ArrayData {
         offset(other.offset),
         buffers(other.buffers),
         child_data(other.child_data),
-        dictionary(other.dictionary) {
+        dictionary(other.dictionary),
+        statistics(other.statistics) {
     SetNullCount(other.null_count);
   }
 
@@ -176,6 +179,7 @@ struct ARROW_EXPORT ArrayData {
     buffers = std::move(other.buffers);
     child_data = std::move(other.child_data);
     dictionary = std::move(other.dictionary);
+    statistics = std::move(other.statistics);
     return *this;
   }
 
@@ -188,6 +192,7 @@ struct ARROW_EXPORT ArrayData {
     buffers = other.buffers;
     child_data = other.child_data;
     dictionary = other.dictionary;
+    statistics = other.statistics;
     return *this;
   }
 
@@ -274,6 +279,18 @@ struct ARROW_EXPORT ArrayData {
   }
 
   /// \brief Construct a zero-copy slice of the data with the given offset and length
+  ///
+  /// The associated `ArrayStatistics` is always discarded in a sliced
+  /// `ArrayData`. Because `ArrayStatistics` in the original
+  /// `ArrayData` may be invalid in a sliced `ArrayData`. If you want
+  /// to reuse statistics in the original `ArrayData`, you need to do
+  /// it by yourself.
+  ///
+  /// If the specified slice range has the same range as the original
+  /// `ArrayData`, we can reuse statistics in the original
+  /// `ArrayData`. Because it has the same data as the original
+  /// `ArrayData`. But the associated `ArrayStatistics` is discarded
+  /// in this case too. Use `Copy()` instead for the case.
   std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
 
   /// \brief Input-checking variant of Slice
@@ -390,6 +407,9 @@ struct ARROW_EXPORT ArrayData {
 
   // The dictionary for this Array, if any. Only used for dictionary type
   std::shared_ptr<ArrayData> dictionary;
+
+  // The statistics for this Array.
+  std::shared_ptr<ArrayStatistics> statistics;
 };
 
 /// \brief A non-owning Buffer reference
diff --git a/cpp/src/arrow/array/statistics.cc b/cpp/src/arrow/array/statistics.cc
new file mode 100644
index 0000000000000..b661c9fbaffed
--- /dev/null
+++ b/cpp/src/arrow/array/statistics.cc
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This empty .cc file is for embedding not inlined symbols in
+// arrow::ArrayStatistics into libarrow.
+
+#include "arrow/array/statistics.h"
diff --git a/cpp/src/arrow/array/statistics.h b/cpp/src/arrow/array/statistics.h
new file mode 100644
index 0000000000000..523f877bbe429
--- /dev/null
+++ b/cpp/src/arrow/array/statistics.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <optional>
+#include <string>
+#include <variant>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief Statistics for an Array
+///
+/// Apache Arrow format doesn't have statistics but data source such
+/// as Apache Parquet may have statistics. Statistics associated with
+/// data source can be read unified API via this class.
+struct ARROW_EXPORT ArrayStatistics {
+  using ValueType = std::variant<bool, int64_t, uint64_t, double, std::string>;
+
+  /// \brief The number of null values, may not be set
+  std::optional<int64_t> null_count = std::nullopt;
+
+  /// \brief The number of distinct values, may not be set
+  std::optional<int64_t> distinct_count = std::nullopt;
+
+  /// \brief The minimum value, may not be set
+  std::optional<ValueType> min = std::nullopt;
+
+  /// \brief Whether the minimum value is exact or not
+  bool is_min_exact = false;
+
+  /// \brief The maximum value, may not be set
+  std::optional<ValueType> max = std::nullopt;
+
+  /// \brief Whether the maximum value is exact or not
+  bool is_max_exact = false;
+
+  /// \brief Check two statistics for equality
+  bool Equals(const ArrayStatistics& other) const {
+    return null_count == other.null_count && distinct_count == other.distinct_count &&
+           min == other.min && is_min_exact == other.is_min_exact && max == other.max &&
+           is_max_exact == other.is_max_exact;
+  }
+
+  /// \brief Check two statistics for equality
+  bool operator==(const ArrayStatistics& other) const { return Equals(other); }
+
+  /// \brief Check two statistics for not equality
+  bool operator!=(const ArrayStatistics& other) const { return !Equals(other); }
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/statistics_test.cc b/cpp/src/arrow/array/statistics_test.cc
new file mode 100644
index 0000000000000..cf15a5d382978
--- /dev/null
+++ b/cpp/src/arrow/array/statistics_test.cc
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "arrow/array/statistics.h"
+
+namespace arrow {
+
+TEST(ArrayStatisticsTest, TestNullCount) {
+  ArrayStatistics statistics;
+  ASSERT_FALSE(statistics.null_count.has_value());
+  statistics.null_count = 29;
+  ASSERT_TRUE(statistics.null_count.has_value());
+  ASSERT_EQ(29, statistics.null_count.value());
+}
+
+TEST(ArrayStatisticsTest, TestDistinctCount) {
+  ArrayStatistics statistics;
+  ASSERT_FALSE(statistics.distinct_count.has_value());
+  statistics.distinct_count = 29;
+  ASSERT_TRUE(statistics.distinct_count.has_value());
+  ASSERT_EQ(29, statistics.distinct_count.value());
+}
+
+TEST(ArrayStatisticsTest, TestMin) {
+  ArrayStatistics statistics;
+  ASSERT_FALSE(statistics.min.has_value());
+  ASSERT_FALSE(statistics.is_min_exact);
+  statistics.min = static_cast<uint64_t>(29);
+  statistics.is_min_exact = true;
+  ASSERT_TRUE(statistics.min.has_value());
+  ASSERT_TRUE(std::holds_alternative<uint64_t>(statistics.min.value()));
+  ASSERT_EQ(29, std::get<uint64_t>(statistics.min.value()));
+  ASSERT_TRUE(statistics.is_min_exact);
+}
+
+TEST(ArrayStatisticsTest, TestMax) {
+  ArrayStatistics statistics;
+  ASSERT_FALSE(statistics.max.has_value());
+  ASSERT_FALSE(statistics.is_max_exact);
+  statistics.max = std::string("hello");
+  statistics.is_max_exact = false;
+  ASSERT_TRUE(statistics.max.has_value());
+  ASSERT_TRUE(std::holds_alternative<std::string>(statistics.max.value()));
+  ASSERT_EQ("hello", std::get<std::string>(statistics.max.value()));
+  ASSERT_FALSE(statistics.is_max_exact);
+}
+
+TEST(ArrayStatisticsTest, TestEquality) {
+  ArrayStatistics statistics1;
+  ArrayStatistics statistics2;
+
+  ASSERT_EQ(statistics1, statistics2);
+
+  statistics1.null_count = 29;
+  ASSERT_NE(statistics1, statistics2);
+  statistics2.null_count = 29;
+  ASSERT_EQ(statistics1, statistics2);
+
+  statistics1.distinct_count = 2929;
+  ASSERT_NE(statistics1, statistics2);
+  statistics2.distinct_count = 2929;
+  ASSERT_EQ(statistics1, statistics2);
+
+  statistics1.min = std::string("world");
+  ASSERT_NE(statistics1, statistics2);
+  statistics2.min = std::string("world");
+  ASSERT_EQ(statistics1, statistics2);
+
+  statistics1.is_min_exact = true;
+  ASSERT_NE(statistics1, statistics2);
+  statistics2.is_min_exact = true;
+  ASSERT_EQ(statistics1, statistics2);
+
+  statistics1.max = static_cast<int64_t>(-29);
+  ASSERT_NE(statistics1, statistics2);
+  statistics2.max = static_cast<int64_t>(-29);
+  ASSERT_EQ(statistics1, statistics2);
+
+  statistics1.is_max_exact = true;
+  ASSERT_NE(statistics1, statistics2);
+  statistics2.is_max_exact = true;
+  ASSERT_EQ(statistics1, statistics2);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 0d940d3bc869e..69f1646054f4c 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -985,10 +985,22 @@ Status ValidateArrayFull(const Array& array) { return ValidateArrayFull(*array.d
 
 ARROW_EXPORT
 Status ValidateUTF8(const ArrayData& data) {
-  DCHECK(data.type->id() == Type::STRING || data.type->id() == Type::STRING_VIEW ||
-         data.type->id() == Type::LARGE_STRING);
-  UTF8DataValidator validator{data};
-  return VisitTypeInline(*data.type, &validator);
+  const auto& storage_type =
+      (data.type->id() == Type::EXTENSION)
+          ? checked_cast<const ExtensionType&>(*data.type).storage_type()
+          : data.type;
+  DCHECK(storage_type->id() == Type::STRING || storage_type->id() == Type::STRING_VIEW ||
+         storage_type->id() == Type::LARGE_STRING);
+
+  if (data.type->id() == Type::EXTENSION) {
+    ArrayData ext_data(data);
+    ext_data.type = storage_type;
+    UTF8DataValidator validator{ext_data};
+    return VisitTypeInline(*storage_type, &validator);
+  } else {
+    UTF8DataValidator validator{data};
+    return VisitTypeInline(*storage_type, &validator);
+  }
 }
 
 ARROW_EXPORT
diff --git a/cpp/src/arrow/builder_benchmark.cc b/cpp/src/arrow/builder_benchmark.cc
index 84f27d20ee038..8ec7373a1de1f 100644
--- a/cpp/src/arrow/builder_benchmark.cc
+++ b/cpp/src/arrow/builder_benchmark.cc
@@ -150,6 +150,44 @@ static void BuildBinaryArray(benchmark::State& state) {  // NOLINT non-const ref
   state.SetItemsProcessed(state.iterations() * kItemsProcessed);
 }
 
+static void BuildInlineBinaryViewArray(
+    benchmark::State& state) {  // NOLINT non-const reference
+  std::string_view kBinaryStrings[] = {"1",  "12345678", "12345", "123456789",
+                                       "12", "",         "   "};
+
+  for (auto _ : state) {
+    BinaryViewBuilder builder(memory_tracker.memory_pool());
+
+    for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
+      ABORT_NOT_OK(builder.Append(kBinaryStrings[i % 7]));
+    }
+
+    std::shared_ptr<Array> out;
+    ABORT_NOT_OK(builder.Finish(&out));
+  }
+
+  state.SetBytesProcessed(state.iterations() * kBytesProcessed);
+  state.SetItemsProcessed(state.iterations() * kItemsProcessed);
+}
+
+static void BuildNonInlineBinaryViewArray(
+    benchmark::State& state) {  // NOLINT non-const reference
+  const char* kLargeBinaryString = "12345678901234567890123456789012345678901234567890";
+  for (auto _ : state) {
+    BinaryViewBuilder builder(memory_tracker.memory_pool());
+
+    for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) {
+      ABORT_NOT_OK(builder.Append(kLargeBinaryString));
+    }
+
+    std::shared_ptr<Array> out;
+    ABORT_NOT_OK(builder.Finish(&out));
+  }
+
+  state.SetBytesProcessed(state.iterations() * kBytesProcessed);
+  state.SetItemsProcessed(state.iterations() * kItemsProcessed);
+}
+
 static void BuildChunkedBinaryArray(
     benchmark::State& state) {  // NOLINT non-const reference
   // 1MB chunks
@@ -458,6 +496,8 @@ BENCHMARK(BuildBinaryArray);
 BENCHMARK(BuildChunkedBinaryArray);
 BENCHMARK(BuildFixedSizeBinaryArray);
 BENCHMARK(BuildDecimalArray);
+BENCHMARK(BuildInlineBinaryViewArray);
+BENCHMARK(BuildNonInlineBinaryViewArray);
 
 BENCHMARK(BuildInt64DictionaryArrayRandom);
 BENCHMARK(BuildInt64DictionaryArraySequential);
diff --git a/cpp/src/arrow/c/abi.h b/cpp/src/arrow/c/abi.h
index 6abe866b5f6f6..db051fff5ff05 100644
--- a/cpp/src/arrow/c/abi.h
+++ b/cpp/src/arrow/c/abi.h
@@ -41,11 +41,11 @@ extern "C" {
 #endif
 
 #ifndef ARROW_C_DATA_INTERFACE
-#define ARROW_C_DATA_INTERFACE
+#  define ARROW_C_DATA_INTERFACE
 
-#define ARROW_FLAG_DICTIONARY_ORDERED 1
-#define ARROW_FLAG_NULLABLE 2
-#define ARROW_FLAG_MAP_KEYS_SORTED 4
+#  define ARROW_FLAG_DICTIONARY_ORDERED 1
+#  define ARROW_FLAG_NULLABLE 2
+#  define ARROW_FLAG_MAP_KEYS_SORTED 4
 
 struct ArrowSchema {
   // Array type description
@@ -83,7 +83,7 @@ struct ArrowArray {
 #endif  // ARROW_C_DATA_INTERFACE
 
 #ifndef ARROW_C_DEVICE_DATA_INTERFACE
-#define ARROW_C_DEVICE_DATA_INTERFACE
+#  define ARROW_C_DEVICE_DATA_INTERFACE
 
 // Spec and Documentation: https://arrow.apache.org/docs/format/CDeviceDataInterface.html
 
@@ -91,33 +91,33 @@ struct ArrowArray {
 typedef int32_t ArrowDeviceType;
 
 // CPU device, same as using ArrowArray directly
-#define ARROW_DEVICE_CPU 1
+#  define ARROW_DEVICE_CPU 1
 // CUDA GPU Device
-#define ARROW_DEVICE_CUDA 2
+#  define ARROW_DEVICE_CUDA 2
 // Pinned CUDA CPU memory by cudaMallocHost
-#define ARROW_DEVICE_CUDA_HOST 3
+#  define ARROW_DEVICE_CUDA_HOST 3
 // OpenCL Device
-#define ARROW_DEVICE_OPENCL 4
+#  define ARROW_DEVICE_OPENCL 4
 // Vulkan buffer for next-gen graphics
-#define ARROW_DEVICE_VULKAN 7
+#  define ARROW_DEVICE_VULKAN 7
 // Metal for Apple GPU
-#define ARROW_DEVICE_METAL 8
+#  define ARROW_DEVICE_METAL 8
 // Verilog simulator buffer
-#define ARROW_DEVICE_VPI 9
+#  define ARROW_DEVICE_VPI 9
 // ROCm GPUs for AMD GPUs
-#define ARROW_DEVICE_ROCM 10
+#  define ARROW_DEVICE_ROCM 10
 // Pinned ROCm CPU memory allocated by hipMallocHost
-#define ARROW_DEVICE_ROCM_HOST 11
+#  define ARROW_DEVICE_ROCM_HOST 11
 // Reserved for extension
-#define ARROW_DEVICE_EXT_DEV 12
+#  define ARROW_DEVICE_EXT_DEV 12
 // CUDA managed/unified memory allocated by cudaMallocManaged
-#define ARROW_DEVICE_CUDA_MANAGED 13
+#  define ARROW_DEVICE_CUDA_MANAGED 13
 // unified shared memory allocated on a oneAPI non-partitioned device.
-#define ARROW_DEVICE_ONEAPI 14
+#  define ARROW_DEVICE_ONEAPI 14
 // GPU support for next-gen WebGPU standard
-#define ARROW_DEVICE_WEBGPU 15
+#  define ARROW_DEVICE_WEBGPU 15
 // Qualcomm Hexagon DSP
-#define ARROW_DEVICE_HEXAGON 16
+#  define ARROW_DEVICE_HEXAGON 16
 
 struct ArrowDeviceArray {
   // the Allocated Array
@@ -138,7 +138,7 @@ struct ArrowDeviceArray {
 #endif  // ARROW_C_DEVICE_DATA_INTERFACE
 
 #ifndef ARROW_C_STREAM_INTERFACE
-#define ARROW_C_STREAM_INTERFACE
+#  define ARROW_C_STREAM_INTERFACE
 
 struct ArrowArrayStream {
   // Callback to get the stream type
@@ -179,7 +179,7 @@ struct ArrowArrayStream {
 #endif  // ARROW_C_STREAM_INTERFACE
 
 #ifndef ARROW_C_DEVICE_STREAM_INTERFACE
-#define ARROW_C_DEVICE_STREAM_INTERFACE
+#  define ARROW_C_DEVICE_STREAM_INTERFACE
 
 // Equivalent to ArrowArrayStream, but for ArrowDeviceArrays.
 //
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 09bb524adbdf0..01fd56f631d99 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -48,7 +48,7 @@
 
 // TODO(GH-37221): Remove these ifdef checks when compute dependency is removed
 #ifdef ARROW_COMPUTE
-#include "arrow/compute/api_vector.h"
+#  include "arrow/compute/api_vector.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/c/dlpack_abi.h b/cpp/src/arrow/c/dlpack_abi.h
index 4af557a7ed5d7..fbe2a56a344b3 100644
--- a/cpp/src/arrow/c/dlpack_abi.h
+++ b/cpp/src/arrow/c/dlpack_abi.h
@@ -12,9 +12,9 @@
  * \brief Compatibility with C++
  */
 #ifdef __cplusplus
-#define DLPACK_EXTERN_C extern "C"
+#  define DLPACK_EXTERN_C extern "C"
 #else
-#define DLPACK_EXTERN_C
+#  define DLPACK_EXTERN_C
 #endif
 
 /*! \brief The current major version of dlpack */
@@ -25,13 +25,13 @@
 
 /*! \brief DLPACK_DLL prefix for windows */
 #ifdef _WIN32
-#ifdef DLPACK_EXPORTS
-#define DLPACK_DLL __declspec(dllexport)
+#  ifdef DLPACK_EXPORTS
+#    define DLPACK_DLL __declspec(dllexport)
+#  else
+#    define DLPACK_DLL __declspec(dllimport)
+#  endif
 #else
-#define DLPACK_DLL __declspec(dllimport)
-#endif
-#else
-#define DLPACK_DLL
+#  define DLPACK_DLL
 #endif
 
 #include <stddef.h>
diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc
index 55eec53ced1c7..854127480744e 100644
--- a/cpp/src/arrow/chunk_resolver.cc
+++ b/cpp/src/arrow/chunk_resolver.cc
@@ -60,42 +60,38 @@ inline std::vector<int64_t> MakeChunksOffsets(const std::vector<T>& chunks) {
 template <typename IndexType>
 void ResolveManyInline(size_t num_offsets, const int64_t* signed_offsets,
                        int64_t n_indices, const IndexType* logical_index_vec,
-                       IndexType* out_chunk_index_vec, IndexType chunk_hint,
-                       IndexType* out_index_in_chunk_vec) {
+                       TypedChunkLocation<IndexType>* out_chunk_location_vec,
+                       IndexType chunk_hint) {
   auto* offsets = reinterpret_cast<const uint64_t*>(signed_offsets);
   const auto num_chunks = static_cast<IndexType>(num_offsets - 1);
   // chunk_hint in [0, num_offsets) per the precondition.
   for (int64_t i = 0; i < n_indices; i++) {
-    const auto index = static_cast<uint64_t>(logical_index_vec[i]);
+    auto typed_logical_index = logical_index_vec[i];
+    const auto index = static_cast<uint64_t>(typed_logical_index);
+    // use or update chunk_hint
     if (index >= offsets[chunk_hint] &&
         (chunk_hint == num_chunks || index < offsets[chunk_hint + 1])) {
-      out_chunk_index_vec[i] = chunk_hint;  // hint is correct!
-      continue;
+      // hint is correct!
+    } else {
+      // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
+      auto chunk_index =
+          ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
+      chunk_hint = static_cast<IndexType>(chunk_index);
     }
-    // lo < hi is guaranteed by `num_offsets = chunks.size() + 1`
-    auto chunk_index =
-        ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets);
-    chunk_hint = static_cast<IndexType>(chunk_index);
-    out_chunk_index_vec[i] = chunk_hint;
-  }
-  if (out_index_in_chunk_vec != NULLPTR) {
-    for (int64_t i = 0; i < n_indices; i++) {
-      auto logical_index = logical_index_vec[i];
-      auto chunk_index = out_chunk_index_vec[i];
-      // chunk_index is in [0, chunks.size()] no matter what the
-      // value of logical_index is, so it's always safe to dereference
-      // offset_ as it contains chunks.size()+1 values.
-      out_index_in_chunk_vec[i] =
-          logical_index - static_cast<IndexType>(offsets[chunk_index]);
+    out_chunk_location_vec[i].chunk_index = chunk_hint;
+    // chunk_index is in [0, chunks.size()] no matter what the
+    // value of logical_index is, so it's always safe to dereference
+    // offset_ as it contains chunks.size()+1 values.
+    out_chunk_location_vec[i].index_in_chunk =
+        typed_logical_index - static_cast<IndexType>(offsets[chunk_hint]);
 #if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER)
-      // Make it more likely that Valgrind/ASAN can catch an invalid memory
-      // access by poisoning out_index_in_chunk_vec[i] when the logical
-      // index is out-of-bounds.
-      if (chunk_index == num_chunks) {
-        out_index_in_chunk_vec[i] = std::numeric_limits<IndexType>::max();
-      }
-#endif
+    // Make it more likely that Valgrind/ASAN can catch an invalid memory
+    // access by poisoning the index-in-chunk value when the logical
+    // index is out-of-bounds.
+    if (chunk_hint == num_chunks) {
+      out_chunk_location_vec[i].index_in_chunk = std::numeric_limits<IndexType>::max();
     }
+#endif
   }
 }
 
@@ -130,31 +126,31 @@ ChunkResolver& ChunkResolver::operator=(const ChunkResolver& other) noexcept {
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint8_t* logical_index_vec,
-                                    uint8_t* out_chunk_index_vec, uint8_t chunk_hint,
-                                    uint8_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint8_t>* out_chunk_location_vec,
+                                    uint8_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint32_t* logical_index_vec,
-                                    uint32_t* out_chunk_index_vec, uint32_t chunk_hint,
-                                    uint32_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint32_t>* out_chunk_location_vec,
+                                    uint32_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint16_t* logical_index_vec,
-                                    uint16_t* out_chunk_index_vec, uint16_t chunk_hint,
-                                    uint16_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint16_t>* out_chunk_location_vec,
+                                    uint16_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_index_vec,
-                                    uint64_t* out_chunk_index_vec, uint64_t chunk_hint,
-                                    uint64_t* out_index_in_chunk_vec) const {
+                                    TypedChunkLocation<uint64_t>* out_chunk_location_vec,
+                                    uint64_t chunk_hint) const {
   ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec,
-                    out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec);
+                    out_chunk_location_vec, chunk_hint);
 }
 
 }  // namespace arrow::internal
diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h
index a2a3d5a864243..83fda62387fe1 100644
--- a/cpp/src/arrow/chunk_resolver.h
+++ b/cpp/src/arrow/chunk_resolver.h
@@ -31,28 +31,34 @@ namespace arrow::internal {
 
 struct ChunkResolver;
 
-struct ChunkLocation {
+template <typename IndexType>
+struct TypedChunkLocation {
   /// \brief Index of the chunk in the array of chunks
   ///
   /// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used
   /// to represent out-of-bounds locations.
-  int64_t chunk_index = 0;
+  IndexType chunk_index = 0;
 
   /// \brief Index of the value in the chunk
   ///
   /// The value is UNDEFINED if chunk_index >= chunks.size()
-  int64_t index_in_chunk = 0;
+  IndexType index_in_chunk = 0;
 
-  ChunkLocation() = default;
+  TypedChunkLocation() = default;
 
-  ChunkLocation(int64_t chunk_index, int64_t index_in_chunk)
-      : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {}
+  TypedChunkLocation(IndexType chunk_index, IndexType index_in_chunk)
+      : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {
+    static_assert(sizeof(TypedChunkLocation<IndexType>) == 2 * sizeof(IndexType));
+    static_assert(alignof(TypedChunkLocation<IndexType>) == alignof(IndexType));
+  }
 
-  bool operator==(ChunkLocation other) const {
+  bool operator==(TypedChunkLocation other) const {
     return chunk_index == other.chunk_index && index_in_chunk == other.index_in_chunk;
   }
 };
 
+using ChunkLocation = TypedChunkLocation<int64_t>;
+
 /// \brief An utility that incrementally resolves logical indices into
 /// physical indices in a chunked array.
 struct ARROW_EXPORT ChunkResolver {
@@ -144,26 +150,25 @@ struct ARROW_EXPORT ChunkResolver {
   ///
   /// \pre 0 <= logical_index_vec[i] < logical_array_length()
   ///      (for well-defined and valid chunk index results)
-  /// \pre out_chunk_index_vec has space for `n_indices`
+  /// \pre out_chunk_location_vec has space for `n_indices` locations
   /// \pre chunk_hint in [0, chunks.size()]
-  /// \post out_chunk_index_vec[i] in [0, chunks.size()] for i in [0, n)
+  /// \post out_chunk_location_vec[i].chunk_index in [0, chunks.size()] for i in [0, n)
   /// \post if logical_index_vec[i] >= chunked_array.length(), then
-  ///       out_chunk_index_vec[i] == chunks.size()
-  ///       and out_index_in_chunk_vec[i] is UNDEFINED (can be out-of-bounds)
-  /// \post if logical_index_vec[i] < 0, then both out_chunk_index_vec[i] and
-  ///       out_index_in_chunk_vec[i] are UNDEFINED
+  ///       out_chunk_location_vec[i].chunk_index == chunks.size()
+  ///       and out_chunk_location_vec[i].index_in_chunk is UNDEFINED (can be
+  ///       out-of-bounds)
+  /// \post if logical_index_vec[i] < 0, then both values in out_chunk_index_vec[i]
+  ///       are UNDEFINED
   ///
   /// \param n_indices The number of logical indices to resolve
   /// \param logical_index_vec The logical indices to resolve
-  /// \param out_chunk_index_vec The output array where the chunk indices will be written
+  /// \param out_chunk_location_vec The output array where the locations will be written
   /// \param chunk_hint 0 or the last chunk_index produced by ResolveMany
-  /// \param out_index_in_chunk_vec If not NULLPTR, the output array where the
-  ///                               within-chunk indices will be written
   /// \return false iff chunks.size() > std::numeric_limits<IndexType>::max()
   template <typename IndexType>
   [[nodiscard]] bool ResolveMany(int64_t n_indices, const IndexType* logical_index_vec,
-                                 IndexType* out_chunk_index_vec, IndexType chunk_hint = 0,
-                                 IndexType* out_index_in_chunk_vec = NULLPTR) const {
+                                 TypedChunkLocation<IndexType>* out_chunk_location_vec,
+                                 IndexType chunk_hint = 0) const {
     if constexpr (sizeof(IndexType) < sizeof(uint64_t)) {
       // The max value returned by Bisect is `offsets.size() - 1` (= chunks.size()).
       constexpr uint64_t kMaxIndexTypeValue = std::numeric_limits<IndexType>::max();
@@ -188,13 +193,11 @@ struct ARROW_EXPORT ChunkResolver {
       // logical index in the chunked array.
       using U = std::make_unsigned_t<IndexType>;
       ResolveManyImpl(n_indices, reinterpret_cast<const U*>(logical_index_vec),
-                      reinterpret_cast<U*>(out_chunk_index_vec),
-                      static_cast<U>(chunk_hint),
-                      reinterpret_cast<U*>(out_index_in_chunk_vec));
+                      reinterpret_cast<TypedChunkLocation<U>*>(out_chunk_location_vec),
+                      static_cast<U>(chunk_hint));
     } else {
       static_assert(std::is_unsigned_v<IndexType>);
-      ResolveManyImpl(n_indices, logical_index_vec, out_chunk_index_vec, chunk_hint,
-                      out_index_in_chunk_vec);
+      ResolveManyImpl(n_indices, logical_index_vec, out_chunk_location_vec, chunk_hint);
     }
     return true;
   }
@@ -226,10 +229,14 @@ struct ARROW_EXPORT ChunkResolver {
 
   /// \pre all the pre-conditions of ChunkResolver::ResolveMany()
   /// \pre num_offsets - 1 <= std::numeric_limits<IndexType>::max()
-  void ResolveManyImpl(int64_t, const uint8_t*, uint8_t*, uint8_t, uint8_t*) const;
-  void ResolveManyImpl(int64_t, const uint16_t*, uint16_t*, uint16_t, uint16_t*) const;
-  void ResolveManyImpl(int64_t, const uint32_t*, uint32_t*, uint32_t, uint32_t*) const;
-  void ResolveManyImpl(int64_t, const uint64_t*, uint64_t*, uint64_t, uint64_t*) const;
+  void ResolveManyImpl(int64_t, const uint8_t*, TypedChunkLocation<uint8_t>*,
+                       uint8_t) const;
+  void ResolveManyImpl(int64_t, const uint16_t*, TypedChunkLocation<uint16_t>*,
+                       uint16_t) const;
+  void ResolveManyImpl(int64_t, const uint32_t*, TypedChunkLocation<uint32_t>*,
+                       uint32_t) const;
+  void ResolveManyImpl(int64_t, const uint64_t*, TypedChunkLocation<uint64_t>*,
+                       uint64_t) const;
 
  public:
   /// \brief Find the index of the chunk that contains the logical index.
diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index c36b736d5d5df..dd6aa51534fcb 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -27,6 +27,7 @@
 #include "arrow/array/array_nested.h"
 #include "arrow/array/util.h"
 #include "arrow/array/validate.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/pretty_print.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
@@ -86,6 +87,18 @@ Result<std::shared_ptr<ChunkedArray>> ChunkedArray::MakeEmpty(
   return std::make_shared<ChunkedArray>(std::move(new_chunks));
 }
 
+DeviceAllocationTypeSet ChunkedArray::device_types() const {
+  if (chunks_.empty()) {
+    // An empty ChunkedArray is considered to be CPU-only.
+    return DeviceAllocationTypeSet::CpuOnly();
+  }
+  DeviceAllocationTypeSet set;
+  for (const auto& chunk : chunks_) {
+    set.add(chunk->device_type());
+  }
+  return set;
+}
+
 bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) const {
   if (length_ != other.length()) {
     return false;
diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h
index 5d300861d85c2..c65b6cb6e227f 100644
--- a/cpp/src/arrow/chunked_array.h
+++ b/cpp/src/arrow/chunked_array.h
@@ -25,6 +25,7 @@
 
 #include "arrow/chunk_resolver.h"
 #include "arrow/compare.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
@@ -116,6 +117,13 @@ class ARROW_EXPORT ChunkedArray {
   /// \return an ArrayVector of chunks
   const ArrayVector& chunks() const { return chunks_; }
 
+  /// \return The set of device allocation types used by the chunks in this
+  /// chunked array.
+  DeviceAllocationTypeSet device_types() const;
+
+  /// \return true if all chunks are allocated on CPU-accessible memory.
+  bool is_cpu() const { return device_types().is_cpu_only(); }
+
   /// \brief Construct a zero-copy slice of the chunked array with the
   /// indicated offset and length
   ///
diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc
index e9cc283b53cd5..bf9d4af7c7bb0 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -37,6 +37,7 @@ namespace arrow {
 
 using internal::ChunkLocation;
 using internal::ChunkResolver;
+using internal::TypedChunkLocation;
 
 class TestChunkedArray : public ::testing::Test {
  protected:
@@ -61,12 +62,17 @@ TEST_F(TestChunkedArray, Make) {
                        ChunkedArray::Make({}, int64()));
   AssertTypeEqual(*int64(), *result->type());
   ASSERT_EQ(result->num_chunks(), 0);
+  // Empty chunked arrays are treated as CPU-allocated.
+  ASSERT_TRUE(result->is_cpu());
 
   auto chunk0 = ArrayFromJSON(int8(), "[0, 1, 2]");
   auto chunk1 = ArrayFromJSON(int16(), "[3, 4, 5]");
 
   ASSERT_OK_AND_ASSIGN(result, ChunkedArray::Make({chunk0, chunk0}));
   ASSERT_OK_AND_ASSIGN(auto result2, ChunkedArray::Make({chunk0, chunk0}, int8()));
+  // All chunks are CPU-accessible.
+  ASSERT_TRUE(result->is_cpu());
+  ASSERT_TRUE(result2->is_cpu());
   AssertChunkedEqual(*result, *result2);
 
   ASSERT_RAISES(TypeError, ChunkedArray::Make({chunk0, chunk1}));
@@ -375,24 +381,26 @@ class TestChunkResolverMany : public ::testing::Test {
   Result<std::vector<ChunkLocation>> ResolveMany(
       const ChunkResolver& resolver, const std::vector<IndexType>& logical_index_vec) {
     const size_t n = logical_index_vec.size();
-    std::vector<IndexType> chunk_index_vec;
-    chunk_index_vec.resize(n);
-    std::vector<IndexType> index_in_chunk_vec;
-    index_in_chunk_vec.resize(n);
+    std::vector<TypedChunkLocation<IndexType>> chunk_location_vec;
+    chunk_location_vec.resize(n);
     bool valid = resolver.ResolveMany<IndexType>(
-        static_cast<int64_t>(n), logical_index_vec.data(), chunk_index_vec.data(), 0,
-        index_in_chunk_vec.data());
+        static_cast<int64_t>(n), logical_index_vec.data(), chunk_location_vec.data(), 0);
     if (ARROW_PREDICT_FALSE(!valid)) {
       return Status::Invalid("index type doesn't fit possible chunk indexes");
     }
-    std::vector<ChunkLocation> locations;
-    locations.reserve(n);
-    for (size_t i = 0; i < n; i++) {
-      auto chunk_index = static_cast<int64_t>(chunk_index_vec[i]);
-      auto index_in_chunk = static_cast<int64_t>(index_in_chunk_vec[i]);
-      locations.emplace_back(chunk_index, index_in_chunk);
+    if constexpr (std::is_same<decltype(ChunkLocation::chunk_index), IndexType>::value) {
+      return chunk_location_vec;
+    } else {
+      std::vector<ChunkLocation> locations;
+      locations.reserve(n);
+      for (size_t i = 0; i < n; i++) {
+        auto loc = chunk_location_vec[i];
+        auto chunk_index = static_cast<int64_t>(loc.chunk_index);
+        auto index_in_chunk = static_cast<int64_t>(loc.index_in_chunk);
+        locations.emplace_back(chunk_index, index_in_chunk);
+      }
+      return locations;
     }
-    return locations;
   }
 
   void CheckResolveMany(const ChunkResolver& resolver,
diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt
index e20b45897db95..aa2a2d4e9af0b 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -92,6 +92,7 @@ add_arrow_test(internals_test
                key_hash_test.cc
                row/compare_test.cc
                row/grouper_test.cc
+               row/row_encoder_internal_test.cc
                row/row_test.cc
                util_internal_test.cc)
 
diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc
index 33e5928c2865d..12fda5d58f3bf 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -23,6 +23,7 @@
 #include <unordered_set>
 
 #include "arrow/chunked_array.h"
+#include "arrow/compute/api_aggregate.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/expression_internal.h"
@@ -1242,6 +1243,72 @@ struct Inequality {
                             /*insert_implicit_casts=*/false, &exec_context);
   }
 
+  /// Simplify an `is_in` call against an inequality guarantee.
+  ///
+  /// We avoid the complexity of fully simplifying EQUAL comparisons to true
+  /// literals (e.g., 'x is_in [1, 2, 3]' given the guarantee 'x = 2') due to
+  /// potential complications with null matching behavior. This is ok for the
+  /// predicate pushdown use case because the overall aim is to simplify to an
+  /// unsatisfiable expression.
+  ///
+  /// \pre `is_in_call` is a call to the `is_in` function
+  /// \return a simplified expression, or nullopt if no simplification occurred
+  static Result<std::optional<Expression>> SimplifyIsIn(
+      const Inequality& guarantee, const Expression::Call* is_in_call) {
+    DCHECK_EQ(is_in_call->function_name, "is_in");
+
+    auto options = checked_pointer_cast<SetLookupOptions>(is_in_call->options);
+
+    const auto& lhs = Comparison::StripOrderPreservingCasts(is_in_call->arguments[0]);
+    if (!lhs.field_ref()) return std::nullopt;
+    if (*lhs.field_ref() != guarantee.target) return std::nullopt;
+
+    FilterOptions::NullSelectionBehavior null_selection;
+    switch (options->null_matching_behavior) {
+      case SetLookupOptions::MATCH:
+        null_selection =
+            guarantee.nullable ? FilterOptions::EMIT_NULL : FilterOptions::DROP;
+        break;
+      case SetLookupOptions::SKIP:
+        null_selection = FilterOptions::DROP;
+        break;
+      case SetLookupOptions::EMIT_NULL:
+        if (guarantee.nullable) return std::nullopt;
+        null_selection = FilterOptions::DROP;
+        break;
+      case SetLookupOptions::INCONCLUSIVE:
+        if (guarantee.nullable) return std::nullopt;
+        ARROW_ASSIGN_OR_RAISE(Datum is_null, IsNull(options->value_set));
+        ARROW_ASSIGN_OR_RAISE(Datum any_null, Any(is_null));
+        if (any_null.scalar_as<BooleanScalar>().value) return std::nullopt;
+        null_selection = FilterOptions::DROP;
+        break;
+    }
+
+    std::string func_name = Comparison::GetName(guarantee.cmp);
+    DCHECK_NE(func_name, "na");
+    std::vector<Datum> args{options->value_set, guarantee.bound};
+    ARROW_ASSIGN_OR_RAISE(Datum filter_mask, CallFunction(func_name, args));
+    FilterOptions filter_options(null_selection);
+    ARROW_ASSIGN_OR_RAISE(Datum simplified_value_set,
+                          Filter(options->value_set, filter_mask, filter_options));
+
+    if (simplified_value_set.length() == 0) return literal(false);
+    if (simplified_value_set.length() == options->value_set.length()) return std::nullopt;
+
+    ExecContext exec_context;
+    Expression::Call simplified_call;
+    simplified_call.function_name = "is_in";
+    simplified_call.arguments = is_in_call->arguments;
+    simplified_call.options = std::make_shared<SetLookupOptions>(
+        simplified_value_set, options->null_matching_behavior);
+    ARROW_ASSIGN_OR_RAISE(
+        Expression simplified_expr,
+        BindNonRecursive(std::move(simplified_call),
+                         /*insert_implicit_casts=*/false, &exec_context));
+    return simplified_expr;
+  }
+
   /// \brief Simplify the given expression given this inequality as a guarantee.
   Result<Expression> Simplify(Expression expr) {
     const auto& guarantee = *this;
@@ -1258,6 +1325,12 @@ struct Inequality {
       return call->function_name == "is_valid" ? literal(true) : literal(false);
     }
 
+    if (call->function_name == "is_in") {
+      ARROW_ASSIGN_OR_RAISE(std::optional<Expression> result,
+                            SimplifyIsIn(guarantee, call));
+      return result.value_or(expr);
+    }
+
     auto cmp = Comparison::Get(expr);
     if (!cmp) return expr;
 
diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc
index d94a17b6ffadf..0b7e8a9c23b13 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -27,6 +27,7 @@
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include "arrow/array/builder_primitive.h"
 #include "arrow/compute/expression_internal.h"
 #include "arrow/compute/function_internal.h"
 #include "arrow/compute/registry.h"
@@ -1616,6 +1617,144 @@ TEST(Expression, SimplifyWithComparisonAndNullableCaveat) {
           true_unless_null(field_ref("i32"))));  // not satisfiable, will drop row group
 }
 
+TEST(Expression, SimplifyIsIn) {
+  auto is_in = [](Expression field, std::shared_ptr<DataType> value_set_type,
+                  std::string json_array,
+                  SetLookupOptions::NullMatchingBehavior null_matching_behavior) {
+    SetLookupOptions options{ArrayFromJSON(value_set_type, json_array),
+                             null_matching_behavior};
+    return call("is_in", {field}, options);
+  };
+
+  for (SetLookupOptions::NullMatchingBehavior null_matching : {
+           SetLookupOptions::MATCH,
+           SetLookupOptions::SKIP,
+           SetLookupOptions::EMIT_NULL,
+           SetLookupOptions::INCONCLUSIVE,
+       }) {
+    Simplify{is_in(field_ref("i32"), int32(), "[]", null_matching)}
+        .WithGuarantee(greater(field_ref("i32"), literal(2)))
+        .Expect(false);
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(equal(field_ref("i32"), literal(6)))
+        .Expect(false);
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(greater(field_ref("i32"), literal(3)))
+        .Expect(is_in(field_ref("i32"), int32(), "[5,7,9]", null_matching));
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(greater(field_ref("i32"), literal(9)))
+        .Expect(false);
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(less_equal(field_ref("i32"), literal(0)))
+        .Expect(false);
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(greater(field_ref("i32"), literal(0)))
+        .ExpectUnchanged();
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(less_equal(field_ref("i32"), literal(9)))
+        .ExpectUnchanged();
+
+    Simplify{is_in(field_ref("i32"), int32(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(and_(less_equal(field_ref("i32"), literal(7)),
+                            greater(field_ref("i32"), literal(4))))
+        .Expect(is_in(field_ref("i32"), int32(), "[5,7]", null_matching));
+
+    Simplify{is_in(field_ref("u32"), int8(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(greater(field_ref("u32"), literal(3)))
+        .Expect(is_in(field_ref("u32"), int8(), "[5,7,9]", null_matching));
+
+    Simplify{is_in(field_ref("u32"), int64(), "[1,3,5,7,9]", null_matching)}
+        .WithGuarantee(greater(field_ref("u32"), literal(3)))
+        .Expect(is_in(field_ref("u32"), int64(), "[5,7,9]", null_matching));
+  }
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3]", SetLookupOptions::MATCH),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::MATCH));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::MATCH),
+  }
+      .WithGuarantee(greater(field_ref("i32"), literal(2)))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::MATCH));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::MATCH),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .Expect(is_in(field_ref("i32"), int32(), "[3,null]", SetLookupOptions::MATCH));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3]", SetLookupOptions::SKIP),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::SKIP));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::SKIP),
+  }
+      .WithGuarantee(greater(field_ref("i32"), literal(2)))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::SKIP));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::SKIP),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::SKIP));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3]", SetLookupOptions::EMIT_NULL),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .ExpectUnchanged();
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::EMIT_NULL),
+  }
+      .WithGuarantee(greater(field_ref("i32"), literal(2)))
+      .Expect(is_in(field_ref("i32"), int32(), "[3]", SetLookupOptions::EMIT_NULL));
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::EMIT_NULL),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .ExpectUnchanged();
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3]", SetLookupOptions::INCONCLUSIVE),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .ExpectUnchanged();
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::INCONCLUSIVE),
+  }
+      .WithGuarantee(greater(field_ref("i32"), literal(2)))
+      .ExpectUnchanged();
+
+  Simplify{
+      is_in(field_ref("i32"), int32(), "[1,2,3,null]", SetLookupOptions::INCONCLUSIVE),
+  }
+      .WithGuarantee(
+          or_(greater(field_ref("i32"), literal(2)), is_null(field_ref("i32"))))
+      .ExpectUnchanged();
+}
+
 TEST(Expression, SimplifyThenExecute) {
   auto filter =
       or_({equal(field_ref("f32"), literal(0)),
@@ -1643,6 +1782,40 @@ TEST(Expression, SimplifyThenExecute) {
   AssertDatumsEqual(evaluated, simplified_evaluated, /*verbose=*/true);
 }
 
+TEST(Expression, SimplifyIsInThenExecute) {
+  auto input = RecordBatchFromJSON(kBoringSchema, R"([
+      {"i64": 2, "i32": 5},
+      {"i64": 5, "i32": 6},
+      {"i64": 3, "i32": 6},
+      {"i64": 3, "i32": 5},
+      {"i64": 4, "i32": 5},
+      {"i64": 2, "i32": 7},
+      {"i64": 5, "i32": 5}
+  ])");
+
+  std::vector<Expression> guarantees{greater(field_ref("i64"), literal(1)),
+                                     greater_equal(field_ref("i32"), literal(5)),
+                                     less_equal(field_ref("i64"), literal(5))};
+
+  for (const Expression& guarantee : guarantees) {
+    auto filter =
+        call("is_in", {guarantee.call()->arguments[0]},
+             compute::SetLookupOptions{ArrayFromJSON(int32(), "[1,2,3]"), true});
+    ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*kBoringSchema));
+    ASSERT_OK_AND_ASSIGN(auto simplified, SimplifyWithGuarantee(filter, guarantee));
+
+    Datum evaluated, simplified_evaluated;
+    ExpectExecute(filter, input, &evaluated);
+    ExpectExecute(simplified, input, &simplified_evaluated);
+    if (simplified_evaluated.is_scalar()) {
+      ASSERT_OK_AND_ASSIGN(
+          simplified_evaluated,
+          MakeArrayFromScalar(*simplified_evaluated.scalar(), evaluated.length()));
+    }
+    AssertDatumsEqual(evaluated, simplified_evaluated, /*verbose=*/true);
+  }
+}
+
 TEST(Expression, Filter) {
   auto ExpectFilter = [](Expression filter, std::string batch_json) {
     ASSERT_OK_AND_ASSIGN(auto s, kBoringSchema->AddField(0, field("in", boolean())));
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index e1a2e8c5d8879..0478a3d1e801a 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -30,6 +30,7 @@
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/datum.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/logging.h"
 #include "arrow/util/tracing_internal.h"
diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc
index 5c87ef2cd0561..5e7461cc52d0e 100644
--- a/cpp/src/arrow/compute/kernel.cc
+++ b/cpp/src/arrow/compute/kernel.cc
@@ -24,6 +24,7 @@
 
 #include "arrow/buffer.h"
 #include "arrow/compute/exec.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/result.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_util.h"
diff --git a/cpp/src/arrow/compute/kernel.h b/cpp/src/arrow/compute/kernel.h
index 1adb3e96c97c8..cfb6265f12904 100644
--- a/cpp/src/arrow/compute/kernel.h
+++ b/cpp/src/arrow/compute/kernel.h
@@ -31,6 +31,7 @@
 #include "arrow/buffer.h"
 #include "arrow/compute/exec.h"
 #include "arrow/datum.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/memory_pool.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
@@ -41,7 +42,7 @@
 // macOS defines PREALLOCATE as a preprocessor macro in the header sys/vnode.h.
 // No other BSD seems to do so. The name is used as an identifier in MemAllocation enum.
 #if defined(__APPLE__) && defined(PREALLOCATE)
-#undef PREALLOCATE
+#  undef PREALLOCATE
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 1fbcd6a249093..b545d8bcc1003 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -23,7 +23,9 @@
 #include "arrow/util/cpu_info.h"
 #include "arrow/util/hashing.h"
 
-#include <memory>
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
 
 namespace arrow {
 namespace compute {
@@ -276,11 +278,6 @@ struct SumImplDefault : public SumImpl<ArrowType, SimdLevel::NONE> {
   using SumImpl<ArrowType, SimdLevel::NONE>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
-  using MeanImpl<ArrowType, SimdLevel::NONE>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
                                              const KernelInitArgs& args) {
   SumLikeInit<SumImplDefault> visitor(
@@ -289,6 +286,14 @@ Result<std::unique_ptr<KernelState>> SumInit(KernelContext* ctx,
   return visitor.Create();
 }
 
+// ----------------------------------------------------------------------
+// Mean implementation
+
+template <typename ArrowType>
+struct MeanImplDefault : public MeanImpl<ArrowType, SimdLevel::NONE> {
+  using MeanImpl<ArrowType, SimdLevel::NONE>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInit(KernelContext* ctx,
                                               const KernelInitArgs& args) {
   MeanKernelInit<MeanImplDefault> visitor(
@@ -482,8 +487,8 @@ void AddFirstOrLastAggKernel(ScalarAggregateFunction* func,
 // ----------------------------------------------------------------------
 // MinMax implementation
 
-Result<std::unique_ptr<KernelState>> MinMaxInit(KernelContext* ctx,
-                                                const KernelInitArgs& args) {
+Result<std::unique_ptr<KernelState>> MinMaxInitDefault(KernelContext* ctx,
+                                                       const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
                         args.kernel->signature->out_type().Resolve(ctx, args.inputs));
   MinMaxInitState<SimdLevel::NONE> visitor(
@@ -532,13 +537,13 @@ struct BooleanAnyImpl : public ScalarAggregator {
     }
     if (batch[0].is_scalar()) {
       const Scalar& scalar = *batch[0].scalar;
-      this->has_nulls = !scalar.is_valid;
-      this->any = scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
-      this->count += scalar.is_valid;
+      this->has_nulls |= !scalar.is_valid;
+      this->any |= scalar.is_valid && checked_cast<const BooleanScalar&>(scalar).value;
+      this->count += scalar.is_valid * batch.length;
       return Status::OK();
     }
     const ArraySpan& data = batch[0].array;
-    this->has_nulls = data.GetNullCount() > 0;
+    this->has_nulls |= data.GetNullCount() > 0;
     this->count += data.length - data.GetNullCount();
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[0].data, data.offset, data.buffers[1].data, data.offset,
@@ -603,13 +608,13 @@ struct BooleanAllImpl : public ScalarAggregator {
     }
     if (batch[0].is_scalar()) {
       const Scalar& scalar = *batch[0].scalar;
-      this->has_nulls = !scalar.is_valid;
-      this->count += scalar.is_valid;
-      this->all = !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
+      this->has_nulls |= !scalar.is_valid;
+      this->count += scalar.is_valid * batch.length;
+      this->all &= !scalar.is_valid || checked_cast<const BooleanScalar&>(scalar).value;
       return Status::OK();
     }
     const ArraySpan& data = batch[0].array;
-    this->has_nulls = data.GetNullCount() > 0;
+    this->has_nulls |= data.GetNullCount() > 0;
     this->count += data.length - data.GetNullCount();
     arrow::internal::OptionalBinaryBitBlockCounter counter(
         data.buffers[1].data, data.offset, data.buffers[0].data, data.offset,
@@ -1114,14 +1119,14 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
   // Add min max function
   func = std::make_shared<ScalarAggregateFunction>("min_max", Arity::Unary(), min_max_doc,
                                                    &default_scalar_aggregate_options);
-  AddMinMaxKernels(MinMaxInit, {null(), boolean()}, func.get());
-  AddMinMaxKernels(MinMaxInit, NumericTypes(), func.get());
-  AddMinMaxKernels(MinMaxInit, TemporalTypes(), func.get());
-  AddMinMaxKernels(MinMaxInit, BaseBinaryTypes(), func.get());
-  AddMinMaxKernel(MinMaxInit, Type::FIXED_SIZE_BINARY, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::INTERVAL_MONTHS, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::DECIMAL128, func.get());
-  AddMinMaxKernel(MinMaxInit, Type::DECIMAL256, func.get());
+  AddMinMaxKernels(MinMaxInitDefault, {null(), boolean()}, func.get());
+  AddMinMaxKernels(MinMaxInitDefault, NumericTypes(), func.get());
+  AddMinMaxKernels(MinMaxInitDefault, TemporalTypes(), func.get());
+  AddMinMaxKernels(MinMaxInitDefault, BaseBinaryTypes(), func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::FIXED_SIZE_BINARY, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::INTERVAL_MONTHS, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::DECIMAL128, func.get());
+  AddMinMaxKernel(MinMaxInitDefault, Type::DECIMAL256, func.get());
   // Add the SIMD variants for min max
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
   if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc
new file mode 100644
index 0000000000000..f2151e0a9e029
--- /dev/null
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.inc.cc
@@ -0,0 +1,1025 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// .inc.cc file to be included in compilation unit where kernels are meant to be
+// compiled auto-vectorized by the compiler with different SIMD levels passed
+// as compiler flags.
+//
+// It contains no includes to avoid double inclusion in the compilation unit
+// that includes this .inc.cc file.
+
+#include <cassert>
+#include <cmath>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/align_util.h"
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/decimal.h"
+
+namespace arrow::compute::internal {
+namespace {
+
+// ----------------------------------------------------------------------
+// Sum implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel,
+          typename ResultType = typename FindAccumulatorType<ArrowType>::Type>
+struct SumImpl : public ScalarAggregator {
+  using ThisType = SumImpl<ArrowType, SimdLevel, ResultType>;
+  using CType = typename TypeTraits<ArrowType>::CType;
+  using SumType = ResultType;
+  using SumCType = typename TypeTraits<SumType>::CType;
+  using OutputType = typename TypeTraits<SumType>::ScalarType;
+
+  SumImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options_)
+      : out_type(std::move(out_type)), options(std::move(options_)) {}
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      const ArraySpan& data = batch[0].array;
+      this->count += data.length - data.GetNullCount();
+      this->nulls_observed = this->nulls_observed || data.GetNullCount();
+
+      if (!options.skip_nulls && this->nulls_observed) {
+        // Short-circuit
+        return Status::OK();
+      }
+
+      if (is_boolean_type<ArrowType>::value) {
+        this->sum += GetTrueCount(data);
+      } else {
+        this->sum += SumArray<CType, SumCType, SimdLevel>(data);
+      }
+    } else {
+      const Scalar& data = *batch[0].scalar;
+      this->count += data.is_valid * batch.length;
+      this->nulls_observed = this->nulls_observed || !data.is_valid;
+      if (data.is_valid) {
+        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
+      }
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->count += other.count;
+    this->sum += other.sum;
+    this->nulls_observed = this->nulls_observed || other.nulls_observed;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count)) {
+      out->value = std::make_shared<OutputType>(out_type);
+    } else {
+      out->value = std::make_shared<OutputType>(this->sum, out_type);
+    }
+    return Status::OK();
+  }
+
+  size_t count = 0;
+  bool nulls_observed = false;
+  SumCType sum = 0;
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+};
+
+template <typename ArrowType>
+struct NullImpl : public ScalarAggregator {
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  explicit NullImpl(const ScalarAggregateOptions& options_) : options(options_) {}
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_scalar() || batch[0].array.GetNullCount() > 0) {
+      // If the batch is a scalar or an array with elements, set is_empty to false
+      is_empty = false;
+    }
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const NullImpl&>(src);
+    this->is_empty &= other.is_empty;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    if ((options.skip_nulls || this->is_empty) && options.min_count == 0) {
+      // Return 0 if the remaining data is empty
+      out->value = output_empty();
+    } else {
+      out->value = MakeNullScalar(TypeTraits<ArrowType>::type_singleton());
+    }
+    return Status::OK();
+  }
+
+  virtual std::shared_ptr<Scalar> output_empty() = 0;
+
+  bool is_empty = true;
+  ScalarAggregateOptions options;
+};
+
+template <typename ArrowType>
+struct NullSumImpl : public NullImpl<ArrowType> {
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  explicit NullSumImpl(const ScalarAggregateOptions& options_)
+      : NullImpl<ArrowType>(options_) {}
+
+  std::shared_ptr<Scalar> output_empty() override {
+    return std::make_shared<ScalarType>(0);
+  }
+};
+
+template <template <typename> class KernelClass>
+struct SumLikeInit {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  std::shared_ptr<DataType> type;
+  const ScalarAggregateOptions& options;
+
+  SumLikeInit(KernelContext* ctx, std::shared_ptr<DataType> type,
+              const ScalarAggregateOptions& options)
+      : ctx(ctx), type(type), options(options) {}
+
+  Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
+
+  Status Visit(const HalfFloatType&) {
+    return Status::NotImplemented("No sum implemented");
+  }
+
+  Status Visit(const BooleanType&) {
+    auto ty = TypeTraits<typename KernelClass<BooleanType>::SumType>::type_singleton();
+    state.reset(new KernelClass<BooleanType>(ty, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_number<Type, Status> Visit(const Type&) {
+    auto ty = TypeTraits<typename KernelClass<Type>::SumType>::type_singleton();
+    state.reset(new KernelClass<Type>(ty, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_decimal<Type, Status> Visit(const Type&) {
+    state.reset(new KernelClass<Type>(type, options));
+    return Status::OK();
+  }
+
+  virtual Status Visit(const NullType&) {
+    state.reset(new NullSumImpl<Int64Type>(options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(*type, this));
+    return std::move(state);
+  }
+};
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
+struct MeanImpl;
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MeanImpl<ArrowType, SimdLevel, enable_if_decimal<ArrowType>>
+    : public SumImpl<ArrowType, SimdLevel> {
+  using SumImpl<ArrowType, SimdLevel>::SumImpl;
+  using SumImpl<ArrowType, SimdLevel>::options;
+  using SumCType = typename SumImpl<ArrowType, SimdLevel>::SumCType;
+  using OutputType = typename SumImpl<ArrowType, SimdLevel>::OutputType;
+
+  template <typename T = ArrowType>
+  Status FinalizeImpl(Datum* out) {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count) || (this->count == 0)) {
+      out->value = std::make_shared<OutputType>(this->out_type);
+    } else {
+      SumCType quotient, remainder;
+      ARROW_ASSIGN_OR_RAISE(std::tie(quotient, remainder), this->sum.Divide(this->count));
+      // Round the decimal result based on the remainder
+      remainder.Abs();
+      if (remainder * 2 >= this->count) {
+        if (this->sum >= 0) {
+          quotient += 1;
+        } else {
+          quotient -= 1;
+        }
+      }
+      out->value = std::make_shared<OutputType>(quotient, this->out_type);
+    }
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MeanImpl<ArrowType, SimdLevel,
+                std::enable_if_t<!is_decimal_type<ArrowType>::value>>
+    // Override the ResultType of SumImpl because we need to use double for intermediate
+    // sum to prevent integer overflows
+    : public SumImpl<ArrowType, SimdLevel, DoubleType> {
+  using SumImpl<ArrowType, SimdLevel, DoubleType>::SumImpl;
+  using SumImpl<ArrowType, SimdLevel, DoubleType>::options;
+
+  template <typename T = ArrowType>
+  Status FinalizeImpl(Datum* out) {
+    if ((!options.skip_nulls && this->nulls_observed) ||
+        (this->count < options.min_count)) {
+      out->value = std::make_shared<DoubleScalar>();
+    } else {
+      static_assert(std::is_same_v<decltype(this->sum), double>,
+                    "SumCType must be double for numeric inputs");
+      const double mean = this->sum / this->count;
+      out->value = std::make_shared<DoubleScalar>(mean);
+    }
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
+};
+
+template <template <typename> class KernelClass>
+struct MeanKernelInit : public SumLikeInit<KernelClass> {
+  MeanKernelInit(KernelContext* ctx, std::shared_ptr<DataType> type,
+                 const ScalarAggregateOptions& options)
+      : SumLikeInit<KernelClass>(ctx, type, options) {}
+
+  Status Visit(const NullType&) override {
+    this->state.reset(new NullSumImpl<DoubleType>(this->options));
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// FirstLast implementation
+
+template <typename ArrowType, typename Enable = void>
+struct FirstLastState {};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_boolean<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    this->last = value;
+  }
+
+  T first = false;
+  T last = false;
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_physical_integer<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    this->last = value;
+  }
+
+  T first = std::numeric_limits<T>::infinity();
+  T last = std::numeric_limits<T>::infinity();
+  bool has_values = false;
+
+  // These are updated in ConsumeScalar and ConsumeArray since null values don't
+  // invoke MergeOne
+  bool first_is_null = false;
+  bool last_is_null = false;
+  // has_any_values indicates whether there is any value (either null or non-null)
+  // (1) has_any_values = false: There is no value aggregated
+  // (2) has_any_values = true, has_values = false: There are only null values aggregated
+  // (3) has_any_values = true, has_values = true: There are both null and non-null values
+  // aggregated
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType, enable_if_floating_point<ArrowType>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    if (!has_values) {
+      this->first = value;
+      has_values = true;
+    }
+    last = value;
+  }
+
+  T first = std::numeric_limits<T>::infinity();
+  T last = std::numeric_limits<T>::infinity();
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastState<ArrowType,
+                      enable_if_t<is_base_binary_type<ArrowType>::value ||
+                                  std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
+  using ThisType = FirstLastState<ArrowType>;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->first = this->has_values ? this->first : rhs.first;
+    this->last = rhs.has_values ? rhs.last : this->last;
+    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
+    this->last_is_null = rhs.last_is_null;
+    this->has_values |= rhs.has_values;
+    this->has_any_values |= rhs.has_any_values;
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    if (!has_values) {
+      first = std::string(value);
+      has_values = true;
+    }
+    last = std::string(value);
+  }
+
+  std::string first = "";
+  std::string last = "";
+  bool has_values = false;
+  bool first_is_null = false;
+  bool last_is_null = false;
+  bool has_any_values = false;
+};
+
+template <typename ArrowType>
+struct FirstLastImpl : public ScalarAggregator {
+  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
+  using ThisType = FirstLastImpl<ArrowType>;
+  using StateType = FirstLastState<ArrowType>;
+
+  FirstLastImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
+      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
+    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
+  }
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(batch[0].array);
+    }
+    return ConsumeScalar(*batch[0].scalar);
+  }
+
+  Status ConsumeScalar(const Scalar& scalar) {
+    this->state.has_any_values = true;
+    if (scalar.is_valid) {
+      this->state.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    } else {
+      if (!this->state.has_values) {
+        this->state.first_is_null = true;
+      }
+    }
+    this->count += scalar.is_valid;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArraySpan& arr_span) {
+    this->state.has_any_values = true;
+    ArrayType arr(arr_span.ToArrayData());
+    const auto null_count = arr.null_count();
+    this->count += arr.length() - null_count;
+
+    if (null_count == 0) {
+      // If there are no null values, we can just merge
+      // the first and last element
+      this->state.MergeOne(arr.GetView(0));
+      this->state.MergeOne(arr.GetView(arr.length() - 1));
+    } else {
+      int64_t first_i = -1;
+      int64_t last_i = -1;
+
+      if (!this->state.has_values && arr.IsNull(0)) {
+        this->state.first_is_null = true;
+      }
+
+      if (arr.IsNull(arr.length() - 1)) {
+        this->state.last_is_null = true;
+      }
+
+      // Find the first and last non-null value and update state
+      for (int64_t i = 0; i < arr.length(); i++) {
+        if (!arr.IsNull(i)) {
+          first_i = i;
+          break;
+        }
+      }
+      if (first_i >= 0) {
+        for (int64_t i = arr.length() - 1; i >= 0; i--) {
+          if (!arr.IsNull(i)) {
+            last_i = i;
+            break;
+          }
+        }
+        assert(last_i >= first_i);
+        this->state.MergeOne(arr.GetView(first_i));
+        this->state.MergeOne(arr.GetView(last_i));
+      }
+    }
+
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->state += other.state;
+    this->count += other.count;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    const auto& struct_type = checked_cast<const StructType&>(*out_type);
+    const auto& child_type = struct_type.field(0)->type();
+    auto null_scalar = MakeNullScalar(child_type);
+
+    std::vector<std::shared_ptr<Scalar>> values;
+
+    if (this->count < options.min_count) {
+      values = {null_scalar, null_scalar};
+    } else {
+      if (state.has_values) {
+        if (options.skip_nulls) {
+          ARROW_ASSIGN_OR_RAISE(auto first_scalar, MakeScalar(child_type, state.first));
+          ARROW_ASSIGN_OR_RAISE(auto last_scalar, MakeScalar(child_type, state.last));
+          values = {first_scalar, last_scalar};
+        } else {
+          ARROW_ASSIGN_OR_RAISE(
+              auto first_scalar,
+              state.first_is_null ? null_scalar : MakeScalar(child_type, state.first));
+          ARROW_ASSIGN_OR_RAISE(
+              auto last_scalar,
+              state.last_is_null ? null_scalar : MakeScalar(child_type, state.last));
+
+          values = {first_scalar, last_scalar};
+        }
+      } else {
+        // If there is no non-null values, we always output null regardless of
+        // skip_null
+        values = {null_scalar, null_scalar};
+      }
+    }
+
+    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+  int64_t count;
+  FirstLastState<ArrowType> state;
+};
+
+struct FirstLastInitState {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const DataType& in_type;
+  std::shared_ptr<DataType> out_type;
+  const ScalarAggregateOptions& options;
+
+  FirstLastInitState(KernelContext* ctx, const DataType& in_type,
+                     const std::shared_ptr<DataType>& out_type,
+                     const ScalarAggregateOptions& options)
+      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+
+  Status Visit(const DataType& ty) {
+    return Status::NotImplemented("No first/last implemented for ", ty);
+  }
+
+  Status Visit(const HalfFloatType& ty) {
+    return Status::NotImplemented("No first/last implemented for ", ty);
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new FirstLastImpl<BooleanType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_integer<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_floating_point<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new FirstLastImpl<Type>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_t<std::is_same<Type, FixedSizeBinaryType>::value, Status> Visit(const Type&) {
+    state.reset(new FirstLastImpl<Type>(out_type, options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(in_type, this));
+    return std::move(state);
+  }
+};
+
+// ----------------------------------------------------------------------
+// MinMax implementation
+
+template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
+struct MinMaxState {};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_boolean<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = this->min && rhs.min;
+    this->max = this->max || rhs.max;
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = this->min && value;
+    this->max = this->max || value;
+  }
+
+  T min = true;
+  T max = false;
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_integer<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::min(this->min, rhs.min);
+    this->max = std::max(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = std::min(this->min, value);
+    this->max = std::max(this->max, value);
+  }
+
+  T min = std::numeric_limits<T>::max();
+  T max = std::numeric_limits<T>::min();
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_floating_point<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename ArrowType::c_type;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::fmin(this->min, rhs.min);
+    this->max = std::fmax(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(T value) {
+    this->min = std::fmin(this->min, value);
+    this->max = std::fmax(this->max, value);
+  }
+
+  T min = std::numeric_limits<T>::infinity();
+  T max = -std::numeric_limits<T>::infinity();
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel, enable_if_decimal<ArrowType>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using T = typename TypeTraits<ArrowType>::CType;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  MinMaxState() : min(T::GetMaxSentinel()), max(T::GetMinSentinel()) {}
+
+  ThisType& operator+=(const ThisType& rhs) {
+    this->has_nulls |= rhs.has_nulls;
+    this->min = std::min(this->min, rhs.min);
+    this->max = std::max(this->max, rhs.max);
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    MergeOne(T(reinterpret_cast<const uint8_t*>(value.data())));
+  }
+
+  void MergeOne(const T value) {
+    this->min = std::min(this->min, value);
+    this->max = std::max(this->max, value);
+  }
+
+  T min;
+  T max;
+  bool has_nulls = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxState<ArrowType, SimdLevel,
+                   enable_if_t<is_base_binary_type<ArrowType>::value ||
+                               std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
+  using ThisType = MinMaxState<ArrowType, SimdLevel>;
+  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
+
+  ThisType& operator+=(const ThisType& rhs) {
+    if (!this->seen && rhs.seen) {
+      this->min = rhs.min;
+      this->max = rhs.max;
+    } else if (this->seen && rhs.seen) {
+      if (this->min > rhs.min) {
+        this->min = rhs.min;
+      }
+      if (this->max < rhs.max) {
+        this->max = rhs.max;
+      }
+    }
+    this->has_nulls |= rhs.has_nulls;
+    this->seen |= rhs.seen;
+    return *this;
+  }
+
+  void MergeOne(std::string_view value) {
+    if (!seen) {
+      this->min = std::string(value);
+      this->max = std::string(value);
+    } else {
+      if (value < std::string_view(this->min)) {
+        this->min = std::string(value);
+      } else if (value > std::string_view(this->max)) {
+        this->max = std::string(value);
+      }
+    }
+    this->seen = true;
+  }
+
+  std::string min;
+  std::string max;
+  bool has_nulls = false;
+  bool seen = false;
+};
+
+template <typename ArrowType, SimdLevel::type SimdLevel>
+struct MinMaxImpl : public ScalarAggregator {
+  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
+  using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
+  using StateType = MinMaxState<ArrowType, SimdLevel>;
+
+  MinMaxImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
+      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
+    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
+  }
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (batch[0].is_array()) {
+      return ConsumeArray(batch[0].array);
+    }
+    return ConsumeScalar(*batch[0].scalar);
+  }
+
+  Status ConsumeScalar(const Scalar& scalar) {
+    StateType local;
+    local.has_nulls = !scalar.is_valid;
+    this->count += scalar.is_valid;
+
+    if (!local.has_nulls || options.skip_nulls) {
+      local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status ConsumeArray(const ArraySpan& arr_span) {
+    StateType local;
+
+    ArrayType arr(arr_span.ToArrayData());
+
+    const auto null_count = arr.null_count();
+    local.has_nulls = null_count > 0;
+    this->count += arr.length() - null_count;
+
+    if (!local.has_nulls) {
+      for (int64_t i = 0; i < arr.length(); i++) {
+        local.MergeOne(arr.GetView(i));
+      }
+    } else if (local.has_nulls && options.skip_nulls) {
+      local += ConsumeWithNulls(arr);
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override {
+    const auto& other = checked_cast<const ThisType&>(src);
+    this->state += other.state;
+    this->count += other.count;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    const auto& struct_type = checked_cast<const StructType&>(*out_type);
+    const auto& child_type = struct_type.field(0)->type();
+
+    std::vector<std::shared_ptr<Scalar>> values;
+    // Physical type != result type
+    if ((state.has_nulls && !options.skip_nulls) || (this->count < options.min_count)) {
+      // (null, null)
+      auto null_scalar = MakeNullScalar(child_type);
+      values = {null_scalar, null_scalar};
+    } else {
+      ARROW_ASSIGN_OR_RAISE(auto min_scalar,
+                            MakeScalar(child_type, std::move(state.min)));
+      ARROW_ASSIGN_OR_RAISE(auto max_scalar,
+                            MakeScalar(child_type, std::move(state.max)));
+      values = {std::move(min_scalar), std::move(max_scalar)};
+    }
+    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
+    return Status::OK();
+  }
+
+  std::shared_ptr<DataType> out_type;
+  ScalarAggregateOptions options;
+  int64_t count;
+  MinMaxState<ArrowType, SimdLevel> state;
+
+ private:
+  StateType ConsumeWithNulls(const ArrayType& arr) const {
+    StateType local;
+    const int64_t length = arr.length();
+    int64_t offset = arr.offset();
+    const uint8_t* bitmap = arr.null_bitmap_data();
+    int64_t idx = 0;
+
+    const auto p = arrow::internal::BitmapWordAlign<1>(bitmap, offset, length);
+    // First handle the leading bits
+    const int64_t leading_bits = p.leading_bits;
+    while (idx < leading_bits) {
+      if (bit_util::GetBit(bitmap, offset)) {
+        local.MergeOne(arr.GetView(idx));
+      }
+      idx++;
+      offset++;
+    }
+
+    // The aligned parts scanned with BitBlockCounter
+    arrow::internal::BitBlockCounter data_counter(bitmap, offset, length - leading_bits);
+    auto current_block = data_counter.NextWord();
+    while (idx < length) {
+      if (current_block.AllSet()) {  // All true values
+        int run_length = 0;
+        // Scan forward until a block that has some false values (or the end)
+        while (current_block.length > 0 && current_block.AllSet()) {
+          run_length += current_block.length;
+          current_block = data_counter.NextWord();
+        }
+        for (int64_t i = 0; i < run_length; i++) {
+          local.MergeOne(arr.GetView(idx + i));
+        }
+        idx += run_length;
+        offset += run_length;
+        // The current_block already computed, advance to next loop
+        continue;
+      } else if (!current_block.NoneSet()) {  // Some values are null
+        BitmapReader reader(arr.null_bitmap_data(), offset, current_block.length);
+        for (int64_t i = 0; i < current_block.length; i++) {
+          if (reader.IsSet()) {
+            local.MergeOne(arr.GetView(idx + i));
+          }
+          reader.Next();
+        }
+
+        idx += current_block.length;
+        offset += current_block.length;
+      } else {  // All null values
+        idx += current_block.length;
+        offset += current_block.length;
+      }
+      current_block = data_counter.NextWord();
+    }
+
+    return local;
+  }
+};
+
+template <SimdLevel::type SimdLevel>
+struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
+  using StateType = MinMaxState<BooleanType, SimdLevel>;
+  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
+  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
+  using MinMaxImpl<BooleanType, SimdLevel>::options;
+
+  Status Consume(KernelContext*, const ExecSpan& batch) override {
+    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
+      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar));
+    }
+    StateType local;
+    ArrayType arr(batch[0].array.ToArrayData());
+
+    const auto arr_length = arr.length();
+    const auto null_count = arr.null_count();
+    const auto valid_count = arr_length - null_count;
+
+    local.has_nulls = null_count > 0;
+    this->count += valid_count;
+    if (!local.has_nulls || options.skip_nulls) {
+      const auto true_count = arr.true_count();
+      const auto false_count = valid_count - true_count;
+      local.max = true_count > 0;
+      local.min = false_count == 0;
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+
+  Status ConsumeScalar(const BooleanScalar& scalar) {
+    StateType local;
+
+    local.has_nulls = !scalar.is_valid;
+    this->count += scalar.is_valid;
+    if (!local.has_nulls || options.skip_nulls) {
+      const int true_count = scalar.is_valid && scalar.value;
+      const int false_count = scalar.is_valid && !scalar.value;
+      local.max = true_count > 0;
+      local.min = false_count == 0;
+    }
+
+    this->state += local;
+    return Status::OK();
+  }
+};
+
+struct NullMinMaxImpl : public ScalarAggregator {
+  Status Consume(KernelContext*, const ExecSpan& batch) override { return Status::OK(); }
+
+  Status MergeFrom(KernelContext*, KernelState&& src) override { return Status::OK(); }
+
+  Status Finalize(KernelContext*, Datum* out) override {
+    std::vector<std::shared_ptr<Scalar>> values{std::make_shared<NullScalar>(),
+                                                std::make_shared<NullScalar>()};
+    out->value = std::make_shared<StructScalar>(
+        std::move(values), struct_({field("min", null()), field("max", null())}));
+    return Status::OK();
+  }
+};
+
+template <SimdLevel::type SimdLevel>
+struct MinMaxInitState {
+  std::unique_ptr<KernelState> state;
+  KernelContext* ctx;
+  const DataType& in_type;
+  std::shared_ptr<DataType> out_type;
+  const ScalarAggregateOptions& options;
+
+  MinMaxInitState(KernelContext* ctx, const DataType& in_type,
+                  const std::shared_ptr<DataType>& out_type,
+                  const ScalarAggregateOptions& options)
+      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
+
+  Status Visit(const DataType& ty) {
+    return Status::NotImplemented("No min/max implemented for ", ty);
+  }
+
+  Status Visit(const HalfFloatType& ty) {
+    return Status::NotImplemented("No min/max implemented for ", ty);
+  }
+
+  Status Visit(const NullType&) {
+    state.reset(new NullMinMaxImpl());
+    return Status::OK();
+  }
+
+  Status Visit(const BooleanType&) {
+    state.reset(new BooleanMinMaxImpl<SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_physical_integer<Type, Status> Visit(const Type&) {
+    using PhysicalType = typename Type::PhysicalType;
+    state.reset(new MinMaxImpl<PhysicalType, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_floating_point<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_base_binary<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_fixed_size_binary<Type, Status> Visit(const Type&) {
+    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
+    return Status::OK();
+  }
+
+  Result<std::unique_ptr<KernelState>> Create() {
+    ARROW_RETURN_NOT_OK(VisitTypeInline(in_type, this));
+    return std::move(state);
+  }
+};
+
+}  // namespace
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
index 03b45107eeca1..a1a6a95c5e11c 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc
@@ -17,6 +17,10 @@
 
 #include "arrow/compute/kernels/aggregate_basic_internal.h"
 
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
+
 namespace arrow {
 namespace compute {
 namespace internal {
@@ -24,16 +28,13 @@ namespace internal {
 // ----------------------------------------------------------------------
 // Sum implementation
 
+namespace {
+
 template <typename ArrowType>
 struct SumImplAvx2 : public SumImpl<ArrowType, SimdLevel::AVX2> {
   using SumImpl<ArrowType, SimdLevel::AVX2>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
-  using MeanImpl<ArrowType, SimdLevel::AVX2>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
                                                  const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx2> visitor(
@@ -42,6 +43,24 @@ Result<std::unique_ptr<KernelState>> SumInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddSumAvx2AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(SumInitAvx2, SignedIntTypes(), int64(), func, SimdLevel::AVX2);
+  AddBasicAggKernels(SumInitAvx2, UnsignedIntTypes(), uint64(), func, SimdLevel::AVX2);
+  AddBasicAggKernels(SumInitAvx2, FloatingPointTypes(), float64(), func, SimdLevel::AVX2);
+}
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+namespace {
+
+template <typename ArrowType>
+struct MeanImplAvx2 : public MeanImpl<ArrowType, SimdLevel::AVX2> {
+  using MeanImpl<ArrowType, SimdLevel::AVX2>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
                                                   const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx2> visitor(
@@ -50,9 +69,17 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(MeanInitAvx2, NumericTypes(), float64(), func, SimdLevel::AVX2);
+}
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
+namespace {
+
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
@@ -63,15 +90,7 @@ Result<std::unique_ptr<KernelState>> MinMaxInitAvx2(KernelContext* ctx,
   return visitor.Create();
 }
 
-void AddSumAvx2AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(SumInitAvx2, SignedIntTypes(), int64(), func, SimdLevel::AVX2);
-  AddBasicAggKernels(SumInitAvx2, UnsignedIntTypes(), uint64(), func, SimdLevel::AVX2);
-  AddBasicAggKernels(SumInitAvx2, FloatingPointTypes(), float64(), func, SimdLevel::AVX2);
-}
-
-void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(MeanInitAvx2, NumericTypes(), float64(), func, SimdLevel::AVX2);
-}
+}  // namespace
 
 void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func) {
   // Enable int types for AVX2 variants.
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
index 0d66ed2ec3e7e..9dc490937a691 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc
@@ -17,6 +17,10 @@
 
 #include "arrow/compute/kernels/aggregate_basic_internal.h"
 
+// Include templated definitions for aggregate kernels that must compiled here
+// with the SIMD level configured for this compilation unit in the build.
+#include "arrow/compute/kernels/aggregate_basic.inc.cc"  // NOLINT(build/include)
+
 namespace arrow {
 namespace compute {
 namespace internal {
@@ -24,16 +28,13 @@ namespace internal {
 // ----------------------------------------------------------------------
 // Sum implementation
 
+namespace {
+
 template <typename ArrowType>
 struct SumImplAvx512 : public SumImpl<ArrowType, SimdLevel::AVX512> {
   using SumImpl<ArrowType, SimdLevel::AVX512>::SumImpl;
 };
 
-template <typename ArrowType>
-struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
-  using MeanImpl<ArrowType, SimdLevel::AVX512>::MeanImpl;
-};
-
 Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
                                                    const KernelInitArgs& args) {
   SumLikeInit<SumImplAvx512> visitor(
@@ -42,6 +43,26 @@ Result<std::unique_ptr<KernelState>> SumInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddSumAvx512AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(SumInitAvx512, SignedIntTypes(), int64(), func, SimdLevel::AVX512);
+  AddBasicAggKernels(SumInitAvx512, UnsignedIntTypes(), uint64(), func,
+                     SimdLevel::AVX512);
+  AddBasicAggKernels(SumInitAvx512, FloatingPointTypes(), float64(), func,
+                     SimdLevel::AVX512);
+}
+
+// ----------------------------------------------------------------------
+// Mean implementation
+
+namespace {
+
+template <typename ArrowType>
+struct MeanImplAvx512 : public MeanImpl<ArrowType, SimdLevel::AVX512> {
+  using MeanImpl<ArrowType, SimdLevel::AVX512>::MeanImpl;
+};
+
 Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
                                                     const KernelInitArgs& args) {
   SumLikeInit<MeanImplAvx512> visitor(
@@ -50,9 +71,17 @@ Result<std::unique_ptr<KernelState>> MeanInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
+}  // namespace
+
+void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) {
+  AddBasicAggKernels(MeanInitAvx512, NumericTypes(), float64(), func, SimdLevel::AVX512);
+}
+
 // ----------------------------------------------------------------------
 // MinMax implementation
 
+namespace {
+
 Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
                                                       const KernelInitArgs& args) {
   ARROW_ASSIGN_OR_RAISE(TypeHolder out_type,
@@ -63,25 +92,15 @@ Result<std::unique_ptr<KernelState>> MinMaxInitAvx512(KernelContext* ctx,
   return visitor.Create();
 }
 
-void AddSumAvx512AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(SumInitAvx512, SignedIntTypes(), int64(), func, SimdLevel::AVX512);
-  AddBasicAggKernels(SumInitAvx512, UnsignedIntTypes(), uint64(), func,
-                     SimdLevel::AVX512);
-  AddBasicAggKernels(SumInitAvx512, FloatingPointTypes(), float64(), func,
-                     SimdLevel::AVX512);
-}
-
-void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) {
-  AddBasicAggKernels(MeanInitAvx512, NumericTypes(), float64(), func, SimdLevel::AVX512);
-}
+}  // namespace
 
 void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func) {
   // Enable 32/64 int types for avx512 variants, no advantage on 8/16 int.
   AddMinMaxKernels(MinMaxInitAvx512, {int32(), uint32(), int64(), uint64()}, func,
                    SimdLevel::AVX512);
   AddMinMaxKernels(MinMaxInitAvx512, TemporalTypes(), func, SimdLevel::AVX512);
-  AddMinMaxKernels(MinMaxInitAvx512, BaseBinaryTypes(), func, SimdLevel::AVX2);
-  AddMinMaxKernel(MinMaxInitAvx512, Type::FIXED_SIZE_BINARY, func, SimdLevel::AVX2);
+  AddMinMaxKernels(MinMaxInitAvx512, BaseBinaryTypes(), func, SimdLevel::AVX512);
+  AddMinMaxKernel(MinMaxInitAvx512, Type::FIXED_SIZE_BINARY, func, SimdLevel::AVX512);
   AddMinMaxKernel(MinMaxInitAvx512, Type::INTERVAL_MONTHS, func, SimdLevel::AVX512);
 }
 
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
index f08e7aaa538bb..5cc3a558b1efb 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
@@ -17,23 +17,18 @@
 
 #pragma once
 
-#include <cmath>
-#include <type_traits>
-#include <utility>
+#include <memory>
+#include <vector>
 
-#include "arrow/compute/api_aggregate.h"
-#include "arrow/compute/kernels/aggregate_internal.h"
+#include "arrow/compute/kernel.h"
 #include "arrow/compute/kernels/codegen_internal.h"
-#include "arrow/compute/kernels/common_internal.h"
-#include "arrow/compute/kernels/util_internal.h"
-#include "arrow/type.h"
-#include "arrow/type_traits.h"
-#include "arrow/util/align_util.h"
-#include "arrow/util/bit_block_counter.h"
-#include "arrow/util/decimal.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/type_fwd.h"
 
 namespace arrow::compute::internal {
 
+// aggregate_basic.cc
+
 void AddBasicAggKernels(KernelInit init,
                         const std::vector<std::shared_ptr<DataType>>& types,
                         std::shared_ptr<DataType> out_ty, ScalarAggregateFunction* func,
@@ -47,990 +42,16 @@ void AddMinMaxKernel(KernelInit init, internal::detail::GetTypeId get_id,
                      ScalarAggregateFunction* func,
                      SimdLevel::type simd_level = SimdLevel::NONE);
 
-// SIMD variants for kernels
+// aggregate_basic_avx2.cc
+
 void AddSumAvx2AggKernels(ScalarAggregateFunction* func);
 void AddMeanAvx2AggKernels(ScalarAggregateFunction* func);
 void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func);
 
+// aggregate_basic_avx512.cc
+
 void AddSumAvx512AggKernels(ScalarAggregateFunction* func);
 void AddMeanAvx512AggKernels(ScalarAggregateFunction* func);
 void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func);
 
-// ----------------------------------------------------------------------
-// Sum implementation
-
-template <typename ArrowType, SimdLevel::type SimdLevel,
-          typename ResultType = typename FindAccumulatorType<ArrowType>::Type>
-struct SumImpl : public ScalarAggregator {
-  using ThisType = SumImpl<ArrowType, SimdLevel, ResultType>;
-  using CType = typename TypeTraits<ArrowType>::CType;
-  using SumType = ResultType;
-  using SumCType = typename TypeTraits<SumType>::CType;
-  using OutputType = typename TypeTraits<SumType>::ScalarType;
-
-  SumImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options_)
-      : out_type(std::move(out_type)), options(std::move(options_)) {}
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      const ArraySpan& data = batch[0].array;
-      this->count += data.length - data.GetNullCount();
-      this->nulls_observed = this->nulls_observed || data.GetNullCount();
-
-      if (!options.skip_nulls && this->nulls_observed) {
-        // Short-circuit
-        return Status::OK();
-      }
-
-      if (is_boolean_type<ArrowType>::value) {
-        this->sum += GetTrueCount(data);
-      } else {
-        this->sum += SumArray<CType, SumCType, SimdLevel>(data);
-      }
-    } else {
-      const Scalar& data = *batch[0].scalar;
-      this->count += data.is_valid * batch.length;
-      this->nulls_observed = this->nulls_observed || !data.is_valid;
-      if (data.is_valid) {
-        this->sum += internal::UnboxScalar<ArrowType>::Unbox(data) * batch.length;
-      }
-    }
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->count += other.count;
-    this->sum += other.sum;
-    this->nulls_observed = this->nulls_observed || other.nulls_observed;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count)) {
-      out->value = std::make_shared<OutputType>(out_type);
-    } else {
-      out->value = std::make_shared<OutputType>(this->sum, out_type);
-    }
-    return Status::OK();
-  }
-
-  size_t count = 0;
-  bool nulls_observed = false;
-  SumCType sum = 0;
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-};
-
-template <typename ArrowType>
-struct NullImpl : public ScalarAggregator {
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  explicit NullImpl(const ScalarAggregateOptions& options_) : options(options_) {}
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_scalar() || batch[0].array.GetNullCount() > 0) {
-      // If the batch is a scalar or an array with elements, set is_empty to false
-      is_empty = false;
-    }
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const NullImpl&>(src);
-    this->is_empty &= other.is_empty;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    if ((options.skip_nulls || this->is_empty) && options.min_count == 0) {
-      // Return 0 if the remaining data is empty
-      out->value = output_empty();
-    } else {
-      out->value = MakeNullScalar(TypeTraits<ArrowType>::type_singleton());
-    }
-    return Status::OK();
-  }
-
-  virtual std::shared_ptr<Scalar> output_empty() = 0;
-
-  bool is_empty = true;
-  ScalarAggregateOptions options;
-};
-
-template <typename ArrowType>
-struct NullSumImpl : public NullImpl<ArrowType> {
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  explicit NullSumImpl(const ScalarAggregateOptions& options_)
-      : NullImpl<ArrowType>(options_) {}
-
-  std::shared_ptr<Scalar> output_empty() override {
-    return std::make_shared<ScalarType>(0);
-  }
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
-struct MeanImpl;
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MeanImpl<ArrowType, SimdLevel, enable_if_decimal<ArrowType>>
-    : public SumImpl<ArrowType, SimdLevel> {
-  using SumImpl<ArrowType, SimdLevel>::SumImpl;
-  using SumImpl<ArrowType, SimdLevel>::options;
-  using SumCType = typename SumImpl<ArrowType, SimdLevel>::SumCType;
-  using OutputType = typename SumImpl<ArrowType, SimdLevel>::OutputType;
-
-  template <typename T = ArrowType>
-  Status FinalizeImpl(Datum* out) {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count) || (this->count == 0)) {
-      out->value = std::make_shared<OutputType>(this->out_type);
-    } else {
-      SumCType quotient, remainder;
-      ARROW_ASSIGN_OR_RAISE(std::tie(quotient, remainder), this->sum.Divide(this->count));
-      // Round the decimal result based on the remainder
-      remainder.Abs();
-      if (remainder * 2 >= this->count) {
-        if (this->sum >= 0) {
-          quotient += 1;
-        } else {
-          quotient -= 1;
-        }
-      }
-      out->value = std::make_shared<OutputType>(quotient, this->out_type);
-    }
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MeanImpl<ArrowType, SimdLevel,
-                std::enable_if_t<!is_decimal_type<ArrowType>::value>>
-    // Override the ResultType of SumImpl because we need to use double for intermediate
-    // sum to prevent integer overflows
-    : public SumImpl<ArrowType, SimdLevel, DoubleType> {
-  using SumImpl<ArrowType, SimdLevel, DoubleType>::SumImpl;
-  using SumImpl<ArrowType, SimdLevel, DoubleType>::options;
-
-  template <typename T = ArrowType>
-  Status FinalizeImpl(Datum* out) {
-    if ((!options.skip_nulls && this->nulls_observed) ||
-        (this->count < options.min_count)) {
-      out->value = std::make_shared<DoubleScalar>();
-    } else {
-      static_assert(std::is_same_v<decltype(this->sum), double>,
-                    "SumCType must be double for numeric inputs");
-      const double mean = this->sum / this->count;
-      out->value = std::make_shared<DoubleScalar>(mean);
-    }
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override { return FinalizeImpl(out); }
-};
-
-template <template <typename> class KernelClass>
-struct SumLikeInit {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  std::shared_ptr<DataType> type;
-  const ScalarAggregateOptions& options;
-
-  SumLikeInit(KernelContext* ctx, std::shared_ptr<DataType> type,
-              const ScalarAggregateOptions& options)
-      : ctx(ctx), type(type), options(options) {}
-
-  Status Visit(const DataType&) { return Status::NotImplemented("No sum implemented"); }
-
-  Status Visit(const HalfFloatType&) {
-    return Status::NotImplemented("No sum implemented");
-  }
-
-  Status Visit(const BooleanType&) {
-    auto ty = TypeTraits<typename KernelClass<BooleanType>::SumType>::type_singleton();
-    state.reset(new KernelClass<BooleanType>(ty, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_number<Type, Status> Visit(const Type&) {
-    auto ty = TypeTraits<typename KernelClass<Type>::SumType>::type_singleton();
-    state.reset(new KernelClass<Type>(ty, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_decimal<Type, Status> Visit(const Type&) {
-    state.reset(new KernelClass<Type>(type, options));
-    return Status::OK();
-  }
-
-  virtual Status Visit(const NullType&) {
-    state.reset(new NullSumImpl<Int64Type>(options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(*type, this));
-    return std::move(state);
-  }
-};
-
-template <template <typename> class KernelClass>
-struct MeanKernelInit : public SumLikeInit<KernelClass> {
-  MeanKernelInit(KernelContext* ctx, std::shared_ptr<DataType> type,
-                 const ScalarAggregateOptions& options)
-      : SumLikeInit<KernelClass>(ctx, type, options) {}
-
-  Status Visit(const NullType&) override {
-    this->state.reset(new NullSumImpl<DoubleType>(this->options));
-    return Status::OK();
-  }
-};
-
-// ----------------------------------------------------------------------
-// FirstLast implementation
-template <typename ArrowType, typename Enable = void>
-struct FirstLastState {};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_boolean<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    this->last = value;
-  }
-
-  T first = false;
-  T last = false;
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_physical_integer<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    this->last = value;
-  }
-
-  T first = std::numeric_limits<T>::infinity();
-  T last = std::numeric_limits<T>::infinity();
-  bool has_values = false;
-
-  // These are updated in ConsumeScalar and ConsumeArray since null values don't
-  // invoke MergeOne
-  bool first_is_null = false;
-  bool last_is_null = false;
-  // has_any_values indicates whether there is any value (either null or non-null)
-  // (1) has_any_values = false: There is no value aggregated
-  // (2) has_any_values = true, has_values = false: There are only null values aggregated
-  // (3) has_any_values = true, has_values = true: There are both null and non-null values
-  // aggregated
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType, enable_if_floating_point<ArrowType>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    if (!has_values) {
-      this->first = value;
-      has_values = true;
-    }
-    last = value;
-  }
-
-  T first = std::numeric_limits<T>::infinity();
-  T last = std::numeric_limits<T>::infinity();
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastState<ArrowType,
-                      enable_if_t<is_base_binary_type<ArrowType>::value ||
-                                  std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
-  using ThisType = FirstLastState<ArrowType>;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->first = this->has_values ? this->first : rhs.first;
-    this->last = rhs.has_values ? rhs.last : this->last;
-    this->first_is_null = this->has_any_values ? this->first_is_null : rhs.first_is_null;
-    this->last_is_null = rhs.last_is_null;
-    this->has_values |= rhs.has_values;
-    this->has_any_values |= rhs.has_any_values;
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    if (!has_values) {
-      first = std::string(value);
-      has_values = true;
-    }
-    last = std::string(value);
-  }
-
-  std::string first = "";
-  std::string last = "";
-  bool has_values = false;
-  bool first_is_null = false;
-  bool last_is_null = false;
-  bool has_any_values = false;
-};
-
-template <typename ArrowType>
-struct FirstLastImpl : public ScalarAggregator {
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using ThisType = FirstLastImpl<ArrowType>;
-  using StateType = FirstLastState<ArrowType>;
-
-  FirstLastImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
-      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
-    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
-  }
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      return ConsumeArray(batch[0].array);
-    }
-    return ConsumeScalar(*batch[0].scalar);
-  }
-
-  Status ConsumeScalar(const Scalar& scalar) {
-    this->state.has_any_values = true;
-    if (scalar.is_valid) {
-      this->state.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
-    } else {
-      if (!this->state.has_values) {
-        this->state.first_is_null = true;
-      }
-    }
-    this->count += scalar.is_valid;
-    return Status::OK();
-  }
-
-  Status ConsumeArray(const ArraySpan& arr_span) {
-    this->state.has_any_values = true;
-    ArrayType arr(arr_span.ToArrayData());
-    const auto null_count = arr.null_count();
-    this->count += arr.length() - null_count;
-
-    if (null_count == 0) {
-      // If there are no null values, we can just merge
-      // the first and last element
-      this->state.MergeOne(arr.GetView(0));
-      this->state.MergeOne(arr.GetView(arr.length() - 1));
-    } else {
-      int64_t first_i = -1;
-      int64_t last_i = -1;
-
-      if (!this->state.has_values && arr.IsNull(0)) {
-        this->state.first_is_null = true;
-      }
-
-      if (arr.IsNull(arr.length() - 1)) {
-        this->state.last_is_null = true;
-      }
-
-      // Find the first and last non-null value and update state
-      for (int64_t i = 0; i < arr.length(); i++) {
-        if (!arr.IsNull(i)) {
-          first_i = i;
-          break;
-        }
-      }
-      if (first_i >= 0) {
-        for (int64_t i = arr.length() - 1; i >= 0; i--) {
-          if (!arr.IsNull(i)) {
-            last_i = i;
-            break;
-          }
-        }
-        DCHECK_GE(last_i, first_i);
-        this->state.MergeOne(arr.GetView(first_i));
-        this->state.MergeOne(arr.GetView(last_i));
-      }
-    }
-
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->state += other.state;
-    this->count += other.count;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    const auto& struct_type = checked_cast<const StructType&>(*out_type);
-    const auto& child_type = struct_type.field(0)->type();
-    auto null_scalar = MakeNullScalar(child_type);
-
-    std::vector<std::shared_ptr<Scalar>> values;
-
-    if (this->count < options.min_count) {
-      values = {null_scalar, null_scalar};
-    } else {
-      if (state.has_values) {
-        if (options.skip_nulls) {
-          ARROW_ASSIGN_OR_RAISE(auto first_scalar, MakeScalar(child_type, state.first));
-          ARROW_ASSIGN_OR_RAISE(auto last_scalar, MakeScalar(child_type, state.last));
-          values = {first_scalar, last_scalar};
-        } else {
-          ARROW_ASSIGN_OR_RAISE(
-              auto first_scalar,
-              state.first_is_null ? null_scalar : MakeScalar(child_type, state.first));
-          ARROW_ASSIGN_OR_RAISE(
-              auto last_scalar,
-              state.last_is_null ? null_scalar : MakeScalar(child_type, state.last));
-
-          values = {first_scalar, last_scalar};
-        }
-      } else {
-        // If there is no non-null values, we always output null regardless of
-        // skip_null
-        values = {null_scalar, null_scalar};
-      }
-    }
-
-    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
-    return Status::OK();
-  }
-
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-  int64_t count;
-  FirstLastState<ArrowType> state;
-};
-
-// ----------------------------------------------------------------------
-// MinMax implementation
-template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
-struct MinMaxState {};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_boolean<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = this->min && rhs.min;
-    this->max = this->max || rhs.max;
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = this->min && value;
-    this->max = this->max || value;
-  }
-
-  T min = true;
-  T max = false;
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_integer<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::min(this->min, rhs.min);
-    this->max = std::max(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = std::min(this->min, value);
-    this->max = std::max(this->max, value);
-  }
-
-  T min = std::numeric_limits<T>::max();
-  T max = std::numeric_limits<T>::min();
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_floating_point<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename ArrowType::c_type;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::fmin(this->min, rhs.min);
-    this->max = std::fmax(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(T value) {
-    this->min = std::fmin(this->min, value);
-    this->max = std::fmax(this->max, value);
-  }
-
-  T min = std::numeric_limits<T>::infinity();
-  T max = -std::numeric_limits<T>::infinity();
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel, enable_if_decimal<ArrowType>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using T = typename TypeTraits<ArrowType>::CType;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  MinMaxState() : min(T::GetMaxSentinel()), max(T::GetMinSentinel()) {}
-
-  ThisType& operator+=(const ThisType& rhs) {
-    this->has_nulls |= rhs.has_nulls;
-    this->min = std::min(this->min, rhs.min);
-    this->max = std::max(this->max, rhs.max);
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    MergeOne(T(reinterpret_cast<const uint8_t*>(value.data())));
-  }
-
-  void MergeOne(const T value) {
-    this->min = std::min(this->min, value);
-    this->max = std::max(this->max, value);
-  }
-
-  T min;
-  T max;
-  bool has_nulls = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxState<ArrowType, SimdLevel,
-                   enable_if_t<is_base_binary_type<ArrowType>::value ||
-                               std::is_same<ArrowType, FixedSizeBinaryType>::value>> {
-  using ThisType = MinMaxState<ArrowType, SimdLevel>;
-  using ScalarType = typename TypeTraits<ArrowType>::ScalarType;
-
-  ThisType& operator+=(const ThisType& rhs) {
-    if (!this->seen && rhs.seen) {
-      this->min = rhs.min;
-      this->max = rhs.max;
-    } else if (this->seen && rhs.seen) {
-      if (this->min > rhs.min) {
-        this->min = rhs.min;
-      }
-      if (this->max < rhs.max) {
-        this->max = rhs.max;
-      }
-    }
-    this->has_nulls |= rhs.has_nulls;
-    this->seen |= rhs.seen;
-    return *this;
-  }
-
-  void MergeOne(std::string_view value) {
-    if (!seen) {
-      this->min = std::string(value);
-      this->max = std::string(value);
-    } else {
-      if (value < std::string_view(this->min)) {
-        this->min = std::string(value);
-      } else if (value > std::string_view(this->max)) {
-        this->max = std::string(value);
-      }
-    }
-    this->seen = true;
-  }
-
-  std::string min;
-  std::string max;
-  bool has_nulls = false;
-  bool seen = false;
-};
-
-template <typename ArrowType, SimdLevel::type SimdLevel>
-struct MinMaxImpl : public ScalarAggregator {
-  using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
-  using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
-  using StateType = MinMaxState<ArrowType, SimdLevel>;
-
-  MinMaxImpl(std::shared_ptr<DataType> out_type, ScalarAggregateOptions options)
-      : out_type(std::move(out_type)), options(std::move(options)), count(0) {
-    this->options.min_count = std::max<uint32_t>(1, this->options.min_count);
-  }
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (batch[0].is_array()) {
-      return ConsumeArray(batch[0].array);
-    }
-    return ConsumeScalar(*batch[0].scalar);
-  }
-
-  Status ConsumeScalar(const Scalar& scalar) {
-    StateType local;
-    local.has_nulls = !scalar.is_valid;
-    this->count += scalar.is_valid;
-
-    if (!local.has_nulls || options.skip_nulls) {
-      local.MergeOne(internal::UnboxScalar<ArrowType>::Unbox(scalar));
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status ConsumeArray(const ArraySpan& arr_span) {
-    StateType local;
-
-    ArrayType arr(arr_span.ToArrayData());
-
-    const auto null_count = arr.null_count();
-    local.has_nulls = null_count > 0;
-    this->count += arr.length() - null_count;
-
-    if (!local.has_nulls) {
-      for (int64_t i = 0; i < arr.length(); i++) {
-        local.MergeOne(arr.GetView(i));
-      }
-    } else if (local.has_nulls && options.skip_nulls) {
-      local += ConsumeWithNulls(arr);
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override {
-    const auto& other = checked_cast<const ThisType&>(src);
-    this->state += other.state;
-    this->count += other.count;
-    return Status::OK();
-  }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    const auto& struct_type = checked_cast<const StructType&>(*out_type);
-    const auto& child_type = struct_type.field(0)->type();
-
-    std::vector<std::shared_ptr<Scalar>> values;
-    // Physical type != result type
-    if ((state.has_nulls && !options.skip_nulls) || (this->count < options.min_count)) {
-      // (null, null)
-      auto null_scalar = MakeNullScalar(child_type);
-      values = {null_scalar, null_scalar};
-    } else {
-      ARROW_ASSIGN_OR_RAISE(auto min_scalar,
-                            MakeScalar(child_type, std::move(state.min)));
-      ARROW_ASSIGN_OR_RAISE(auto max_scalar,
-                            MakeScalar(child_type, std::move(state.max)));
-      values = {std::move(min_scalar), std::move(max_scalar)};
-    }
-    out->value = std::make_shared<StructScalar>(std::move(values), this->out_type);
-    return Status::OK();
-  }
-
-  std::shared_ptr<DataType> out_type;
-  ScalarAggregateOptions options;
-  int64_t count;
-  MinMaxState<ArrowType, SimdLevel> state;
-
- private:
-  StateType ConsumeWithNulls(const ArrayType& arr) const {
-    StateType local;
-    const int64_t length = arr.length();
-    int64_t offset = arr.offset();
-    const uint8_t* bitmap = arr.null_bitmap_data();
-    int64_t idx = 0;
-
-    const auto p = arrow::internal::BitmapWordAlign<1>(bitmap, offset, length);
-    // First handle the leading bits
-    const int64_t leading_bits = p.leading_bits;
-    while (idx < leading_bits) {
-      if (bit_util::GetBit(bitmap, offset)) {
-        local.MergeOne(arr.GetView(idx));
-      }
-      idx++;
-      offset++;
-    }
-
-    // The aligned parts scanned with BitBlockCounter
-    arrow::internal::BitBlockCounter data_counter(bitmap, offset, length - leading_bits);
-    auto current_block = data_counter.NextWord();
-    while (idx < length) {
-      if (current_block.AllSet()) {  // All true values
-        int run_length = 0;
-        // Scan forward until a block that has some false values (or the end)
-        while (current_block.length > 0 && current_block.AllSet()) {
-          run_length += current_block.length;
-          current_block = data_counter.NextWord();
-        }
-        for (int64_t i = 0; i < run_length; i++) {
-          local.MergeOne(arr.GetView(idx + i));
-        }
-        idx += run_length;
-        offset += run_length;
-        // The current_block already computed, advance to next loop
-        continue;
-      } else if (!current_block.NoneSet()) {  // Some values are null
-        BitmapReader reader(arr.null_bitmap_data(), offset, current_block.length);
-        for (int64_t i = 0; i < current_block.length; i++) {
-          if (reader.IsSet()) {
-            local.MergeOne(arr.GetView(idx + i));
-          }
-          reader.Next();
-        }
-
-        idx += current_block.length;
-        offset += current_block.length;
-      } else {  // All null values
-        idx += current_block.length;
-        offset += current_block.length;
-      }
-      current_block = data_counter.NextWord();
-    }
-
-    return local;
-  }
-};
-
-template <SimdLevel::type SimdLevel>
-struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
-  using StateType = MinMaxState<BooleanType, SimdLevel>;
-  using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
-  using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
-  using MinMaxImpl<BooleanType, SimdLevel>::options;
-
-  Status Consume(KernelContext*, const ExecSpan& batch) override {
-    if (ARROW_PREDICT_FALSE(batch[0].is_scalar())) {
-      return ConsumeScalar(checked_cast<const BooleanScalar&>(*batch[0].scalar));
-    }
-    StateType local;
-    ArrayType arr(batch[0].array.ToArrayData());
-
-    const auto arr_length = arr.length();
-    const auto null_count = arr.null_count();
-    const auto valid_count = arr_length - null_count;
-
-    local.has_nulls = null_count > 0;
-    this->count += valid_count;
-    if (!local.has_nulls || options.skip_nulls) {
-      const auto true_count = arr.true_count();
-      const auto false_count = valid_count - true_count;
-      local.max = true_count > 0;
-      local.min = false_count == 0;
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-
-  Status ConsumeScalar(const BooleanScalar& scalar) {
-    StateType local;
-
-    local.has_nulls = !scalar.is_valid;
-    this->count += scalar.is_valid;
-    if (!local.has_nulls || options.skip_nulls) {
-      const int true_count = scalar.is_valid && scalar.value;
-      const int false_count = scalar.is_valid && !scalar.value;
-      local.max = true_count > 0;
-      local.min = false_count == 0;
-    }
-
-    this->state += local;
-    return Status::OK();
-  }
-};
-
-struct NullMinMaxImpl : public ScalarAggregator {
-  Status Consume(KernelContext*, const ExecSpan& batch) override { return Status::OK(); }
-
-  Status MergeFrom(KernelContext*, KernelState&& src) override { return Status::OK(); }
-
-  Status Finalize(KernelContext*, Datum* out) override {
-    std::vector<std::shared_ptr<Scalar>> values{std::make_shared<NullScalar>(),
-                                                std::make_shared<NullScalar>()};
-    out->value = std::make_shared<StructScalar>(
-        std::move(values), struct_({field("min", null()), field("max", null())}));
-    return Status::OK();
-  }
-};
-
-// First/Last
-
-struct FirstLastInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  std::shared_ptr<DataType> out_type;
-  const ScalarAggregateOptions& options;
-
-  FirstLastInitState(KernelContext* ctx, const DataType& in_type,
-                     const std::shared_ptr<DataType>& out_type,
-                     const ScalarAggregateOptions& options)
-      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
-
-  Status Visit(const DataType& ty) {
-    return Status::NotImplemented("No first/last implemented for ", ty);
-  }
-
-  Status Visit(const HalfFloatType& ty) {
-    return Status::NotImplemented("No first/last implemented for ", ty);
-  }
-
-  Status Visit(const BooleanType&) {
-    state.reset(new FirstLastImpl<BooleanType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_integer<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_floating_point<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new FirstLastImpl<PhysicalType>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_base_binary<Type, Status> Visit(const Type&) {
-    state.reset(new FirstLastImpl<Type>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_t<std::is_same<Type, FixedSizeBinaryType>::value, Status> Visit(const Type&) {
-    state.reset(new FirstLastImpl<Type>(out_type, options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(in_type, this));
-    return std::move(state);
-  }
-};
-
-template <SimdLevel::type SimdLevel>
-struct MinMaxInitState {
-  std::unique_ptr<KernelState> state;
-  KernelContext* ctx;
-  const DataType& in_type;
-  std::shared_ptr<DataType> out_type;
-  const ScalarAggregateOptions& options;
-
-  MinMaxInitState(KernelContext* ctx, const DataType& in_type,
-                  const std::shared_ptr<DataType>& out_type,
-                  const ScalarAggregateOptions& options)
-      : ctx(ctx), in_type(in_type), out_type(out_type), options(options) {}
-
-  Status Visit(const DataType& ty) {
-    return Status::NotImplemented("No min/max implemented for ", ty);
-  }
-
-  Status Visit(const HalfFloatType& ty) {
-    return Status::NotImplemented("No min/max implemented for ", ty);
-  }
-
-  Status Visit(const NullType&) {
-    state.reset(new NullMinMaxImpl());
-    return Status::OK();
-  }
-
-  Status Visit(const BooleanType&) {
-    state.reset(new BooleanMinMaxImpl<SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_physical_integer<Type, Status> Visit(const Type&) {
-    using PhysicalType = typename Type::PhysicalType;
-    state.reset(new MinMaxImpl<PhysicalType, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_floating_point<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_base_binary<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  template <typename Type>
-  enable_if_fixed_size_binary<Type, Status> Visit(const Type&) {
-    state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
-    return Status::OK();
-  }
-
-  Result<std::unique_ptr<KernelState>> Create() {
-    RETURN_NOT_OK(VisitTypeInline(in_type, this));
-    return std::move(state);
-  }
-};
-
 }  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 9e46a21887f8c..7f9be92f3a14b 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -133,7 +133,8 @@ struct GetViewType<Type, enable_if_has_c_type<Type>> {
 
 template <typename Type>
 struct GetViewType<Type, enable_if_t<is_base_binary_type<Type>::value ||
-                                     is_fixed_size_binary_type<Type>::value>> {
+                                     is_fixed_size_binary_type<Type>::value ||
+                                     is_binary_view_like_type<Type>::value>> {
   using T = std::string_view;
   using PhysicalType = T;
 
@@ -1265,6 +1266,22 @@ ArrayKernelExec GenerateVarBinary(detail::GetTypeId get_id) {
   }
 }
 
+// Generate a kernel given a templated functor for binary-view types. Generates a
+// single kernel for binary/string-view.
+//
+// See "Numeric" above for description of the generator functor
+template <template <typename...> class Generator, typename Type0, typename... Args>
+ArrayKernelExec GenerateVarBinaryViewBase(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::BINARY_VIEW:
+    case Type::STRING_VIEW:
+      return Generator<Type0, BinaryViewType, Args...>::Exec;
+    default:
+      DCHECK(false);
+      return nullptr;
+  }
+}
+
 // Generate a kernel given a templated functor for temporal types
 //
 // See "Numeric" above for description of the generator functor
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 54cd695421a93..1207355939a0c 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -33,9 +33,9 @@
 #include "arrow/compute/kernels/aggregate_internal.h"
 #include "arrow/compute/kernels/aggregate_var_std_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
-#include "arrow/compute/kernels/row_encoder_internal.h"
 #include "arrow/compute/kernels/util_internal.h"
 #include "arrow/compute/row/grouper.h"
+#include "arrow/compute/row/row_encoder_internal.h"
 #include "arrow/record_batch.h"
 #include "arrow/stl_allocator.h"
 #include "arrow/type_traits.h"
@@ -2533,11 +2533,11 @@ struct GroupedCountDistinctImpl : public GroupedAggregator {
 struct GroupedDistinctImpl : public GroupedCountDistinctImpl {
   Result<Datum> Finalize() override {
     ARROW_ASSIGN_OR_RAISE(auto uniques, grouper_->GetUniques());
-    ARROW_ASSIGN_OR_RAISE(auto groupings, grouper_->MakeGroupings(
-                                              *uniques[1].array_as<UInt32Array>(),
-                                              static_cast<uint32_t>(num_groups_), ctx_));
     ARROW_ASSIGN_OR_RAISE(
-        auto list, grouper_->ApplyGroupings(*groupings, *uniques[0].make_array(), ctx_));
+        auto groupings, Grouper::MakeGroupings(*uniques[1].array_as<UInt32Array>(),
+                                               static_cast<uint32_t>(num_groups_), ctx_));
+    ARROW_ASSIGN_OR_RAISE(
+        auto list, Grouper::ApplyGroupings(*groupings, *uniques[0].make_array(), ctx_));
     const auto& values = list->values();
     DCHECK_EQ(values->offset(), 0);
     auto* offsets = list->value_offsets()->mutable_data_as<int32_t>();
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
index 8935b0d5f2d0d..cb1a67bad90ca 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_boolean.cc
@@ -63,6 +63,12 @@ std::vector<std::shared_ptr<CastFunction>> GetBooleanCasts() {
                                                  BooleanType, ParseBooleanString>(*ty);
     DCHECK_OK(func->AddKernel(ty->id(), {ty}, boolean(), exec));
   }
+  for (const auto& ty : BinaryViewTypes()) {
+    ArrayKernelExec exec =
+        GenerateVarBinaryViewBase<applicator::ScalarUnaryNotNull, BooleanType,
+                                  ParseBooleanString>(*ty);
+    DCHECK_OK(func->AddKernel(ty->id(), {ty}, boolean(), exec));
+  }
   return {func};
 }
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
index d8c4088759643..5c43d87edcab9 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_internal.cc
@@ -188,8 +188,6 @@ void CastNumberToNumberUnsafe(Type::type in_type, Type::type out_type,
 // ----------------------------------------------------------------------
 
 Status UnpackDictionary(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
-  // TODO: is there an implementation more friendly to the "span" data structures?
-
   DictionaryArray dict_arr(batch[0].array.ToArrayData());
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
 
@@ -281,6 +279,8 @@ void AddZeroCopyCast(Type::type in_type_id, InputType in_type, OutputType out_ty
 }
 
 static bool CanCastFromDictionary(Type::type type_id) {
+  /// TODO(GH-43010): add is_binary_view_like() here once array_take
+  /// can handle string-views
   return (is_primitive(type_id) || is_base_binary_like(type_id) ||
           is_fixed_size_binary(type_id));
 }
@@ -297,9 +297,6 @@ void AddCommonCasts(Type::type out_type_id, OutputType out_ty, CastFunction* fun
   // From dictionary to this type
   if (CanCastFromDictionary(out_type_id)) {
     // Dictionary unpacking not implemented for boolean or nested types.
-    //
-    // XXX: Uses Take and does its own memory allocation for the moment. We can
-    // fix this later.
     DCHECK_OK(func->AddKernel(Type::DICTIONARY, {InputType(Type::DICTIONARY)}, out_ty,
                               UnpackDictionary, NullHandling::COMPUTED_NO_PREALLOCATE,
                               MemAllocation::NO_PREALLOCATE));
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
index 3df86e7d6936c..1fe26b316362d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
@@ -313,7 +313,9 @@ struct ParseString {
 
 template <typename O, typename I>
 struct CastFunctor<
-    O, I, enable_if_t<(is_number_type<O>::value && is_base_binary_type<I>::value)>> {
+    O, I,
+    enable_if_t<(is_number_type<O>::value && (is_base_binary_type<I>::value ||
+                                              is_binary_view_like_type<I>::value))>> {
   static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
     return applicator::ScalarUnaryNotNull<O, I, ParseString<O>>::Exec(ctx, batch, out);
   }
@@ -658,11 +660,15 @@ struct DecimalCastFunctor {
 };
 
 template <typename I>
-struct CastFunctor<Decimal128Type, I, enable_if_t<is_base_binary_type<I>::value>>
+struct CastFunctor<
+    Decimal128Type, I,
+    enable_if_t<is_base_binary_type<I>::value || is_binary_view_like_type<I>::value>>
     : public DecimalCastFunctor<Decimal128Type, I> {};
 
 template <typename I>
-struct CastFunctor<Decimal256Type, I, enable_if_t<is_base_binary_type<I>::value>>
+struct CastFunctor<
+    Decimal256Type, I,
+    enable_if_t<is_base_binary_type<I>::value || is_binary_view_like_type<I>::value>>
     : public DecimalCastFunctor<Decimal256Type, I> {};
 
 // ----------------------------------------------------------------------
@@ -708,6 +714,10 @@ void AddCommonNumberCasts(const std::shared_ptr<DataType>& out_ty, CastFunction*
     auto exec = GenerateVarBinaryBase<CastFunctor, OutType>(*in_ty);
     DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, exec));
   }
+  for (const std::shared_ptr<DataType>& in_ty : BinaryViewTypes()) {
+    auto exec = GenerateVarBinaryViewBase<CastFunctor, OutType>(*in_ty);
+    DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, out_ty, exec));
+  }
 }
 
 template <typename OutType>
@@ -793,6 +803,10 @@ std::shared_ptr<CastFunction> GetCastToDecimal128() {
     auto exec = GenerateVarBinaryBase<CastFunctor, Decimal128Type>(in_ty->id());
     DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
   }
+  for (const std::shared_ptr<DataType>& in_ty : BinaryViewTypes()) {
+    auto exec = GenerateVarBinaryViewBase<CastFunctor, Decimal128Type>(in_ty->id());
+    DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
+  }
 
   // Cast from other decimal
   auto exec = CastFunctor<Decimal128Type, Decimal128Type>::Exec;
@@ -828,6 +842,10 @@ std::shared_ptr<CastFunction> GetCastToDecimal256() {
     auto exec = GenerateVarBinaryBase<CastFunctor, Decimal256Type>(in_ty->id());
     DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
   }
+  for (const std::shared_ptr<DataType>& in_ty : BinaryViewTypes()) {
+    auto exec = GenerateVarBinaryViewBase<CastFunctor, Decimal256Type>(in_ty->id());
+    DCHECK_OK(func->AddKernel(in_ty->id(), {in_ty}, sig_out_ty, std::move(exec)));
+  }
 
   // Cast from other decimal
   auto exec = CastFunctor<Decimal256Type, Decimal128Type>::Exec;
@@ -865,6 +883,25 @@ std::shared_ptr<CastFunction> GetCastToHalfFloat() {
   return func;
 }
 
+struct NullExtensionTypeMatcher : public TypeMatcher {
+  ~NullExtensionTypeMatcher() override = default;
+
+  bool Matches(const DataType& type) const override {
+    return type.id() == Type::EXTENSION &&
+           checked_cast<const ExtensionType&>(type).storage_id() == Type::NA;
+  }
+
+  std::string ToString() const override { return "extension<storage_type: null>"; }
+
+  bool Equals(const TypeMatcher& other) const override {
+    if (this == &other) {
+      return true;
+    }
+    auto casted = dynamic_cast<const NullExtensionTypeMatcher*>(&other);
+    return casted != nullptr;
+  }
+};
+
 }  // namespace
 
 std::vector<std::shared_ptr<CastFunction>> GetNumericCasts() {
@@ -875,6 +912,10 @@ std::vector<std::shared_ptr<CastFunction>> GetNumericCasts() {
   auto cast_null = std::make_shared<CastFunction>("cast_null", Type::NA);
   DCHECK_OK(cast_null->AddKernel(Type::DICTIONARY, {InputType(Type::DICTIONARY)}, null(),
                                  OutputAllNull));
+  // Explicitly allow casting extension type with null backing array to null
+  DCHECK_OK(cast_null->AddKernel(
+      Type::EXTENSION, {InputType(std::make_shared<NullExtensionTypeMatcher>())}, null(),
+      OutputAllNull));
   functions.push_back(cast_null);
 
   functions.push_back(GetCastToInteger<Int8Type>("cast_int8"));
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index dc3fe29a3dfae..4edf00225d317 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -30,12 +30,14 @@
 #include "arrow/type_traits.h"
 #include "arrow/util/formatting.h"
 #include "arrow/util/int_util.h"
+#include "arrow/util/logging.h"
 #include "arrow/util/utf8_internal.h"
 #include "arrow/visit_data_inline.h"
 
 namespace arrow {
 
 using internal::StringFormatter;
+using internal::VisitSetBitRunsVoid;
 using util::InitializeUTF8;
 using util::ValidateUTF8Inline;
 
@@ -217,8 +219,8 @@ struct TemporalToStringCastFunctor<O, TimestampType> {
 
 #if defined(_MSC_VER)
 // Silence warning: """'visitor': unreferenced local variable"""
-#pragma warning(push)
-#pragma warning(disable : 4101)
+#  pragma warning(push)
+#  pragma warning(disable : 4101)
 #endif
 
 struct Utf8Validator {
@@ -286,17 +288,20 @@ Status CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
   }
 }
 
+// Offset String -> Offset String
 template <typename O, typename I>
-enable_if_t<is_base_binary_type<I>::value && !is_fixed_size_binary_type<O>::value, Status>
+enable_if_t<is_base_binary_type<I>::value && is_base_binary_type<O>::value, Status>
 BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArraySpan& input = batch[0].array;
 
-  if (!I::is_utf8 && O::is_utf8 && !options.allow_invalid_utf8) {
-    InitializeUTF8();
-    ArraySpanVisitor<I> visitor;
-    Utf8Validator validator;
-    RETURN_NOT_OK(visitor.Visit(input, &validator));
+  if constexpr (!I::is_utf8 && O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
   }
 
   // Start with a zero-copy cast, but change indices to expected size
@@ -305,19 +310,243 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
       ctx, input, out->array_data().get());
 }
 
+// String View -> Offset String
+template <typename O, typename I>
+enable_if_t<is_binary_view_like_type<I>::value && is_base_binary_type<O>::value, Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  using OutputBuilderType = typename TypeTraits<O>::BuilderType;
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  const ArraySpan& input = batch[0].array;
+
+  if constexpr (!I::is_utf8 && O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
+  }
+
+  const int64_t sum_of_binary_view_sizes = util::SumOfBinaryViewSizes(
+      input.GetValues<BinaryViewType::c_type>(1), input.length);
+
+  // TODO(GH-43573): A more efficient implementation that copies the validity
+  // bitmap all at once is possible, but would mean we don't delegate all the
+  // building logic to the ArrayBuilder implementation for the output type.
+  OutputBuilderType builder(options.to_type.GetSharedPtr(), ctx->memory_pool());
+  RETURN_NOT_OK(builder.Resize(input.length));
+  RETURN_NOT_OK(builder.ReserveData(sum_of_binary_view_sizes));
+  arrow::internal::ArraySpanInlineVisitor<I> visitor;
+  RETURN_NOT_OK(visitor.VisitStatus(
+      input,
+      [&](std::string_view v) {
+        // Append valid string view
+        return builder.Append(v);
+      },
+      [&]() {
+        // Append null
+        builder.UnsafeAppendNull();
+        return Status::OK();
+      }));
+
+  std::shared_ptr<ArrayData> output_array;
+  RETURN_NOT_OK(builder.FinishInternal(&output_array));
+  out->value = std::move(output_array);
+  return Status::OK();
+}
+
+// Offset String -> String View
+template <typename O, typename I>
+enable_if_t<is_base_binary_type<I>::value && is_binary_view_like_type<O>::value, Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  using offset_type = typename I::offset_type;
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  const ArraySpan& input = batch[0].array;
+
+  if constexpr (!I::is_utf8 && O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
+  }
+
+  // Start with a zero-copy cast, then reconfigure the view and data buffers
+  RETURN_NOT_OK(ZeroCopyCastExec(ctx, batch, out));
+  ArrayData* output = out->array_data().get();
+
+  const int64_t total_length = input.offset + input.length;
+  const auto* validity = input.GetValues<uint8_t>(0, 0);
+  const auto* input_offsets = input.GetValues<offset_type>(1);
+  const auto* input_data = input.GetValues<uint8_t>(2, 0);
+
+  // Turn buffers[1] into a buffer of empty BinaryViewType::c_type entries.
+  ARROW_ASSIGN_OR_RAISE(output->buffers[1],
+                        ctx->Allocate(total_length * BinaryViewType::kSize));
+  memset(output->buffers[1]->mutable_data(), 0, total_length * BinaryViewType::kSize);
+
+  // Check against offset overflow
+  if constexpr (sizeof(offset_type) > 4) {
+    if (total_length > 0) {
+      // Offsets are monotonically increasing, that is, offsets[j] <= offsets[j+1] for
+      // 0 <= j < length, even for null slots. So we only need to check the last offset.
+      const int64_t max_data_offset = input_offsets[input.length];
+      if (ARROW_PREDICT_FALSE(max_data_offset > std::numeric_limits<int32_t>::max())) {
+        // A more complicated loop could work by slicing the data buffer into
+        // more than one variadic buffer, but this is probably overkill for now
+        // before someone hits this problem in practice.
+        return Status::CapacityError("Failed casting from ", input.type->ToString(),
+                                     " to ", output->type->ToString(),
+                                     ": input array too large for efficient conversion.");
+      }
+    }
+  }
+
+  auto* out_views = output->GetMutableValues<BinaryViewType::c_type>(1);
+
+  // If all entries are inline, we can drop the extra data buffer for
+  // large strings in output->buffers[2].
+  bool all_entries_are_inline = true;
+  VisitSetBitRunsVoid(
+      validity, output->offset, output->length,
+      [&](int64_t start_offset, int64_t run_length) {
+        for (int64_t i = start_offset; i < start_offset + run_length; i++) {
+          const offset_type data_offset = input_offsets[i];
+          const offset_type data_length = input_offsets[i + 1] - data_offset;
+          auto& out_view = out_views[i];
+          if (data_length <= BinaryViewType::kInlineSize) {
+            out_view.inlined.size = static_cast<int32_t>(data_length);
+            memcpy(out_view.inlined.data.data(), input_data + data_offset, data_length);
+          } else {
+            out_view.ref.size = static_cast<int32_t>(data_length);
+            memcpy(out_view.ref.prefix.data(), input_data + data_offset,
+                   BinaryViewType::kPrefixSize);
+            // (buffer_index is 0'd by the memset of the buffer 1 above)
+            // out_view.ref.buffer_index = 0;
+            out_view.ref.offset = static_cast<int32_t>(data_offset);
+            all_entries_are_inline = false;
+          }
+        }
+      });
+  if (all_entries_are_inline) {
+    output->buffers[2] = nullptr;
+  }
+  return Status::OK();
+}
+
+// String View -> String View
+template <typename O, typename I>
+enable_if_t<is_binary_view_like_type<I>::value && is_binary_view_like_type<O>::value,
+            Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  const ArraySpan& input = batch[0].array;
+
+  if constexpr (!I::is_utf8 && O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
+  }
+
+  return ZeroCopyCastExec(ctx, batch, out);
+}
+
+// Fixed -> String View
 template <typename O, typename I>
 enable_if_t<std::is_same<I, FixedSizeBinaryType>::value &&
-                !std::is_same<O, FixedSizeBinaryType>::value,
+                is_binary_view_like_type<O>::value,
             Status>
 BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArraySpan& input = batch[0].array;
 
-  if (O::is_utf8 && !options.allow_invalid_utf8) {
-    InitializeUTF8();
-    ArraySpanVisitor<I> visitor;
-    Utf8Validator validator;
-    RETURN_NOT_OK(visitor.Visit(input, &validator));
+  if constexpr (!I::is_utf8 && O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
+  }
+
+  const int32_t fixed_size_width = input.type->byte_width();
+  const int64_t total_length = input.offset + input.length;
+
+  ArrayData* output = out->array_data().get();
+  DCHECK_EQ(output->length, input.length);
+  output->offset = input.offset;
+  output->buffers.resize(3);
+  output->SetNullCount(input.null_count);
+  // Share the validity bitmap buffer
+  output->buffers[0] = input.GetBuffer(0);
+  // Init buffers[1] with input.length empty BinaryViewType::c_type entries.
+  ARROW_ASSIGN_OR_RAISE(output->buffers[1],
+                        ctx->Allocate(total_length * BinaryViewType::kSize));
+  memset(output->buffers[1]->mutable_data(), 0, total_length * BinaryViewType::kSize);
+  auto* out_views = output->GetMutableValues<BinaryViewType::c_type>(1);
+
+  auto data_buffer = input.GetBuffer(1);
+  const auto* data = data_buffer->data();
+
+  // Check against offset overflow
+  if (total_length > 0) {
+    const int64_t max_data_offset = (total_length - 1) * fixed_size_width;
+    if (ARROW_PREDICT_FALSE(max_data_offset > std::numeric_limits<int32_t>::max())) {
+      // A more complicated loop could work by slicing the data buffer into
+      // more than one variadic buffer, but this is probably overkill for now
+      // before someone hits this problem in practice.
+      return Status::CapacityError("Failed casting from ", input.type->ToString(), " to ",
+                                   output->type->ToString(),
+                                   ": input array too large for efficient conversion.");
+    }
+  }
+
+  // Inline string and non-inline string loops
+  if (fixed_size_width <= BinaryViewType::kInlineSize) {
+    int32_t data_offset = static_cast<int32_t>(input.offset) * fixed_size_width;
+    for (int64_t i = 0; i < input.length; i++) {
+      auto& out_view = out_views[i];
+      out_view.inlined.size = fixed_size_width;
+      memcpy(out_view.inlined.data.data(), data + data_offset, fixed_size_width);
+      data_offset += fixed_size_width;
+    }
+  } else {
+    // We share the fixed-size string array data buffer as variadic data
+    // buffer 0 (index=2+0) and set every buffer_index to 0.
+    output->buffers[2] = std::move(data_buffer);
+    int32_t data_offset = static_cast<int32_t>(input.offset) * fixed_size_width;
+    for (int64_t i = 0; i < input.length; i++) {
+      auto& out_view = out_views[i];
+      out_view.ref.size = fixed_size_width;
+      memcpy(out_view.ref.prefix.data(), data + data_offset, BinaryViewType::kPrefixSize);
+      // (buffer_index is 0'd by the memset of the buffer 1 above)
+      // out_view.ref.buffer_index = 0;
+      out_view.ref.offset = static_cast<int32_t>(data_offset);
+      data_offset += fixed_size_width;
+    }
+  }
+  return Status::OK();
+}
+
+// Fixed -> Offset String
+template <typename O, typename I>
+enable_if_t<std::is_same<I, FixedSizeBinaryType>::value && is_base_binary_type<O>::value,
+            Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  const ArraySpan& input = batch[0].array;
+
+  if constexpr (O::is_utf8) {
+    if (!options.allow_invalid_utf8) {
+      InitializeUTF8();
+      ArraySpanVisitor<I> visitor;
+      Utf8Validator validator;
+      RETURN_NOT_OK(visitor.Visit(input, &validator));
+    }
   }
 
   // Check for overflow
@@ -352,7 +581,7 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
   }
 
   // This buffer is preallocated
-  output_offset_type* offsets = output->GetMutableValues<output_offset_type>(1);
+  auto* offsets = output->GetMutableValues<output_offset_type>(1);
   offsets[0] = static_cast<output_offset_type>(input.offset * width);
   for (int64_t i = 0; i < input.length; i++) {
     offsets[i + 1] = offsets[i] + width;
@@ -378,6 +607,7 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
   return Status::OK();
 }
 
+// Fixed -> Fixed
 template <typename O, typename I>
 enable_if_t<std::is_same<I, FixedSizeBinaryType>::value &&
                 std::is_same<O, FixedSizeBinaryType>::value,
@@ -394,8 +624,10 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
   return ZeroCopyCastExec(ctx, batch, out);
 }
 
+// Offset String | String View -> Fixed
 template <typename O, typename I>
-enable_if_t<is_base_binary_type<I>::value && std::is_same<O, FixedSizeBinaryType>::value,
+enable_if_t<(is_base_binary_type<I>::value || is_binary_view_like_type<I>::value) &&
+                std::is_same<O, FixedSizeBinaryType>::value,
             Status>
 BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
@@ -422,7 +654,7 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
 }
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 // ----------------------------------------------------------------------
@@ -484,7 +716,9 @@ void AddBinaryToBinaryCast(CastFunction* func) {
 template <typename OutType>
 void AddBinaryToBinaryCast(CastFunction* func) {
   AddBinaryToBinaryCast<OutType, StringType>(func);
+  AddBinaryToBinaryCast<OutType, StringViewType>(func);
   AddBinaryToBinaryCast<OutType, BinaryType>(func);
+  AddBinaryToBinaryCast<OutType, BinaryViewType>(func);
   AddBinaryToBinaryCast<OutType, LargeStringType>(func);
   AddBinaryToBinaryCast<OutType, LargeBinaryType>(func);
   AddBinaryToBinaryCast<OutType, FixedSizeBinaryType>(func);
@@ -504,7 +738,9 @@ void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
 
 void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
   AddBinaryToFixedSizeBinaryCast<StringType>(func);
+  AddBinaryToFixedSizeBinaryCast<StringViewType>(func);
   AddBinaryToFixedSizeBinaryCast<BinaryType>(func);
+  AddBinaryToFixedSizeBinaryCast<BinaryViewType>(func);
   AddBinaryToFixedSizeBinaryCast<LargeStringType>(func);
   AddBinaryToFixedSizeBinaryCast<LargeBinaryType>(func);
   AddBinaryToFixedSizeBinaryCast<FixedSizeBinaryType>(func);
@@ -513,15 +749,24 @@ void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
 }  // namespace
 
 std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
+  // cast_binary / cast_binary_view / cast_large_binary
+
   auto cast_binary = std::make_shared<CastFunction>("cast_binary", Type::BINARY);
   AddCommonCasts(Type::BINARY, binary(), cast_binary.get());
   AddBinaryToBinaryCast<BinaryType>(cast_binary.get());
 
+  auto cast_binary_view =
+      std::make_shared<CastFunction>("cast_binary_view", Type::BINARY_VIEW);
+  AddCommonCasts(Type::BINARY_VIEW, binary_view(), cast_binary_view.get());
+  AddBinaryToBinaryCast<BinaryViewType>(cast_binary_view.get());
+
   auto cast_large_binary =
       std::make_shared<CastFunction>("cast_large_binary", Type::LARGE_BINARY);
   AddCommonCasts(Type::LARGE_BINARY, large_binary(), cast_large_binary.get());
   AddBinaryToBinaryCast<LargeBinaryType>(cast_large_binary.get());
 
+  // cast_string / cast_string_view / cast_large_string
+
   auto cast_string = std::make_shared<CastFunction>("cast_string", Type::STRING);
   AddCommonCasts(Type::STRING, utf8(), cast_string.get());
   AddNumberToStringCasts<StringType>(cast_string.get());
@@ -529,6 +774,14 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
   AddTemporalToStringCasts<StringType>(cast_string.get());
   AddBinaryToBinaryCast<StringType>(cast_string.get());
 
+  auto cast_string_view =
+      std::make_shared<CastFunction>("cast_string_view", Type::STRING_VIEW);
+  AddCommonCasts(Type::STRING_VIEW, utf8_view(), cast_string_view.get());
+  AddNumberToStringCasts<StringViewType>(cast_string_view.get());
+  AddDecimalToStringCasts<StringViewType>(cast_string_view.get());
+  AddTemporalToStringCasts<StringViewType>(cast_string_view.get());
+  AddBinaryToBinaryCast<StringViewType>(cast_string_view.get());
+
   auto cast_large_string =
       std::make_shared<CastFunction>("cast_large_string", Type::LARGE_STRING);
   AddCommonCasts(Type::LARGE_STRING, large_utf8(), cast_large_string.get());
@@ -537,13 +790,19 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
   AddTemporalToStringCasts<LargeStringType>(cast_large_string.get());
   AddBinaryToBinaryCast<LargeStringType>(cast_large_string.get());
 
+  // cast_fixed_size_binary
+
   auto cast_fsb =
       std::make_shared<CastFunction>("cast_fixed_size_binary", Type::FIXED_SIZE_BINARY);
   AddCommonCasts(Type::FIXED_SIZE_BINARY, OutputType(ResolveOutputFromOptions),
                  cast_fsb.get());
   AddBinaryToFixedSizeBinaryCast(cast_fsb.get());
 
-  return {cast_binary, cast_large_binary, cast_string, cast_large_string, cast_fsb};
+  return {
+      std::move(cast_binary), std::move(cast_binary_view), std::move(cast_large_binary),
+      std::move(cast_string), std::move(cast_string_view), std::move(cast_large_string),
+      std::move(cast_fsb),
+  };
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index 140789e59665b..6315044a1ba05 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -97,7 +97,10 @@ static std::vector<std::shared_ptr<DataType>> kDictionaryIndexTypes = kIntegerTy
 static std::vector<std::shared_ptr<DataType>> kBaseBinaryTypes = {
     binary(), utf8(), large_binary(), large_utf8()};
 
-static void AssertBufferSame(const Array& left, const Array& right, int buffer_index) {
+static std::vector<std::shared_ptr<DataType>> kBaseBinaryAndViewTypes = {
+    binary(), utf8(), large_binary(), large_utf8(), utf8_view(), binary_view()};
+
+static void AssertBufferSame(const Array& left, const Array& right, size_t buffer_index) {
   ASSERT_EQ(left.data()->buffers[buffer_index].get(),
             right.data()->buffers[buffer_index].get());
 }
@@ -174,14 +177,14 @@ TEST(Cast, CanCast) {
 
   ExpectCanCast(null(), {boolean()});
   ExpectCanCast(null(), kNumericTypes);
-  ExpectCanCast(null(), kBaseBinaryTypes);
+  ExpectCanCast(null(), kBaseBinaryAndViewTypes);
   ExpectCanCast(
       null(), {date32(), date64(), time32(TimeUnit::MILLI), timestamp(TimeUnit::SECOND)});
   ExpectCanCast(dictionary(uint16(), null()), {null()});
 
   ExpectCanCast(boolean(), {boolean()});
   ExpectCanCast(boolean(), kNumericTypes);
-  ExpectCanCast(boolean(), {utf8(), large_utf8()});
+  ExpectCanCast(boolean(), {utf8(), utf8_view(), large_utf8()});
   ExpectCanCast(dictionary(int32(), boolean()), {boolean()});
 
   ExpectCannotCast(boolean(), {null()});
@@ -198,11 +201,15 @@ TEST(Cast, CanCast) {
     ExpectCannotCast(from_numeric, {null()});
   }
 
-  for (auto from_base_binary : kBaseBinaryTypes) {
+  for (auto from_base_binary : kBaseBinaryAndViewTypes) {
     ExpectCanCast(from_base_binary, {boolean()});
     ExpectCanCast(from_base_binary, kNumericTypes);
     ExpectCanCast(from_base_binary, kBaseBinaryTypes);
-    ExpectCanCast(dictionary(int64(), from_base_binary), {from_base_binary});
+    // TODO(GH-43010): include is_binary_view_like() types here once array_take
+    // can handle string-views
+    if (!is_binary_view_like(*from_base_binary)) {
+      ExpectCanCast(dictionary(int64(), from_base_binary), {from_base_binary});
+    }
 
     // any cast which is valid for the dictionary is valid for the DictionaryArray
     ExpectCanCast(dictionary(uint32(), from_base_binary), kBaseBinaryTypes);
@@ -216,8 +223,9 @@ TEST(Cast, CanCast) {
   ExpectCannotCast(timestamp(TimeUnit::MICRO),
                    {binary(), large_binary()});  // no formatting supported
 
-  ExpectCanCast(fixed_size_binary(3),
-                {binary(), utf8(), large_binary(), large_utf8(), fixed_size_binary(3)});
+  ExpectCanCast(fixed_size_binary(3), kBaseBinaryAndViewTypes);
+  // Identity cast
+  ExpectCanCast(fixed_size_binary(3), {fixed_size_binary(3)});
   // Doesn't fail since a kernel exists (but it will return an error when executed)
   // ExpectCannotCast(fixed_size_binary(3), {fixed_size_binary(5)});
 
@@ -1039,7 +1047,7 @@ TEST(Cast, DecimalToFloating) {
 }
 
 TEST(Cast, DecimalToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     for (auto decimal_type : {decimal128(5, 2), decimal256(5, 2)}) {
       CheckCast(ArrayFromJSON(decimal_type, R"(["0.00", null, "123.45", "999.99"])"),
                 ArrayFromJSON(string_type, R"(["0.00", null, "123.45", "999.99"])"));
@@ -1558,7 +1566,7 @@ TEST(Cast, TimeZeroCopy) {
 }
 
 TEST(Cast, DateToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(ArrayFromJSON(date32(), "[0, null]"),
               ArrayFromJSON(string_type, R"(["1970-01-01", null])"));
     CheckCast(ArrayFromJSON(date64(), "[86400000, null]"),
@@ -1567,7 +1575,7 @@ TEST(Cast, DateToString) {
 }
 
 TEST(Cast, TimeToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(ArrayFromJSON(time32(TimeUnit::SECOND), "[1, 62]"),
               ArrayFromJSON(string_type, R"(["00:00:01", "00:01:02"])"));
     CheckCast(
@@ -1577,7 +1585,7 @@ TEST(Cast, TimeToString) {
 }
 
 TEST(Cast, TimestampToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(
         ArrayFromJSON(timestamp(TimeUnit::SECOND), "[-30610224000, -5364662400]"),
         ArrayFromJSON(string_type, R"(["1000-01-01 00:00:00", "1800-01-01 00:00:00"])"));
@@ -1603,7 +1611,7 @@ TEST(Cast, TimestampToString) {
 }
 
 TEST_F(CastTimezone, TimestampWithZoneToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(
         ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), "[-30610224000, -5364662400]"),
         ArrayFromJSON(string_type,
@@ -1793,7 +1801,7 @@ TEST(Cast, DurationToDurationMultiplyOverflow) {
 }
 
 TEST(Cast, DurationToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     for (auto unit : TimeUnit::values()) {
       CheckCast(ArrayFromJSON(duration(unit), "[0, null, 1234567, 2000]"),
                 ArrayFromJSON(string_type, R"(["0", null, "1234567", "2000"])"));
@@ -2047,31 +2055,41 @@ TEST(Cast, StringToDate) {
 }
 
 static void AssertBinaryZeroCopy(std::shared_ptr<Array> lhs, std::shared_ptr<Array> rhs) {
+  EXPECT_TRUE(is_base_binary_like(lhs->type_id()) || is_binary_view_like(lhs->type_id()));
+  EXPECT_EQ(is_base_binary_like(lhs->type_id()), is_base_binary_like(rhs->type_id()));
   // null bitmap and data buffers are always zero-copied
   AssertBufferSame(*lhs, *rhs, 0);
-  AssertBufferSame(*lhs, *rhs, 2);
-
-  if (offset_bit_width(lhs->type_id()) == offset_bit_width(rhs->type_id())) {
-    // offset buffer is zero copied if possible
-    AssertBufferSame(*lhs, *rhs, 1);
-    return;
+  if (is_base_binary_like(lhs->type_id())) {
+    AssertBufferSame(*lhs, *rhs, 2);
+  } else {
+    for (size_t i = 2; i < lhs->data()->buffers.size(); ++i) {
+      AssertBufferSame(*lhs, *rhs, i);
+    }
   }
 
-  // offset buffers are equivalent
-  ArrayVector offsets;
-  for (auto array : {lhs, rhs}) {
-    auto length = array->length();
-    auto buffer = array->data()->buffers[1];
-    offsets.push_back(offset_bit_width(array->type_id()) == 32
-                          ? *Cast(Int32Array(length, buffer), int64())
-                          : std::make_shared<Int64Array>(length, buffer));
+  if (is_base_binary_like(lhs->type_id())) {
+    if (offset_bit_width(lhs->type_id()) == offset_bit_width(rhs->type_id())) {
+      // offset buffer is zero copied if possible
+      AssertBufferSame(*lhs, *rhs, 1);
+      return;
+    }
+
+    // offset buffers are equivalent
+    ArrayVector offsets;
+    for (auto array : {lhs, rhs}) {
+      auto length = array->length();
+      auto buffer = array->data()->buffers[1];
+      offsets.push_back(offset_bit_width(array->type_id()) == 32
+                            ? *Cast(Int32Array(length, buffer), int64())
+                            : std::make_shared<Int64Array>(length, buffer));
+    }
+    AssertArraysEqual(*offsets[0], *offsets[1]);
   }
-  AssertArraysEqual(*offsets[0], *offsets[1]);
 }
 
 TEST(Cast, BinaryToString) {
-  for (auto bin_type : {binary(), large_binary()}) {
-    for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto bin_type : {binary(), binary_view(), large_binary()}) {
+    for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
       // empty -> empty always works
       CheckCast(ArrayFromJSON(bin_type, "[]"), ArrayFromJSON(string_type, "[]"));
 
@@ -2089,13 +2107,15 @@ TEST(Cast, BinaryToString) {
       options.allow_invalid_utf8 = true;
       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, string_type, options));
       ASSERT_RAISES(Invalid, strings->ValidateFull());
-      AssertBinaryZeroCopy(invalid_utf8, strings);
+      if (is_binary_view_like(*bin_type) == is_binary_view_like(*string_type)) {
+        AssertBinaryZeroCopy(invalid_utf8, strings);
+      }
     }
   }
 
   auto from_type = fixed_size_binary(3);
   auto invalid_utf8 = FixedSizeInvalidUtf8(from_type);
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(string_type, "[]"));
 
     // invalid utf-8 masked by a null bit is not an error
@@ -2116,13 +2136,16 @@ TEST(Cast, BinaryToString) {
 
     // ARROW-16757: we no longer zero copy, but the contents are equal
     ASSERT_NE(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
-    ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
+    if (!is_binary_view_like(*string_type)) {
+      ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
+    }
   }
 }
 
 TEST(Cast, BinaryOrStringToBinary) {
-  for (auto from_type : {utf8(), large_utf8(), binary(), large_binary()}) {
-    for (auto to_type : {binary(), large_binary()}) {
+  for (auto from_type :
+       {utf8(), utf8_view(), large_utf8(), binary(), binary_view(), large_binary()}) {
+    for (auto to_type : {binary(), binary_view(), large_binary()}) {
       // empty -> empty always works
       CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
 
@@ -2131,7 +2154,9 @@ TEST(Cast, BinaryOrStringToBinary) {
       // invalid utf-8 is not an error for binary
       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type));
       ValidateOutput(*strings);
-      AssertBinaryZeroCopy(invalid_utf8, strings);
+      if (is_binary_view_like(*from_type) == is_binary_view_like(*to_type)) {
+        AssertBinaryZeroCopy(invalid_utf8, strings);
+      }
 
       // invalid utf-8 masked by a null bit is not an error
       CheckCast(MaskArrayWithNullsAt(InvalidUtf8(from_type), {4}),
@@ -2143,7 +2168,7 @@ TEST(Cast, BinaryOrStringToBinary) {
   auto invalid_utf8 = FixedSizeInvalidUtf8(from_type);
   CheckCast(invalid_utf8, invalid_utf8);
   CheckCastFails(invalid_utf8, CastOptions::Safe(fixed_size_binary(5)));
-  for (auto to_type : {binary(), large_binary()}) {
+  for (auto to_type : {binary(), binary_view(), large_binary()}) {
     CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
     ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type));
     ValidateOutput(*strings);
@@ -2153,7 +2178,9 @@ TEST(Cast, BinaryOrStringToBinary) {
 
     // ARROW-16757: we no longer zero copy, but the contents are equal
     ASSERT_NE(invalid_utf8->data()->buffers[1].get(), strings->data()->buffers[2].get());
-    ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
+    if (!is_binary_view_like(*to_type)) {
+      ASSERT_TRUE(invalid_utf8->data()->buffers[1]->Equals(*strings->data()->buffers[2]));
+    }
 
     // invalid utf-8 masked by a null bit is not an error
     CheckCast(MaskArrayWithNullsAt(invalid_utf8, {4}),
@@ -2162,8 +2189,8 @@ TEST(Cast, BinaryOrStringToBinary) {
 }
 
 TEST(Cast, StringToString) {
-  for (auto from_type : {utf8(), large_utf8()}) {
-    for (auto to_type : {utf8(), large_utf8()}) {
+  for (auto from_type : {utf8(), utf8_view(), large_utf8()}) {
+    for (auto to_type : {utf8(), utf8_view(), large_utf8()}) {
       // empty -> empty always works
       CheckCast(ArrayFromJSON(from_type, "[]"), ArrayFromJSON(to_type, "[]"));
 
@@ -2179,13 +2206,27 @@ TEST(Cast, StringToString) {
       // utf-8 is not checked by Cast when the origin guarantees utf-8
       ASSERT_OK_AND_ASSIGN(auto strings, Cast(*invalid_utf8, to_type, options));
       ASSERT_RAISES(Invalid, strings->ValidateFull());
-      AssertBinaryZeroCopy(invalid_utf8, strings);
+      if (is_binary_view_like(*from_type) == is_binary_view_like(*to_type)) {
+        AssertBinaryZeroCopy(invalid_utf8, strings);
+      }
+
+      auto short_input = R"(["foo", null, "bar", "baz", "quu"])";
+      auto long_input = R"(["foofoofoofoofoo", null, "barbarbarbarbarbarbar",
+          "bazbazbazbazbazbazbaz", "quuquuquuquuquuquuquuquuquu"])";
+      auto combine_input = R"(["foo", null, "barbarbarbarbarbarbar", "baz", "quu"])";
+
+      CheckCast(ArrayFromJSON(from_type, short_input),
+                ArrayFromJSON(to_type, short_input));
+      CheckCast(ArrayFromJSON(from_type, long_input), ArrayFromJSON(to_type, long_input));
+      CheckCast(ArrayFromJSON(from_type, combine_input),
+                ArrayFromJSON(to_type, combine_input));
     }
   }
 }
 
 TEST(Cast, BinaryOrStringToFixedSizeBinary) {
-  for (auto in_type : {utf8(), large_utf8(), binary(), large_binary()}) {
+  for (auto in_type :
+       {utf8(), large_utf8(), utf8_view(), binary(), binary_view(), large_binary()}) {
     auto valid_input = ArrayFromJSON(in_type, R"(["foo", null, "bar", "baz", "quu"])");
     auto invalid_input = ArrayFromJSON(in_type, R"(["foo", null, "bar", "baz", "quux"])");
 
@@ -2201,7 +2242,8 @@ TEST(Cast, BinaryOrStringToFixedSizeBinary) {
 }
 
 TEST(Cast, FixedSizeBinaryToBinaryOrString) {
-  for (auto out_type : {utf8(), large_utf8(), binary(), large_binary()}) {
+  for (auto out_type :
+       {utf8(), utf8_view(), large_utf8(), binary(), binary_view(), large_binary()}) {
     auto valid_input = ArrayFromJSON(fixed_size_binary(3), R"(["foo", null, "bar",
           "baz", "quu"])");
 
@@ -2214,7 +2256,8 @@ TEST(Cast, FixedSizeBinaryToBinaryOrString) {
 }
 
 TEST(Cast, FixedSizeBinaryToBinaryOrStringWithSlice) {
-  for (auto out_type : {utf8(), large_utf8(), binary(), large_binary()}) {
+  for (auto out_type :
+       {utf8(), utf8_view(), large_utf8(), binary(), binary_view(), large_binary()}) {
     auto valid_input = ArrayFromJSON(fixed_size_binary(3), R"(["foo", null, "bar",
                 "baz", "quu"])");
     auto sliced = valid_input->Slice(1, 3);
@@ -2228,7 +2271,7 @@ TEST(Cast, FixedSizeBinaryToBinaryOrStringWithSlice) {
 }
 
 TEST(Cast, IntToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(ArrayFromJSON(int8(), "[0, 1, 127, -128, null]"),
               ArrayFromJSON(string_type, R"(["0", "1", "127", "-128", null])"));
 
@@ -2261,7 +2304,7 @@ TEST(Cast, IntToString) {
 
 TEST(Cast, FloatingToString) {
   for (auto float_type : {float16(), float32(), float64()}) {
-    for (auto string_type : {utf8(), large_utf8()}) {
+    for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
       CheckCast(ArrayFromJSON(float_type, "[0.0, -0.0, 1.5, -Inf, Inf, NaN, null]"),
                 ArrayFromJSON(string_type,
                               R"(["0", "-0", "1.5", "-inf", "inf", "nan", null])"));
@@ -2270,7 +2313,7 @@ TEST(Cast, FloatingToString) {
 }
 
 TEST(Cast, BooleanToString) {
-  for (auto string_type : {utf8(), large_utf8()}) {
+  for (auto string_type : {utf8(), utf8_view(), large_utf8()}) {
     CheckCast(ArrayFromJSON(boolean(), "[true, true, false, null]"),
               ArrayFromJSON(string_type, R"(["true", "true", "false", null])"));
   }
@@ -2913,9 +2956,12 @@ TEST(Cast, IdentityCasts) {
   for (auto type : kNumericTypes) {
     CheckIdentityCast(type, "[1, 2, null, 4]");
   }
-  CheckIdentityCast(binary(), R"(["foo", "bar"])");
-  CheckIdentityCast(utf8(), R"(["foo", "bar"])");
-  CheckIdentityCast(fixed_size_binary(3), R"(["foo", "bar"])");
+  const std::string json = R"(["foo", "bar"])";
+  CheckIdentityCast(utf8(), json);
+  CheckIdentityCast(binary(), json);
+  CheckIdentityCast(utf8_view(), json);
+  CheckIdentityCast(binary_view(), json);
+  CheckIdentityCast(fixed_size_binary(3), json);
 
   CheckIdentityCast(list(int8()), "[[1, 2], [null], [], [3]]");
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
index 3c5bb76dc24e9..7f1b5ef710379 100644
--- a/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_round_benchmark.cc
@@ -122,26 +122,28 @@ void SetRoundArgs(benchmark::internal::Benchmark* bench) {
   BENCHMARK_TEMPLATE(BENCHMARK, OP, DoubleType)->Apply(SetRoundArgs);
 
 #ifdef ALL_ROUND_BENCHMARKS
-#define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)                     \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs);         \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::UP)->Apply(SetRoundArgs);           \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_ZERO)->Apply(SetRoundArgs); \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_INFINITY)                   \
-      ->Apply(SetRoundArgs);                                                             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_DOWN)->Apply(SetRoundArgs);    \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_UP)->Apply(SetRoundArgs);      \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)                  \
-      ->Apply(SetRoundArgs);                                                             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_INFINITY)              \
-      ->Apply(SetRoundArgs);                                                             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_EVEN)->Apply(SetRoundArgs); \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
+#  define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)                  \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs);      \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::UP)->Apply(SetRoundArgs);        \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_ZERO)                    \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::TOWARDS_INFINITY)                \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_DOWN)->Apply(SetRoundArgs); \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_UP)->Apply(SetRoundArgs);   \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)               \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_INFINITY)           \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_EVEN)                    \
+        ->Apply(SetRoundArgs);                                                          \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
 #else
-#define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)             \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs); \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)          \
-      ->Apply(SetRoundArgs);                                                     \
-  BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
+#  define DECLARE_ROUND_BENCHMARKS_WITH_ROUNDMODE(BENCHMARK, OP, TYPE)             \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::DOWN)->Apply(SetRoundArgs); \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TOWARDS_ZERO)          \
+        ->Apply(SetRoundArgs);                                                     \
+    BENCHMARK_TEMPLATE(BENCHMARK, OP, TYPE, RoundMode::HALF_TO_ODD)->Apply(SetRoundArgs)
 #endif
 
 #define DECLARE_ROUND_BENCHMARKS(BENCHMARK, OP)                       \
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
index fecd57412b436..e58f7b065a8e5 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
@@ -30,7 +30,7 @@
 #include "arrow/util/value_parsing.h"
 
 #ifdef ARROW_WITH_RE2
-#include <re2/re2.h>
+#  include <re2/re2.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 0a2261290846a..59a22b9926456 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -34,7 +34,7 @@
 #include "arrow/util/value_parsing.h"
 
 #ifdef ARROW_WITH_UTF8PROC
-#include <utf8proc.h>
+#  include <utf8proc.h>
 #endif
 
 namespace arrow::compute {
@@ -1415,7 +1415,7 @@ TYPED_TEST(TestStringKernels, IsTitleUnicode) {
 }
 
 // Older versions of utf8proc fail
-#if !(UTF8PROC_VERSION_MAJOR <= 2 && UTF8PROC_VERSION_MINOR < 5)
+#  if !(UTF8PROC_VERSION_MAJOR <= 2 && UTF8PROC_VERSION_MINOR < 5)
 
 TYPED_TEST(TestStringKernels, IsUpperUnicode) {
   // ٣ is arabic 3 (decimal), Φ capital
@@ -1437,7 +1437,7 @@ TYPED_TEST(TestStringKernels, IsUpperUnicode) {
                    boolean(), "[true, true, true, false, true, false]");
 }
 
-#endif  // UTF8PROC_VERSION_MINOR >= 5
+#  endif  // UTF8PROC_VERSION_MINOR >= 5
 
 #endif  // ARROW_WITH_UTF8PROC
 
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
index 42762ca8b116f..cf248b7c9f879 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_utf8.cc
@@ -24,7 +24,7 @@
 #include "arrow/util/utf8_internal.h"
 
 #ifdef ARROW_WITH_UTF8PROC
-#include <utf8proc.h>
+#  include <utf8proc.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
index c2a27dfe43488..75affd32560f0 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_benchmark.cc
@@ -17,6 +17,7 @@
 
 #include "benchmark/benchmark.h"
 
+#include <cmath>
 #include <cstdint>
 #include <sstream>
 
@@ -42,6 +43,9 @@ struct FilterParams {
   const double filter_null_proportion;
 };
 
+constexpr double kDefaultTakeSelectionFactor = 1.0;
+constexpr double kSmallTakeSelectionFactor = 0.05;
+
 std::vector<int64_t> g_data_sizes = {kL2Size};
 
 // The benchmark state parameter references this vector of cases. Test high and
@@ -104,14 +108,21 @@ struct TakeBenchmark {
   benchmark::State& state;
   RegressionArgs args;
   random::RandomArrayGenerator rand;
+  double selection_factor;
   bool indices_have_nulls;
   bool monotonic_indices = false;
 
   TakeBenchmark(benchmark::State& state, bool indices_have_nulls,
                 bool monotonic_indices = false)
+      : TakeBenchmark(state, /*selection_factor=*/kDefaultTakeSelectionFactor,
+                      indices_have_nulls, monotonic_indices) {}
+
+  TakeBenchmark(benchmark::State& state, double selection_factor, bool indices_have_nulls,
+                bool monotonic_indices = false)
       : state(state),
         args(state, /*size_is_bytes=*/false),
         rand(kSeed),
+        selection_factor(selection_factor),
         indices_have_nulls(indices_have_nulls),
         monotonic_indices(monotonic_indices) {}
 
@@ -185,10 +196,10 @@ struct TakeBenchmark {
   }
 
   void Bench(const std::shared_ptr<Array>& values) {
-    double indices_null_proportion = indices_have_nulls ? args.null_proportion : 0;
-    auto indices =
-        rand.Int32(values->length(), 0, static_cast<int32_t>(values->length() - 1),
-                   indices_null_proportion);
+    const double indices_null_proportion = indices_have_nulls ? args.null_proportion : 0;
+    const int64_t num_indices = static_cast<int64_t>(selection_factor * values->length());
+    auto indices = rand.Int32(num_indices, 0, static_cast<int32_t>(values->length() - 1),
+                              indices_null_proportion);
 
     if (monotonic_indices) {
       auto arg_sorter = *SortIndices(*indices);
@@ -198,14 +209,15 @@ struct TakeBenchmark {
     for (auto _ : state) {
       ABORT_NOT_OK(Take(values, indices).status());
     }
-    state.SetItemsProcessed(state.iterations() * values->length());
+    state.SetItemsProcessed(state.iterations() * num_indices);
+    state.counters["selection_factor"] = selection_factor;
   }
 
   void BenchChunked(const std::shared_ptr<ChunkedArray>& values, bool chunk_indices_too) {
     double indices_null_proportion = indices_have_nulls ? args.null_proportion : 0;
-    auto indices =
-        rand.Int32(values->length(), 0, static_cast<int32_t>(values->length() - 1),
-                   indices_null_proportion);
+    const int64_t num_indices = static_cast<int64_t>(selection_factor * values->length());
+    auto indices = rand.Int32(num_indices, 0, static_cast<int32_t>(values->length() - 1),
+                              indices_null_proportion);
 
     if (monotonic_indices) {
       auto arg_sorter = *SortIndices(*indices);
@@ -213,14 +225,26 @@ struct TakeBenchmark {
     }
     std::shared_ptr<ChunkedArray> chunked_indices;
     if (chunk_indices_too) {
+      // Here we choose for indices chunks to have roughly the same length
+      // as values chunks, but there may be less of them if selection_factor < 1.0.
+      // The alternative is to have the same number of chunks, but with a potentially
+      // much smaller (and irrealistic) length.
       std::vector<std::shared_ptr<Array>> indices_chunks;
+      // Make sure there are at least two chunks of indices
+      const auto max_chunk_length = indices->length() / 2 + 1;
       int64_t offset = 0;
       for (int i = 0; i < values->num_chunks(); ++i) {
-        auto chunk = indices->Slice(offset, values->chunk(i)->length());
+        const auto chunk_length = std::min(max_chunk_length, values->chunk(i)->length());
+        auto chunk = indices->Slice(offset, chunk_length);
         indices_chunks.push_back(std::move(chunk));
-        offset += values->chunk(i)->length();
+        offset += chunk_length;
+        if (offset >= indices->length()) {
+          break;
+        }
       }
       chunked_indices = std::make_shared<ChunkedArray>(std::move(indices_chunks));
+      ARROW_CHECK_EQ(chunked_indices->length(), num_indices);
+      ARROW_CHECK_GT(chunked_indices->num_chunks(), 1);
     }
 
     if (chunk_indices_too) {
@@ -232,7 +256,8 @@ struct TakeBenchmark {
         ABORT_NOT_OK(Take(values, indices).status());
       }
     }
-    state.SetItemsProcessed(state.iterations() * values->length());
+    state.SetItemsProcessed(state.iterations() * num_indices);
+    state.counters["selection_factor"] = selection_factor;
   }
 };
 
@@ -432,12 +457,25 @@ static void TakeChunkedChunkedInt64RandomIndicesWithNulls(benchmark::State& stat
       .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/true);
 }
 
+static void TakeChunkedChunkedInt64FewRandomIndicesWithNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*selection_factor=*/kSmallTakeSelectionFactor,
+                /*indices_with_nulls=*/true)
+      .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
 static void TakeChunkedChunkedInt64MonotonicIndices(benchmark::State& state) {
   TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true)
       .ChunkedInt64(
           /*num_chunks=*/100, /*chunk_indices_too=*/true);
 }
 
+static void TakeChunkedChunkedInt64FewMonotonicIndices(benchmark::State& state) {
+  TakeBenchmark(state, /*selection_factor=*/kSmallTakeSelectionFactor,
+                /*indices_with_nulls=*/false, /*monotonic=*/true)
+      .ChunkedInt64(
+          /*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
 static void TakeChunkedChunkedFSBRandomIndicesNoNulls(benchmark::State& state) {
   TakeBenchmark(state, /*indices_with_nulls=*/false)
       .ChunkedFSB(/*num_chunks=*/100, /*chunk_indices_too=*/true);
@@ -463,11 +501,23 @@ static void TakeChunkedChunkedStringRandomIndicesWithNulls(benchmark::State& sta
       .ChunkedString(/*num_chunks=*/100, /*chunk_indices_too=*/true);
 }
 
+static void TakeChunkedChunkedStringFewRandomIndicesWithNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*selection_factor=*/kSmallTakeSelectionFactor,
+                /*indices_with_nulls=*/true)
+      .ChunkedString(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
 static void TakeChunkedChunkedStringMonotonicIndices(benchmark::State& state) {
   TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true)
       .ChunkedString(/*num_chunks=*/100, /*chunk_indices_too=*/true);
 }
 
+static void TakeChunkedChunkedStringFewMonotonicIndices(benchmark::State& state) {
+  TakeBenchmark(state, /*selection_factor=*/kSmallTakeSelectionFactor,
+                /*indices_with_nulls=*/false, /*monotonic=*/true)
+      .ChunkedString(/*num_chunks=*/100, /*chunk_indices_too=*/true);
+}
+
 static void TakeChunkedFlatInt64RandomIndicesNoNulls(benchmark::State& state) {
   TakeBenchmark(state, /*indices_with_nulls=*/false)
       .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/false);
@@ -478,12 +528,25 @@ static void TakeChunkedFlatInt64RandomIndicesWithNulls(benchmark::State& state)
       .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/false);
 }
 
+static void TakeChunkedFlatInt64FewRandomIndicesWithNulls(benchmark::State& state) {
+  TakeBenchmark(state, /*selection_factor=*/kSmallTakeSelectionFactor,
+                /*indices_with_nulls=*/true)
+      .ChunkedInt64(/*num_chunks=*/100, /*chunk_indices_too=*/false);
+}
+
 static void TakeChunkedFlatInt64MonotonicIndices(benchmark::State& state) {
   TakeBenchmark(state, /*indices_with_nulls=*/false, /*monotonic=*/true)
       .ChunkedInt64(
           /*num_chunks=*/100, /*chunk_indices_too=*/false);
 }
 
+static void TakeChunkedFlatInt64FewMonotonicIndices(benchmark::State& state) {
+  TakeBenchmark(state, /*selection_factor=*/kSmallTakeSelectionFactor,
+                /*indices_with_nulls=*/false, /*monotonic=*/true)
+      .ChunkedInt64(
+          /*num_chunks=*/100, /*chunk_indices_too=*/false);
+}
+
 void FilterSetArgs(benchmark::internal::Benchmark* bench) {
   for (int64_t size : g_data_sizes) {
     for (int i = 0; i < static_cast<int>(g_filter_params.size()); ++i) {
@@ -560,18 +623,24 @@ BENCHMARK(TakeStringMonotonicIndices)->Apply(TakeSetArgs);
 // Chunked values x Chunked indices
 BENCHMARK(TakeChunkedChunkedInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeChunkedChunkedInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedChunkedInt64FewRandomIndicesWithNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeChunkedChunkedInt64MonotonicIndices)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedChunkedInt64FewMonotonicIndices)->Apply(TakeSetArgs);
 BENCHMARK(TakeChunkedChunkedFSBRandomIndicesNoNulls)->Apply(TakeFSBSetArgs);
 BENCHMARK(TakeChunkedChunkedFSBRandomIndicesWithNulls)->Apply(TakeFSBSetArgs);
 BENCHMARK(TakeChunkedChunkedFSBMonotonicIndices)->Apply(TakeFSBSetArgs);
 BENCHMARK(TakeChunkedChunkedStringRandomIndicesNoNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeChunkedChunkedStringRandomIndicesWithNulls)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedChunkedStringFewRandomIndicesWithNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeChunkedChunkedStringMonotonicIndices)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedChunkedStringFewMonotonicIndices)->Apply(TakeSetArgs);
 
 // Chunked values x Flat indices
 BENCHMARK(TakeChunkedFlatInt64RandomIndicesNoNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeChunkedFlatInt64RandomIndicesWithNulls)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedFlatInt64FewRandomIndicesWithNulls)->Apply(TakeSetArgs);
 BENCHMARK(TakeChunkedFlatInt64MonotonicIndices)->Apply(TakeSetArgs);
+BENCHMARK(TakeChunkedFlatInt64FewMonotonicIndices)->Apply(TakeSetArgs);
 
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
index aba016d6b7e8d..b38f3fcbd8ccd 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc
@@ -28,6 +28,7 @@
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api.h"
 #include "arrow/compute/kernels/test_util.h"
+#include "arrow/scalar.h"
 #include "arrow/table.h"
 #include "arrow/testing/builder.h"
 #include "arrow/testing/fixed_width_test_util.h"
@@ -1101,33 +1102,114 @@ TEST(TestFilterMetaFunction, ArityChecking) {
 
 // ----------------------------------------------------------------------
 // Take tests
+//
+// Shorthand notation (as defined in `TakeMetaFunction`):
+//
+//   A = Array
+//   C = ChunkedArray
+//   R = RecordBatch
+//   T = Table
+//
+// (e.g. TakeCAC = Take(ChunkedArray, Array) -> ChunkedArray)
+//
+// The interface implemented by `TakeMetaFunction` is:
+//
+//   Take(A, A) -> A  (TakeAAA)
+//   Take(A, C) -> C  (TakeACC)
+//   Take(C, A) -> C  (TakeCAC)
+//   Take(C, C) -> C  (TakeCCC)
+//   Take(R, A) -> R  (TakeRAR)
+//   Take(T, A) -> T  (TakeTAT)
+//   Take(T, C) -> T  (TakeTCT)
+//
+// The tests extend the notation with a few "union kinds":
+//
+//   X = Array | ChunkedArray
+//
+// Examples:
+//
+//   TakeXA = {TakeAAA, TakeCAC},
+//   TakeXX = {TakeAAA, TakeACC, TakeCAC, TakeCCC}
+namespace {
 
-void AssertTakeArrays(const std::shared_ptr<Array>& values,
-                      const std::shared_ptr<Array>& indices,
-                      const std::shared_ptr<Array>& expected) {
-  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> actual, Take(*values, *indices));
-  ValidateOutput(actual);
-  AssertArraysEqual(*expected, *actual, /*verbose=*/true);
+Result<std::shared_ptr<Array>> TakeAAA(const Array& values, const Array& indices) {
+  ARROW_ASSIGN_OR_RAISE(Datum out, Take(Datum(values), Datum(indices)));
+  return out.make_array();
 }
 
-Status TakeJSON(const std::shared_ptr<DataType>& type, const std::string& values,
-                const std::shared_ptr<DataType>& index_type, const std::string& indices,
-                std::shared_ptr<Array>* out) {
-  return Take(*ArrayFromJSON(type, values), *ArrayFromJSON(index_type, indices))
-      .Value(out);
+Result<std::shared_ptr<Array>> TakeAAA(
+    const std::shared_ptr<DataType>& type, const std::string& values,
+    const std::string& indices, const std::shared_ptr<DataType>& index_type = int32()) {
+  return TakeAAA(*ArrayFromJSON(type, values), *ArrayFromJSON(index_type, indices));
 }
 
-void DoCheckTake(const std::shared_ptr<Array>& values,
-                 const std::shared_ptr<Array>& indices,
-                 const std::shared_ptr<Array>& expected) {
-  AssertTakeArrays(values, indices, expected);
+// TakeACC is never tested directly, so it is not defined here
+
+Result<Datum> TakeCAC(std::shared_ptr<ChunkedArray> values,
+                      std::shared_ptr<Array> indices) {
+  return Take(Datum{std::move(values)}, Datum{std::move(indices)});
+}
+
+Result<Datum> TakeCAC(const std::shared_ptr<DataType>& type,
+                      const std::vector<std::string>& values, const std::string& indices,
+                      const std::shared_ptr<DataType>& index_type = int8()) {
+  return TakeCAC(ChunkedArrayFromJSON(type, values), ArrayFromJSON(index_type, indices));
+}
+
+Result<Datum> TakeCCC(std::shared_ptr<ChunkedArray> values,
+                      std::shared_ptr<ChunkedArray> indices) {
+  return Take(Datum{std::move(values)}, Datum{std::move(indices)});
+}
+
+Result<Datum> TakeCCC(const std::shared_ptr<DataType>& type,
+                      const std::vector<std::string>& values,
+                      const std::vector<std::string>& indices) {
+  return TakeCCC(ChunkedArrayFromJSON(type, values),
+                 ChunkedArrayFromJSON(int8(), indices));
+}
+
+Result<Datum> TakeRAR(const std::shared_ptr<Schema>& schm, const std::string& batch_json,
+                      const std::string& indices,
+                      const std::shared_ptr<DataType>& index_type = int8()) {
+  auto batch = RecordBatchFromJSON(schm, batch_json);
+  return Take(Datum{std::move(batch)}, Datum{ArrayFromJSON(index_type, indices)});
+}
+
+Result<Datum> TakeTAT(const std::shared_ptr<Schema>& schm,
+                      const std::vector<std::string>& values, const std::string& indices,
+                      const std::shared_ptr<DataType>& index_type = int8()) {
+  return Take(Datum{TableFromJSON(schm, values)},
+              Datum{ArrayFromJSON(index_type, indices)});
+}
+
+Result<Datum> TakeTCT(const std::shared_ptr<Schema>& schm,
+                      const std::vector<std::string>& values,
+                      const std::vector<std::string>& indices) {
+  return Take(Datum{TableFromJSON(schm, values)},
+              Datum{ChunkedArrayFromJSON(int8(), indices)});
+}
+
+// Assert helpers for Take tests
+
+void DoAssertTakeAAA(const std::shared_ptr<Array>& values,
+                     const std::shared_ptr<Array>& indices,
+                     const std::shared_ptr<Array>& expected) {
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> actual, TakeAAA(*values, *indices));
+  ValidateOutput(actual);
+  AssertArraysEqual(*expected, *actual, /*verbose=*/true);
+}
+
+void DoCheckTakeAAA(const std::shared_ptr<Array>& values,
+                    const std::shared_ptr<Array>& indices,
+                    const std::shared_ptr<Array>& expected) {
+  DoAssertTakeAAA(values, indices, expected);
 
   // Check sliced values
   ASSERT_OK_AND_ASSIGN(auto values_filler, MakeArrayOfNull(values->type(), 2));
   ASSERT_OK_AND_ASSIGN(auto values_sliced,
                        Concatenate({values_filler, values, values_filler}));
   values_sliced = values_sliced->Slice(2, values->length());
-  AssertTakeArrays(values_sliced, indices, expected);
+  DoAssertTakeAAA(values_sliced, indices, expected);
 
   // Check sliced indices
   ASSERT_OK_AND_ASSIGN(auto zero, MakeScalar(indices->type(), int8_t{0}));
@@ -1135,33 +1217,171 @@ void DoCheckTake(const std::shared_ptr<Array>& values,
   ASSERT_OK_AND_ASSIGN(auto indices_sliced,
                        Concatenate({indices_filler, indices, indices_filler}));
   indices_sliced = indices_sliced->Slice(3, indices->length());
-  AssertTakeArrays(values, indices_sliced, expected);
-}
-
-void CheckTake(const std::shared_ptr<DataType>& type, const std::string& values_json,
-               const std::string& indices_json, const std::string& expected_json) {
+  DoAssertTakeAAA(values, indices_sliced, expected);
+}
+
+void DoCheckTakeCACWithArrays(const std::shared_ptr<Array>& values,
+                              const std::shared_ptr<Array>& indices,
+                              const std::shared_ptr<Array>& expected) {
+  auto pool = default_memory_pool();
+  const bool indices_null_count_is_known = indices->null_count() != kUnknownNullCount;
+
+  // We check TakeCAC by checking this equality:
+  //
+  // TakeAAA(Concat(V, V, V), I') == Concat(TakeCAC([V, V, V], I'))
+  // where
+  //   V = values
+  //   I = indices
+  //   I' = Concat(I + 2 * V.length, I,  I + V.length)
+  auto values3 = ArrayVector{values, values, values};
+  ASSERT_OK_AND_ASSIGN(auto concat_values3, Concatenate(values3, pool));
+  auto chunked_values3 = std::make_shared<ChunkedArray>(values3);
+  std::shared_ptr<Array> concat_indices3;
+  {
+    auto double_length =
+        MakeScalar(indices->type(), static_cast<int>(2 * values->length()));
+    auto zero = MakeScalar(indices->type(), 0);
+    auto length = MakeScalar(indices->type(), static_cast<int>(values->length()));
+    ASSERT_OK_AND_ASSIGN(auto indices_prefix, Add(indices, *double_length));
+    ASSERT_OK_AND_ASSIGN(auto indices_middle, Add(indices, *zero));
+    ASSERT_OK_AND_ASSIGN(auto indices_suffix, Add(indices, *length));
+    auto indices3 = ArrayVector{
+        indices_prefix.make_array(),
+        indices_middle.make_array(),
+        indices_suffix.make_array(),
+    };
+    ASSERT_OK_AND_ASSIGN(concat_indices3, Concatenate(indices3, pool));
+    // Preserve the fact that indices->null_count() is unknown if it is unknown.
+    if (!indices_null_count_is_known) {
+      concat_indices3->data()->null_count = kUnknownNullCount;
+    }
+  }
+  ASSERT_OK_AND_ASSIGN(auto concat_expected3,
+                       Concatenate({expected, expected, expected}));
+  ASSERT_OK_AND_ASSIGN(Datum chunked_actual, TakeCAC(chunked_values3, concat_indices3));
+  ValidateOutput(chunked_actual);
+  ASSERT_OK_AND_ASSIGN(auto concat_actual,
+                       Concatenate(chunked_actual.chunked_array()->chunks()));
+  AssertArraysEqual(*concat_expected3, *concat_actual, /*verbose=*/true);
+
+  // We check TakeCAC again by checking this equality:
+  //
+  // TakeAAA(V, I) == Concat(TakeCAC(C, I))
+  // where
+  //   K = V.length // 4
+  //   C = [V.slice(0, K), V.slice(K, 2*K), V.slice(3*K, N - 3*K)]
+  //   V = values
+  //   I = indices
+  const int64_t n = values->length();
+  const int64_t k = n / 4;
+  if (k > 0) {
+    auto value_slices = ArrayVector{values->Slice(0, k), values->Slice(k, 2 * k),
+                                    values->Slice(3 * k, n - k)};
+    auto chunked_values = std::make_shared<ChunkedArray>(value_slices);
+    ASSERT_OK_AND_ASSIGN(chunked_actual, TakeCAC(chunked_values, indices));
+    ValidateOutput(chunked_actual);
+    ASSERT_OK_AND_ASSIGN(concat_actual,
+                         Concatenate(chunked_actual.chunked_array()->chunks()));
+    AssertArraysEqual(*concat_actual, *expected, /*verbose=*/true);
+  }
+}
+
+// TakeXA = {TakeAAA, TakeCAC}
+void DoCheckTakeXA(const std::shared_ptr<Array>& values,
+                   const std::shared_ptr<Array>& indices,
+                   const std::shared_ptr<Array>& expected) {
+  DoCheckTakeAAA(values, indices, expected);
+  DoCheckTakeCACWithArrays(values, indices, expected);
+}
+
+// TakeXA = {TakeAAA, TakeCAC}
+void CheckTakeXA(const std::shared_ptr<DataType>& type, const std::string& values_json,
+                 const std::string& indices_json, const std::string& expected_json) {
   auto values = ArrayFromJSON(type, values_json);
   auto expected = ArrayFromJSON(type, expected_json);
   for (auto index_type : {int8(), uint32()}) {
     auto indices = ArrayFromJSON(index_type, indices_json);
-    DoCheckTake(values, indices, expected);
+    DoCheckTakeXA(values, indices, expected);
   }
 }
 
-void AssertTakeNull(const std::string& values, const std::string& indices,
-                    const std::string& expected) {
-  CheckTake(null(), values, indices, expected);
+void CheckTakeXADictionary(std::shared_ptr<DataType> value_type,
+                           const std::string& dictionary_values,
+                           const std::string& dictionary_indices,
+                           const std::string& indices,
+                           const std::string& expected_indices) {
+  auto dict = ArrayFromJSON(value_type, dictionary_values);
+  auto type = dictionary(int8(), value_type);
+  ASSERT_OK_AND_ASSIGN(
+      auto values,
+      DictionaryArray::FromArrays(type, ArrayFromJSON(int8(), dictionary_indices), dict));
+  ASSERT_OK_AND_ASSIGN(
+      auto expected,
+      DictionaryArray::FromArrays(type, ArrayFromJSON(int8(), expected_indices), dict));
+  auto take_indices = ArrayFromJSON(int8(), indices);
+  DoCheckTakeXA(values, take_indices, expected);
 }
 
-void AssertTakeBoolean(const std::string& values, const std::string& indices,
-                       const std::string& expected) {
-  CheckTake(boolean(), values, indices, expected);
+void AssertTakeCAC(const std::shared_ptr<DataType>& type,
+                   const std::vector<std::string>& values, const std::string& indices,
+                   const std::vector<std::string>& expected) {
+  ASSERT_OK_AND_ASSIGN(auto actual, TakeCAC(type, values, indices));
+  ValidateOutput(actual);
+  AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual.chunked_array());
 }
 
+void AssertTakeCCC(const std::shared_ptr<DataType>& type,
+                   const std::vector<std::string>& values,
+                   const std::vector<std::string>& indices,
+                   const std::vector<std::string>& expected) {
+  ASSERT_OK_AND_ASSIGN(auto actual, TakeCCC(type, values, indices));
+  ValidateOutput(actual);
+  AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual.chunked_array());
+}
+
+void CheckTakeXCC(const Datum& values, const std::vector<std::string>& indices,
+                  const std::vector<std::string>& expected) {
+  EXPECT_TRUE(values.is_array() || values.is_chunked_array());
+  auto idx = ChunkedArrayFromJSON(int32(), indices);
+  ASSERT_OK_AND_ASSIGN(auto actual, Take(values, Datum{idx}));
+  ValidateOutput(actual);
+  AssertChunkedEqual(*ChunkedArrayFromJSON(values.type(), expected),
+                     *actual.chunked_array());
+}
+
+void AssertTakeRAR(const std::shared_ptr<Schema>& schm, const std::string& batch_json,
+                   const std::string& indices, const std::string& expected_batch) {
+  for (auto index_type : {int8(), uint32()}) {
+    ASSERT_OK_AND_ASSIGN(auto actual, TakeRAR(schm, batch_json, indices, index_type));
+    ValidateOutput(actual);
+    ASSERT_BATCHES_EQUAL(*RecordBatchFromJSON(schm, expected_batch),
+                         *actual.record_batch());
+  }
+}
+
+void AssertTakeTAT(const std::shared_ptr<Schema>& schm,
+                   const std::vector<std::string>& table_json, const std::string& filter,
+                   const std::vector<std::string>& expected_table) {
+  ASSERT_OK_AND_ASSIGN(auto actual, TakeTAT(schm, table_json, filter));
+  ValidateOutput(actual);
+  ASSERT_TABLES_EQUAL(*TableFromJSON(schm, expected_table), *actual.table());
+}
+
+void AssertTakeTCT(const std::shared_ptr<Schema>& schm,
+                   const std::vector<std::string>& table_json,
+                   const std::vector<std::string>& filter,
+                   const std::vector<std::string>& expected_table) {
+  ASSERT_OK_AND_ASSIGN(auto actual, TakeTCT(schm, table_json, filter));
+  ValidateOutput(actual);
+  ASSERT_TABLES_EQUAL(*TableFromJSON(schm, expected_table), *actual.table());
+}
+
+// Validators used by random data tests
+
 template <typename ValuesType, typename IndexType>
-void ValidateTakeImpl(const std::shared_ptr<Array>& values,
-                      const std::shared_ptr<Array>& indices,
-                      const std::shared_ptr<Array>& result) {
+void ValidateTakeXAImpl(const std::shared_ptr<Array>& values,
+                        const std::shared_ptr<Array>& indices,
+                        const std::shared_ptr<Array>& result) {
   using ValuesArrayType = typename TypeTraits<ValuesType>::ArrayType;
   using IndexArrayType = typename TypeTraits<IndexType>::ArrayType;
   auto typed_values = checked_pointer_cast<ValuesArrayType>(values);
@@ -1185,39 +1405,45 @@ void ValidateTakeImpl(const std::shared_ptr<Array>& values,
           << i;
     }
   }
+  // DoCheckTakeCACWithArrays transforms the indices which has a risk of
+  // overflow, so we only call it if the index type is not too wide.
+  if (indices->type()->byte_width() <= 4) {
+    auto cast_options = CastOptions::Safe(TypeHolder{int64()});
+    ASSERT_OK_AND_ASSIGN(auto indices64, Cast(indices, cast_options));
+    DoCheckTakeCACWithArrays(values, indices64.make_array(), /*expected=*/result);
+  }
 }
 
 template <typename ValuesType>
-void ValidateTake(const std::shared_ptr<Array>& values,
-                  const std::shared_ptr<Array>& indices) {
-  ASSERT_OK_AND_ASSIGN(Datum out, Take(values, indices));
-  auto taken = out.make_array();
+void ValidateTakeXA(const std::shared_ptr<Array>& values,
+                    const std::shared_ptr<Array>& indices) {
+  ASSERT_OK_AND_ASSIGN(auto taken, TakeAAA(*values, *indices));
   ValidateOutput(taken);
   ASSERT_EQ(indices->length(), taken->length());
   switch (indices->type_id()) {
     case Type::INT8:
-      ValidateTakeImpl<ValuesType, Int8Type>(values, indices, taken);
+      ValidateTakeXAImpl<ValuesType, Int8Type>(values, indices, taken);
       break;
     case Type::INT16:
-      ValidateTakeImpl<ValuesType, Int16Type>(values, indices, taken);
+      ValidateTakeXAImpl<ValuesType, Int16Type>(values, indices, taken);
       break;
     case Type::INT32:
-      ValidateTakeImpl<ValuesType, Int32Type>(values, indices, taken);
+      ValidateTakeXAImpl<ValuesType, Int32Type>(values, indices, taken);
       break;
     case Type::INT64:
-      ValidateTakeImpl<ValuesType, Int64Type>(values, indices, taken);
+      ValidateTakeXAImpl<ValuesType, Int64Type>(values, indices, taken);
       break;
     case Type::UINT8:
-      ValidateTakeImpl<ValuesType, UInt8Type>(values, indices, taken);
+      ValidateTakeXAImpl<ValuesType, UInt8Type>(values, indices, taken);
       break;
     case Type::UINT16:
-      ValidateTakeImpl<ValuesType, UInt16Type>(values, indices, taken);
+      ValidateTakeXAImpl<ValuesType, UInt16Type>(values, indices, taken);
       break;
     case Type::UINT32:
-      ValidateTakeImpl<ValuesType, UInt32Type>(values, indices, taken);
+      ValidateTakeXAImpl<ValuesType, UInt32Type>(values, indices, taken);
       break;
     case Type::UINT64:
-      ValidateTakeImpl<ValuesType, UInt64Type>(values, indices, taken);
+      ValidateTakeXAImpl<ValuesType, UInt64Type>(values, indices, taken);
       break;
     default:
       FAIL() << "Invalid index type";
@@ -1225,6 +1451,8 @@ void ValidateTake(const std::shared_ptr<Array>& values,
   }
 }
 
+// ----
+
 template <typename T>
 T GetMaxIndex(int64_t values_length) {
   int64_t max_index = values_length - 1;
@@ -1239,13 +1467,15 @@ uint64_t GetMaxIndex(int64_t values_length) {
   return static_cast<uint64_t>(values_length - 1);
 }
 
+}  // namespace
+
 class TestTakeKernel : public ::testing::Test {
- public:
-  void TestNoValidityBitmapButUnknownNullCount(const std::shared_ptr<Array>& values,
-                                               const std::shared_ptr<Array>& indices) {
+ private:
+  void DoTestNoValidityBitmapButUnknownNullCount(const std::shared_ptr<Array>& values,
+                                                 const std::shared_ptr<Array>& indices) {
     ASSERT_EQ(values->null_count(), 0);
     ASSERT_EQ(indices->null_count(), 0);
-    auto expected = (*Take(values, indices)).make_array();
+    ASSERT_OK_AND_ASSIGN(auto expected, TakeAAA(*values, *indices));
 
     auto new_values = MakeArray(values->data()->Copy());
     new_values->data()->buffers[0].reset();
@@ -1253,67 +1483,95 @@ class TestTakeKernel : public ::testing::Test {
     auto new_indices = MakeArray(indices->data()->Copy());
     new_indices->data()->buffers[0].reset();
     new_indices->data()->null_count = kUnknownNullCount;
-    auto result = (*Take(new_values, new_indices)).make_array();
-
-    AssertArraysEqual(*expected, *result);
+    DoCheckTakeXA(new_values, new_indices, expected);
   }
 
-  void TestNoValidityBitmapButUnknownNullCount(const std::shared_ptr<DataType>& type,
-                                               const std::string& values,
-                                               const std::string& indices) {
-    TestNoValidityBitmapButUnknownNullCount(ArrayFromJSON(type, values),
-                                            ArrayFromJSON(int16(), indices));
+ public:
+  void DoTestNoValidityBitmapButUnknownNullCount(
+      const std::shared_ptr<DataType>& type, const std::string& values,
+      const std::string& indices, std::shared_ptr<DataType> index_type = int8()) {
+    DoTestNoValidityBitmapButUnknownNullCount(ArrayFromJSON(type, values),
+                                              ArrayFromJSON(index_type, indices));
   }
 
   void TestNumericBasics(const std::shared_ptr<DataType>& type) {
     ARROW_SCOPED_TRACE("type = ", *type);
-    CheckTake(type, "[7, 8, 9]", "[]", "[]");
-    CheckTake(type, "[7, 8, 9]", "[0, 1, 0]", "[7, 8, 7]");
-    CheckTake(type, "[null, 8, 9]", "[0, 1, 0]", "[null, 8, null]");
-    CheckTake(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8, 7]");
-    CheckTake(type, "[null, 8, 9]", "[]", "[]");
-    CheckTake(type, "[7, 8, 9]", "[0, 0, 0, 0, 0, 0, 2]", "[7, 7, 7, 7, 7, 7, 9]");
-
+    CheckTakeXA(type, "[7, 8, 9]", "[]", "[]");
+    CheckTakeXA(type, "[7, 8, 9]", "[0, 1, 0]", "[7, 8, 7]");
+    CheckTakeXA(type, "[null, 8, 9]", "[0, 1, 0]", "[null, 8, null]");
+    CheckTakeXA(type, "[7, 8, 9]", "[null, 1, 0]", "[null, 8, 7]");
+    CheckTakeXA(type, "[null, 8, 9]", "[]", "[]");
+    CheckTakeXA(type, "[7, 8, 9]", "[0, 0, 0, 0, 0, 0, 2]", "[7, 7, 7, 7, 7, 7, 9]");
+
+    const std::string k789 = "[7, 8, 9]";
     std::shared_ptr<Array> arr;
-    ASSERT_RAISES(IndexError, TakeJSON(type, "[7, 8, 9]", int8(), "[0, 9, 0]", &arr));
-    ASSERT_RAISES(IndexError, TakeJSON(type, "[7, 8, 9]", int8(), "[0, -1, 0]", &arr));
+    ASSERT_RAISES(IndexError, TakeAAA(type, k789, "[0, 9, 0]").Value(&arr));
+    ASSERT_RAISES(IndexError, TakeAAA(type, k789, "[0, -1, 0]").Value(&arr));
+    Datum chunked_arr;
+    ASSERT_RAISES(IndexError,
+                  TakeCAC(type, {k789, k789}, "[0, 9, 0]").Value(&chunked_arr));
+    ASSERT_RAISES(IndexError,
+                  TakeCAC(type, {k789, k789}, "[0, -1, 0]").Value(&chunked_arr));
   }
 };
 
 template <typename ArrowType>
-class TestTakeKernelTyped : public TestTakeKernel {};
+class TestTakeKernelTyped : public TestTakeKernel {
+ protected:
+  virtual std::shared_ptr<DataType> value_type() const {
+    if constexpr (is_parameter_free_type<ArrowType>::value) {
+      return TypeTraits<ArrowType>::type_singleton();
+    } else {
+      EXPECT_TRUE(false) << "value_type() must be overridden for parameterized types";
+      return nullptr;
+    }
+  }
+
+  void TestNoValidityBitmapButUnknownNullCount(
+      const std::string& values, const std::string& indices,
+      const std::shared_ptr<DataType>& index_type = int8()) {
+    return DoTestNoValidityBitmapButUnknownNullCount(this->value_type(), values, indices,
+                                                     index_type);
+  }
+
+  void CheckTakeXA(const std::string& values, const std::string& indices,
+                   const std::string& expected) {
+    compute::CheckTakeXA(this->value_type(), values, indices, expected);
+  }
+};
+
+static const char kNull3[] = "[null, null, null]";
 
 TEST_F(TestTakeKernel, TakeNull) {
-  AssertTakeNull("[null, null, null]", "[0, 1, 0]", "[null, null, null]");
-  AssertTakeNull("[null, null, null]", "[0, 2]", "[null, null]");
+  CheckTakeXA(null(), kNull3, "[0, 1, 0]", "[null, null, null]");
+  CheckTakeXA(null(), kNull3, "[0, 2]", "[null, null]");
 
   std::shared_ptr<Array> arr;
+  ASSERT_RAISES(IndexError, TakeAAA(null(), kNull3, "[0, 9, 0]").Value(&arr));
+  ASSERT_RAISES(IndexError, TakeAAA(boolean(), kNull3, "[0, -1, 0]").Value(&arr));
+  Datum chunked_arr;
   ASSERT_RAISES(IndexError,
-                TakeJSON(null(), "[null, null, null]", int8(), "[0, 9, 0]", &arr));
+                TakeCAC(null(), {kNull3, kNull3}, "[0, 9, 0]").Value(&chunked_arr));
   ASSERT_RAISES(IndexError,
-                TakeJSON(boolean(), "[null, null, null]", int8(), "[0, -1, 0]", &arr));
+                TakeCAC(boolean(), {kNull3, kNull3}, "[0, -1, 0]").Value(&chunked_arr));
 }
 
 TEST_F(TestTakeKernel, InvalidIndexType) {
   std::shared_ptr<Array> arr;
-  ASSERT_RAISES(NotImplemented, TakeJSON(null(), "[null, null, null]", float32(),
-                                         "[0.0, 1.0, 0.1]", &arr));
+  ASSERT_RAISES(NotImplemented,
+                TakeAAA(null(), kNull3, "[0.0, 1.0, 0.1]", float32()).Value(&arr));
+  Datum chunked_arr;
+  ASSERT_RAISES(NotImplemented,
+                TakeCAC(null(), {kNull3, kNull3}, "[0.0, 1.0, 0.1]", float32())
+                    .Value(&chunked_arr));
 }
 
-TEST_F(TestTakeKernel, TakeCCEmptyIndices) {
-  Datum dat = ChunkedArrayFromJSON(int8(), {"[]"});
-  Datum idx = ChunkedArrayFromJSON(int32(), {});
-  ASSERT_OK_AND_ASSIGN(auto out, Take(dat, idx));
-  ValidateOutput(out);
-  AssertDatumsEqual(ChunkedArrayFromJSON(int8(), {"[]"}), out, true);
-}
-
-TEST_F(TestTakeKernel, TakeACEmptyIndices) {
-  Datum dat = ArrayFromJSON(int8(), {"[]"});
-  Datum idx = ChunkedArrayFromJSON(int32(), {});
-  ASSERT_OK_AND_ASSIGN(auto out, Take(dat, idx));
-  ValidateOutput(out);
-  AssertDatumsEqual(ChunkedArrayFromJSON(int8(), {"[]"}), out, true);
+TEST_F(TestTakeKernel, TakeXCCEmptyIndices) {
+  auto expected = std::vector<std::string>{"[]"};
+  auto values = ArrayFromJSON(int8(), {"[1, 3, 3, 7]"});
+  CheckTakeXCC(values, {"[]"}, expected);
+  auto chunked_values = std::make_shared<ChunkedArray>(values);
+  CheckTakeXCC(chunked_values, {"[]"}, expected);
 }
 
 TEST_F(TestTakeKernel, DefaultOptions) {
@@ -1329,18 +1587,25 @@ TEST_F(TestTakeKernel, DefaultOptions) {
 }
 
 TEST_F(TestTakeKernel, TakeBoolean) {
-  AssertTakeBoolean("[7, 8, 9]", "[]", "[]");
-  AssertTakeBoolean("[true, false, true]", "[0, 1, 0]", "[true, false, true]");
-  AssertTakeBoolean("[null, false, true]", "[0, 1, 0]", "[null, false, null]");
-  AssertTakeBoolean("[true, false, true]", "[null, 1, 0]", "[null, false, true]");
+  CheckTakeXA(boolean(), "[7, 8, 9]", "[]", "[]");
+  CheckTakeXA(boolean(), "[true, false, true]", "[0, 1, 0]", "[true, false, true]");
+  CheckTakeXA(boolean(), "[null, false, true]", "[0, 1, 0]", "[null, false, null]");
+  CheckTakeXA(boolean(), "[true, false, true]", "[null, 1, 0]", "[null, false, true]");
 
-  TestNoValidityBitmapButUnknownNullCount(boolean(), "[true, false, true]", "[1, 0, 0]");
+  DoTestNoValidityBitmapButUnknownNullCount(boolean(), "[true, false, true]",
+                                            "[1, 0, 0]");
 
+  const std::string kTrueFalseTrue = "[true, false, true]";
   std::shared_ptr<Array> arr;
+  ASSERT_RAISES(IndexError, TakeAAA(boolean(), kTrueFalseTrue, "[0, 9, 0]").Value(&arr));
+  ASSERT_RAISES(IndexError, TakeAAA(boolean(), kTrueFalseTrue, "[0, -1, 0]").Value(&arr));
+  Datum chunked_arr;
   ASSERT_RAISES(IndexError,
-                TakeJSON(boolean(), "[true, false, true]", int8(), "[0, 9, 0]", &arr));
+                TakeCAC(boolean(), {kTrueFalseTrue, kTrueFalseTrue}, "[0, 9, 0]")
+                    .Value(&chunked_arr));
   ASSERT_RAISES(IndexError,
-                TakeJSON(boolean(), "[true, false, true]", int8(), "[0, -1, 0]", &arr));
+                TakeCAC(boolean(), {kTrueFalseTrue, kTrueFalseTrue}, "[0, -1, 0]")
+                    .Value(&chunked_arr));
 }
 
 TEST_F(TestTakeKernel, Temporal) {
@@ -1349,8 +1614,8 @@ TEST_F(TestTakeKernel, Temporal) {
   this->TestNumericBasics(timestamp(TimeUnit::NANO, "Europe/Paris"));
   this->TestNumericBasics(duration(TimeUnit::SECOND));
   this->TestNumericBasics(date32());
-  CheckTake(date64(), "[0, 86400000, null]", "[null, 1, 1, 0]",
-            "[null, 86400000, 86400000, 0]");
+  CheckTakeXA(date64(), "[0, 86400000, null]", "[null, 1, 1, 0]",
+              "[null, 86400000, 86400000, 0]");
 }
 
 TEST_F(TestTakeKernel, Duration) {
@@ -1363,177 +1628,184 @@ TEST_F(TestTakeKernel, Interval) {
   this->TestNumericBasics(month_interval());
 
   auto type = day_time_interval();
-  CheckTake(type, "[[1, -600], [2, 3000], null]", "[0, null, 2, 1]",
-            "[[1, -600], null, null, [2, 3000]]");
+  CheckTakeXA(type, "[[1, -600], [2, 3000], null]", "[0, null, 2, 1]",
+              "[[1, -600], null, null, [2, 3000]]");
   type = month_day_nano_interval();
-  CheckTake(type, "[[1, -2, 34567890123456789], [2, 3, -34567890123456789], null]",
-            "[0, null, 2, 1]",
-            "[[1, -2, 34567890123456789], null, null, [2, 3, -34567890123456789]]");
+  CheckTakeXA(type, "[[1, -2, 34567890123456789], [2, 3, -34567890123456789], null]",
+              "[0, null, 2, 1]",
+              "[[1, -2, 34567890123456789], null, null, [2, 3, -34567890123456789]]");
 }
 
 template <typename ArrowType>
-class TestTakeKernelWithNumeric : public TestTakeKernelTyped<ArrowType> {
- protected:
-  void AssertTake(const std::string& values, const std::string& indices,
-                  const std::string& expected) {
-    CheckTake(type_singleton(), values, indices, expected);
-  }
-
-  std::shared_ptr<DataType> type_singleton() {
-    return TypeTraits<ArrowType>::type_singleton();
-  }
-};
+class TestTakeKernelWithNumeric : public TestTakeKernelTyped<ArrowType> {};
 
 TYPED_TEST_SUITE(TestTakeKernelWithNumeric, NumericArrowTypes);
 TYPED_TEST(TestTakeKernelWithNumeric, TakeNumeric) {
-  this->TestNumericBasics(this->type_singleton());
+  this->TestNumericBasics(this->value_type());
 }
 
 template <typename TypeClass>
 class TestTakeKernelWithString : public TestTakeKernelTyped<TypeClass> {
  public:
-  std::shared_ptr<DataType> value_type() {
-    return TypeTraits<TypeClass>::type_singleton();
-  }
-
-  void AssertTake(const std::string& values, const std::string& indices,
-                  const std::string& expected) {
-    CheckTake(value_type(), values, indices, expected);
-  }
-
-  void AssertTakeDictionary(const std::string& dictionary_values,
-                            const std::string& dictionary_indices,
-                            const std::string& indices,
-                            const std::string& expected_indices) {
-    auto dict = ArrayFromJSON(value_type(), dictionary_values);
-    auto type = dictionary(int8(), value_type());
-    ASSERT_OK_AND_ASSIGN(auto values,
-                         DictionaryArray::FromArrays(
-                             type, ArrayFromJSON(int8(), dictionary_indices), dict));
-    ASSERT_OK_AND_ASSIGN(
-        auto expected,
-        DictionaryArray::FromArrays(type, ArrayFromJSON(int8(), expected_indices), dict));
-    auto take_indices = ArrayFromJSON(int8(), indices);
-    AssertTakeArrays(values, take_indices, expected);
+  void AssertTakeXADictionary(const std::string& dictionary_values,
+                              const std::string& dictionary_indices,
+                              const std::string& indices,
+                              const std::string& expected_indices) {
+    return CheckTakeXADictionary(this->value_type(), dictionary_values,
+                                 dictionary_indices, indices, expected_indices);
   }
 };
 
 TYPED_TEST_SUITE(TestTakeKernelWithString, BaseBinaryArrowTypes);
 
 TYPED_TEST(TestTakeKernelWithString, TakeString) {
-  this->AssertTake(R"(["a", "b", "c"])", "[0, 1, 0]", R"(["a", "b", "a"])");
-  this->AssertTake(R"([null, "b", "c"])", "[0, 1, 0]", "[null, \"b\", null]");
-  this->AssertTake(R"(["a", "b", "c"])", "[null, 1, 0]", R"([null, "b", "a"])");
+  this->CheckTakeXA(R"(["a", "b", "c"])", "[0, 1, 0]", R"(["a", "b", "a"])");
+  this->CheckTakeXA(R"([null, "b", "c"])", "[0, 1, 0]", "[null, \"b\", null]");
+  this->CheckTakeXA(R"(["a", "b", "c"])", "[null, 1, 0]", R"([null, "b", "a"])");
 
-  this->TestNoValidityBitmapButUnknownNullCount(this->value_type(), R"(["a", "b", "c"])",
-                                                "[0, 1, 0]");
+  this->TestNoValidityBitmapButUnknownNullCount(R"(["a", "b", "c"])", "[0, 1, 0]");
 
   std::shared_ptr<DataType> type = this->value_type();
+  const std::string kABC = R"(["a", "b", "c"])";
   std::shared_ptr<Array> arr;
-  ASSERT_RAISES(IndexError,
-                TakeJSON(type, R"(["a", "b", "c"])", int8(), "[0, 9, 0]", &arr));
-  ASSERT_RAISES(IndexError, TakeJSON(type, R"(["a", "b", null, "ddd", "ee"])", int64(),
-                                     "[2, 5]", &arr));
+  ASSERT_RAISES(IndexError, TakeAAA(type, kABC, "[0, 9, 0]").Value(&arr));
+  ASSERT_RAISES(IndexError, TakeAAA(type, kABC, "[2, 5]").Value(&arr));
+  Datum chunked_arr;
+  ASSERT_RAISES(IndexError, TakeCAC(type, {kABC, kABC}, "[0, 9, 0]").Value(&chunked_arr));
+  ASSERT_RAISES(IndexError, TakeCAC(type, {kABC, kABC}, "[4, 10]").Value(&chunked_arr));
 }
 
 TYPED_TEST(TestTakeKernelWithString, TakeDictionary) {
   auto dict = R"(["a", "b", "c", "d", "e"])";
-  this->AssertTakeDictionary(dict, "[3, 4, 2]", "[0, 1, 0]", "[3, 4, 3]");
-  this->AssertTakeDictionary(dict, "[null, 4, 2]", "[0, 1, 0]", "[null, 4, null]");
-  this->AssertTakeDictionary(dict, "[3, 4, 2]", "[null, 1, 0]", "[null, 4, 3]");
+  this->AssertTakeXADictionary(dict, "[3, 4, 2]", "[0, 1, 0]", "[3, 4, 3]");
+  this->AssertTakeXADictionary(dict, "[null, 4, 2]", "[0, 1, 0]", "[null, 4, null]");
+  this->AssertTakeXADictionary(dict, "[3, 4, 2]", "[null, 1, 0]", "[null, 4, 3]");
 }
 
 class TestTakeKernelFSB : public TestTakeKernelTyped<FixedSizeBinaryType> {
  public:
-  std::shared_ptr<DataType> value_type() { return fixed_size_binary(3); }
-
-  void AssertTake(const std::string& values, const std::string& indices,
-                  const std::string& expected) {
-    CheckTake(value_type(), values, indices, expected);
-  }
+  std::shared_ptr<DataType> value_type() const override { return fixed_size_binary(3); }
 };
 
 TEST_F(TestTakeKernelFSB, TakeFixedSizeBinary) {
-  this->AssertTake(R"(["aaa", "bbb", "ccc"])", "[0, 1, 0]", R"(["aaa", "bbb", "aaa"])");
-  this->AssertTake(R"([null, "bbb", "ccc"])", "[0, 1, 0]", "[null, \"bbb\", null]");
-  this->AssertTake(R"(["aaa", "bbb", "ccc"])", "[null, 1, 0]", R"([null, "bbb", "aaa"])");
+  const std::string kABC = R"(["aaa", "bbb", "ccc"])";
+  this->CheckTakeXA(kABC, "[0, 1, 0]", R"(["aaa", "bbb", "aaa"])");
+  this->CheckTakeXA(R"([null, "bbb", "ccc"])", "[0, 1, 0]", "[null, \"bbb\", null]");
+  this->CheckTakeXA(kABC, "[null, 1, 0]", R"([null, "bbb", "aaa"])");
 
-  this->TestNoValidityBitmapButUnknownNullCount(this->value_type(),
-                                                R"(["aaa", "bbb", "ccc"])", "[0, 1, 0]");
+  this->TestNoValidityBitmapButUnknownNullCount(kABC, "[0, 1, 0]");
 
   std::shared_ptr<DataType> type = this->value_type();
+  const std::string kABNullDE = R"(["aaa", "bbb", null, "ddd", "eee"])";
   std::shared_ptr<Array> arr;
+  ASSERT_RAISES(IndexError, TakeAAA(type, kABC, "[0, 9, 0]").Value(&arr));
+  ASSERT_RAISES(IndexError, TakeAAA(type, kABNullDE, "[2, 5]").Value(&arr));
+  Datum chunked_arr;
+  ASSERT_RAISES(IndexError, TakeCAC(type, {kABC, kABC}, "[0, 9, 0]").Value(&chunked_arr));
   ASSERT_RAISES(IndexError,
-                TakeJSON(type, R"(["aaa", "bbb", "ccc"])", int8(), "[0, 9, 0]", &arr));
-  ASSERT_RAISES(IndexError, TakeJSON(type, R"(["aaa", "bbb", null, "ddd", "eee"])",
-                                     int64(), "[2, 5]", &arr));
+                TakeCAC(type, {kABNullDE, kABC}, "[4, 10]").Value(&chunked_arr));
 }
 
-class TestTakeKernelWithList : public TestTakeKernelTyped<ListType> {};
+using ListAndListViewArrowTypes =
+    ::testing::Types<ListType, LargeListType, ListViewType, LargeListViewType>;
+
+template <typename ArrowListType>
+class TestTakeKernelWithList : public TestTakeKernelTyped<ListType> {
+ protected:
+  std::shared_ptr<DataType> inner_type_ = nullptr;
+
+  std::shared_ptr<DataType> value_type(std::shared_ptr<DataType> inner_type) const {
+    return std::make_shared<ArrowListType>(std::move(inner_type));
+  }
+
+  std::shared_ptr<DataType> value_type() const override {
+    EXPECT_TRUE(inner_type_);
+    return value_type(inner_type_);
+  }
+
+  std::vector<std::shared_ptr<DataType>> InnerListTypes() const {
+    return std::vector<std::shared_ptr<DataType>>{
+        list(int32()),
+        large_list(int32()),
+        list_view(int32()),
+        large_list_view(int32()),
+    };
+  }
+};
+
+TYPED_TEST_SUITE(TestTakeKernelWithList, ListAndListViewArrowTypes);
 
-TEST_F(TestTakeKernelWithList, TakeListInt32) {
+TYPED_TEST(TestTakeKernelWithList, TakeListInt32) {
+  this->inner_type_ = int32();
   std::string list_json = "[[], [1,2], null, [3]]";
-  for (auto& type : kListAndListViewTypes) {
-    CheckTake(type, list_json, "[]", "[]");
-    CheckTake(type, list_json, "[3, 2, 1]", "[[3], null, [1,2]]");
-    CheckTake(type, list_json, "[null, 3, 0]", "[null, [3], []]");
-    CheckTake(type, list_json, "[null, null]", "[null, null]");
-    CheckTake(type, list_json, "[3, 0, 0, 3]", "[[3], [], [], [3]]");
-    CheckTake(type, list_json, "[0, 1, 2, 3]", list_json);
-    CheckTake(type, list_json, "[0, 0, 0, 0, 0, 0, 1]",
-              "[[], [], [], [], [], [], [1, 2]]");
+  {
+    this->CheckTakeXA(list_json, "[]", "[]");
+    this->CheckTakeXA(list_json, "[3, 2, 1]", "[[3], null, [1,2]]");
+    this->CheckTakeXA(list_json, "[null, 3, 0]", "[null, [3], []]");
+    this->CheckTakeXA(list_json, "[null, null]", "[null, null]");
+    this->CheckTakeXA(list_json, "[3, 0, 0, 3]", "[[3], [], [], [3]]");
+    this->CheckTakeXA(list_json, "[0, 1, 2, 3]", list_json);
+    this->CheckTakeXA(list_json, "[0, 0, 0, 0, 0, 0, 1]",
+                      "[[], [], [], [], [], [], [1, 2]]");
 
-    this->TestNoValidityBitmapButUnknownNullCount(type, "[[], [1,2], [3]]", "[0, 1, 0]");
+    this->TestNoValidityBitmapButUnknownNullCount("[[], [1,2], [3]]", "[0, 1, 0]");
   }
 }
 
-TEST_F(TestTakeKernelWithList, TakeListListInt32) {
+TYPED_TEST(TestTakeKernelWithList, TakeListListInt32) {
   std::string list_json = R"([
     [],
     [[1], [2, null, 2], []],
     null,
     [[3, null], null]
   ])";
-  for (auto& type : kNestedListAndListViewTypes) {
-    ARROW_SCOPED_TRACE("type = ", *type);
-    CheckTake(type, list_json, "[]", "[]");
-    CheckTake(type, list_json, "[3, 2, 1]", R"([
+  for (auto& inner_type : this->InnerListTypes()) {
+    this->inner_type_ = inner_type;
+    ARROW_SCOPED_TRACE("type = ", *this->value_type());
+    this->CheckTakeXA(list_json, "[]", "[]");
+    this->CheckTakeXA(list_json, "[3, 2, 1]", R"([
       [[3, null], null],
       null,
       [[1], [2, null, 2], []]
     ])");
-    CheckTake(type, list_json, "[null, 3, 0]", R"([
+    this->CheckTakeXA(list_json, "[null, 3, 0]", R"([
       null,
       [[3, null], null],
       []
     ])");
-    CheckTake(type, list_json, "[null, null]", "[null, null]");
-    CheckTake(type, list_json, "[3, 0, 0, 3]",
-              "[[[3, null], null], [], [], [[3, null], null]]");
-    CheckTake(type, list_json, "[0, 1, 2, 3]", list_json);
-    CheckTake(type, list_json, "[0, 0, 0, 0, 0, 0, 1]",
-              "[[], [], [], [], [], [], [[1], [2, null, 2], []]]");
+    this->CheckTakeXA(list_json, "[null, null]", "[null, null]");
+    this->CheckTakeXA(list_json, "[3, 0, 0, 3]",
+                      "[[[3, null], null], [], [], [[3, null], null]]");
+    this->CheckTakeXA(list_json, "[0, 1, 2, 3]", list_json);
+    this->CheckTakeXA(list_json, "[0, 0, 0, 0, 0, 0, 1]",
+                      "[[], [], [], [], [], [], [[1], [2, null, 2], []]]");
 
     this->TestNoValidityBitmapButUnknownNullCount(
-        type, "[[[1], [2, null, 2], []], [[3, null]]]", "[0, 1, 0]");
+        "[[[1], [2, null, 2], []], [[3, null]]]", "[0, 1, 0]");
   }
 }
 
-class TestTakeKernelWithLargeList : public TestTakeKernelTyped<LargeListType> {};
-
-TEST_F(TestTakeKernelWithLargeList, TakeLargeListInt32) {
+TYPED_TEST(TestTakeKernelWithList, TakeLargeListInt32) {
+  this->inner_type_ = int32();
   std::string list_json = "[[], [1,2], null, [3]]";
-  for (auto& type : kLargeListAndListViewTypes) {
-    ARROW_SCOPED_TRACE("type = ", *type);
-    CheckTake(type, list_json, "[]", "[]");
-    CheckTake(type, list_json, "[null, 1, 2, 0]", "[null, [1,2], null, []]");
+  {
+    ARROW_SCOPED_TRACE("type = ", *this->value_type());
+    this->CheckTakeXA(list_json, "[]", "[]");
+    this->CheckTakeXA(list_json, "[null, 1, 2, 0]", "[null, [1,2], null, []]");
   }
 }
 
 class TestTakeKernelWithFixedSizeList : public TestTakeKernelTyped<FixedSizeListType> {
  protected:
-  void CheckTakeOnNestedLists(const std::shared_ptr<DataType>& inner_type,
-                              const std::vector<int>& list_sizes, int64_t length) {
+  std::shared_ptr<DataType> inner_type_ = nullptr;
+
+  std::shared_ptr<DataType> value_type() const override {
+    EXPECT_TRUE(inner_type_);
+    return fixed_size_list(inner_type_, 3);
+  }
+
+  void CheckTakeXAOnNestedLists(const std::shared_ptr<DataType>& inner_type,
+                                const std::vector<int>& list_sizes, int64_t length) {
     using NLG = ::arrow::util::internal::NestedListGenerator;
     // Create two equivalent lists: one as a FixedSizeList and another as a List.
     ASSERT_OK_AND_ASSIGN(auto fsl_list,
@@ -1544,51 +1816,50 @@ class TestTakeKernelWithFixedSizeList : public TestTakeKernelTyped<FixedSizeList
 
     auto indices = ArrayFromJSON(int64(), "[1, 2, 4]");
     // Use the Take on ListType as the reference implementation.
-    ASSERT_OK_AND_ASSIGN(auto expected_list, Take(*list, *indices));
+    ASSERT_OK_AND_ASSIGN(auto expected_list, TakeAAA(*list, *indices));
     ASSERT_OK_AND_ASSIGN(auto expected_fsl, Cast(*expected_list, fsl_list->type()));
-    DoCheckTake(fsl_list, indices, expected_fsl);
+    DoCheckTakeXA(fsl_list, indices, expected_fsl);
   }
 };
 
 TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListInt32) {
+  inner_type_ = int32();
   std::string list_json = "[null, [1, null, 3], [4, 5, 6], [7, 8, null]]";
-  CheckTake(fixed_size_list(int32(), 3), list_json, "[]", "[]");
-  CheckTake(fixed_size_list(int32(), 3), list_json, "[3, 2, 1]",
-            "[[7, 8, null], [4, 5, 6], [1, null, 3]]");
-  CheckTake(fixed_size_list(int32(), 3), list_json, "[null, 2, 0]",
-            "[null, [4, 5, 6], null]");
-  CheckTake(fixed_size_list(int32(), 3), list_json, "[null, null]", "[null, null]");
-  CheckTake(fixed_size_list(int32(), 3), list_json, "[3, 0, 0, 3]",
-            "[[7, 8, null], null, null, [7, 8, null]]");
-  CheckTake(fixed_size_list(int32(), 3), list_json, "[0, 1, 2, 3]", list_json);
+  CheckTakeXA(list_json, "[]", "[]");
+  CheckTakeXA(list_json, "[3, 2, 1]", "[[7, 8, null], [4, 5, 6], [1, null, 3]]");
+  CheckTakeXA(list_json, "[null, 2, 0]", "[null, [4, 5, 6], null]");
+  CheckTakeXA(list_json, "[null, null]", "[null, null]");
+  CheckTakeXA(list_json, "[3, 0, 0, 3]", "[[7, 8, null], null, null, [7, 8, null]]");
+  CheckTakeXA(list_json, "[0, 1, 2, 3]", list_json);
 
   // No nulls in inner list values trigger the use of FixedWidthTakeExec() in
   // FSLTakeExec()
   std::string no_nulls_list_json = "[[0, 0, 0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]";
-  CheckTake(
-      fixed_size_list(int32(), 3), no_nulls_list_json, "[2, 2, 2, 2, 2, 2, 1]",
+  CheckTakeXA(
+      no_nulls_list_json, "[2, 2, 2, 2, 2, 2, 1]",
       "[[4, 5, 6], [4, 5, 6], [4, 5, 6], [4, 5, 6], [4, 5, 6], [4, 5, 6], [1, 2, 3]]");
 
-  this->TestNoValidityBitmapButUnknownNullCount(fixed_size_list(int32(), 3),
-                                                "[[1, null, 3], [4, 5, 6], [7, 8, null]]",
+  this->TestNoValidityBitmapButUnknownNullCount("[[1, null, 3], [4, 5, 6], [7, 8, null]]",
                                                 "[0, 1, 0]");
 }
 
 TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListVarWidth) {
+  inner_type_ = utf8();
   std::string list_json =
       R"([["zero", "one", ""], ["two", "", "three"], ["four", "five", "six"], ["seven", "eight", ""]])";
-  CheckTake(fixed_size_list(utf8(), 3), list_json, "[]", "[]");
-  CheckTake(fixed_size_list(utf8(), 3), list_json, "[3, 2, 1]",
-            R"([["seven", "eight", ""], ["four", "five", "six"], ["two", "", "three"]])");
-  CheckTake(fixed_size_list(utf8(), 3), list_json, "[null, 2, 0]",
-            R"([null, ["four", "five", "six"], ["zero", "one", ""]])");
-  CheckTake(fixed_size_list(utf8(), 3), list_json, R"([null, null])", "[null, null]");
-  CheckTake(
-      fixed_size_list(utf8(), 3), list_json, "[3, 0, 0,3]",
+  CheckTakeXA(list_json, "[]", "[]");
+  CheckTakeXA(
+      list_json, "[3, 2, 1]",
+      R"([["seven", "eight", ""], ["four", "five", "six"], ["two", "", "three"]])");
+  CheckTakeXA(list_json, "[null, 2, 0]",
+              R"([null, ["four", "five", "six"], ["zero", "one", ""]])");
+  CheckTakeXA(list_json, R"([null, null])", "[null, null]");
+  CheckTakeXA(
+      list_json, "[3, 0, 0,3]",
       R"([["seven", "eight", ""], ["zero", "one", ""], ["zero", "one", ""], ["seven", "eight", ""]])");
-  CheckTake(fixed_size_list(utf8(), 3), list_json, "[0, 1, 2, 3]", list_json);
-  CheckTake(fixed_size_list(utf8(), 3), list_json, "[2, 2, 2, 2, 2, 2, 1]",
-            R"([
+  CheckTakeXA(list_json, "[0, 1, 2, 3]", list_json);
+  CheckTakeXA(list_json, "[2, 2, 2, 2, 2, 2, 1]",
+              R"([
                  ["four", "five", "six"], ["four", "five", "six"],
                  ["four", "five", "six"], ["four", "five", "six"],
                  ["four", "five", "six"], ["four", "five", "six"],
@@ -1606,11 +1877,14 @@ TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListModuloNesting) {
   NLG::VisitAllNestedListConfigurations(
       value_types, [this](const std::shared_ptr<DataType>& inner_type,
                           const std::vector<int>& list_sizes) {
-        this->CheckTakeOnNestedLists(inner_type, list_sizes, /*length=*/5);
+        this->CheckTakeXAOnNestedLists(inner_type, list_sizes, /*length=*/5);
       });
 }
 
-class TestTakeKernelWithMap : public TestTakeKernelTyped<MapType> {};
+class TestTakeKernelWithMap : public TestTakeKernelTyped<MapType> {
+ protected:
+  std::shared_ptr<DataType> value_type() const override { return map(utf8(), int32()); }
+};
 
 TEST_F(TestTakeKernelWithMap, TakeMapStringToInt32) {
   std::string map_json = R"([
@@ -1619,21 +1893,20 @@ TEST_F(TestTakeKernelWithMap, TakeMapStringToInt32) {
     [["cap", 8]],
     []
   ])";
-  CheckTake(map(utf8(), int32()), map_json, "[]", "[]");
-  CheckTake(map(utf8(), int32()), map_json, "[3, 1, 3, 1, 3]",
-            "[[], null, [], null, []]");
-  CheckTake(map(utf8(), int32()), map_json, "[2, 1, null]", R"([
+  CheckTakeXA(map_json, "[]", "[]");
+  CheckTakeXA(map_json, "[3, 1, 3, 1, 3]", "[[], null, [], null, []]");
+  CheckTakeXA(map_json, "[2, 1, null]", R"([
     [["cap", 8]],
     null,
     null
   ])");
-  CheckTake(map(utf8(), int32()), map_json, "[2, 1, 0]", R"([
+  CheckTakeXA(map_json, "[2, 1, 0]", R"([
     [["cap", 8]],
     null,
     [["joe", 0], ["mark", null]]
   ])");
-  CheckTake(map(utf8(), int32()), map_json, "[0, 1, 2, 3]", map_json);
-  CheckTake(map(utf8(), int32()), map_json, "[0, 0, 0, 0, 0, 0, 3]", R"([
+  CheckTakeXA(map_json, "[0, 1, 2, 3]", map_json);
+  CheckTakeXA(map_json, "[0, 0, 0, 0, 0, 0, 3]", R"([
     [["joe", 0], ["mark", null]],
     [["joe", 0], ["mark", null]],
     [["joe", 0], ["mark", null]],
@@ -1644,31 +1917,34 @@ TEST_F(TestTakeKernelWithMap, TakeMapStringToInt32) {
   ])");
 }
 
-class TestTakeKernelWithStruct : public TestTakeKernelTyped<StructType> {};
+class TestTakeKernelWithStruct : public TestTakeKernelTyped<StructType> {
+  std::shared_ptr<DataType> value_type() const override {
+    return struct_({field("a", int32()), field("b", utf8())});
+  }
+};
 
 TEST_F(TestTakeKernelWithStruct, TakeStruct) {
-  auto struct_type = struct_({field("a", int32()), field("b", utf8())});
   auto struct_json = R"([
     null,
     {"a": 1, "b": ""},
     {"a": 2, "b": "hello"},
     {"a": 4, "b": "eh"}
   ])";
-  CheckTake(struct_type, struct_json, "[]", "[]");
-  CheckTake(struct_type, struct_json, "[3, 1, 3, 1, 3]", R"([
+  this->CheckTakeXA(struct_json, "[]", "[]");
+  this->CheckTakeXA(struct_json, "[3, 1, 3, 1, 3]", R"([
     {"a": 4, "b": "eh"},
     {"a": 1, "b": ""},
     {"a": 4, "b": "eh"},
     {"a": 1, "b": ""},
     {"a": 4, "b": "eh"}
   ])");
-  CheckTake(struct_type, struct_json, "[3, 1, 0]", R"([
+  this->CheckTakeXA(struct_json, "[3, 1, 0]", R"([
     {"a": 4, "b": "eh"},
     {"a": 1, "b": ""},
     null
   ])");
-  CheckTake(struct_type, struct_json, "[0, 1, 2, 3]", struct_json);
-  CheckTake(struct_type, struct_json, "[0, 2, 2, 2, 2, 2, 2]", R"([
+  this->CheckTakeXA(struct_json, "[0, 1, 2, 3]", struct_json);
+  this->CheckTakeXA(struct_json, "[0, 2, 2, 2, 2, 2, 2]", R"([
     null,
     {"a": 2, "b": "hello"},
     {"a": 2, "b": "hello"},
@@ -1678,16 +1954,30 @@ TEST_F(TestTakeKernelWithStruct, TakeStruct) {
     {"a": 2, "b": "hello"}
   ])");
 
-  this->TestNoValidityBitmapButUnknownNullCount(
-      struct_type, R"([{"a": 1}, {"a": 2, "b": "hello"}])", "[0, 1, 0]");
+  this->TestNoValidityBitmapButUnknownNullCount(R"([{"a": 1}, {"a": 2, "b": "hello"}])",
+                                                "[0, 1, 0]");
 }
 
-class TestTakeKernelWithUnion : public TestTakeKernelTyped<UnionType> {};
+template <typename ArrowUnionType>
+class TestTakeKernelWithUnion : public TestTakeKernelTyped<ArrowUnionType> {
+ protected:
+  std::shared_ptr<DataType> value_type() const override {
+    return std::make_shared<ArrowUnionType>(
+        FieldVector{
+            field("a", int32()),
+            field("b", utf8()),
+        },
+        std::vector<int8_t>{
+            2,
+            5,
+        });
+  }
+};
+
+TYPED_TEST_SUITE(TestTakeKernelWithUnion, UnionArrowTypes);
 
-TEST_F(TestTakeKernelWithUnion, TakeUnion) {
-  for (const auto& union_type :
-       {dense_union({field("a", int32()), field("b", utf8())}, {2, 5}),
-        sparse_union({field("a", int32()), field("b", utf8())}, {2, 5})}) {
+TYPED_TEST(TestTakeKernelWithUnion, TakeUnion) {
+  {
     auto union_json = R"([
       [2, 222],
       [2, null],
@@ -1697,22 +1987,22 @@ TEST_F(TestTakeKernelWithUnion, TakeUnion) {
       [2, 111],
       [5, null]
     ])";
-    CheckTake(union_type, union_json, "[]", "[]");
-    CheckTake(union_type, union_json, "[3, 0, 3, 0, 3]", R"([
+    this->CheckTakeXA(union_json, "[]", "[]");
+    this->CheckTakeXA(union_json, "[3, 0, 3, 0, 3]", R"([
       [5, "eh"],
       [2, 222],
       [5, "eh"],
       [2, 222],
       [5, "eh"]
     ])");
-    CheckTake(union_type, union_json, "[4, 2, 0, 6]", R"([
+    this->CheckTakeXA(union_json, "[4, 2, 0, 6]", R"([
       [2, null],
       [5, "hello"],
       [2, 222],
       [5, null]
     ])");
-    CheckTake(union_type, union_json, "[0, 1, 2, 3, 4, 5, 6]", union_json);
-    CheckTake(union_type, union_json, "[1, 2, 2, 2, 2, 2, 2]", R"([
+    this->CheckTakeXA(union_json, "[0, 1, 2, 3, 4, 5, 6]", union_json);
+    this->CheckTakeXA(union_json, "[1, 2, 2, 2, 2, 2, 2]", R"([
       [2, null],
       [5, "hello"],
       [5, "hello"],
@@ -1721,7 +2011,7 @@ TEST_F(TestTakeKernelWithUnion, TakeUnion) {
       [5, "hello"],
       [5, "hello"]
     ])");
-    CheckTake(union_type, union_json, "[0, null, 1, null, 2, 2, 2]", R"([
+    this->CheckTakeXA(union_json, "[0, null, 1, null, 2, 2, 2]", R"([
       [2, 222],
       [2, null],
       [2, null],
@@ -1735,72 +2025,58 @@ TEST_F(TestTakeKernelWithUnion, TakeUnion) {
 
 class TestPermutationsWithTake : public ::testing::Test {
  protected:
-  void DoTake(const Int16Array& values, const Int16Array& indices,
-              std::shared_ptr<Int16Array>* out) {
-    ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> boxed_out, Take(values, indices));
+  Result<std::shared_ptr<Int16Array>> DoTakeAAA(
+      const std::shared_ptr<Int16Array>& values,
+      const std::shared_ptr<Int16Array>& indices) {
+    ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> boxed_out, TakeAAA(*values, *indices));
     ValidateOutput(boxed_out);
-    *out = checked_pointer_cast<Int16Array>(std::move(boxed_out));
+    return checked_pointer_cast<Int16Array>(std::move(boxed_out));
   }
 
-  std::shared_ptr<Int16Array> DoTake(const Int16Array& values,
-                                     const Int16Array& indices) {
-    std::shared_ptr<Int16Array> out;
-    DoTake(values, indices, &out);
-    return out;
-  }
-
-  std::shared_ptr<Int16Array> DoTakeN(uint64_t n, std::shared_ptr<Int16Array> array) {
+  Result<std::shared_ptr<Int16Array>> DoTakeN(uint64_t n,
+                                              std::shared_ptr<Int16Array> array) {
     auto power_of_2 = array;
-    array = Identity(array->length());
+    ARROW_ASSIGN_OR_RAISE(array, Identity(array->length()));
     while (n != 0) {
       if (n & 1) {
-        array = DoTake(*array, *power_of_2);
+        ARROW_ASSIGN_OR_RAISE(array, DoTakeAAA(array, power_of_2));
       }
-      power_of_2 = DoTake(*power_of_2, *power_of_2);
+      ARROW_ASSIGN_OR_RAISE(power_of_2, DoTakeAAA(power_of_2, power_of_2));
       n >>= 1;
     }
     return array;
   }
 
   template <typename Rng>
-  void Shuffle(const Int16Array& array, Rng& gen, std::shared_ptr<Int16Array>* shuffled) {
+  Result<std::shared_ptr<Int16Array>> Shuffle(const Int16Array& array, Rng& gen) {
     auto byte_length = array.length() * sizeof(int16_t);
-    ASSERT_OK_AND_ASSIGN(auto data, array.values()->CopySlice(0, byte_length));
+    ARROW_ASSIGN_OR_RAISE(auto data, array.values()->CopySlice(0, byte_length));
     auto mutable_data = reinterpret_cast<int16_t*>(data->mutable_data());
     std::shuffle(mutable_data, mutable_data + array.length(), gen);
-    shuffled->reset(new Int16Array(array.length(), data));
-  }
-
-  template <typename Rng>
-  std::shared_ptr<Int16Array> Shuffle(const Int16Array& array, Rng& gen) {
-    std::shared_ptr<Int16Array> out;
-    Shuffle(array, gen, &out);
-    return out;
+    return std::make_shared<Int16Array>(array.length(), data);
   }
 
-  void Identity(int64_t length, std::shared_ptr<Int16Array>* identity) {
+  Result<std::shared_ptr<Int16Array>> Identity(int64_t length) {
+    std::shared_ptr<Int16Array> identity;
     Int16Builder identity_builder;
-    ASSERT_OK(identity_builder.Resize(length));
+    RETURN_NOT_OK(identity_builder.Resize(length));
     for (int16_t i = 0; i < length; ++i) {
       identity_builder.UnsafeAppend(i);
     }
-    ASSERT_OK(identity_builder.Finish(identity));
-  }
-
-  std::shared_ptr<Int16Array> Identity(int64_t length) {
-    std::shared_ptr<Int16Array> out;
-    Identity(length, &out);
-    return out;
+    RETURN_NOT_OK(identity_builder.Finish(&identity));
+    return identity;
   }
 
-  std::shared_ptr<Int16Array> Inverse(const std::shared_ptr<Int16Array>& permutation) {
+  Result<std::shared_ptr<Int16Array>> Inverse(
+      const std::shared_ptr<Int16Array>& permutation) {
     auto length = static_cast<int16_t>(permutation->length());
 
     std::vector<bool> cycle_lengths(length + 1, false);
     auto permutation_to_the_i = permutation;
     for (int16_t cycle_length = 1; cycle_length <= length; ++cycle_length) {
       cycle_lengths[cycle_length] = HasTrivialCycle(*permutation_to_the_i);
-      permutation_to_the_i = DoTake(*permutation, *permutation_to_the_i);
+      ARROW_ASSIGN_OR_RAISE(permutation_to_the_i,
+                            DoTakeAAA(permutation, permutation_to_the_i));
     }
 
     uint64_t cycle_to_identity_length = 1;
@@ -1836,42 +2112,18 @@ TEST_F(TestPermutationsWithTake, InvertPermutation) {
   for (auto seed : std::vector<random::SeedType>({0, kRandomSeed, kRandomSeed * 2 - 1})) {
     std::default_random_engine gen(seed);
     for (int16_t length = 0; length < 1 << 10; ++length) {
-      auto identity = Identity(length);
-      auto permutation = Shuffle(*identity, gen);
-      auto inverse = Inverse(permutation);
+      ASSERT_OK_AND_ASSIGN(auto identity, Identity(length));
+      ASSERT_OK_AND_ASSIGN(auto permutation, Shuffle(*identity, gen));
+      ASSERT_OK_AND_ASSIGN(auto inverse, Inverse(permutation));
       if (inverse == nullptr) {
         break;
       }
-      ASSERT_TRUE(DoTake(*inverse, *permutation)->Equals(identity));
+      DoCheckTakeXA(inverse, permutation, identity);
     }
   }
 }
 
-class TestTakeKernelWithRecordBatch : public TestTakeKernelTyped<RecordBatch> {
- public:
-  void AssertTake(const std::shared_ptr<Schema>& schm, const std::string& batch_json,
-                  const std::string& indices, const std::string& expected_batch) {
-    std::shared_ptr<RecordBatch> actual;
-
-    for (auto index_type : {int8(), uint32()}) {
-      ASSERT_OK(TakeJSON(schm, batch_json, index_type, indices, &actual));
-      ValidateOutput(actual);
-      ASSERT_BATCHES_EQUAL(*RecordBatchFromJSON(schm, expected_batch), *actual);
-    }
-  }
-
-  Status TakeJSON(const std::shared_ptr<Schema>& schm, const std::string& batch_json,
-                  const std::shared_ptr<DataType>& index_type, const std::string& indices,
-                  std::shared_ptr<RecordBatch>* out) {
-    auto batch = RecordBatchFromJSON(schm, batch_json);
-    ARROW_ASSIGN_OR_RAISE(Datum result,
-                          Take(Datum(batch), Datum(ArrayFromJSON(index_type, indices))));
-    *out = result.record_batch();
-    return Status::OK();
-  }
-};
-
-TEST_F(TestTakeKernelWithRecordBatch, TakeRecordBatch) {
+TEST(TestTakeKernelWithRecordBatch, TakeRecordBatch) {
   std::vector<std::shared_ptr<Field>> fields = {field("a", int32()), field("b", utf8())};
   auto schm = schema(fields);
 
@@ -1881,21 +2133,21 @@ TEST_F(TestTakeKernelWithRecordBatch, TakeRecordBatch) {
     {"a": 2, "b": "hello"},
     {"a": 4, "b": "eh"}
   ])";
-  this->AssertTake(schm, struct_json, "[]", "[]");
-  this->AssertTake(schm, struct_json, "[3, 1, 3, 1, 3]", R"([
+  AssertTakeRAR(schm, struct_json, "[]", "[]");
+  AssertTakeRAR(schm, struct_json, "[3, 1, 3, 1, 3]", R"([
     {"a": 4, "b": "eh"},
     {"a": 1, "b": ""},
     {"a": 4, "b": "eh"},
     {"a": 1, "b": ""},
     {"a": 4, "b": "eh"}
   ])");
-  this->AssertTake(schm, struct_json, "[3, 1, 0]", R"([
+  AssertTakeRAR(schm, struct_json, "[3, 1, 0]", R"([
     {"a": 4, "b": "eh"},
     {"a": 1, "b": ""},
     {"a": null, "b": "yo"}
   ])");
-  this->AssertTake(schm, struct_json, "[0, 1, 2, 3]", struct_json);
-  this->AssertTake(schm, struct_json, "[0, 2, 2, 2, 2, 2, 2]", R"([
+  AssertTakeRAR(schm, struct_json, "[0, 1, 2, 3]", struct_json);
+  AssertTakeRAR(schm, struct_json, "[0, 2, 2, 2, 2, 2, 2]", R"([
     {"a": null, "b": "yo"},
     {"a": 2, "b": "hello"},
     {"a": 2, "b": "hello"},
@@ -1906,115 +2158,41 @@ TEST_F(TestTakeKernelWithRecordBatch, TakeRecordBatch) {
   ])");
 }
 
-class TestTakeKernelWithChunkedArray : public TestTakeKernelTyped<ChunkedArray> {
- public:
-  void AssertTake(const std::shared_ptr<DataType>& type,
-                  const std::vector<std::string>& values, const std::string& indices,
-                  const std::vector<std::string>& expected) {
-    std::shared_ptr<ChunkedArray> actual;
-    ASSERT_OK(this->TakeWithArray(type, values, indices, &actual));
-    ValidateOutput(actual);
-    AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual);
+TEST(TestTakeKernelWithChunkedIndices, TakeChunkedArray) {
+  for (auto& ty : {boolean(), int8(), uint64()}) {
+    AssertTakeCAC(ty, {"[]"}, "[]", {"[]"});
+    AssertTakeCCC(ty, {}, {}, {});
+    AssertTakeCCC(ty, {}, {"[]"}, {"[]"});
+    AssertTakeCCC(ty, {}, {"[null]"}, {"[null]"});
+    AssertTakeCCC(ty, {"[]"}, {}, {});
+    AssertTakeCCC(ty, {"[]"}, {"[]"}, {"[]"});
+    AssertTakeCCC(ty, {"[]"}, {"[null]"}, {"[null]"});
   }
 
-  void AssertChunkedTake(const std::shared_ptr<DataType>& type,
-                         const std::vector<std::string>& values,
-                         const std::vector<std::string>& indices,
-                         const std::vector<std::string>& expected) {
-    std::shared_ptr<ChunkedArray> actual;
-    ASSERT_OK(this->TakeWithChunkedArray(type, values, indices, &actual));
-    ValidateOutput(actual);
-    AssertChunkedEqual(*ChunkedArrayFromJSON(type, expected), *actual);
-  }
+  AssertTakeCAC(boolean(), {"[true]", "[false, true]"}, "[0, 1, 0, 2]",
+                {"[true, false, true, true]"});
+  AssertTakeCCC(boolean(), {"[false]", "[true, false]"}, {"[0, 1, 0]", "[]", "[2]"},
+                {"[false, true, false]", "[]", "[false]"});
+  AssertTakeCAC(boolean(), {"[true]", "[false, true]"}, "[2, 1]", {"[true, false]"});
 
-  Status TakeWithArray(const std::shared_ptr<DataType>& type,
-                       const std::vector<std::string>& values, const std::string& indices,
-                       std::shared_ptr<ChunkedArray>* out) {
-    ARROW_ASSIGN_OR_RAISE(Datum result, Take(ChunkedArrayFromJSON(type, values),
-                                             ArrayFromJSON(int8(), indices)));
-    *out = result.chunked_array();
-    return Status::OK();
-  }
+  Datum chunked_arr;
+  for (auto& int_ty : SignedIntTypes()) {
+    AssertTakeCAC(int_ty, {"[7]", "[8, 9]"}, "[0, 1, 0, 2]", {"[7, 8, 7, 9]"});
+    AssertTakeCCC(int_ty, {"[7]", "[8, 9]"}, {"[0, 1, 0]", "[]", "[2]"},
+                  {"[7, 8, 7]", "[]", "[9]"});
+    AssertTakeCAC(int_ty, {"[7]", "[8, 9]"}, "[2, 1]", {"[9, 8]"});
 
-  Status TakeWithChunkedArray(const std::shared_ptr<DataType>& type,
-                              const std::vector<std::string>& values,
-                              const std::vector<std::string>& indices,
-                              std::shared_ptr<ChunkedArray>* out) {
-    ARROW_ASSIGN_OR_RAISE(Datum result, Take(ChunkedArrayFromJSON(type, values),
-                                             ChunkedArrayFromJSON(int8(), indices)));
-    *out = result.chunked_array();
-    return Status::OK();
+    ASSERT_RAISES(IndexError,
+                  TakeCAC(int_ty, {"[7]", "[8, 9]"}, "[0, 5]").Value(&chunked_arr));
+    ASSERT_RAISES(
+        IndexError,
+        TakeCCC(int_ty, {"[7]", "[8, 9]"}, {"[0, 1, 0]", "[5, 1]"}).Value(&chunked_arr));
+    ASSERT_RAISES(IndexError, TakeCCC(int_ty, {}, {"[0]"}).Value(&chunked_arr));
+    ASSERT_RAISES(IndexError, TakeCCC(int_ty, {"[]"}, {"[0]"}).Value(&chunked_arr));
   }
-};
-
-TEST_F(TestTakeKernelWithChunkedArray, TakeChunkedArray) {
-  this->AssertTake(int8(), {"[]"}, "[]", {"[]"});
-  this->AssertChunkedTake(int8(), {}, {}, {});
-  this->AssertChunkedTake(int8(), {}, {"[]"}, {"[]"});
-  this->AssertChunkedTake(int8(), {}, {"[null]"}, {"[null]"});
-  this->AssertChunkedTake(int8(), {"[]"}, {}, {});
-  this->AssertChunkedTake(int8(), {"[]"}, {"[]"}, {"[]"});
-  this->AssertChunkedTake(int8(), {"[]"}, {"[null]"}, {"[null]"});
-
-  this->AssertTake(int8(), {"[7]", "[8, 9]"}, "[0, 1, 0, 2]", {"[7, 8, 7, 9]"});
-  this->AssertChunkedTake(int8(), {"[7]", "[8, 9]"}, {"[0, 1, 0]", "[]", "[2]"},
-                          {"[7, 8, 7]", "[]", "[9]"});
-  this->AssertTake(int8(), {"[7]", "[8, 9]"}, "[2, 1]", {"[9, 8]"});
-
-  std::shared_ptr<ChunkedArray> arr;
-  ASSERT_RAISES(IndexError,
-                this->TakeWithArray(int8(), {"[7]", "[8, 9]"}, "[0, 5]", &arr));
-  ASSERT_RAISES(IndexError, this->TakeWithChunkedArray(int8(), {"[7]", "[8, 9]"},
-                                                       {"[0, 1, 0]", "[5, 1]"}, &arr));
-  ASSERT_RAISES(IndexError, this->TakeWithChunkedArray(int8(), {}, {"[0]"}, &arr));
-  ASSERT_RAISES(IndexError, this->TakeWithChunkedArray(int8(), {"[]"}, {"[0]"}, &arr));
 }
 
-class TestTakeKernelWithTable : public TestTakeKernelTyped<Table> {
- public:
-  void AssertTake(const std::shared_ptr<Schema>& schm,
-                  const std::vector<std::string>& table_json, const std::string& filter,
-                  const std::vector<std::string>& expected_table) {
-    std::shared_ptr<Table> actual;
-
-    ASSERT_OK(this->TakeWithArray(schm, table_json, filter, &actual));
-    ValidateOutput(actual);
-    ASSERT_TABLES_EQUAL(*TableFromJSON(schm, expected_table), *actual);
-  }
-
-  void AssertChunkedTake(const std::shared_ptr<Schema>& schm,
-                         const std::vector<std::string>& table_json,
-                         const std::vector<std::string>& filter,
-                         const std::vector<std::string>& expected_table) {
-    std::shared_ptr<Table> actual;
-
-    ASSERT_OK(this->TakeWithChunkedArray(schm, table_json, filter, &actual));
-    ValidateOutput(actual);
-    ASSERT_TABLES_EQUAL(*TableFromJSON(schm, expected_table), *actual);
-  }
-
-  Status TakeWithArray(const std::shared_ptr<Schema>& schm,
-                       const std::vector<std::string>& values, const std::string& indices,
-                       std::shared_ptr<Table>* out) {
-    ARROW_ASSIGN_OR_RAISE(Datum result, Take(Datum(TableFromJSON(schm, values)),
-                                             Datum(ArrayFromJSON(int8(), indices))));
-    *out = result.table();
-    return Status::OK();
-  }
-
-  Status TakeWithChunkedArray(const std::shared_ptr<Schema>& schm,
-                              const std::vector<std::string>& values,
-                              const std::vector<std::string>& indices,
-                              std::shared_ptr<Table>* out) {
-    ARROW_ASSIGN_OR_RAISE(Datum result,
-                          Take(Datum(TableFromJSON(schm, values)),
-                               Datum(ChunkedArrayFromJSON(int8(), indices))));
-    *out = result.table();
-    return Status::OK();
-  }
-};
-
-TEST_F(TestTakeKernelWithTable, TakeTable) {
+TEST(TestTakeKernelWithTable, TakeTable) {
   std::vector<std::shared_ptr<Field>> fields = {field("a", int32()), field("b", utf8())};
   auto schm = schema(fields);
 
@@ -2022,11 +2200,12 @@ TEST_F(TestTakeKernelWithTable, TakeTable) {
       "[{\"a\": null, \"b\": \"yo\"},{\"a\": 1, \"b\": \"\"}]",
       "[{\"a\": 2, \"b\": \"hello\"},{\"a\": 4, \"b\": \"eh\"}]"};
 
-  this->AssertTake(schm, table_json, "[]", {"[]"});
+  AssertTakeTAT(schm, table_json, "[]", {"[]"});
   std::vector<std::string> expected_310 = {
-      "[{\"a\": 4, \"b\": \"eh\"},{\"a\": 1, \"b\": \"\"},{\"a\": null, \"b\": \"yo\"}]"};
-  this->AssertTake(schm, table_json, "[3, 1, 0]", expected_310);
-  this->AssertChunkedTake(schm, table_json, {"[0, 1]", "[2, 3]"}, table_json);
+      "[{\"a\": 4, \"b\": \"eh\"},{\"a\": 1, \"b\": \"\"},{\"a\": null, \"b\": "
+      "\"yo\"}]"};
+  AssertTakeTAT(schm, table_json, "[3, 1, 0]", expected_310);
+  AssertTakeTCT(schm, table_json, {"[0, 1]", "[2, 3]"}, table_json);
 }
 
 TEST(TestTakeMetaFunction, ArityChecking) {
@@ -2066,14 +2245,14 @@ void CheckTakeRandom(const std::shared_ptr<Array>& values, int64_t indices_lengt
                                           max_index, null_probability);
   auto indices_no_nulls = rand->Numeric<IndexType>(
       indices_length, static_cast<IndexCType>(0), max_index, /*null_probability=*/0.0);
-  ValidateTake<ValuesType>(values, indices);
-  ValidateTake<ValuesType>(values, indices_no_nulls);
+  ValidateTakeXA<ValuesType>(values, indices);
+  ValidateTakeXA<ValuesType>(values, indices_no_nulls);
   // Sliced indices array
   if (indices_length >= 2) {
     indices = indices->Slice(1, indices_length - 2);
     indices_no_nulls = indices_no_nulls->Slice(1, indices_length - 2);
-    ValidateTake<ValuesType>(values, indices);
-    ValidateTake<ValuesType>(values, indices_no_nulls);
+    ValidateTakeXA<ValuesType>(values, indices);
+    ValidateTakeXA<ValuesType>(values, indices_no_nulls);
   }
 }
 
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index ad22fa8d365c4..8766ca3baac96 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -870,7 +870,7 @@ const SortOptions* GetDefaultSortOptions() {
 const FunctionDoc sort_indices_doc(
     "Return the indices that would sort an array, record batch or table",
     ("This function computes an array of indices that define a stable sort\n"
-     "of the input array, record batch or table.  By default, nNull values are\n"
+     "of the input array, record batch or table.  By default, null values are\n"
      "considered greater than any other value and are therefore sorted at the\n"
      "end of the input. For floating-point types, NaNs are considered greater\n"
      "than any other non-null value, but smaller than null values.\n"
diff --git a/cpp/src/arrow/compute/key_hash_internal.h b/cpp/src/arrow/compute/key_hash_internal.h
index 1f25beb0e1622..582cf28732352 100644
--- a/cpp/src/arrow/compute/key_hash_internal.h
+++ b/cpp/src/arrow/compute/key_hash_internal.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-#include <immintrin.h>
+#  include <immintrin.h>
 #endif
 
 #include <cstdint>
diff --git a/cpp/src/arrow/compute/light_array_internal.cc b/cpp/src/arrow/compute/light_array_internal.cc
index 4f235925d0fb6..e4b1f1b8cdd63 100644
--- a/cpp/src/arrow/compute/light_array_internal.cc
+++ b/cpp/src/arrow/compute/light_array_internal.cc
@@ -118,10 +118,9 @@ Result<KeyColumnMetadata> ColumnMetadataFromDataType(
     const std::shared_ptr<DataType>& type) {
   const bool is_extension = type->id() == Type::EXTENSION;
   const std::shared_ptr<DataType>& typ =
-      is_extension
-          ? arrow::internal::checked_pointer_cast<ExtensionType>(type->GetSharedPtr())
-                ->storage_type()
-          : type;
+      is_extension ? arrow::internal::checked_cast<const ExtensionType*>(type.get())
+                         ->storage_type()
+                   : type;
 
   if (typ->id() == Type::DICTIONARY) {
     auto bit_width =
@@ -205,22 +204,25 @@ Status ColumnArraysFromExecBatch(const ExecBatch& batch,
                                    column_arrays);
 }
 
-void ResizableArrayData::Init(const std::shared_ptr<DataType>& data_type,
-                              MemoryPool* pool, int log_num_rows_min) {
+Status ResizableArrayData::Init(const std::shared_ptr<DataType>& data_type,
+                                MemoryPool* pool, int log_num_rows_min) {
 #ifndef NDEBUG
   if (num_rows_allocated_ > 0) {
-    ARROW_DCHECK(data_type_ != NULLPTR);
-    KeyColumnMetadata metadata_before =
-        ColumnMetadataFromDataType(data_type_).ValueOrDie();
-    KeyColumnMetadata metadata_after = ColumnMetadataFromDataType(data_type).ValueOrDie();
+    ARROW_DCHECK(data_type_ != nullptr);
+    const KeyColumnMetadata& metadata_before = column_metadata_;
+    ARROW_ASSIGN_OR_RAISE(KeyColumnMetadata metadata_after,
+                          ColumnMetadataFromDataType(data_type));
     ARROW_DCHECK(metadata_before.is_fixed_length == metadata_after.is_fixed_length &&
                  metadata_before.fixed_length == metadata_after.fixed_length);
   }
 #endif
+  ARROW_DCHECK(data_type != nullptr);
+  ARROW_ASSIGN_OR_RAISE(column_metadata_, ColumnMetadataFromDataType(data_type));
   Clear(/*release_buffers=*/false);
   log_num_rows_min_ = log_num_rows_min;
   data_type_ = data_type;
   pool_ = pool;
+  return Status::OK();
 }
 
 void ResizableArrayData::Clear(bool release_buffers) {
@@ -246,8 +248,6 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
     num_rows_allocated_new *= 2;
   }
 
-  KeyColumnMetadata column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
   if (buffers_[kFixedLengthBuffer] == NULLPTR) {
     ARROW_DCHECK(buffers_[kValidityBuffer] == NULLPTR &&
                  buffers_[kVariableLengthBuffer] == NULLPTR);
@@ -258,8 +258,8 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
             bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes, pool_));
     memset(mutable_data(kValidityBuffer), 0,
            bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes);
-    if (column_metadata.is_fixed_length) {
-      if (column_metadata.fixed_length == 0) {
+    if (column_metadata_.is_fixed_length) {
+      if (column_metadata_.fixed_length == 0) {
         ARROW_ASSIGN_OR_RAISE(
             buffers_[kFixedLengthBuffer],
             AllocateResizableBuffer(
@@ -271,7 +271,7 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
         ARROW_ASSIGN_OR_RAISE(
             buffers_[kFixedLengthBuffer],
             AllocateResizableBuffer(
-                num_rows_allocated_new * column_metadata.fixed_length + kNumPaddingBytes,
+                num_rows_allocated_new * column_metadata_.fixed_length + kNumPaddingBytes,
                 pool_));
       }
     } else {
@@ -300,15 +300,15 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
     memset(mutable_data(kValidityBuffer) + bytes_for_bits_before, 0,
            bytes_for_bits_after - bytes_for_bits_before);
 
-    if (column_metadata.is_fixed_length) {
-      if (column_metadata.fixed_length == 0) {
+    if (column_metadata_.is_fixed_length) {
+      if (column_metadata_.fixed_length == 0) {
         RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
             bit_util::BytesForBits(num_rows_allocated_new) + kNumPaddingBytes));
         memset(mutable_data(kFixedLengthBuffer) + bytes_for_bits_before, 0,
                bytes_for_bits_after - bytes_for_bits_before);
       } else {
         RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
-            num_rows_allocated_new * column_metadata.fixed_length + kNumPaddingBytes));
+            num_rows_allocated_new * column_metadata_.fixed_length + kNumPaddingBytes));
       }
     } else {
       RETURN_NOT_OK(buffers_[kFixedLengthBuffer]->Resize(
@@ -323,10 +323,7 @@ Status ResizableArrayData::ResizeFixedLengthBuffers(int num_rows_new) {
 }
 
 Status ResizableArrayData::ResizeVaryingLengthBuffer() {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
-  if (!column_metadata.is_fixed_length) {
+  if (!column_metadata_.is_fixed_length) {
     int64_t min_new_size = buffers_[kFixedLengthBuffer]->data_as<int32_t>()[num_rows_];
     ARROW_DCHECK(var_len_buf_size_ > 0);
     if (var_len_buf_size_ < min_new_size) {
@@ -343,23 +340,19 @@ Status ResizableArrayData::ResizeVaryingLengthBuffer() {
 }
 
 KeyColumnArray ResizableArrayData::column_array() const {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-  return KeyColumnArray(column_metadata, num_rows_,
+  return KeyColumnArray(column_metadata_, num_rows_,
                         buffers_[kValidityBuffer]->mutable_data(),
                         buffers_[kFixedLengthBuffer]->mutable_data(),
                         buffers_[kVariableLengthBuffer]->mutable_data());
 }
 
 std::shared_ptr<ArrayData> ResizableArrayData::array_data() const {
-  KeyColumnMetadata column_metadata;
-  column_metadata = ColumnMetadataFromDataType(data_type_).ValueOrDie();
-
-  auto valid_count = arrow::internal::CountSetBits(
-      buffers_[kValidityBuffer]->data(), /*offset=*/0, static_cast<int64_t>(num_rows_));
+  auto valid_count =
+      arrow::internal::CountSetBits(buffers_[kValidityBuffer]->data(), /*bit_offset=*/0,
+                                    static_cast<int64_t>(num_rows_));
   int null_count = static_cast<int>(num_rows_) - static_cast<int>(valid_count);
 
-  if (column_metadata.is_fixed_length) {
+  if (column_metadata_.is_fixed_length) {
     return ArrayData::Make(data_type_, num_rows_,
                            {buffers_[kValidityBuffer], buffers_[kFixedLengthBuffer]},
                            null_count);
@@ -493,10 +486,12 @@ Status ExecBatchBuilder::AppendSelected(const std::shared_ptr<ArrayData>& source
   ARROW_DCHECK(num_rows_before >= 0);
   int num_rows_after = num_rows_before + num_rows_to_append;
   if (target->num_rows() == 0) {
-    target->Init(source->type, pool, kLogNumRows);
+    RETURN_NOT_OK(target->Init(source->type, pool, kLogNumRows));
   }
   RETURN_NOT_OK(target->ResizeFixedLengthBuffers(num_rows_after));
 
+  // Since target->Init is called before, we can assume that the ColumnMetadata
+  // would never fail to be created
   KeyColumnMetadata column_metadata =
       ColumnMetadataFromDataType(source->type).ValueOrDie();
 
@@ -647,11 +642,12 @@ Status ExecBatchBuilder::AppendNulls(const std::shared_ptr<DataType>& type,
   int num_rows_before = target.num_rows();
   int num_rows_after = num_rows_before + num_rows_to_append;
   if (target.num_rows() == 0) {
-    target.Init(type, pool, kLogNumRows);
+    RETURN_NOT_OK(target.Init(type, pool, kLogNumRows));
   }
   RETURN_NOT_OK(target.ResizeFixedLengthBuffers(num_rows_after));
 
-  KeyColumnMetadata column_metadata = ColumnMetadataFromDataType(type).ValueOrDie();
+  ARROW_ASSIGN_OR_RAISE(KeyColumnMetadata column_metadata,
+                        ColumnMetadataFromDataType(type));
 
   // Process fixed length buffer
   //
@@ -708,7 +704,7 @@ Status ExecBatchBuilder::AppendSelected(MemoryPool* pool, const ExecBatch& batch
       const Datum& data = batch.values[col_ids ? col_ids[i] : i];
       ARROW_DCHECK(data.is_array());
       const std::shared_ptr<ArrayData>& array_data = data.array();
-      values_[i].Init(array_data->type, pool, kLogNumRows);
+      RETURN_NOT_OK(values_[i].Init(array_data->type, pool, kLogNumRows));
     }
   }
 
@@ -739,7 +735,7 @@ Status ExecBatchBuilder::AppendNulls(MemoryPool* pool,
   if (values_.empty()) {
     values_.resize(types.size());
     for (size_t i = 0; i < types.size(); ++i) {
-      values_[i].Init(types[i], pool, kLogNumRows);
+      RETURN_NOT_OK(values_[i].Init(types[i], pool, kLogNumRows));
     }
   }
 
diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h
index 995c4211998e0..5adb06e540009 100644
--- a/cpp/src/arrow/compute/light_array_internal.h
+++ b/cpp/src/arrow/compute/light_array_internal.h
@@ -65,12 +65,12 @@ struct ARROW_EXPORT KeyColumnMetadata {
   /// If this is true the column will have a validity buffer and
   /// a data buffer and the third buffer will be unused.
   bool is_fixed_length;
-  /// \brief True if this column is the null type
+  /// \brief True if this column is the null type(NA).
   bool is_null_type;
   /// \brief The number of bytes for each item
   ///
   /// Zero has a special meaning, indicating a bit vector with one bit per value if it
-  /// isn't a null type column.
+  /// isn't a null type column. Generally, this means that the column is a boolean type.
   ///
   /// For a varying-length binary column this represents the number of bytes per offset.
   uint32_t fixed_length;
@@ -295,8 +295,8 @@ class ARROW_EXPORT ResizableArrayData {
   /// \param pool The pool to make allocations on
   /// \param log_num_rows_min All resize operations will allocate at least enough
   ///                         space for (1 << log_num_rows_min) rows
-  void Init(const std::shared_ptr<DataType>& data_type, MemoryPool* pool,
-            int log_num_rows_min);
+  Status Init(const std::shared_ptr<DataType>& data_type, MemoryPool* pool,
+              int log_num_rows_min);
 
   /// \brief Resets the array back to an empty state
   /// \param release_buffers If true then allocated memory is released and the
@@ -351,6 +351,8 @@ class ARROW_EXPORT ResizableArrayData {
   static constexpr int64_t kNumPaddingBytes = 64;
   int log_num_rows_min_;
   std::shared_ptr<DataType> data_type_;
+  // Would be valid if data_type_ != NULLPTR.
+  KeyColumnMetadata column_metadata_{};
   MemoryPool* pool_;
   int num_rows_;
   int num_rows_allocated_;
@@ -403,7 +405,7 @@ class ARROW_EXPORT ExecBatchBuilder {
 
   int num_rows() const { return values_.empty() ? 0 : values_[0].num_rows(); }
 
-  static int num_rows_max() { return 1 << kLogNumRows; }
+  static constexpr int num_rows_max() { return 1 << kLogNumRows; }
 
  private:
   static constexpr int kLogNumRows = 15;
diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc
index cc02d489d138f..98a1ab8b7acae 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -295,7 +295,7 @@ TEST(ResizableArrayData, Basic) {
         arrow::internal::checked_pointer_cast<FixedWidthType>(type)->bit_width() / 8;
     {
       ResizableArrayData array;
-      array.Init(type, pool.get(), /*log_num_rows_min=*/16);
+      ASSERT_OK(array.Init(type, pool.get(), /*log_num_rows_min=*/16));
       ASSERT_EQ(0, array.num_rows());
       ASSERT_OK(array.ResizeFixedLengthBuffers(2));
       ASSERT_EQ(2, array.num_rows());
@@ -330,7 +330,7 @@ TEST(ResizableArrayData, Binary) {
     ARROW_SCOPED_TRACE("Type: ", type->ToString());
     {
       ResizableArrayData array;
-      array.Init(type, pool.get(), /*log_num_rows_min=*/4);
+      ASSERT_OK(array.Init(type, pool.get(), /*log_num_rows_min=*/4));
       ASSERT_EQ(0, array.num_rows());
       ASSERT_OK(array.ResizeFixedLengthBuffers(2));
       ASSERT_EQ(2, array.num_rows());
diff --git a/cpp/src/arrow/compute/row/compare_internal.cc b/cpp/src/arrow/compute/row/compare_internal.cc
index 98aea9011266c..5e1a87b795202 100644
--- a/cpp/src/arrow/compute/row/compare_internal.cc
+++ b/cpp/src/arrow/compute/row/compare_internal.cc
@@ -104,18 +104,21 @@ void KeyCompare::CompareBinaryColumnToRowHelper(
     const uint8_t* rows_right = rows.data(1);
     for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) {
       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
-      uint32_t irow_right = left_to_right_map[irow_left];
-      uint32_t offset_right = irow_right * fixed_length + offset_within_row;
+      // irow_right is used to index into row data so promote to the row offset type.
+      RowTableImpl::offset_type irow_right = left_to_right_map[irow_left];
+      RowTableImpl::offset_type offset_right =
+          irow_right * fixed_length + offset_within_row;
       match_bytevector[i] = compare_fn(rows_left, rows_right, irow_left, offset_right);
     }
   } else {
     const uint8_t* rows_left = col.data(1);
-    const uint32_t* offsets_right = rows.offsets();
+    const RowTableImpl::offset_type* offsets_right = rows.offsets();
     const uint8_t* rows_right = rows.data(2);
     for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) {
       uint32_t irow_left = use_selection ? sel_left_maybe_null[i] : i;
       uint32_t irow_right = left_to_right_map[irow_left];
-      uint32_t offset_right = offsets_right[irow_right] + offset_within_row;
+      RowTableImpl::offset_type offset_right =
+          offsets_right[irow_right] + offset_within_row;
       match_bytevector[i] = compare_fn(rows_left, rows_right, irow_left, offset_right);
     }
   }
@@ -145,7 +148,7 @@ void KeyCompare::CompareBinaryColumnToRow(uint32_t offset_within_row,
         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
         left_to_right_map, ctx, col, rows, match_bytevector,
         [bit_offset](const uint8_t* left_base, const uint8_t* right_base,
-                     uint32_t irow_left, uint32_t offset_right) {
+                     uint32_t irow_left, RowTableImpl::offset_type offset_right) {
           uint8_t left =
               bit_util::GetBit(left_base, irow_left + bit_offset) ? 0xff : 0x00;
           uint8_t right = right_base[offset_right];
@@ -156,7 +159,7 @@ void KeyCompare::CompareBinaryColumnToRow(uint32_t offset_within_row,
         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
         left_to_right_map, ctx, col, rows, match_bytevector,
         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
-           uint32_t offset_right) {
+           RowTableImpl::offset_type offset_right) {
           uint8_t left = left_base[irow_left];
           uint8_t right = right_base[offset_right];
           return left == right ? 0xff : 0;
@@ -166,7 +169,7 @@ void KeyCompare::CompareBinaryColumnToRow(uint32_t offset_within_row,
         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
         left_to_right_map, ctx, col, rows, match_bytevector,
         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
-           uint32_t offset_right) {
+           RowTableImpl::offset_type offset_right) {
           util::CheckAlignment<uint16_t>(left_base);
           util::CheckAlignment<uint16_t>(right_base + offset_right);
           uint16_t left = reinterpret_cast<const uint16_t*>(left_base)[irow_left];
@@ -178,7 +181,7 @@ void KeyCompare::CompareBinaryColumnToRow(uint32_t offset_within_row,
         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
         left_to_right_map, ctx, col, rows, match_bytevector,
         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
-           uint32_t offset_right) {
+           RowTableImpl::offset_type offset_right) {
           util::CheckAlignment<uint32_t>(left_base);
           util::CheckAlignment<uint32_t>(right_base + offset_right);
           uint32_t left = reinterpret_cast<const uint32_t*>(left_base)[irow_left];
@@ -190,7 +193,7 @@ void KeyCompare::CompareBinaryColumnToRow(uint32_t offset_within_row,
         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
         left_to_right_map, ctx, col, rows, match_bytevector,
         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
-           uint32_t offset_right) {
+           RowTableImpl::offset_type offset_right) {
           util::CheckAlignment<uint64_t>(left_base);
           util::CheckAlignment<uint64_t>(right_base + offset_right);
           uint64_t left = reinterpret_cast<const uint64_t*>(left_base)[irow_left];
@@ -202,7 +205,7 @@ void KeyCompare::CompareBinaryColumnToRow(uint32_t offset_within_row,
         offset_within_row, num_processed, num_rows_to_compare, sel_left_maybe_null,
         left_to_right_map, ctx, col, rows, match_bytevector,
         [&col](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left,
-               uint32_t offset_right) {
+               RowTableImpl::offset_type offset_right) {
           uint32_t length = col.metadata().fixed_length;
 
           // Non-zero length guarantees no underflow
@@ -241,7 +244,7 @@ void KeyCompare::CompareVarBinaryColumnToRowHelper(
     const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col,
     const RowTableImpl& rows, uint8_t* match_bytevector) {
   const uint32_t* offsets_left = col.offsets();
-  const uint32_t* offsets_right = rows.offsets();
+  const RowTableImpl::offset_type* offsets_right = rows.offsets();
   const uint8_t* rows_left = col.data(2);
   const uint8_t* rows_right = rows.data(2);
   for (uint32_t i = first_row_to_compare; i < num_rows_to_compare; ++i) {
@@ -249,7 +252,7 @@ void KeyCompare::CompareVarBinaryColumnToRowHelper(
     uint32_t irow_right = left_to_right_map[irow_left];
     uint32_t begin_left = offsets_left[irow_left];
     uint32_t length_left = offsets_left[irow_left + 1] - begin_left;
-    uint32_t begin_right = offsets_right[irow_right];
+    RowTableImpl::offset_type begin_right = offsets_right[irow_right];
     uint32_t length_right;
     uint32_t offset_within_row;
     if (!is_first_varbinary_col) {
@@ -334,7 +337,13 @@ void KeyCompare::CompareColumnsToRows(
     const RowTableImpl& rows, bool are_cols_in_encoding_order,
     uint8_t* out_match_bitvector_maybe_null) {
   if (num_rows_to_compare == 0) {
-    *out_num_rows = 0;
+    if (out_match_bitvector_maybe_null) {
+      DCHECK_EQ(out_num_rows, nullptr);
+      DCHECK_EQ(out_sel_left_maybe_same, nullptr);
+      bit_util::ClearBitmap(out_match_bitvector_maybe_null, 0, num_rows_to_compare);
+    } else {
+      *out_num_rows = 0;
+    }
     return;
   }
 
@@ -440,8 +449,8 @@ void KeyCompare::CompareColumnsToRows(
                                 match_bytevector_A, match_bitvector);
 
   if (out_match_bitvector_maybe_null) {
-    ARROW_DCHECK(out_num_rows == nullptr);
-    ARROW_DCHECK(out_sel_left_maybe_same == nullptr);
+    DCHECK_EQ(out_num_rows, nullptr);
+    DCHECK_EQ(out_sel_left_maybe_same, nullptr);
     memcpy(out_match_bitvector_maybe_null, match_bitvector,
            bit_util::BytesForBits(num_rows_to_compare));
   } else {
diff --git a/cpp/src/arrow/compute/row/compare_internal.h b/cpp/src/arrow/compute/row/compare_internal.h
index a5a109b0b516a..29d7f859e59ee 100644
--- a/cpp/src/arrow/compute/row/compare_internal.h
+++ b/cpp/src/arrow/compute/row/compare_internal.h
@@ -42,9 +42,30 @@ class ARROW_EXPORT KeyCompare {
            /*extra=*/util::MiniBatch::kMiniBatchLength;
   }
 
-  // Returns a single 16-bit selection vector of rows that failed comparison.
-  // If there is input selection on the left, the resulting selection is a filtered image
-  // of input selection.
+  /// \brief Compare a batch of rows in columnar format to the specified rows in row
+  /// format.
+  ///
+  /// The comparison result is populated in either a 16-bit selection vector of rows that
+  /// failed comparison, or a match bitvector with 1 for matched rows and 0 otherwise.
+  ///
+  /// @param num_rows_to_compare The number of rows to compare.
+  /// @param sel_left_maybe_null Optional input selection vector on the left, the
+  ///        comparison is only performed on the selected rows. Null if all rows in
+  ///        `left_to_right_map` are to be compared.
+  /// @param left_to_right_map The mapping from the left to the right rows. Left row `i`
+  ///        in `cols` is compared to right row `left_to_right_map[i]` in `row`.
+  /// @param ctx The light context needed for the comparison.
+  /// @param out_num_rows The number of rows that failed comparison. Must be null if
+  ///        `out_match_bitvector_maybe_null` is not null.
+  /// @param out_sel_left_maybe_same The selection vector of rows that failed comparison.
+  ///        Can be the same as `sel_left_maybe_null` for in-place update. Must be null if
+  ///        `out_match_bitvector_maybe_null` is not null.
+  /// @param cols The left rows in columnar format to compare.
+  /// @param rows The right rows in row format to compare.
+  /// @param are_cols_in_encoding_order Whether the columns are in encoding order.
+  /// @param out_match_bitvector_maybe_null The optional output match bitvector, 1 for
+  ///        matched rows and 0 otherwise. Won't be populated if `out_num_rows` and
+  ///        `out_sel_left_maybe_same` are not null.
   static void CompareColumnsToRows(
       uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null,
       const uint32_t* left_to_right_map, LightContext* ctx, uint32_t* out_num_rows,
diff --git a/cpp/src/arrow/compute/row/compare_internal_avx2.cc b/cpp/src/arrow/compute/row/compare_internal_avx2.cc
index 23238a3691c8a..9f6e1adfe2108 100644
--- a/cpp/src/arrow/compute/row/compare_internal_avx2.cc
+++ b/cpp/src/arrow/compute/row/compare_internal_avx2.cc
@@ -15,11 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <immintrin.h>
-
 #include "arrow/compute/row/compare_internal.h"
 #include "arrow/compute/util.h"
 #include "arrow/util/bit_util.h"
+#include "arrow/util/simd.h"
 
 namespace arrow {
 namespace compute {
@@ -180,40 +179,6 @@ uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2(
   }
 }
 
-namespace {
-
-// Intrinsics `_mm256_i32gather_epi32/64` treat the `vindex` as signed integer, and we
-// are using `uint32_t` to represent the offset, in range of [0, 4G), within the row
-// table. When the offset is larger than `0x80000000` (2GB), those intrinsics will treat
-// it as negative offset and gather the data from undesired address. To avoid this issue,
-// we normalize the addresses by translating `base` `0x80000000` higher, and `offset`
-// `0x80000000` lower. This way, the offset is always in range of [-2G, 2G) and those
-// intrinsics are safe.
-
-constexpr uint64_t kTwoGB = 0x80000000ull;
-
-template <uint32_t kScale>
-inline __m256i UnsignedOffsetSafeGather32(int const* base, __m256i offset) {
-  int const* normalized_base = base + kTwoGB / sizeof(int);
-  __m256i normalized_offset =
-      _mm256_sub_epi32(offset, _mm256_set1_epi32(static_cast<int>(kTwoGB / kScale)));
-  return _mm256_i32gather_epi32(normalized_base, normalized_offset,
-                                static_cast<int>(kScale));
-}
-
-template <uint32_t kScale>
-inline __m256i UnsignedOffsetSafeGather64(arrow::util::int64_for_gather_t const* base,
-                                          __m128i offset) {
-  arrow::util::int64_for_gather_t const* normalized_base =
-      base + kTwoGB / sizeof(arrow::util::int64_for_gather_t);
-  __m128i normalized_offset =
-      _mm_sub_epi32(offset, _mm_set1_epi32(static_cast<int>(kTwoGB / kScale)));
-  return _mm256_i32gather_epi64(normalized_base, normalized_offset,
-                                static_cast<int>(kScale));
-}
-
-}  // namespace
-
 template <bool use_selection, class COMPARE8_FN>
 uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
     uint32_t offset_within_row, uint32_t num_rows_to_compare,
@@ -240,12 +205,26 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
             _mm256_loadu_si256(reinterpret_cast<const __m256i*>(left_to_right_map) + i);
       }
 
-      __m256i offset_right =
-          _mm256_mullo_epi32(irow_right, _mm256_set1_epi32(fixed_length));
-      offset_right = _mm256_add_epi32(offset_right, _mm256_set1_epi32(offset_within_row));
-
-      reinterpret_cast<uint64_t*>(match_bytevector)[i] =
-          compare8_fn(rows_left, rows_right, i * unroll, irow_left, offset_right);
+      // Widen the 32-bit row ids to 64-bit and store the first/last 4 of them into 2
+      // 256-bit registers.
+      __m256i irow_right_lo = _mm256_cvtepi32_epi64(_mm256_castsi256_si128(irow_right));
+      __m256i irow_right_hi =
+          _mm256_cvtepi32_epi64(_mm256_extracti128_si256(irow_right, 1));
+      // Calculate the lower/higher 4 64-bit row offsets based on the lower/higher 4
+      // 64-bit row ids and the fixed length.
+      __m256i offset_right_lo =
+          _mm256_mul_epi32(irow_right_lo, _mm256_set1_epi64x(fixed_length));
+      __m256i offset_right_hi =
+          _mm256_mul_epi32(irow_right_hi, _mm256_set1_epi64x(fixed_length));
+      // Calculate the lower/higher 4 64-bit field offsets based on the lower/higher 4
+      // 64-bit row offsets and field offset within row.
+      offset_right_lo =
+          _mm256_add_epi64(offset_right_lo, _mm256_set1_epi64x(offset_within_row));
+      offset_right_hi =
+          _mm256_add_epi64(offset_right_hi, _mm256_set1_epi64x(offset_within_row));
+
+      reinterpret_cast<uint64_t*>(match_bytevector)[i] = compare8_fn(
+          rows_left, rows_right, i * unroll, irow_left, offset_right_lo, offset_right_hi);
 
       if (!use_selection) {
         irow_left = _mm256_add_epi32(irow_left, _mm256_set1_epi32(8));
@@ -254,7 +233,7 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
     return num_rows_to_compare - (num_rows_to_compare % unroll);
   } else {
     const uint8_t* rows_left = col.data(1);
-    const uint32_t* offsets_right = rows.offsets();
+    const RowTableImpl::offset_type* offsets_right = rows.offsets();
     const uint8_t* rows_right = rows.data(2);
     constexpr uint32_t unroll = 8;
     __m256i irow_left = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
@@ -270,12 +249,29 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
         irow_right =
             _mm256_loadu_si256(reinterpret_cast<const __m256i*>(left_to_right_map) + i);
       }
-      __m256i offset_right =
-          UnsignedOffsetSafeGather32<4>((int const*)offsets_right, irow_right);
-      offset_right = _mm256_add_epi32(offset_right, _mm256_set1_epi32(offset_within_row));
 
-      reinterpret_cast<uint64_t*>(match_bytevector)[i] =
-          compare8_fn(rows_left, rows_right, i * unroll, irow_left, offset_right);
+      static_assert(sizeof(RowTableImpl::offset_type) == sizeof(int64_t),
+                    "KeyCompare::CompareBinaryColumnToRowHelper_avx2 only supports "
+                    "64-bit RowTableImpl::offset_type");
+      auto offsets_right_i64 =
+          reinterpret_cast<const arrow::util::int64_for_gather_t*>(offsets_right);
+      // Gather the lower/higher 4 64-bit row offsets based on the lower/higher 4 32-bit
+      // row ids.
+      __m256i offset_right_lo =
+          _mm256_i32gather_epi64(offsets_right_i64, _mm256_castsi256_si128(irow_right),
+                                 sizeof(RowTableImpl::offset_type));
+      __m256i offset_right_hi = _mm256_i32gather_epi64(
+          offsets_right_i64, _mm256_extracti128_si256(irow_right, 1),
+          sizeof(RowTableImpl::offset_type));
+      // Calculate the lower/higher 4 64-bit field offsets based on the lower/higher 4
+      // 64-bit row offsets and field offset within row.
+      offset_right_lo =
+          _mm256_add_epi64(offset_right_lo, _mm256_set1_epi64x(offset_within_row));
+      offset_right_hi =
+          _mm256_add_epi64(offset_right_hi, _mm256_set1_epi64x(offset_within_row));
+
+      reinterpret_cast<uint64_t*>(match_bytevector)[i] = compare8_fn(
+          rows_left, rows_right, i * unroll, irow_left, offset_right_lo, offset_right_hi);
 
       if (!use_selection) {
         irow_left = _mm256_add_epi32(irow_left, _mm256_set1_epi32(8));
@@ -287,8 +283,8 @@ uint32_t KeyCompare::CompareBinaryColumnToRowHelper_avx2(
 
 template <int column_width>
 inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* right_base,
-                                      __m256i irow_left, __m256i offset_right,
-                                      int bit_offset = 0) {
+                                      __m256i irow_left, __m256i offset_right_lo,
+                                      __m256i offset_right_hi, int bit_offset = 0) {
   __m256i left;
   switch (column_width) {
     case 0: {
@@ -315,7 +311,9 @@ inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* r
       ARROW_DCHECK(false);
   }
 
-  __m256i right = UnsignedOffsetSafeGather32<1>((int const*)right_base, offset_right);
+  __m128i right_lo = _mm256_i64gather_epi32((int const*)right_base, offset_right_lo, 1);
+  __m128i right_hi = _mm256_i64gather_epi32((int const*)right_base, offset_right_hi, 1);
+  __m256i right = _mm256_set_m128i(right_hi, right_lo);
   if (column_width != sizeof(uint32_t)) {
     constexpr uint32_t mask = column_width == 0 || column_width == 1 ? 0xff : 0xffff;
     right = _mm256_and_si256(right, _mm256_set1_epi32(mask));
@@ -333,8 +331,8 @@ inline uint64_t CompareSelected8_avx2(const uint8_t* left_base, const uint8_t* r
 
 template <int column_width>
 inline uint64_t Compare8_avx2(const uint8_t* left_base, const uint8_t* right_base,
-                              uint32_t irow_left_first, __m256i offset_right,
-                              int bit_offset = 0) {
+                              uint32_t irow_left_first, __m256i offset_right_lo,
+                              __m256i offset_right_hi, int bit_offset = 0) {
   __m256i left;
   switch (column_width) {
     case 0: {
@@ -364,7 +362,9 @@ inline uint64_t Compare8_avx2(const uint8_t* left_base, const uint8_t* right_bas
       ARROW_DCHECK(false);
   }
 
-  __m256i right = UnsignedOffsetSafeGather32<1>((int const*)right_base, offset_right);
+  __m128i right_lo = _mm256_i64gather_epi32((int const*)right_base, offset_right_lo, 1);
+  __m128i right_hi = _mm256_i64gather_epi32((int const*)right_base, offset_right_hi, 1);
+  __m256i right = _mm256_set_m128i(right_hi, right_lo);
   if (column_width != sizeof(uint32_t)) {
     constexpr uint32_t mask = column_width == 0 || column_width == 1 ? 0xff : 0xffff;
     right = _mm256_and_si256(right, _mm256_set1_epi32(mask));
@@ -383,7 +383,7 @@ inline uint64_t Compare8_avx2(const uint8_t* left_base, const uint8_t* right_bas
 template <bool use_selection>
 inline uint64_t Compare8_64bit_avx2(const uint8_t* left_base, const uint8_t* right_base,
                                     __m256i irow_left, uint32_t irow_left_first,
-                                    __m256i offset_right) {
+                                    __m256i offset_right_lo, __m256i offset_right_hi) {
   auto left_base_i64 =
       reinterpret_cast<const arrow::util::int64_for_gather_t*>(left_base);
   __m256i left_lo, left_hi;
@@ -400,10 +400,8 @@ inline uint64_t Compare8_64bit_avx2(const uint8_t* left_base, const uint8_t* rig
   }
   auto right_base_i64 =
       reinterpret_cast<const arrow::util::int64_for_gather_t*>(right_base);
-  __m256i right_lo =
-      UnsignedOffsetSafeGather64<1>(right_base_i64, _mm256_castsi256_si128(offset_right));
-  __m256i right_hi = UnsignedOffsetSafeGather64<1>(
-      right_base_i64, _mm256_extracti128_si256(offset_right, 1));
+  __m256i right_lo = _mm256_i64gather_epi64(right_base_i64, offset_right_lo, 1);
+  __m256i right_hi = _mm256_i64gather_epi64(right_base_i64, offset_right_hi, 1);
   uint32_t result_lo = _mm256_movemask_epi8(_mm256_cmpeq_epi64(left_lo, right_lo));
   uint32_t result_hi = _mm256_movemask_epi8(_mm256_cmpeq_epi64(left_hi, right_hi));
   return result_lo | (static_cast<uint64_t>(result_hi) << 32);
@@ -412,13 +410,19 @@ inline uint64_t Compare8_64bit_avx2(const uint8_t* left_base, const uint8_t* rig
 template <bool use_selection>
 inline uint64_t Compare8_Binary_avx2(uint32_t length, const uint8_t* left_base,
                                      const uint8_t* right_base, __m256i irow_left,
-                                     uint32_t irow_left_first, __m256i offset_right) {
+                                     uint32_t irow_left_first, __m256i offset_right_lo,
+                                     __m256i offset_right_hi) {
   uint32_t irow_left_array[8];
-  uint32_t offset_right_array[8];
+  RowTableImpl::offset_type offset_right_array[8];
   if (use_selection) {
     _mm256_storeu_si256(reinterpret_cast<__m256i*>(irow_left_array), irow_left);
   }
-  _mm256_storeu_si256(reinterpret_cast<__m256i*>(offset_right_array), offset_right);
+  static_assert(
+      sizeof(RowTableImpl::offset_type) * 4 == sizeof(__m256i),
+      "Unexpected RowTableImpl::offset_type size in KeyCompare::Compare8_Binary_avx2");
+  _mm256_storeu_si256(reinterpret_cast<__m256i*>(offset_right_array), offset_right_lo);
+  _mm256_storeu_si256(reinterpret_cast<__m256i*>(&offset_right_array[4]),
+                      offset_right_hi);
 
   // Non-zero length guarantees no underflow
   int32_t num_loops_less_one = (static_cast<int32_t>(length) + 31) / 32 - 1;
@@ -463,13 +467,14 @@ uint32_t KeyCompare::CompareBinaryColumnToRowImp_avx2(
         offset_within_row, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
         ctx, col, rows, match_bytevector,
         [bit_offset](const uint8_t* left_base, const uint8_t* right_base,
-                     uint32_t irow_left_base, __m256i irow_left, __m256i offset_right) {
+                     uint32_t irow_left_base, __m256i irow_left, __m256i offset_right_lo,
+                     __m256i offset_right_hi) {
           if (use_selection) {
             return CompareSelected8_avx2<0>(left_base, right_base, irow_left,
-                                            offset_right, bit_offset);
+                                            offset_right_lo, offset_right_hi, bit_offset);
           } else {
-            return Compare8_avx2<0>(left_base, right_base, irow_left_base, offset_right,
-                                    bit_offset);
+            return Compare8_avx2<0>(left_base, right_base, irow_left_base,
+                                    offset_right_lo, offset_right_hi, bit_offset);
           }
         });
   } else if (col_width == 1) {
@@ -477,12 +482,13 @@ uint32_t KeyCompare::CompareBinaryColumnToRowImp_avx2(
         offset_within_row, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
         ctx, col, rows, match_bytevector,
         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left_base,
-           __m256i irow_left, __m256i offset_right) {
+           __m256i irow_left, __m256i offset_right_lo, __m256i offset_right_hi) {
           if (use_selection) {
             return CompareSelected8_avx2<1>(left_base, right_base, irow_left,
-                                            offset_right);
+                                            offset_right_lo, offset_right_hi);
           } else {
-            return Compare8_avx2<1>(left_base, right_base, irow_left_base, offset_right);
+            return Compare8_avx2<1>(left_base, right_base, irow_left_base,
+                                    offset_right_lo, offset_right_hi);
           }
         });
   } else if (col_width == 2) {
@@ -490,12 +496,13 @@ uint32_t KeyCompare::CompareBinaryColumnToRowImp_avx2(
         offset_within_row, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
         ctx, col, rows, match_bytevector,
         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left_base,
-           __m256i irow_left, __m256i offset_right) {
+           __m256i irow_left, __m256i offset_right_lo, __m256i offset_right_hi) {
           if (use_selection) {
             return CompareSelected8_avx2<2>(left_base, right_base, irow_left,
-                                            offset_right);
+                                            offset_right_lo, offset_right_hi);
           } else {
-            return Compare8_avx2<2>(left_base, right_base, irow_left_base, offset_right);
+            return Compare8_avx2<2>(left_base, right_base, irow_left_base,
+                                    offset_right_lo, offset_right_hi);
           }
         });
   } else if (col_width == 4) {
@@ -503,12 +510,13 @@ uint32_t KeyCompare::CompareBinaryColumnToRowImp_avx2(
         offset_within_row, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
         ctx, col, rows, match_bytevector,
         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left_base,
-           __m256i irow_left, __m256i offset_right) {
+           __m256i irow_left, __m256i offset_right_lo, __m256i offset_right_hi) {
           if (use_selection) {
             return CompareSelected8_avx2<4>(left_base, right_base, irow_left,
-                                            offset_right);
+                                            offset_right_lo, offset_right_hi);
           } else {
-            return Compare8_avx2<4>(left_base, right_base, irow_left_base, offset_right);
+            return Compare8_avx2<4>(left_base, right_base, irow_left_base,
+                                    offset_right_lo, offset_right_hi);
           }
         });
   } else if (col_width == 8) {
@@ -516,19 +524,22 @@ uint32_t KeyCompare::CompareBinaryColumnToRowImp_avx2(
         offset_within_row, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
         ctx, col, rows, match_bytevector,
         [](const uint8_t* left_base, const uint8_t* right_base, uint32_t irow_left_base,
-           __m256i irow_left, __m256i offset_right) {
+           __m256i irow_left, __m256i offset_right_lo, __m256i offset_right_hi) {
           return Compare8_64bit_avx2<use_selection>(left_base, right_base, irow_left,
-                                                    irow_left_base, offset_right);
+                                                    irow_left_base, offset_right_lo,
+                                                    offset_right_hi);
         });
   } else {
     return CompareBinaryColumnToRowHelper_avx2<use_selection>(
         offset_within_row, num_rows_to_compare, sel_left_maybe_null, left_to_right_map,
         ctx, col, rows, match_bytevector,
         [&col](const uint8_t* left_base, const uint8_t* right_base,
-               uint32_t irow_left_base, __m256i irow_left, __m256i offset_right) {
+               uint32_t irow_left_base, __m256i irow_left, __m256i offset_right_lo,
+               __m256i offset_right_hi) {
           uint32_t length = col.metadata().fixed_length;
-          return Compare8_Binary_avx2<use_selection>(
-              length, left_base, right_base, irow_left, irow_left_base, offset_right);
+          return Compare8_Binary_avx2<use_selection>(length, left_base, right_base,
+                                                     irow_left, irow_left_base,
+                                                     offset_right_lo, offset_right_hi);
         });
   }
 }
@@ -541,7 +552,7 @@ void KeyCompare::CompareVarBinaryColumnToRowImp_avx2(
     LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows,
     uint8_t* match_bytevector) {
   const uint32_t* offsets_left = col.offsets();
-  const uint32_t* offsets_right = rows.offsets();
+  const RowTableImpl::offset_type* offsets_right = rows.offsets();
   const uint8_t* rows_left = col.data(2);
   const uint8_t* rows_right = rows.data(2);
   for (uint32_t i = 0; i < num_rows_to_compare; ++i) {
@@ -549,7 +560,7 @@ void KeyCompare::CompareVarBinaryColumnToRowImp_avx2(
     uint32_t irow_right = left_to_right_map[irow_left];
     uint32_t begin_left = offsets_left[irow_left];
     uint32_t length_left = offsets_left[irow_left + 1] - begin_left;
-    uint32_t begin_right = offsets_right[irow_right];
+    RowTableImpl::offset_type begin_right = offsets_right[irow_right];
     uint32_t length_right;
     uint32_t offset_within_row;
     if (!is_first_varbinary_col) {
diff --git a/cpp/src/arrow/compute/row/compare_test.cc b/cpp/src/arrow/compute/row/compare_test.cc
index 22af7e067d855..5e8ee7c58a782 100644
--- a/cpp/src/arrow/compute/row/compare_test.cc
+++ b/cpp/src/arrow/compute/row/compare_test.cc
@@ -27,7 +27,12 @@ namespace arrow {
 namespace compute {
 
 using arrow::bit_util::BytesForBits;
+using arrow::bit_util::GetBit;
+using arrow::gen::Constant;
+using arrow::gen::Random;
+using arrow::internal::CountSetBits;
 using arrow::internal::CpuInfo;
+using arrow::random::kSeedMax;
 using arrow::random::RandomArrayGenerator;
 using arrow::util::MiniBatch;
 using arrow::util::TempVectorStack;
@@ -106,7 +111,7 @@ TEST(KeyCompare, CompareColumnsToRowsCuriousFSB) {
                                      true, match_bitvector.data());
     for (int i = 0; i < num_rows; ++i) {
       SCOPED_TRACE(i);
-      ASSERT_EQ(arrow::bit_util::GetBit(match_bitvector.data(), i), i != 6);
+      ASSERT_EQ(GetBit(match_bitvector.data(), i), i != 6);
     }
   }
 }
@@ -166,9 +171,111 @@ TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) {
   }
 }
 
+namespace {
+
+Result<RowTableImpl> MakeRowTableFromExecBatch(const ExecBatch& batch) {
+  RowTableImpl row_table;
+
+  std::vector<KeyColumnMetadata> column_metadatas;
+  RETURN_NOT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas));
+  RowTableMetadata table_metadata;
+  table_metadata.FromColumnMetadataVector(column_metadatas, sizeof(uint64_t),
+                                          sizeof(uint64_t));
+  RETURN_NOT_OK(row_table.Init(default_memory_pool(), table_metadata));
+  std::vector<uint16_t> row_ids(batch.length);
+  std::iota(row_ids.begin(), row_ids.end(), 0);
+  RowTableEncoder row_encoder;
+  row_encoder.Init(column_metadatas, sizeof(uint64_t), sizeof(uint64_t));
+  std::vector<KeyColumnArray> column_arrays;
+  RETURN_NOT_OK(ColumnArraysFromExecBatch(batch, &column_arrays));
+  row_encoder.PrepareEncodeSelected(0, batch.length, column_arrays);
+  RETURN_NOT_OK(row_encoder.EncodeSelected(
+      &row_table, static_cast<uint32_t>(batch.length), row_ids.data()));
+
+  return row_table;
+}
+
+Result<RowTableImpl> RepeatRowTableUntil(const RowTableImpl& seed, int64_t num_rows) {
+  RowTableImpl row_table;
+
+  RETURN_NOT_OK(row_table.Init(default_memory_pool(), seed.metadata()));
+  // Append the seed row table repeatedly to grow the row table to big enough.
+  while (row_table.length() < num_rows) {
+    RETURN_NOT_OK(row_table.AppendSelectionFrom(seed,
+                                                static_cast<uint32_t>(seed.length()),
+                                                /*source_row_ids=*/NULLPTR));
+  }
+
+  return row_table;
+}
+
+void AssertCompareColumnsToRowsAllMatch(const std::vector<KeyColumnArray>& columns,
+                                        const RowTableImpl& row_table,
+                                        const std::vector<uint32_t>& row_ids_to_compare) {
+  uint32_t num_rows_to_compare = static_cast<uint32_t>(row_ids_to_compare.size());
+
+  TempVectorStack stack;
+  ASSERT_OK(
+      stack.Init(default_memory_pool(),
+                 KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows_to_compare)));
+  LightContext ctx{CpuInfo::GetInstance()->hardware_flags(), &stack};
+
+  {
+    // No selection, output no match row ids.
+    uint32_t num_rows_no_match;
+    std::vector<uint16_t> row_ids_out(num_rows_to_compare);
+    KeyCompare::CompareColumnsToRows(num_rows_to_compare, /*sel_left_maybe_null=*/NULLPTR,
+                                     row_ids_to_compare.data(), &ctx, &num_rows_no_match,
+                                     row_ids_out.data(), columns, row_table,
+                                     /*are_cols_in_encoding_order=*/true,
+                                     /*out_match_bitvector_maybe_null=*/NULLPTR);
+    ASSERT_EQ(num_rows_no_match, 0);
+  }
+
+  {
+    // No selection, output match bit vector.
+    std::vector<uint8_t> match_bitvector(BytesForBits(num_rows_to_compare));
+    KeyCompare::CompareColumnsToRows(
+        num_rows_to_compare, /*sel_left_maybe_null=*/NULLPTR, row_ids_to_compare.data(),
+        &ctx,
+        /*out_num_rows=*/NULLPTR, /*out_sel_left_maybe_same=*/NULLPTR, columns, row_table,
+        /*are_cols_in_encoding_order=*/true, match_bitvector.data());
+    ASSERT_EQ(CountSetBits(match_bitvector.data(), 0, num_rows_to_compare),
+              num_rows_to_compare);
+  }
+
+  std::vector<uint16_t> selection_left(num_rows_to_compare);
+  std::iota(selection_left.begin(), selection_left.end(), 0);
+
+  {
+    // With selection, output no match row ids.
+    uint32_t num_rows_no_match;
+    std::vector<uint16_t> row_ids_out(num_rows_to_compare);
+    KeyCompare::CompareColumnsToRows(num_rows_to_compare, selection_left.data(),
+                                     row_ids_to_compare.data(), &ctx, &num_rows_no_match,
+                                     row_ids_out.data(), columns, row_table,
+                                     /*are_cols_in_encoding_order=*/true,
+                                     /*out_match_bitvector_maybe_null=*/NULLPTR);
+    ASSERT_EQ(num_rows_no_match, 0);
+  }
+
+  {
+    // With selection, output match bit vector.
+    std::vector<uint8_t> match_bitvector(BytesForBits(num_rows_to_compare));
+    KeyCompare::CompareColumnsToRows(
+        num_rows_to_compare, selection_left.data(), row_ids_to_compare.data(), &ctx,
+        /*out_num_rows=*/NULLPTR, /*out_sel_left_maybe_same=*/NULLPTR, columns, row_table,
+        /*are_cols_in_encoding_order=*/true, match_bitvector.data());
+    ASSERT_EQ(CountSetBits(match_bitvector.data(), 0, num_rows_to_compare),
+              num_rows_to_compare);
+  }
+}
+
+}  // namespace
+
 // Compare columns to rows at offsets over 2GB within a row table.
 // Certain AVX2 instructions may behave unexpectedly causing troubles like GH-41813.
-TEST(KeyCompare, LARGE_MEMORY_TEST(CompareColumnsToRowsLarge)) {
+TEST(KeyCompare, LARGE_MEMORY_TEST(CompareColumnsToRowsOver2GB)) {
   if constexpr (sizeof(void*) == 4) {
     GTEST_SKIP() << "Test only works on 64-bit platforms";
   }
@@ -176,128 +283,194 @@ TEST(KeyCompare, LARGE_MEMORY_TEST(CompareColumnsToRowsLarge)) {
   // The idea of this case is to create a row table using several fixed length columns and
   // one var length column (so the row is hence var length and has offset buffer), with
   // the overall data size exceeding 2GB. Then compare each row with itself.
-  constexpr int64_t two_gb = 2ll * 1024ll * 1024ll * 1024ll;
+  constexpr int64_t k2GB = 2ll * 1024ll * 1024ll * 1024ll;
   // The compare function requires the row id of the left column to be uint16_t, hence the
   // number of rows.
   constexpr int64_t num_rows = std::numeric_limits<uint16_t>::max() + 1;
   const std::vector<std::shared_ptr<DataType>> fixed_length_types{uint64(), uint32()};
   // The var length column should be a little smaller than 2GB to workaround the capacity
   // limitation in the var length builder.
-  constexpr int32_t var_length = two_gb / num_rows - 1;
+  constexpr int32_t var_length = k2GB / num_rows - 1;
   auto row_size = std::accumulate(fixed_length_types.begin(), fixed_length_types.end(),
                                   static_cast<int64_t>(var_length),
                                   [](int64_t acc, const std::shared_ptr<DataType>& type) {
                                     return acc + type->byte_width();
                                   });
   // The overall size should be larger than 2GB.
-  ASSERT_GT(row_size * num_rows, two_gb);
-
-  MemoryPool* pool = default_memory_pool();
+  ASSERT_GT(row_size * num_rows, k2GB);
 
-  // The left side columns.
-  std::vector<KeyColumnArray> columns_left;
+  // The left side batch.
   ExecBatch batch_left;
   {
     std::vector<Datum> values;
 
     // Several fixed length arrays containing random content.
     for (const auto& type : fixed_length_types) {
-      ASSERT_OK_AND_ASSIGN(auto value, ::arrow::gen::Random(type)->Generate(num_rows));
+      ASSERT_OK_AND_ASSIGN(auto value, Random(type)->Generate(num_rows));
       values.push_back(std::move(value));
     }
     // A var length array containing 'X' repeated var_length times.
-    ASSERT_OK_AND_ASSIGN(auto value_var_length,
-                         ::arrow::gen::Constant(
-                             std::make_shared<BinaryScalar>(std::string(var_length, 'X')))
-                             ->Generate(num_rows));
+    ASSERT_OK_AND_ASSIGN(
+        auto value_var_length,
+        Constant(std::make_shared<BinaryScalar>(std::string(var_length, 'X')))
+            ->Generate(num_rows));
     values.push_back(std::move(value_var_length));
 
     batch_left = ExecBatch(std::move(values), num_rows);
-    ASSERT_OK(ColumnArraysFromExecBatch(batch_left, &columns_left));
   }
 
+  // The left side columns.
+  std::vector<KeyColumnArray> columns_left;
+  ASSERT_OK(ColumnArraysFromExecBatch(batch_left, &columns_left));
+
   // The right side row table.
-  RowTableImpl row_table_right;
-  {
-    // Encode the row table with the left columns.
-    std::vector<KeyColumnMetadata> column_metadatas;
-    ASSERT_OK(ColumnMetadatasFromExecBatch(batch_left, &column_metadatas));
-    RowTableMetadata table_metadata;
-    table_metadata.FromColumnMetadataVector(column_metadatas, sizeof(uint64_t),
-                                            sizeof(uint64_t));
-    ASSERT_OK(row_table_right.Init(pool, table_metadata));
-    std::vector<uint16_t> row_ids(num_rows);
-    std::iota(row_ids.begin(), row_ids.end(), 0);
-    RowTableEncoder row_encoder;
-    row_encoder.Init(column_metadatas, sizeof(uint64_t), sizeof(uint64_t));
-    row_encoder.PrepareEncodeSelected(0, num_rows, columns_left);
-    ASSERT_OK(row_encoder.EncodeSelected(
-        &row_table_right, static_cast<uint32_t>(num_rows), row_ids.data()));
-
-    // The row table must contain an offset buffer.
-    ASSERT_NE(row_table_right.offsets(), NULLPTR);
-    // The whole point of this test.
-    ASSERT_GT(row_table_right.offsets()[num_rows - 1], two_gb);
-  }
+  ASSERT_OK_AND_ASSIGN(RowTableImpl row_table_right,
+                       MakeRowTableFromExecBatch(batch_left));
+  // The row table must contain an offset buffer.
+  ASSERT_NE(row_table_right.data(2), NULLPTR);
+  // The whole point of this test.
+  ASSERT_GT(row_table_right.offsets()[num_rows - 1], k2GB);
 
   // The rows to compare.
   std::vector<uint32_t> row_ids_to_compare(num_rows);
   std::iota(row_ids_to_compare.begin(), row_ids_to_compare.end(), 0);
 
-  TempVectorStack stack;
-  ASSERT_OK(stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows)));
-  LightContext ctx{CpuInfo::GetInstance()->hardware_flags(), &stack};
+  AssertCompareColumnsToRowsAllMatch(columns_left, row_table_right, row_ids_to_compare);
+}
 
-  {
-    // No selection, output no match row ids.
-    uint32_t num_rows_no_match;
-    std::vector<uint16_t> row_ids_out(num_rows);
-    KeyCompare::CompareColumnsToRows(num_rows, /*sel_left_maybe_null=*/NULLPTR,
-                                     row_ids_to_compare.data(), &ctx, &num_rows_no_match,
-                                     row_ids_out.data(), columns_left, row_table_right,
-                                     /*are_cols_in_encoding_order=*/true,
-                                     /*out_match_bitvector_maybe_null=*/NULLPTR);
-    ASSERT_EQ(num_rows_no_match, 0);
+// GH-43495: Compare fixed length columns to rows over 4GB within a row table.
+TEST(KeyCompare, LARGE_MEMORY_TEST(CompareColumnsToRowsOver4GBFixedLength)) {
+  if constexpr (sizeof(void*) == 4) {
+    GTEST_SKIP() << "Test only works on 64-bit platforms";
   }
 
+  // The idea of this case is to create a row table using one fixed length column (so the
+  // row is hence fixed length), with more than 4GB data. Then compare the rows located at
+  // over 4GB.
+
+  // A small batch to append to the row table repeatedly to grow the row table to big
+  // enough.
+  constexpr int64_t num_rows_batch = std::numeric_limits<uint16_t>::max();
+  constexpr int fixed_length = 256;
+
+  // The size of the row table is one batch larger than 4GB, and we'll compare the last
+  // num_rows_batch rows.
+  constexpr int64_t k4GB = 4ll * 1024 * 1024 * 1024;
+  constexpr int64_t num_rows_row_table =
+      (k4GB / (fixed_length * num_rows_batch) + 1) * num_rows_batch;
+  static_assert(num_rows_row_table < std::numeric_limits<uint32_t>::max(),
+                "row table length must be less than uint32 max");
+  static_assert(num_rows_row_table * fixed_length > k4GB,
+                "row table size must be greater than 4GB");
+
+  // The left side batch with num_rows_batch rows.
+  ExecBatch batch_left;
   {
-    // No selection, output match bit vector.
-    std::vector<uint8_t> match_bitvector(BytesForBits(num_rows));
-    KeyCompare::CompareColumnsToRows(
-        num_rows, /*sel_left_maybe_null=*/NULLPTR, row_ids_to_compare.data(), &ctx,
-        /*out_num_rows=*/NULLPTR, /*out_sel_left_maybe_same=*/NULLPTR, columns_left,
-        row_table_right,
-        /*are_cols_in_encoding_order=*/true, match_bitvector.data());
-    ASSERT_EQ(arrow::internal::CountSetBits(match_bitvector.data(), 0, num_rows),
-              num_rows);
+    std::vector<Datum> values;
+
+    // A fixed length array containing random values.
+    ASSERT_OK_AND_ASSIGN(
+        auto value_fixed_length,
+        Random(fixed_size_binary(fixed_length))->Generate(num_rows_batch));
+    values.push_back(std::move(value_fixed_length));
+
+    batch_left = ExecBatch(std::move(values), num_rows_batch);
   }
 
-  std::vector<uint16_t> selection_left(num_rows);
-  std::iota(selection_left.begin(), selection_left.end(), 0);
+  // The left side columns with num_rows_batch rows.
+  std::vector<KeyColumnArray> columns_left;
+  ASSERT_OK(ColumnArraysFromExecBatch(batch_left, &columns_left));
+
+  // The right side row table with num_rows_row_table rows.
+  ASSERT_OK_AND_ASSIGN(
+      RowTableImpl row_table_right,
+      RepeatRowTableUntil(MakeRowTableFromExecBatch(batch_left).ValueUnsafe(),
+                          num_rows_row_table));
+  // The row table must not contain a third buffer.
+  ASSERT_EQ(row_table_right.data(2), NULLPTR);
+  // The row data must be greater than 4GB.
+  ASSERT_GT(row_table_right.buffer_size(1), k4GB);
+
+  // The rows to compare: the last num_rows_batch rows in the row table VS. the whole
+  // batch.
+  std::vector<uint32_t> row_ids_to_compare(num_rows_batch);
+  std::iota(row_ids_to_compare.begin(), row_ids_to_compare.end(),
+            static_cast<uint32_t>(num_rows_row_table - num_rows_batch));
+
+  AssertCompareColumnsToRowsAllMatch(columns_left, row_table_right, row_ids_to_compare);
+}
 
-  {
-    // With selection, output no match row ids.
-    uint32_t num_rows_no_match;
-    std::vector<uint16_t> row_ids_out(num_rows);
-    KeyCompare::CompareColumnsToRows(num_rows, selection_left.data(),
-                                     row_ids_to_compare.data(), &ctx, &num_rows_no_match,
-                                     row_ids_out.data(), columns_left, row_table_right,
-                                     /*are_cols_in_encoding_order=*/true,
-                                     /*out_match_bitvector_maybe_null=*/NULLPTR);
-    ASSERT_EQ(num_rows_no_match, 0);
+// GH-43495: Compare var length columns to rows at offset over 4GB within a row table.
+TEST(KeyCompare, LARGE_MEMORY_TEST(CompareColumnsToRowsOver4GBVarLength)) {
+  if constexpr (sizeof(void*) == 4) {
+    GTEST_SKIP() << "Test only works on 64-bit platforms";
   }
 
+  // The idea of this case is to create a row table using one fixed length column and one
+  // var length column (so the row is hence var length and has offset buffer), with more
+  // than 4GB data. Then compare the rows located at over 4GB.
+
+  // A small batch to append to the row table repeatedly to grow the row table to big
+  // enough.
+  constexpr int64_t num_rows_batch = std::numeric_limits<uint16_t>::max();
+  constexpr int fixed_length = 128;
+  // Involve some small randomness in the var length column.
+  constexpr int var_length_min = 128;
+  constexpr int var_length_max = 129;
+  constexpr double null_probability = 0.01;
+
+  // The size of the row table is one batch larger than 4GB, and we'll compare the last
+  // num_rows_batch rows.
+  constexpr int64_t k4GB = 4ll * 1024 * 1024 * 1024;
+  constexpr int64_t size_row_min = fixed_length + var_length_min;
+  constexpr int64_t num_rows_row_table =
+      (k4GB / (size_row_min * num_rows_batch) + 1) * num_rows_batch;
+  static_assert(num_rows_row_table < std::numeric_limits<uint32_t>::max(),
+                "row table length must be less than uint32 max");
+  static_assert(num_rows_row_table * size_row_min > k4GB,
+                "row table size must be greater than 4GB");
+
+  // The left side batch with num_rows_batch rows.
+  ExecBatch batch_left;
   {
-    // With selection, output match bit vector.
-    std::vector<uint8_t> match_bitvector(BytesForBits(num_rows));
-    KeyCompare::CompareColumnsToRows(
-        num_rows, selection_left.data(), row_ids_to_compare.data(), &ctx,
-        /*out_num_rows=*/NULLPTR, /*out_sel_left_maybe_same=*/NULLPTR, columns_left,
-        row_table_right,
-        /*are_cols_in_encoding_order=*/true, match_bitvector.data());
-    ASSERT_EQ(arrow::internal::CountSetBits(match_bitvector.data(), 0, num_rows),
-              num_rows);
+    std::vector<Datum> values;
+
+    // A fixed length array containing random values.
+    ASSERT_OK_AND_ASSIGN(
+        auto value_fixed_length,
+        Random(fixed_size_binary(fixed_length))->Generate(num_rows_batch));
+    values.push_back(std::move(value_fixed_length));
+
+    // A var length array containing random binary of 128 or 129 bytes with small portion
+    // of nulls.
+    auto value_var_length = RandomArrayGenerator(kSeedMax).String(
+        num_rows_batch, var_length_min, var_length_max, null_probability);
+    values.push_back(std::move(value_var_length));
+
+    batch_left = ExecBatch(std::move(values), num_rows_batch);
   }
+
+  // The left side columns with num_rows_batch rows.
+  std::vector<KeyColumnArray> columns_left;
+  ASSERT_OK(ColumnArraysFromExecBatch(batch_left, &columns_left));
+
+  // The right side row table with num_rows_row_table rows.
+  ASSERT_OK_AND_ASSIGN(
+      RowTableImpl row_table_right,
+      RepeatRowTableUntil(MakeRowTableFromExecBatch(batch_left).ValueUnsafe(),
+                          num_rows_row_table));
+  // The row table must contain an offset buffer.
+  ASSERT_NE(row_table_right.data(2), NULLPTR);
+  // At least the last row should be located at over 4GB.
+  ASSERT_GT(row_table_right.offsets()[num_rows_row_table - 1], k4GB);
+
+  // The rows to compare: the last num_rows_batch rows in the row table VS. the whole
+  // batch.
+  std::vector<uint32_t> row_ids_to_compare(num_rows_batch);
+  std::iota(row_ids_to_compare.begin(), row_ids_to_compare.end(),
+            static_cast<uint32_t>(num_rows_row_table - num_rows_batch));
+
+  AssertCompareColumnsToRowsAllMatch(columns_left, row_table_right, row_ids_to_compare);
 }
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/row/encode_internal.cc b/cpp/src/arrow/compute/row/encode_internal.cc
index 88ab5b81b1e0a..127d43021d639 100644
--- a/cpp/src/arrow/compute/row/encode_internal.cc
+++ b/cpp/src/arrow/compute/row/encode_internal.cc
@@ -160,8 +160,8 @@ Status RowTableEncoder::EncodeSelected(RowTableImpl* rows, uint32_t num_selected
                         /*num_extra_bytes_to_append=*/static_cast<uint32_t>(0)));
   // Then populate the offsets of the var-length columns, which will be used as the target
   // size of the var-length buffers resizing below.
-  EncoderOffsets::GetRowOffsetsSelected(rows, batch_varbinary_cols_, num_selected,
-                                        selection);
+  RETURN_NOT_OK(EncoderOffsets::GetRowOffsetsSelected(rows, batch_varbinary_cols_,
+                                                      num_selected, selection));
   // Last AppendEmpty with zero rows and zero extra bytes to resize the var-length buffers
   // based on the populated offsets.
   RETURN_NOT_OK(
@@ -264,7 +264,8 @@ void EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
            num_rows * row_size);
   } else if (rows.metadata().is_fixed_length) {
     uint32_t row_size = rows.metadata().fixed_length;
-    const uint8_t* row_base = rows.data(1) + start_row * row_size;
+    const uint8_t* row_base =
+        rows.data(1) + static_cast<RowTableImpl::offset_type>(start_row) * row_size;
     row_base += offset_within_row;
     uint8_t* col_base = col_prep.mutable_data(1);
     switch (col_prep.metadata().fixed_length) {
@@ -295,7 +296,7 @@ void EncoderInteger::Decode(uint32_t start_row, uint32_t num_rows,
         DCHECK(false);
     }
   } else {
-    const uint32_t* row_offsets = rows.offsets() + start_row;
+    const RowTableImpl::offset_type* row_offsets = rows.offsets() + start_row;
     const uint8_t* row_base = rows.data(2);
     row_base += offset_within_row;
     uint8_t* col_base = col_prep.mutable_data(1);
@@ -361,14 +362,14 @@ void EncoderBinary::EncodeSelectedImp(uint32_t offset_within_row, RowTableImpl*
   } else {
     const uint8_t* src_base = col.data(1);
     uint8_t* dst = rows->mutable_data(2) + offset_within_row;
-    const uint32_t* offsets = rows->offsets();
+    const RowTableImpl::offset_type* offsets = rows->offsets();
     for (uint32_t i = 0; i < num_selected; ++i) {
       copy_fn(dst + offsets[i], src_base, selection[i]);
     }
     if (col.data(0)) {
       const uint8_t* non_null_bits = col.data(0);
       uint8_t* dst = rows->mutable_data(2) + offset_within_row;
-      const uint32_t* offsets = rows->offsets();
+      const RowTableImpl::offset_type* offsets = rows->offsets();
       for (uint32_t i = 0; i < num_selected; ++i) {
         bool is_null = !bit_util::GetBit(non_null_bits, selection[i] + col.bit_offset(0));
         if (is_null) {
@@ -584,10 +585,12 @@ void EncoderBinaryPair::DecodeImp(uint32_t num_rows_to_skip, uint32_t start_row,
   uint8_t* dst_B = col2->mutable_data(1);
 
   uint32_t fixed_length = rows.metadata().fixed_length;
-  const uint32_t* offsets;
+  const RowTableImpl::offset_type* offsets;
   const uint8_t* src_base;
   if (is_row_fixed_length) {
-    src_base = rows.data(1) + fixed_length * start_row + offset_within_row;
+    src_base = rows.data(1) +
+               static_cast<RowTableImpl::offset_type>(start_row) * fixed_length +
+               offset_within_row;
     offsets = nullptr;
   } else {
     src_base = rows.data(2) + offset_within_row;
@@ -639,7 +642,7 @@ void EncoderOffsets::Decode(uint32_t start_row, uint32_t num_rows,
   // The Nth element is the sum of all the lengths of varbinary columns data in
   // that row, up to and including Nth varbinary column.
 
-  const uint32_t* row_offsets = rows.offsets() + start_row;
+  const RowTableImpl::offset_type* row_offsets = rows.offsets() + start_row;
 
   // Set the base offset for each column
   for (size_t col = 0; col < varbinary_cols->size(); ++col) {
@@ -657,8 +660,8 @@ void EncoderOffsets::Decode(uint32_t start_row, uint32_t num_rows,
     // Update the offset of each column
     uint32_t offset_within_row = rows.metadata().fixed_length;
     for (size_t col = 0; col < varbinary_cols->size(); ++col) {
-      offset_within_row +=
-          RowTableMetadata::padding_for_alignment(offset_within_row, string_alignment);
+      offset_within_row += RowTableMetadata::padding_for_alignment_within_row(
+          offset_within_row, string_alignment);
       uint32_t length = varbinary_ends[col] - offset_within_row;
       offset_within_row = varbinary_ends[col];
       uint32_t* col_offsets = (*varbinary_cols)[col].mutable_offsets();
@@ -667,15 +670,15 @@ void EncoderOffsets::Decode(uint32_t start_row, uint32_t num_rows,
   }
 }
 
-void EncoderOffsets::GetRowOffsetsSelected(RowTableImpl* rows,
-                                           const std::vector<KeyColumnArray>& cols,
-                                           uint32_t num_selected,
-                                           const uint16_t* selection) {
+Status EncoderOffsets::GetRowOffsetsSelected(RowTableImpl* rows,
+                                             const std::vector<KeyColumnArray>& cols,
+                                             uint32_t num_selected,
+                                             const uint16_t* selection) {
   if (rows->metadata().is_fixed_length) {
-    return;
+    return Status::OK();
   }
 
-  uint32_t* row_offsets = rows->mutable_offsets();
+  RowTableImpl::offset_type* row_offsets = rows->mutable_offsets();
   for (uint32_t i = 0; i < num_selected; ++i) {
     row_offsets[i] = rows->metadata().fixed_length;
   }
@@ -687,7 +690,7 @@ void EncoderOffsets::GetRowOffsetsSelected(RowTableImpl* rows,
       for (uint32_t i = 0; i < num_selected; ++i) {
         uint32_t irow = selection[i];
         uint32_t length = col_offsets[irow + 1] - col_offsets[irow];
-        row_offsets[i] += RowTableMetadata::padding_for_alignment(
+        row_offsets[i] += RowTableMetadata::padding_for_alignment_row(
             row_offsets[i], rows->metadata().string_alignment);
         row_offsets[i] += length;
       }
@@ -707,22 +710,24 @@ void EncoderOffsets::GetRowOffsetsSelected(RowTableImpl* rows,
     }
   }
 
-  uint32_t sum = 0;
+  int64_t sum = 0;
   int row_alignment = rows->metadata().row_alignment;
   for (uint32_t i = 0; i < num_selected; ++i) {
-    uint32_t length = row_offsets[i];
-    length += RowTableMetadata::padding_for_alignment(length, row_alignment);
+    RowTableImpl::offset_type length = row_offsets[i];
+    length += RowTableMetadata::padding_for_alignment_row(length, row_alignment);
     row_offsets[i] = sum;
     sum += length;
   }
   row_offsets[num_selected] = sum;
+
+  return Status::OK();
 }
 
 template <bool has_nulls, bool is_first_varbinary>
 void EncoderOffsets::EncodeSelectedImp(uint32_t ivarbinary, RowTableImpl* rows,
                                        const std::vector<KeyColumnArray>& cols,
                                        uint32_t num_selected, const uint16_t* selection) {
-  const uint32_t* row_offsets = rows->offsets();
+  const RowTableImpl::offset_type* row_offsets = rows->offsets();
   uint8_t* row_base = rows->mutable_data(2) +
                       rows->metadata().varbinary_end_array_offset +
                       ivarbinary * sizeof(uint32_t);
@@ -743,7 +748,7 @@ void EncoderOffsets::EncodeSelectedImp(uint32_t ivarbinary, RowTableImpl* rows,
       row[0] = rows->metadata().fixed_length + length;
     } else {
       row[0] = row[-1] +
-               RowTableMetadata::padding_for_alignment(
+               RowTableMetadata::padding_for_alignment_within_row(
                    row[-1], rows->metadata().string_alignment) +
                length;
     }
@@ -847,7 +852,7 @@ void EncoderNulls::Decode(uint32_t start_row, uint32_t num_rows, const RowTableI
 void EncoderVarBinary::EncodeSelected(uint32_t ivarbinary, RowTableImpl* rows,
                                       const KeyColumnArray& cols, uint32_t num_selected,
                                       const uint16_t* selection) {
-  const uint32_t* row_offsets = rows->offsets();
+  const RowTableImpl::offset_type* row_offsets = rows->offsets();
   uint8_t* row_base = rows->mutable_data(2);
   const uint32_t* col_offsets = cols.offsets();
   const uint8_t* col_base = cols.data(2);
diff --git a/cpp/src/arrow/compute/row/encode_internal.h b/cpp/src/arrow/compute/row/encode_internal.h
index 2afc150530b9e..37538fcc4b835 100644
--- a/cpp/src/arrow/compute/row/encode_internal.h
+++ b/cpp/src/arrow/compute/row/encode_internal.h
@@ -173,7 +173,7 @@ class EncoderBinary {
         copy_fn(dst, src, col_width);
       }
     } else {
-      const uint32_t* row_offsets = rows_const->offsets();
+      const RowTableImpl::offset_type* row_offsets = rows_const->offsets();
       for (uint32_t i = 0; i < num_rows; ++i) {
         const uint8_t* src;
         uint8_t* dst;
@@ -227,9 +227,9 @@ class EncoderBinaryPair {
 
 class EncoderOffsets {
  public:
-  static void GetRowOffsetsSelected(RowTableImpl* rows,
-                                    const std::vector<KeyColumnArray>& cols,
-                                    uint32_t num_selected, const uint16_t* selection);
+  static Status GetRowOffsetsSelected(RowTableImpl* rows,
+                                      const std::vector<KeyColumnArray>& cols,
+                                      uint32_t num_selected, const uint16_t* selection);
   static void EncodeSelected(RowTableImpl* rows, const std::vector<KeyColumnArray>& cols,
                              uint32_t num_selected, const uint16_t* selection);
 
@@ -267,7 +267,8 @@ class EncoderVarBinary {
     ARROW_DCHECK(!rows_const->metadata().is_fixed_length &&
                  !col_const->metadata().is_fixed_length);
 
-    const uint32_t* row_offsets_for_batch = rows_const->offsets() + start_row;
+    const RowTableImpl::offset_type* row_offsets_for_batch =
+        rows_const->offsets() + start_row;
     const uint32_t* col_offsets = col_const->offsets();
 
     uint32_t col_offset_next = col_offsets[0];
@@ -275,7 +276,7 @@ class EncoderVarBinary {
       uint32_t col_offset = col_offset_next;
       col_offset_next = col_offsets[i + 1];
 
-      uint32_t row_offset = row_offsets_for_batch[i];
+      RowTableImpl::offset_type row_offset = row_offsets_for_batch[i];
       const uint8_t* row = rows_const->data(2) + row_offset;
 
       uint32_t offset_within_row;
diff --git a/cpp/src/arrow/compute/row/encode_internal_avx2.cc b/cpp/src/arrow/compute/row/encode_internal_avx2.cc
index 50969c7bd6034..26f8e3a63de0a 100644
--- a/cpp/src/arrow/compute/row/encode_internal_avx2.cc
+++ b/cpp/src/arrow/compute/row/encode_internal_avx2.cc
@@ -75,10 +75,12 @@ uint32_t EncoderBinaryPair::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows
   uint8_t* col_vals_B = col2->mutable_data(1);
 
   uint32_t fixed_length = rows.metadata().fixed_length;
-  const uint32_t* offsets;
+  const RowTableImpl::offset_type* offsets;
   const uint8_t* src_base;
   if (is_row_fixed_length) {
-    src_base = rows.data(1) + fixed_length * start_row + offset_within_row;
+    src_base = rows.data(1) +
+               static_cast<RowTableImpl::offset_type>(fixed_length) * start_row +
+               offset_within_row;
     offsets = nullptr;
   } else {
     src_base = rows.data(2) + offset_within_row;
@@ -99,7 +101,7 @@ uint32_t EncoderBinaryPair::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows
         src2 = reinterpret_cast<const __m128i*>(src + fixed_length * 2);
         src3 = reinterpret_cast<const __m128i*>(src + fixed_length * 3);
       } else {
-        const uint32_t* row_offsets = offsets + i * unroll;
+        const RowTableImpl::offset_type* row_offsets = offsets + i * unroll;
         const uint8_t* src = src_base;
         src0 = reinterpret_cast<const __m128i*>(src + row_offsets[0]);
         src1 = reinterpret_cast<const __m128i*>(src + row_offsets[1]);
@@ -140,7 +142,7 @@ uint32_t EncoderBinaryPair::DecodeImp_avx2(uint32_t start_row, uint32_t num_rows
           }
         }
       } else {
-        const uint32_t* row_offsets = offsets + i * unroll;
+        const RowTableImpl::offset_type* row_offsets = offsets + i * unroll;
         const uint8_t* src = src_base;
         for (int j = 0; j < unroll; ++j) {
           if (col_width == 1) {
diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc
index 45b9ad5971e80..02ed186449d22 100644
--- a/cpp/src/arrow/compute/row/grouper.cc
+++ b/cpp/src/arrow/compute/row/grouper.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/compute/row/grouper.h"
 
+#include <iostream>
 #include <memory>
 #include <mutex>
 #include <type_traits>
@@ -25,12 +26,12 @@
 
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/function.h"
-#include "arrow/compute/kernels/row_encoder_internal.h"
 #include "arrow/compute/key_hash_internal.h"
 #include "arrow/compute/light_array_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/compare_internal.h"
 #include "arrow/compute/row/grouper_internal.h"
+#include "arrow/compute/row/row_encoder_internal.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bitmap_ops.h"
@@ -54,13 +55,8 @@ using group_id_t = std::remove_const<decltype(kNoGroupId)>::type;
 using GroupIdType = CTypeTraits<group_id_t>::ArrowType;
 auto g_group_id_type = std::make_shared<GroupIdType>();
 
-inline const uint8_t* GetValuesAsBytes(const ArraySpan& data, int64_t offset = 0) {
-  DCHECK_GT(data.type->byte_width(), 0);
-  int64_t absolute_byte_offset = (data.offset + offset) * data.type->byte_width();
-  return data.GetValues<uint8_t>(1, absolute_byte_offset);
-}
-
 template <typename Value>
+ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
 Status CheckForGetNextSegment(const std::vector<Value>& values, int64_t length,
                               int64_t offset, const std::vector<TypeHolder>& key_types) {
   if (offset < 0 || offset > length) {
@@ -82,11 +78,22 @@ Status CheckForGetNextSegment(const std::vector<Value>& values, int64_t length,
 }
 
 template <typename Batch>
+ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
 enable_if_t<std::is_same<Batch, ExecSpan>::value || std::is_same<Batch, ExecBatch>::value,
-            Status>
-CheckForGetNextSegment(const Batch& batch, int64_t offset,
-                       const std::vector<TypeHolder>& key_types) {
+            Status> CheckForGetNextSegment(const Batch& batch, int64_t offset,
+                                           const std::vector<TypeHolder>& key_types) {
+  ARROW_SUPPRESS_DEPRECATION_WARNING
   return CheckForGetNextSegment(batch.values, batch.length, offset, key_types);
+  ARROW_UNSUPPRESS_DEPRECATION_WARNING
+}
+
+Status CheckForGetSegments(const ExecSpan& batch,
+                           const std::vector<TypeHolder>& key_types) {
+  // TODO: Move the implementation of CheckForGetNextSegment here once we remove the
+  // deprecated functions.
+  ARROW_SUPPRESS_DEPRECATION_WARNING
+  return CheckForGetNextSegment(batch, 0, key_types);
+  ARROW_UNSUPPRESS_DEPRECATION_WARNING
 }
 
 struct BaseRowSegmenter : public RowSegmenter {
@@ -102,21 +109,6 @@ Segment MakeSegment(int64_t batch_length, int64_t offset, int64_t length, bool e
   return Segment{offset, length, offset + length >= batch_length, extends};
 }
 
-// Used by SimpleKeySegmenter::GetNextSegment to find the match-length of a value within a
-// fixed-width buffer
-int64_t GetMatchLength(const uint8_t* match_bytes, int64_t match_width,
-                       const uint8_t* array_bytes, int64_t offset, int64_t length) {
-  int64_t cursor, byte_cursor;
-  for (cursor = offset, byte_cursor = match_width * cursor; cursor < length;
-       cursor++, byte_cursor += match_width) {
-    if (memcmp(match_bytes, array_bytes + byte_cursor,
-               static_cast<size_t>(match_width)) != 0) {
-      break;
-    }
-  }
-  return std::min(cursor, length) - offset;
-}
-
 using ExtendFunc = std::function<bool(const void*)>;
 constexpr bool kDefaultExtends = true;  // by default, the first segment extends
 constexpr bool kEmptyExtends = true;    // an empty segment extends too
@@ -130,9 +122,22 @@ struct NoKeysSegmenter : public BaseRowSegmenter {
 
   Status Reset() override { return Status::OK(); }
 
+  ARROW_DEPRECATED("Deprecated in 18.0.0. Use GetSegments instead.")
   Result<Segment> GetNextSegment(const ExecSpan& batch, int64_t offset) override {
+    ARROW_SUPPRESS_DEPRECATION_WARNING
     ARROW_RETURN_NOT_OK(CheckForGetNextSegment(batch, offset, {}));
     return MakeSegment(batch.length, offset, batch.length - offset, kDefaultExtends);
+    ARROW_UNSUPPRESS_DEPRECATION_WARNING
+  }
+
+  Result<std::vector<Segment>> GetSegments(const ExecSpan& batch) override {
+    RETURN_NOT_OK(CheckForGetSegments(batch, {}));
+
+    if (batch.length == 0) {
+      return std::vector<Segment>{};
+    }
+    return std::vector<Segment>{
+        MakeSegment(batch.length, 0, batch.length - 0, kDefaultExtends)};
   }
 };
 
@@ -147,13 +152,6 @@ struct SimpleKeySegmenter : public BaseRowSegmenter {
         save_key_data_(static_cast<size_t>(key_type_.type->byte_width())),
         extend_was_called_(false) {}
 
-  Status CheckType(const DataType& type) {
-    if (!is_fixed_width(type)) {
-      return Status::Invalid("SimpleKeySegmenter does not support type ", type);
-    }
-    return Status::OK();
-  }
-
   Status Reset() override {
     extend_was_called_ = false;
     return Status::OK();
@@ -161,7 +159,8 @@ struct SimpleKeySegmenter : public BaseRowSegmenter {
 
   // Checks whether the given grouping data extends the current segment, i.e., is equal to
   // previously seen grouping data, which is updated with each invocation.
-  bool Extend(const void* data) {
+  ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
+  bool ExtendDeprecated(const void* data) {
     bool extends = !extend_was_called_
                        ? kDefaultExtends
                        : 0 == memcmp(save_key_data_.data(), data, save_key_data_.size());
@@ -170,42 +169,136 @@ struct SimpleKeySegmenter : public BaseRowSegmenter {
     return extends;
   }
 
-  Result<Segment> GetNextSegment(const Scalar& scalar, int64_t offset, int64_t length) {
+  ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
+  Result<Segment> GetNextSegmentDeprecated(const Scalar& scalar, int64_t offset,
+                                           int64_t length) {
+    ARROW_SUPPRESS_DEPRECATION_WARNING
     ARROW_RETURN_NOT_OK(CheckType(*scalar.type));
     if (!scalar.is_valid) {
       return Status::Invalid("segmenting an invalid scalar");
     }
     auto data = checked_cast<const PrimitiveScalarBase&>(scalar).data();
-    bool extends = length > 0 ? Extend(data) : kEmptyExtends;
+    bool extends = length > 0 ? ExtendDeprecated(data) : kEmptyExtends;
     return MakeSegment(length, offset, length, extends);
+    ARROW_UNSUPPRESS_DEPRECATION_WARNING
   }
 
-  Result<Segment> GetNextSegment(const DataType& array_type, const uint8_t* array_bytes,
-                                 int64_t offset, int64_t length) {
+  ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
+  Result<Segment> GetNextSegmentDeprecated(const DataType& array_type,
+                                           const uint8_t* array_bytes, int64_t offset,
+                                           int64_t length) {
+    ARROW_SUPPRESS_DEPRECATION_WARNING
     RETURN_NOT_OK(CheckType(array_type));
     DCHECK_LE(offset, length);
     int64_t byte_width = array_type.byte_width();
     int64_t match_length = GetMatchLength(array_bytes + offset * byte_width, byte_width,
                                           array_bytes, offset, length);
-    bool extends = length > 0 ? Extend(array_bytes + offset * byte_width) : kEmptyExtends;
+    bool extends =
+        length > 0 ? ExtendDeprecated(array_bytes + offset * byte_width) : kEmptyExtends;
     return MakeSegment(length, offset, match_length, extends);
+    ARROW_UNSUPPRESS_DEPRECATION_WARNING
   }
 
   Result<Segment> GetNextSegment(const ExecSpan& batch, int64_t offset) override {
+    ARROW_SUPPRESS_DEPRECATION_WARNING
     ARROW_RETURN_NOT_OK(CheckForGetNextSegment(batch, offset, {key_type_}));
     if (offset == batch.length) {
       return MakeSegment(batch.length, offset, 0, kEmptyExtends);
     }
     const auto& value = batch.values[0];
     if (value.is_scalar()) {
-      return GetNextSegment(*value.scalar, offset, batch.length);
+      return GetNextSegmentDeprecated(*value.scalar, offset, batch.length);
     }
     ARROW_DCHECK(value.is_array());
     const auto& array = value.array;
     if (array.GetNullCount() > 0) {
       return Status::NotImplemented("segmenting a nullable array");
     }
-    return GetNextSegment(*array.type, GetValuesAsBytes(array), offset, batch.length);
+    return GetNextSegmentDeprecated(*array.type, GetValuesAsBytes(array), offset,
+                                    batch.length);
+    ARROW_UNSUPPRESS_DEPRECATION_WARNING
+  }
+
+  Result<std::vector<Segment>> GetSegments(const ExecSpan& batch) override {
+    RETURN_NOT_OK(CheckForGetSegments(batch, {key_type_}));
+
+    if (batch.length == 0) {
+      return std::vector<Segment>{};
+    }
+
+    const auto& value = batch.values[0];
+    RETURN_NOT_OK(CheckType(*value.type()));
+
+    std::vector<Segment> segments;
+    const void* key_data;
+    if (value.is_scalar()) {
+      const auto& scalar = *value.scalar;
+      DCHECK(scalar.is_valid);
+      key_data = checked_cast<const PrimitiveScalarBase&>(scalar).data();
+      bool extends = Extend(key_data);
+      segments.push_back(MakeSegment(batch.length, 0, batch.length, extends));
+    } else {
+      DCHECK(value.is_array());
+      const auto& array = value.array;
+      DCHECK_EQ(array.GetNullCount(), 0);
+      auto data = GetValuesAsBytes(array);
+      int64_t byte_width = array.type->byte_width();
+      int64_t offset = 0;
+      bool extends = Extend(data);
+      while (offset < array.length) {
+        int64_t match_length = GetMatchLength(data + offset * byte_width, byte_width,
+                                              data, offset, array.length);
+        segments.push_back(MakeSegment(array.length, offset, match_length,
+                                       offset == 0 ? extends : false));
+        offset += match_length;
+      }
+      key_data = data + (array.length - 1) * byte_width;
+    }
+
+    SaveKeyData(key_data);
+
+    return segments;
+  }
+
+ private:
+  static Status CheckType(const DataType& type) {
+    if (!is_fixed_width(type)) {
+      return Status::Invalid("SimpleKeySegmenter does not support type ", type);
+    }
+    return Status::OK();
+  }
+
+  static const uint8_t* GetValuesAsBytes(const ArraySpan& data, int64_t offset = 0) {
+    DCHECK_GT(data.type->byte_width(), 0);
+    int64_t absolute_byte_offset = (data.offset + offset) * data.type->byte_width();
+    return data.GetValues<uint8_t>(1, absolute_byte_offset);
+  }
+
+  // Find the match-length of a value within a fixed-width buffer
+  static int64_t GetMatchLength(const uint8_t* match_bytes, int64_t match_width,
+                                const uint8_t* array_bytes, int64_t offset,
+                                int64_t length) {
+    int64_t cursor, byte_cursor;
+    for (cursor = offset, byte_cursor = match_width * cursor; cursor < length;
+         cursor++, byte_cursor += match_width) {
+      if (memcmp(match_bytes, array_bytes + byte_cursor,
+                 static_cast<size_t>(match_width)) != 0) {
+        break;
+      }
+    }
+    return std::min(cursor, length) - offset;
+  }
+
+  bool Extend(const void* data) {
+    if (ARROW_PREDICT_FALSE(!extend_was_called_)) {
+      extend_was_called_ = true;
+      return kDefaultExtends;
+    }
+    return 0 == memcmp(save_key_data_.data(), data, save_key_data_.size());
+  }
+
+  void SaveKeyData(const void* data) {
+    memcpy(save_key_data_.data(), data, save_key_data_.size());
   }
 
  private:
@@ -233,6 +326,7 @@ struct AnyKeysSegmenter : public BaseRowSegmenter {
     return Status::OK();
   }
 
+  ARROW_DEPRECATED("Deprecated in 18.0.0 along with GetSegments.")
   bool Extend(const void* data) {
     auto group_id = *static_cast<const group_id_t*>(data);
     bool extends =
@@ -241,24 +335,9 @@ struct AnyKeysSegmenter : public BaseRowSegmenter {
     return extends;
   }
 
-  // Runs the grouper on a single row.  This is used to determine the group id of the
-  // first row of a new segment to see if it extends the previous segment.
-  template <typename Batch>
-  Result<group_id_t> MapGroupIdAt(const Batch& batch, int64_t offset) {
-    ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch, offset,
-                                                        /*length=*/1));
-    if (!datum.is_array()) {
-      return Status::Invalid("accessing unsupported datum kind ", datum.kind());
-    }
-    const std::shared_ptr<ArrayData>& data = datum.array();
-    ARROW_DCHECK(data->GetNullCount() == 0);
-    DCHECK_EQ(data->type->id(), GroupIdType::type_id);
-    DCHECK_EQ(1, data->length);
-    const group_id_t* values = data->GetValues<group_id_t>(1);
-    return values[0];
-  }
-
+  ARROW_DEPRECATED("Deprecated in 18.0.0. Use GetSegments instead.")
   Result<Segment> GetNextSegment(const ExecSpan& batch, int64_t offset) override {
+    ARROW_SUPPRESS_DEPRECATION_WARNING
     ARROW_RETURN_NOT_OK(CheckForGetNextSegment(batch, offset, key_types_));
     if (offset == batch.length) {
       return MakeSegment(batch.length, offset, 0, kEmptyExtends);
@@ -273,7 +352,7 @@ struct AnyKeysSegmenter : public BaseRowSegmenter {
     };
     // resetting drops grouper's group-ids, freeing-up memory for the next segment
     ARROW_RETURN_NOT_OK(grouper_->Reset());
-    // GH-34475: cache the grouper-consume result across invocations of GetNextSegment
+
     ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch, offset));
     if (datum.is_array()) {
       // `data` is an array whose index-0 corresponds to index `offset` of `batch`
@@ -292,6 +371,76 @@ struct AnyKeysSegmenter : public BaseRowSegmenter {
     } else {
       return Status::Invalid("segmenting unsupported datum kind ", datum.kind());
     }
+    ARROW_UNSUPPRESS_DEPRECATION_WARNING
+  }
+
+  Result<std::vector<Segment>> GetSegments(const ExecSpan& batch) override {
+    RETURN_NOT_OK(CheckForGetSegments(batch, {key_types_}));
+
+    if (batch.length == 0) {
+      return std::vector<Segment>{};
+    }
+
+    // determine if the first segment in this batch extends the last segment in the
+    // previous batch
+    bool extends = kDefaultExtends;
+    if (save_group_id_ != kNoGroupId) {
+      // the group id must be computed prior to resetting the grouper, since it is
+      // compared to save_group_id_, and after resetting the grouper produces incomparable
+      // group ids
+      ARROW_ASSIGN_OR_RAISE(auto group_id, MapGroupIdAt(batch));
+      // it "extends" unless the group id differs from the last group id
+      extends = (group_id == save_group_id_);
+    }
+
+    // resetting drops grouper's group-ids, freeing-up memory for the next segment
+    RETURN_NOT_OK(grouper_->Reset());
+
+    std::vector<Segment> segments;
+    ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch));
+    DCHECK(datum.is_array());
+    // `data` is an array whose index-0 corresponds to index `offset` of `batch`
+    const std::shared_ptr<ArrayData>& data = datum.array();
+    DCHECK_EQ(data->length, batch.length);
+    DCHECK_EQ(data->GetNullCount(), 0);
+    DCHECK_EQ(data->type->id(), GroupIdType::type_id);
+    const group_id_t* group_ids = data->GetValues<group_id_t>(1);
+    int64_t current_group_offset = 0;
+    int64_t cursor;
+    for (cursor = 1; cursor < data->length; ++cursor) {
+      if (group_ids[cursor] != group_ids[current_group_offset]) {
+        segments.push_back(MakeSegment(batch.length, current_group_offset,
+                                       cursor - current_group_offset,
+                                       current_group_offset == 0 ? extends : false));
+        current_group_offset = cursor;
+      }
+    }
+    segments.push_back(MakeSegment(batch.length, current_group_offset,
+                                   cursor - current_group_offset,
+                                   current_group_offset == 0 ? extends : false));
+
+    // update the save_group_id_ to the last group id in this batch
+    save_group_id_ = group_ids[batch.length - 1];
+
+    return segments;
+  }
+
+ private:
+  // Runs the grouper on a single row.  This is used to determine the group id of the
+  // first row of a new segment to see if it extends the previous segment.
+  template <typename Batch>
+  Result<group_id_t> MapGroupIdAt(const Batch& batch, int64_t offset = 0) {
+    ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch, offset,
+                                                        /*length=*/1));
+    if (!datum.is_array()) {
+      return Status::Invalid("accessing unsupported datum kind ", datum.kind());
+    }
+    const std::shared_ptr<ArrayData>& data = datum.array();
+    ARROW_DCHECK(data->GetNullCount() == 0);
+    DCHECK_EQ(data->type->id(), GroupIdType::type_id);
+    DCHECK_EQ(1, data->length);
+    const group_id_t* values = data->GetValues<group_id_t>(1);
+    return values[0];
   }
 
  private:
@@ -332,38 +481,6 @@ Result<std::unique_ptr<RowSegmenter>> RowSegmenter::Make(
 
 namespace {
 
-struct GrouperNoKeysImpl : Grouper {
-  Result<std::shared_ptr<Array>> MakeConstantGroupIdArray(int64_t length,
-                                                          group_id_t value) {
-    std::unique_ptr<ArrayBuilder> a_builder;
-    RETURN_NOT_OK(MakeBuilder(default_memory_pool(), g_group_id_type, &a_builder));
-    using GroupIdBuilder = typename TypeTraits<GroupIdType>::BuilderType;
-    auto builder = checked_cast<GroupIdBuilder*>(a_builder.get());
-    if (length != 0) {
-      RETURN_NOT_OK(builder->Resize(length));
-    }
-    for (int64_t i = 0; i < length; i++) {
-      builder->UnsafeAppend(value);
-    }
-    std::shared_ptr<Array> array;
-    RETURN_NOT_OK(builder->Finish(&array));
-    return array;
-  }
-  Status Reset() override { return Status::OK(); }
-  Result<Datum> Consume(const ExecSpan& batch, int64_t offset, int64_t length) override {
-    ARROW_ASSIGN_OR_RAISE(auto array, MakeConstantGroupIdArray(length, 0));
-    return Datum(array);
-  }
-  Result<ExecBatch> GetUniques() override {
-    auto data = ArrayData::Make(uint32(), 1, 0);
-    auto values = data->GetMutableValues<uint32_t>(0);
-    values[0] = 0;
-    ExecBatch out({Datum(data)}, 1);
-    return out;
-  }
-  uint32_t num_groups() const override { return 1; }
-};
-
 struct GrouperImpl : public Grouper {
   static Result<std::unique_ptr<GrouperImpl>> Make(
       const std::vector<TypeHolder>& key_types, ExecContext* ctx) {
diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h
index a883fb938ddaf..345bc62924241 100644
--- a/cpp/src/arrow/compute/row/grouper.h
+++ b/cpp/src/arrow/compute/row/grouper.h
@@ -57,13 +57,13 @@ inline bool operator!=(const Segment& segment1, const Segment& segment2) {
 
 /// \brief a helper class to divide a batch into segments of equal values
 ///
-/// For example, given a batch with two rows:
+/// For example, given a batch with two columns specifed as segment keys:
 ///
-/// A A
-/// A A
-/// A B
-/// A B
-/// A A
+/// A A [other columns]...
+/// A A ...
+/// A B ...
+/// A B ...
+/// A A ...
 ///
 /// Then the batch could be divided into 3 segments.  The first would be rows 0 & 1,
 /// the second would be rows 2 & 3, and the third would be row 4.
@@ -97,7 +97,12 @@ class ARROW_EXPORT RowSegmenter {
   virtual Status Reset() = 0;
 
   /// \brief Get the next segment for the given batch starting from the given offset
+  /// DEPRECATED: Due to its inefficiency, use GetSegments instead.
+  ARROW_DEPRECATED("Deprecated in 18.0.0. Use GetSegments instead.")
   virtual Result<Segment> GetNextSegment(const ExecSpan& batch, int64_t offset) = 0;
+
+  /// \brief Get all segments for the given batch
+  virtual Result<std::vector<Segment>> GetSegments(const ExecSpan& batch) = 0;
 };
 
 /// Consumes batches of keys and yields batches of the group ids.
diff --git a/cpp/src/arrow/compute/kernels/row_encoder.cc b/cpp/src/arrow/compute/row/row_encoder_internal.cc
similarity index 85%
rename from cpp/src/arrow/compute/kernels/row_encoder.cc
rename to cpp/src/arrow/compute/row/row_encoder_internal.cc
index 8224eaa6d6315..0965e4e8f9571 100644
--- a/cpp/src/arrow/compute/kernels/row_encoder.cc
+++ b/cpp/src/arrow/compute/row/row_encoder_internal.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/compute/kernels/row_encoder_internal.h"
+#include "arrow/compute/row/row_encoder_internal.h"
 
 #include "arrow/util/bitmap_writer.h"
 #include "arrow/util/logging.h"
@@ -75,26 +75,31 @@ void BooleanKeyEncoder::AddLengthNull(int32_t* length) {
 
 Status BooleanKeyEncoder::Encode(const ExecValue& data, int64_t batch_length,
                                  uint8_t** encoded_bytes) {
+  auto handle_next_valid_value = [&encoded_bytes](bool value) {
+    auto& encoded_ptr = *encoded_bytes++;
+    *encoded_ptr++ = kValidByte;
+    *encoded_ptr++ = value;
+  };
+  auto handle_next_null_value = [&encoded_bytes]() {
+    auto& encoded_ptr = *encoded_bytes++;
+    *encoded_ptr++ = kNullByte;
+    *encoded_ptr++ = 0;
+  };
+
   if (data.is_array()) {
-    VisitArraySpanInline<BooleanType>(
-        data.array,
-        [&](bool value) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kValidByte;
-          *encoded_ptr++ = value;
-        },
-        [&] {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kNullByte;
-          *encoded_ptr++ = 0;
-        });
+    VisitArraySpanInline<BooleanType>(data.array, handle_next_valid_value,
+                                      handle_next_null_value);
   } else {
     const auto& scalar = data.scalar_as<BooleanScalar>();
-    bool value = scalar.is_valid && scalar.value;
-    for (int64_t i = 0; i < batch_length; i++) {
-      auto& encoded_ptr = *encoded_bytes++;
-      *encoded_ptr++ = kValidByte;
-      *encoded_ptr++ = value;
+    if (!scalar.is_valid) {
+      for (int64_t i = 0; i < batch_length; i++) {
+        handle_next_null_value();
+      }
+    } else {
+      const bool value = scalar.value;
+      for (int64_t i = 0; i < batch_length; i++) {
+        handle_next_valid_value(value);
+      }
     }
   }
   return Status::OK();
@@ -140,41 +145,37 @@ void FixedWidthKeyEncoder::AddLengthNull(int32_t* length) {
 
 Status FixedWidthKeyEncoder::Encode(const ExecValue& data, int64_t batch_length,
                                     uint8_t** encoded_bytes) {
+  auto handle_next_valid_value = [&](std::string_view bytes) {
+    auto& encoded_ptr = *encoded_bytes++;
+    *encoded_ptr++ = kValidByte;
+    memcpy(encoded_ptr, bytes.data(), byte_width_);
+    encoded_ptr += byte_width_;
+  };
+  auto handle_next_null_value = [&] {
+    auto& encoded_ptr = *encoded_bytes++;
+    *encoded_ptr++ = kNullByte;
+    memset(encoded_ptr, 0, byte_width_);
+    encoded_ptr += byte_width_;
+  };
   if (data.is_array()) {
     ArraySpan viewed = data.array;
+    // The original type might not be FixedSizeBinaryType, but it would
+    // treat the input as binary data.
     auto view_ty = fixed_size_binary(byte_width_);
     viewed.type = view_ty.get();
-    VisitArraySpanInline<FixedSizeBinaryType>(
-        viewed,
-        [&](std::string_view bytes) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kValidByte;
-          memcpy(encoded_ptr, bytes.data(), byte_width_);
-          encoded_ptr += byte_width_;
-        },
-        [&] {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kNullByte;
-          memset(encoded_ptr, 0, byte_width_);
-          encoded_ptr += byte_width_;
-        });
+    VisitArraySpanInline<FixedSizeBinaryType>(viewed, handle_next_valid_value,
+                                              handle_next_null_value);
   } else {
     const auto& scalar = data.scalar_as<arrow::internal::PrimitiveScalarBase>();
     if (scalar.is_valid) {
-      const std::string_view data = scalar.view();
-      DCHECK_EQ(data.size(), static_cast<size_t>(byte_width_));
+      const std::string_view scalar_data = scalar.view();
+      DCHECK_EQ(scalar_data.size(), static_cast<size_t>(byte_width_));
       for (int64_t i = 0; i < batch_length; i++) {
-        auto& encoded_ptr = *encoded_bytes++;
-        *encoded_ptr++ = kValidByte;
-        memcpy(encoded_ptr, data.data(), data.size());
-        encoded_ptr += byte_width_;
+        handle_next_valid_value(scalar_data);
       }
     } else {
       for (int64_t i = 0; i < batch_length; i++) {
-        auto& encoded_ptr = *encoded_bytes++;
-        *encoded_ptr++ = kNullByte;
-        memset(encoded_ptr, 0, byte_width_);
-        encoded_ptr += byte_width_;
+        handle_next_null_value();
       }
     }
   }
@@ -262,11 +263,11 @@ void RowEncoder::Init(const std::vector<TypeHolder>& column_types, ExecContext*
 
   for (size_t i = 0; i < column_types.size(); ++i) {
     const bool is_extension = column_types[i].id() == Type::EXTENSION;
-    const TypeHolder& type = is_extension
-                                 ? arrow::internal::checked_pointer_cast<ExtensionType>(
-                                       column_types[i].GetSharedPtr())
-                                       ->storage_type()
-                                 : column_types[i];
+    const TypeHolder& type =
+        is_extension
+            ? arrow::internal::checked_cast<const ExtensionType*>(column_types[i].type)
+                  ->storage_type()
+            : column_types[i];
 
     if (is_extension) {
       extension_types_[i] = arrow::internal::checked_pointer_cast<ExtensionType>(
@@ -374,7 +375,7 @@ Result<ExecBatch> RowEncoder::Decode(int64_t num_rows, const int32_t* row_ids) {
       ARROW_ASSIGN_OR_RAISE(out.values[i], ::arrow::internal::GetArrayView(
                                                column_array_data, extension_types_[i]))
     } else {
-      out.values[i] = column_array_data;
+      out.values[i] = std::move(column_array_data);
     }
   }
 
diff --git a/cpp/src/arrow/compute/kernels/row_encoder_internal.h b/cpp/src/arrow/compute/row/row_encoder_internal.h
similarity index 58%
rename from cpp/src/arrow/compute/kernels/row_encoder_internal.h
rename to cpp/src/arrow/compute/row/row_encoder_internal.h
index 9bf7c1d1c4fed..4d6cc34af2342 100644
--- a/cpp/src/arrow/compute/kernels/row_encoder_internal.h
+++ b/cpp/src/arrow/compute/row/row_encoder_internal.h
@@ -29,7 +29,7 @@ using internal::checked_cast;
 namespace compute {
 namespace internal {
 
-struct KeyEncoder {
+struct ARROW_EXPORT KeyEncoder {
   // the first byte of an encoded key is used to indicate nullity
   static constexpr bool kExtraByteForNull = true;
 
@@ -38,16 +38,41 @@ struct KeyEncoder {
 
   virtual ~KeyEncoder() = default;
 
+  // Increment the values in the lengths array by the length of the encoded key for the
+  // corresponding value in the given column.
+  //
+  // Generally if Encoder is for a fixed-width type, the length of the encoded key
+  // would add ExtraByteForNull + byte_width.
+  // If Encoder is for a variable-width type, the length would add ExtraByteForNull +
+  // sizeof(Offset) + buffer_size.
+  // If Encoder is for null type, the length would add 0.
   virtual void AddLength(const ExecValue& value, int64_t batch_length,
                          int32_t* lengths) = 0;
 
+  // Increment the length by the length of an encoded null value.
+  // It's a special case for AddLength like `AddLength(Null-Scalar, 1, lengths)`.
   virtual void AddLengthNull(int32_t* length) = 0;
 
+  // Encode the column into the encoded_bytes, which is an array of pointers to each row
+  // buffer.
+  //
+  // If value is an array, the array-size should be batch_length.
+  // If value is a scalar, the value would repeat batch_length times.
+  // NB: The pointers in the encoded_bytes will be advanced as values being encoded into.
   virtual Status Encode(const ExecValue&, int64_t batch_length,
                         uint8_t** encoded_bytes) = 0;
 
+  // Encode a null value into the encoded_bytes, which is an array of pointers to each row
+  // buffer.
+  //
+  // It's a special case for Encode like `Encode(Null-Scalar, 1, encoded_bytes)`.
+  // NB: The pointers in the encoded_bytes will be advanced as values being encoded into.
   virtual void EncodeNull(uint8_t** encoded_bytes) = 0;
 
+  // Decode the encoded key from the encoded_bytes, which is an array of pointers to each
+  // row buffer, into an ArrayData.
+  //
+  // NB: The pointers in the encoded_bytes will be advanced as values being decoded from.
   virtual Result<std::shared_ptr<ArrayData>> Decode(uint8_t** encoded_bytes,
                                                     int32_t length, MemoryPool*) = 0;
 
@@ -60,7 +85,7 @@ struct KeyEncoder {
   }
 };
 
-struct BooleanKeyEncoder : KeyEncoder {
+struct ARROW_EXPORT BooleanKeyEncoder : KeyEncoder {
   static constexpr int kByteWidth = 1;
 
   void AddLength(const ExecValue& data, int64_t batch_length, int32_t* lengths) override;
@@ -76,7 +101,7 @@ struct BooleanKeyEncoder : KeyEncoder {
                                             MemoryPool* pool) override;
 };
 
-struct FixedWidthKeyEncoder : KeyEncoder {
+struct ARROW_EXPORT FixedWidthKeyEncoder : KeyEncoder {
   explicit FixedWidthKeyEncoder(std::shared_ptr<DataType> type)
       : type_(std::move(type)),
         byte_width_(checked_cast<const FixedWidthType&>(*type_).bit_width() / 8) {}
@@ -94,10 +119,10 @@ struct FixedWidthKeyEncoder : KeyEncoder {
                                             MemoryPool* pool) override;
 
   std::shared_ptr<DataType> type_;
-  int byte_width_;
+  const int byte_width_;
 };
 
-struct DictionaryKeyEncoder : FixedWidthKeyEncoder {
+struct ARROW_EXPORT DictionaryKeyEncoder : FixedWidthKeyEncoder {
   DictionaryKeyEncoder(std::shared_ptr<DataType> type, MemoryPool* pool)
       : FixedWidthKeyEncoder(std::move(type)), pool_(pool) {}
 
@@ -112,12 +137,13 @@ struct DictionaryKeyEncoder : FixedWidthKeyEncoder {
 };
 
 template <typename T>
-struct VarLengthKeyEncoder : KeyEncoder {
+struct ARROW_EXPORT VarLengthKeyEncoder : KeyEncoder {
   using Offset = typename T::offset_type;
 
   void AddLength(const ExecValue& data, int64_t batch_length, int32_t* lengths) override {
     if (data.is_array()) {
       int64_t i = 0;
+      ARROW_DCHECK_EQ(data.array.length, batch_length);
       VisitArraySpanInline<T>(
           data.array,
           [&](std::string_view bytes) {
@@ -142,41 +168,34 @@ struct VarLengthKeyEncoder : KeyEncoder {
 
   Status Encode(const ExecValue& data, int64_t batch_length,
                 uint8_t** encoded_bytes) override {
+    auto handle_next_valid_value = [&encoded_bytes](std::string_view bytes) {
+      auto& encoded_ptr = *encoded_bytes++;
+      *encoded_ptr++ = kValidByte;
+      util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
+      encoded_ptr += sizeof(Offset);
+      memcpy(encoded_ptr, bytes.data(), bytes.size());
+      encoded_ptr += bytes.size();
+    };
+    auto handle_next_null_value = [&encoded_bytes]() {
+      auto& encoded_ptr = *encoded_bytes++;
+      *encoded_ptr++ = kNullByte;
+      util::SafeStore(encoded_ptr, static_cast<Offset>(0));
+      encoded_ptr += sizeof(Offset);
+    };
     if (data.is_array()) {
-      VisitArraySpanInline<T>(
-          data.array,
-          [&](std::string_view bytes) {
-            auto& encoded_ptr = *encoded_bytes++;
-            *encoded_ptr++ = kValidByte;
-            util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
-            encoded_ptr += sizeof(Offset);
-            memcpy(encoded_ptr, bytes.data(), bytes.size());
-            encoded_ptr += bytes.size();
-          },
-          [&] {
-            auto& encoded_ptr = *encoded_bytes++;
-            *encoded_ptr++ = kNullByte;
-            util::SafeStore(encoded_ptr, static_cast<Offset>(0));
-            encoded_ptr += sizeof(Offset);
-          });
+      DCHECK_EQ(data.length(), batch_length);
+      VisitArraySpanInline<T>(data.array, handle_next_valid_value,
+                              handle_next_null_value);
     } else {
       const auto& scalar = data.scalar_as<BaseBinaryScalar>();
       if (scalar.is_valid) {
-        const auto& bytes = *scalar.value;
+        const auto bytes = std::string_view{*scalar.value};
         for (int64_t i = 0; i < batch_length; i++) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kValidByte;
-          util::SafeStore(encoded_ptr, static_cast<Offset>(bytes.size()));
-          encoded_ptr += sizeof(Offset);
-          memcpy(encoded_ptr, bytes.data(), bytes.size());
-          encoded_ptr += bytes.size();
+          handle_next_valid_value(bytes);
         }
       } else {
         for (int64_t i = 0; i < batch_length; i++) {
-          auto& encoded_ptr = *encoded_bytes++;
-          *encoded_ptr++ = kNullByte;
-          util::SafeStore(encoded_ptr, static_cast<Offset>(0));
-          encoded_ptr += sizeof(Offset);
+          handle_next_null_value();
         }
       }
     }
@@ -232,7 +251,7 @@ struct VarLengthKeyEncoder : KeyEncoder {
   std::shared_ptr<DataType> type_;
 };
 
-struct NullKeyEncoder : KeyEncoder {
+struct ARROW_EXPORT NullKeyEncoder : KeyEncoder {
   void AddLength(const ExecValue&, int64_t batch_length, int32_t* lengths) override {}
 
   void AddLengthNull(int32_t* length) override {}
@@ -250,6 +269,68 @@ struct NullKeyEncoder : KeyEncoder {
   }
 };
 
+/// RowEncoder encodes ExecSpan to a variable length byte sequence
+/// created by concatenating the encoded form of each column. The encoding
+/// for each column depends on its data type.
+///
+/// This is used to encode columns into row-major format, which will be
+/// beneficial for grouping and joining operations.
+///
+/// Unlike DuckDB and arrow-rs, currently this row format can not help
+/// sortings because the row-format is uncomparable.
+///
+/// # Key Column Encoding
+///
+/// The row format is composed of the the KeyColumn encodings for each,
+/// and the column is encoded as follows:
+/// 1. A null byte for each column, indicating whether the column is null.
+///    "1" for null, "0" for non-null.
+/// 2. The "fixed width" encoding for the column, it would exist whether
+///    the column is null or not.
+/// 3. The "variable payload" encoding for the column, it would exists only
+///    for non-null string/binary columns.
+///    For string/binary columns, the length of the payload is in
+///    "fixed width" part, and the binary contents are in the
+///    "variable payload" part.
+/// 4. Specially, if all columns in a row are null, the caller may decide
+///    to refer to kRowIdForNulls instead of actually encoding/decoding
+///    it using any KeyEncoder. See the comment for encoded_nulls_.
+///
+/// The endianness of the encoded bytes is platform-dependent.
+///
+/// ## Null Type
+///
+/// Null Type is a special case, it doesn't occupy any space in the
+/// encoded row.
+///
+/// ## Fixed Width Type
+///
+/// Fixed Width Type is encoded as a fixed-width byte sequence. For example:
+/// ```
+/// Int8: 5, null, 6
+/// ```
+/// Would be encoded as [0 5], [1 0], [0 6].
+///
+/// ### Dictionary Type
+///
+/// Dictionary Type is encoded as a fixed-width byte sequence using
+/// dictionary  indices, the dictionary should be identical for all
+/// rows.
+///
+/// ## Variable Width Type
+///
+/// Variable Width Type is encoded as:
+/// [null byte, variable-byte length, variable bytes]. For example:
+///
+/// String "abc" Would be encoded as:
+/// 0 ( 1 byte for not null) + 3 ( 4 bytes for length ) + "abc" (payload)
+///
+/// Null string Would be encoded as:
+/// 1 ( 1 byte for null) + 0 ( 4 bytes for length )
+///
+/// # Row Encoding
+///
+/// The row format is the concatenation of the encodings of each column.
 class ARROW_EXPORT RowEncoder {
  public:
   static constexpr int kRowIdForNulls() { return -1; }
@@ -259,6 +340,9 @@ class ARROW_EXPORT RowEncoder {
   Status EncodeAndAppend(const ExecSpan& batch);
   Result<ExecBatch> Decode(int64_t num_rows, const int32_t* row_ids);
 
+  // Returns the encoded representation of the row at index i.
+  // If i is kRowIdForNulls, it returns the pre-encoded all-nulls
+  // row.
   inline std::string encoded_row(int32_t i) const {
     if (i == kRowIdForNulls()) {
       return std::string(reinterpret_cast<const char*>(encoded_nulls_.data()),
@@ -270,14 +354,26 @@ class ARROW_EXPORT RowEncoder {
   }
 
   int32_t num_rows() const {
-    return offsets_.size() == 0 ? 0 : static_cast<int32_t>(offsets_.size() - 1);
+    return offsets_.empty() ? 0 : static_cast<int32_t>(offsets_.size() - 1);
   }
 
  private:
-  ExecContext* ctx_;
+  ExecContext* ctx_{nullptr};
   std::vector<std::shared_ptr<KeyEncoder>> encoders_;
+  // offsets_ vector stores the starting position (offset) of each encoded row
+  // within the bytes_ vector. This allows for quick access to individual rows.
+  //
+  // The size would be num_rows + 1 if not empty, the last element is the total
+  // length of the bytes_ vector.
   std::vector<int32_t> offsets_;
+  // The encoded bytes of all non "kRowIdForNulls" rows.
   std::vector<uint8_t> bytes_;
+  // A pre-encoded constant row with all its columns being null. Useful when
+  // the caller is certain that an entire row is null and then uses kRowIdForNulls
+  // to refer to it.
+  //
+  // EncodeAndAppend would never append this row, but encoded_row and Decode would
+  // return this row when kRowIdForNulls is passed.
   std::vector<uint8_t> encoded_nulls_;
   std::vector<std::shared_ptr<ExtensionType>> extension_types_;
 };
diff --git a/cpp/src/arrow/compute/row/row_encoder_internal_test.cc b/cpp/src/arrow/compute/row/row_encoder_internal_test.cc
new file mode 100644
index 0000000000000..78839d1ead557
--- /dev/null
+++ b/cpp/src/arrow/compute/row/row_encoder_internal_test.cc
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "arrow/compute/row/row_encoder_internal.h"
+
+#include "arrow/array/validate.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/type.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow::compute::internal {
+
+// GH-43733: Test that the key encoder can handle boolean scalar values well.
+TEST(TestKeyEncoder, BooleanScalar) {
+  for (auto scalar : {BooleanScalar{}, BooleanScalar{true}, BooleanScalar{false}}) {
+    BooleanKeyEncoder key_encoder;
+    SCOPED_TRACE("scalar " + scalar.ToString());
+    constexpr int64_t kBatchLength = 10;
+    std::array<int32_t, kBatchLength> lengths{};
+    key_encoder.AddLength(ExecValue{&scalar}, kBatchLength, lengths.data());
+    // Check that the lengths are all 2.
+    constexpr int32_t kPayloadWidth =
+        BooleanKeyEncoder::kByteWidth + BooleanKeyEncoder::kExtraByteForNull;
+    for (int i = 0; i < kBatchLength; ++i) {
+      ASSERT_EQ(kPayloadWidth, lengths[i]);
+    }
+    std::array<std::array<uint8_t, kPayloadWidth>, kBatchLength> payloads{};
+    std::array<uint8_t*, kBatchLength> payload_ptrs{};
+    // Reset the payload pointers to point to the beginning of each payload.
+    // This is necessary because the key encoder may have modified the pointers.
+    auto reset_payload_ptrs = [&payload_ptrs, &payloads]() {
+      std::transform(payloads.begin(), payloads.end(), payload_ptrs.begin(),
+                     [](auto& payload) -> uint8_t* { return payload.data(); });
+    };
+    reset_payload_ptrs();
+    ASSERT_OK(key_encoder.Encode(ExecValue{&scalar}, kBatchLength, payload_ptrs.data()));
+    reset_payload_ptrs();
+    ASSERT_OK_AND_ASSIGN(auto array_data,
+                         key_encoder.Decode(payload_ptrs.data(), kBatchLength,
+                                            ::arrow::default_memory_pool()));
+    ASSERT_EQ(kBatchLength, array_data->length);
+    auto boolean_array = std::make_shared<BooleanArray>(array_data);
+    ASSERT_OK(arrow::internal::ValidateArrayFull(*array_data));
+    ASSERT_OK_AND_ASSIGN(
+        auto expected_array,
+        MakeArrayFromScalar(scalar, kBatchLength, ::arrow::default_memory_pool()));
+    AssertArraysEqual(*expected_array, *boolean_array);
+  }
+}
+
+}  // namespace arrow::compute::internal
diff --git a/cpp/src/arrow/compute/row/row_internal.cc b/cpp/src/arrow/compute/row/row_internal.cc
index 0d418fdecf488..aa7e62add45ff 100644
--- a/cpp/src/arrow/compute/row/row_internal.cc
+++ b/cpp/src/arrow/compute/row/row_internal.cc
@@ -127,8 +127,8 @@ void RowTableMetadata::FromColumnMetadataVector(
     const KeyColumnMetadata& col = cols[column_order[i]];
     if (col.is_fixed_length && col.fixed_length != 0 &&
         ARROW_POPCOUNT64(col.fixed_length) != 1) {
-      offset_within_row += RowTableMetadata::padding_for_alignment(offset_within_row,
-                                                                   string_alignment, col);
+      offset_within_row += RowTableMetadata::padding_for_alignment_within_row(
+          offset_within_row, string_alignment, col);
     }
     column_offsets[i] = offset_within_row;
     if (!col.is_fixed_length) {
@@ -154,7 +154,7 @@ void RowTableMetadata::FromColumnMetadataVector(
   is_fixed_length = (num_varbinary_cols == 0);
   fixed_length =
       offset_within_row +
-      RowTableMetadata::padding_for_alignment(
+      RowTableMetadata::padding_for_alignment_within_row(
           offset_within_row, num_varbinary_cols == 0 ? row_alignment : string_alignment);
 
   // We set the number of bytes per row storing null masks of individual key columns
@@ -190,7 +190,7 @@ Status RowTableImpl::Init(MemoryPool* pool, const RowTableMetadata& metadata) {
         auto offsets, AllocateResizableBuffer(size_offsets(kInitialRowsCapacity), pool_));
     offsets_ = std::move(offsets);
     memset(offsets_->mutable_data(), 0, size_offsets(kInitialRowsCapacity));
-    reinterpret_cast<uint32_t*>(offsets_->mutable_data())[0] = 0;
+    reinterpret_cast<offset_type*>(offsets_->mutable_data())[0] = 0;
 
     ARROW_ASSIGN_OR_RAISE(
         auto rows,
@@ -225,7 +225,7 @@ void RowTableImpl::Clean() {
   has_any_nulls_ = false;
 
   if (!metadata_.is_fixed_length) {
-    reinterpret_cast<uint32_t*>(offsets_->mutable_data())[0] = 0;
+    reinterpret_cast<offset_type*>(offsets_->mutable_data())[0] = 0;
   }
 }
 
@@ -234,7 +234,7 @@ int64_t RowTableImpl::size_null_masks(int64_t num_rows) const {
 }
 
 int64_t RowTableImpl::size_offsets(int64_t num_rows) const {
-  return (num_rows + 1) * sizeof(uint32_t) + kPaddingForVectors;
+  return (num_rows + 1) * sizeof(offset_type) + kPaddingForVectors;
 }
 
 int64_t RowTableImpl::size_rows_fixed_length(int64_t num_rows) const {
@@ -292,8 +292,10 @@ Status RowTableImpl::ResizeFixedLengthBuffers(int64_t num_extra_rows) {
 }
 
 Status RowTableImpl::ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes) {
+  DCHECK(!metadata_.is_fixed_length);
+
   int64_t num_bytes = offsets()[num_rows_];
-  if (bytes_capacity_ >= num_bytes + num_extra_bytes || metadata_.is_fixed_length) {
+  if (bytes_capacity_ >= num_bytes + num_extra_bytes) {
     return Status::OK();
   }
 
@@ -323,14 +325,13 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from,
 
   if (!metadata_.is_fixed_length) {
     // Varying-length rows
-    auto from_offsets = reinterpret_cast<const uint32_t*>(from.offsets_->data());
-    auto to_offsets = reinterpret_cast<uint32_t*>(offsets_->mutable_data());
-    // TODO(GH-43202): The following two variables are possibly overflowing.
-    uint32_t total_length = to_offsets[num_rows_];
-    uint32_t total_length_to_append = 0;
+    auto from_offsets = reinterpret_cast<const offset_type*>(from.offsets_->data());
+    auto to_offsets = reinterpret_cast<offset_type*>(offsets_->mutable_data());
+    offset_type total_length = to_offsets[num_rows_];
+    int64_t total_length_to_append = 0;
     for (uint32_t i = 0; i < num_rows_to_append; ++i) {
       uint16_t row_id = source_row_ids ? source_row_ids[i] : i;
-      uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id];
+      int64_t length = from_offsets[row_id + 1] - from_offsets[row_id];
       total_length_to_append += length;
       to_offsets[num_rows_ + i + 1] = total_length + total_length_to_append;
     }
@@ -341,7 +342,8 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from,
     uint8_t* dst = rows_->mutable_data() + total_length;
     for (uint32_t i = 0; i < num_rows_to_append; ++i) {
       uint16_t row_id = source_row_ids ? source_row_ids[i] : i;
-      uint32_t length = from_offsets[row_id + 1] - from_offsets[row_id];
+      int64_t length = from_offsets[row_id + 1] - from_offsets[row_id];
+      DCHECK_LE(length, std::numeric_limits<uint32_t>::max());
       auto src64 = reinterpret_cast<const uint64_t*>(src + from_offsets[row_id]);
       auto dst64 = reinterpret_cast<uint64_t*>(dst);
       for (uint32_t j = 0; j < bit_util::CeilDiv(length, 8); ++j) {
@@ -387,9 +389,11 @@ Status RowTableImpl::AppendSelectionFrom(const RowTableImpl& from,
 }
 
 Status RowTableImpl::AppendEmpty(uint32_t num_rows_to_append,
-                                 uint32_t num_extra_bytes_to_append) {
+                                 int64_t num_extra_bytes_to_append) {
   RETURN_NOT_OK(ResizeFixedLengthBuffers(num_rows_to_append));
-  RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
+  if (!metadata_.is_fixed_length) {
+    RETURN_NOT_OK(ResizeOptionalVaryingLengthBuffer(num_extra_bytes_to_append));
+  }
   num_rows_ += num_rows_to_append;
   if (metadata_.row_alignment > 1 || metadata_.string_alignment > 1) {
     memset(rows_->mutable_data(), 0, bytes_capacity_);
diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h
index 80409f93d2b96..3ab86fd1fc6ed 100644
--- a/cpp/src/arrow/compute/row/row_internal.h
+++ b/cpp/src/arrow/compute/row/row_internal.h
@@ -30,13 +30,15 @@ namespace compute {
 
 /// Description of the data stored in a RowTable
 struct ARROW_EXPORT RowTableMetadata {
+  using offset_type = int64_t;
+
   /// \brief True if there are no variable length columns in the table
   bool is_fixed_length;
 
   /// For a fixed-length binary row, common size of rows in bytes,
   /// rounded up to the multiple of alignment.
   ///
-  /// For a varying-length binary, size of all encoded fixed-length key columns,
+  /// For a varying-length binary row, size of all encoded fixed-length key columns,
   /// including lengths of varying-length columns, rounded up to the multiple of string
   /// alignment.
   uint32_t fixed_length;
@@ -78,26 +80,35 @@ struct ARROW_EXPORT RowTableMetadata {
   /// Offsets within a row to fields in their encoding order.
   std::vector<uint32_t> column_offsets;
 
-  /// Rounding up offset to the nearest multiple of alignment value.
+  /// Rounding up offset within row to the nearest multiple of alignment value.
   /// Alignment must be a power of 2.
-  static inline uint32_t padding_for_alignment(uint32_t offset, int required_alignment) {
+  static inline uint32_t padding_for_alignment_within_row(uint32_t offset,
+                                                          int required_alignment) {
     ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
     return static_cast<uint32_t>((-static_cast<int32_t>(offset)) &
                                  (required_alignment - 1));
   }
 
-  /// Rounding up offset to the beginning of next column,
+  /// Rounding up offset within row to the beginning of next column,
   /// choosing required alignment based on the data type of that column.
-  static inline uint32_t padding_for_alignment(uint32_t offset, int string_alignment,
-                                               const KeyColumnMetadata& col_metadata) {
+  static inline uint32_t padding_for_alignment_within_row(
+      uint32_t offset, int string_alignment, const KeyColumnMetadata& col_metadata) {
     if (!col_metadata.is_fixed_length ||
         ARROW_POPCOUNT64(col_metadata.fixed_length) <= 1) {
       return 0;
     } else {
-      return padding_for_alignment(offset, string_alignment);
+      return padding_for_alignment_within_row(offset, string_alignment);
     }
   }
 
+  /// Rounding up row offset to the nearest multiple of alignment value.
+  /// Alignment must be a power of 2.
+  static inline offset_type padding_for_alignment_row(offset_type row_offset,
+                                                      int required_alignment) {
+    ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
+    return (-row_offset) & (required_alignment - 1);
+  }
+
   /// Returns an array of offsets within a row of ends of varbinary fields.
   inline const uint32_t* varbinary_end_array(const uint8_t* row) const {
     ARROW_DCHECK(!is_fixed_length);
@@ -127,7 +138,7 @@ struct ARROW_EXPORT RowTableMetadata {
     ARROW_DCHECK(varbinary_id > 0);
     const uint32_t* varbinary_end = varbinary_end_array(row);
     uint32_t offset = varbinary_end[varbinary_id - 1];
-    offset += padding_for_alignment(offset, string_alignment);
+    offset += padding_for_alignment_within_row(offset, string_alignment);
     *out_offset = offset;
     *out_length = varbinary_end[varbinary_id] - offset;
   }
@@ -161,6 +172,8 @@ struct ARROW_EXPORT RowTableMetadata {
 /// The row table is not safe
 class ARROW_EXPORT RowTableImpl {
  public:
+  using offset_type = RowTableMetadata::offset_type;
+
   RowTableImpl();
   /// \brief Initialize a row array for use
   ///
@@ -175,7 +188,7 @@ class ARROW_EXPORT RowTableImpl {
   /// \param num_extra_bytes_to_append For tables storing variable-length data this
   ///     should be a guess of how many data bytes will be needed to populate the
   ///     data.  This is ignored if there are no variable-length columns
-  Status AppendEmpty(uint32_t num_rows_to_append, uint32_t num_extra_bytes_to_append);
+  Status AppendEmpty(uint32_t num_rows_to_append, int64_t num_extra_bytes_to_append);
   /// \brief Append rows from a source table
   /// \param from The table to append from
   /// \param num_rows_to_append The number of rows to append
@@ -201,8 +214,12 @@ class ARROW_EXPORT RowTableImpl {
     }
     return NULLPTR;
   }
-  const uint32_t* offsets() const { return reinterpret_cast<const uint32_t*>(data(1)); }
-  uint32_t* mutable_offsets() { return reinterpret_cast<uint32_t*>(mutable_data(1)); }
+  const offset_type* offsets() const {
+    return reinterpret_cast<const offset_type*>(data(1));
+  }
+  offset_type* mutable_offsets() {
+    return reinterpret_cast<offset_type*>(mutable_data(1));
+  }
   const uint8_t* null_masks() const { return null_masks_->data(); }
   uint8_t* null_masks() { return null_masks_->mutable_data(); }
 
@@ -220,7 +237,14 @@ class ARROW_EXPORT RowTableImpl {
   }
 
  private:
+  /// \brief Resize the fixed length buffers to store `num_extra_rows` more rows. The
+  /// fixed length buffers are buffers_[0] for null masks, buffers_[1] for row data if the
+  /// row is fixed length, or for row offsets otherwise.
   Status ResizeFixedLengthBuffers(int64_t num_extra_rows);
+
+  /// \brief Resize the optional varying length buffer to store `num_extra_bytes` more
+  /// bytes.
+  /// \pre !metadata_.is_fixed_length
   Status ResizeOptionalVaryingLengthBuffer(int64_t num_extra_bytes);
 
   // Helper functions to determine the number of bytes needed for each
diff --git a/cpp/src/arrow/compute/row/row_test.cc b/cpp/src/arrow/compute/row/row_test.cc
index 2c1a60dfb231c..5057ce91b5bea 100644
--- a/cpp/src/arrow/compute/row/row_test.cc
+++ b/cpp/src/arrow/compute/row/row_test.cc
@@ -69,9 +69,14 @@ TEST(RowTableMemoryConsumption, Encode) {
   constexpr int64_t num_rows_max = 8192;
   constexpr int64_t padding_for_vectors = 64;
 
-  ASSERT_OK_AND_ASSIGN(
-      auto fixed_length_column,
-      ::arrow::gen::Constant(std::make_shared<UInt32Scalar>(0))->Generate(num_rows_max));
+  std::vector<std::shared_ptr<Array>> fixed_length_columns;
+  for (const auto& dt : {int8(), uint16(), int32(), uint64(), fixed_size_binary(16),
+                         fixed_size_binary(32)}) {
+    ASSERT_OK_AND_ASSIGN(auto fixed_length_column,
+                         ::arrow::gen::Random(dt)->Generate(num_rows_max));
+    fixed_length_columns.push_back(std::move(fixed_length_column));
+  }
+
   ASSERT_OK_AND_ASSIGN(auto var_length_column,
                        ::arrow::gen::Constant(std::make_shared<BinaryScalar>("X"))
                            ->Generate(num_rows_max));
@@ -81,22 +86,26 @@ TEST(RowTableMemoryConsumption, Encode) {
     {
       SCOPED_TRACE("encoding fixed length column of " + std::to_string(num_rows) +
                    " rows");
-      ASSERT_OK_AND_ASSIGN(auto row_table,
-                           MakeRowTableFromColumn(fixed_length_column, num_rows,
-                                                  uint32()->byte_width(), 0));
-      ASSERT_NE(row_table.data(0), NULLPTR);
-      ASSERT_NE(row_table.data(1), NULLPTR);
-      ASSERT_EQ(row_table.data(2), NULLPTR);
-
-      int64_t actual_null_mask_size =
-          num_rows * row_table.metadata().null_masks_bytes_per_row;
-      ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) - padding_for_vectors);
-      ASSERT_GT(actual_null_mask_size * 2,
-                row_table.buffer_size(0) - padding_for_vectors);
-
-      int64_t actual_rows_size = num_rows * uint32()->byte_width();
-      ASSERT_LE(actual_rows_size, row_table.buffer_size(1) - padding_for_vectors);
-      ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(1) - padding_for_vectors);
+      for (const auto& col : fixed_length_columns) {
+        const auto& dt = col->type();
+        SCOPED_TRACE("encoding fixed length column of type " + dt->ToString());
+        ASSERT_OK_AND_ASSIGN(auto row_table,
+                             MakeRowTableFromColumn(col, num_rows, dt->byte_width(),
+                                                    /*string_alignment=*/0));
+        ASSERT_NE(row_table.data(0), NULLPTR);
+        ASSERT_NE(row_table.data(1), NULLPTR);
+        ASSERT_EQ(row_table.data(2), NULLPTR);
+
+        int64_t actual_null_mask_size =
+            num_rows * row_table.metadata().null_masks_bytes_per_row;
+        ASSERT_LE(actual_null_mask_size, row_table.buffer_size(0) - padding_for_vectors);
+        ASSERT_GT(actual_null_mask_size * 2,
+                  row_table.buffer_size(0) - padding_for_vectors);
+
+        int64_t actual_rows_size = num_rows * dt->byte_width();
+        ASSERT_LE(actual_rows_size, row_table.buffer_size(1) - padding_for_vectors);
+        ASSERT_GT(actual_rows_size * 2, row_table.buffer_size(1) - padding_for_vectors);
+      }
     }
 
     // Var length column.
@@ -114,7 +123,7 @@ TEST(RowTableMemoryConsumption, Encode) {
       ASSERT_GT(actual_null_mask_size * 2,
                 row_table.buffer_size(0) - padding_for_vectors);
 
-      int64_t actual_offset_size = num_rows * sizeof(uint32_t);
+      int64_t actual_offset_size = num_rows * sizeof(RowTableImpl::offset_type);
       ASSERT_LE(actual_offset_size, row_table.buffer_size(1) - padding_for_vectors);
       ASSERT_GT(actual_offset_size * 2, row_table.buffer_size(1) - padding_for_vectors);
 
@@ -125,5 +134,110 @@ TEST(RowTableMemoryConsumption, Encode) {
   }
 }
 
+// GH-43495: Ensure that we can build a row table with more than 4GB row data.
+TEST(RowTableLarge, LARGE_MEMORY_TEST(Encode)) {
+  if constexpr (sizeof(void*) == 4) {
+    GTEST_SKIP() << "Test only works on 64-bit platforms";
+  }
+
+  // Use 9 512MB var-length rows to occupy more than 4GB memory.
+  constexpr int64_t num_rows = 9;
+  constexpr int64_t length_per_binary = 512 * 1024 * 1024;
+  constexpr int64_t row_alignment = sizeof(uint32_t);
+  constexpr int64_t var_length_alignment = sizeof(uint32_t);
+
+  MemoryPool* pool = default_memory_pool();
+
+  // The column to encode.
+  std::vector<KeyColumnArray> columns;
+  std::vector<Datum> values;
+  ASSERT_OK_AND_ASSIGN(
+      auto value, ::arrow::gen::Constant(
+                      std::make_shared<BinaryScalar>(std::string(length_per_binary, 'X')))
+                      ->Generate(1));
+  values.emplace_back(std::move(value));
+  ExecBatch batch = ExecBatch(std::move(values), 1);
+  ASSERT_OK(ColumnArraysFromExecBatch(batch, &columns));
+
+  // The row table.
+  std::vector<KeyColumnMetadata> column_metadatas;
+  ASSERT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas));
+  RowTableMetadata table_metadata;
+  table_metadata.FromColumnMetadataVector(column_metadatas, row_alignment,
+                                          var_length_alignment);
+  RowTableImpl row_table;
+  ASSERT_OK(row_table.Init(pool, table_metadata));
+  RowTableEncoder row_encoder;
+  row_encoder.Init(column_metadatas, row_alignment, var_length_alignment);
+
+  // The rows to encode.
+  std::vector<uint16_t> row_ids(num_rows, 0);
+
+  // Encode num_rows rows.
+  row_encoder.PrepareEncodeSelected(0, num_rows, columns);
+  ASSERT_OK(row_encoder.EncodeSelected(&row_table, static_cast<uint32_t>(num_rows),
+                                       row_ids.data()));
+
+  auto encoded_row_length = table_metadata.fixed_length + length_per_binary;
+  ASSERT_EQ(row_table.offsets()[num_rows - 1], encoded_row_length * (num_rows - 1));
+  ASSERT_EQ(row_table.offsets()[num_rows], encoded_row_length * num_rows);
+}
+
+// GH-43495: Ensure that we can build a row table with more than 4GB row data.
+TEST(RowTableLarge, LARGE_MEMORY_TEST(AppendFrom)) {
+  if constexpr (sizeof(void*) == 4) {
+    GTEST_SKIP() << "Test only works on 64-bit platforms";
+  }
+
+  // Use 9 512MB var-length rows to occupy more than 4GB memory.
+  constexpr int64_t num_rows = 9;
+  constexpr int64_t length_per_binary = 512 * 1024 * 1024;
+  constexpr int64_t num_rows_seed = 1;
+  constexpr int64_t row_alignment = sizeof(uint32_t);
+  constexpr int64_t var_length_alignment = sizeof(uint32_t);
+
+  MemoryPool* pool = default_memory_pool();
+
+  // The column to encode.
+  std::vector<KeyColumnArray> columns;
+  std::vector<Datum> values;
+  ASSERT_OK_AND_ASSIGN(
+      auto value, ::arrow::gen::Constant(
+                      std::make_shared<BinaryScalar>(std::string(length_per_binary, 'X')))
+                      ->Generate(num_rows_seed));
+  values.push_back(std::move(value));
+  ExecBatch batch = ExecBatch(std::move(values), num_rows_seed);
+  ASSERT_OK(ColumnArraysFromExecBatch(batch, &columns));
+
+  // The seed row table.
+  std::vector<KeyColumnMetadata> column_metadatas;
+  ASSERT_OK(ColumnMetadatasFromExecBatch(batch, &column_metadatas));
+  RowTableMetadata table_metadata;
+  table_metadata.FromColumnMetadataVector(column_metadatas, row_alignment,
+                                          var_length_alignment);
+  RowTableImpl row_table_seed;
+  ASSERT_OK(row_table_seed.Init(pool, table_metadata));
+  RowTableEncoder row_encoder;
+  row_encoder.Init(column_metadatas, row_alignment, var_length_alignment);
+  row_encoder.PrepareEncodeSelected(0, num_rows_seed, columns);
+  std::vector<uint16_t> row_ids(num_rows_seed, 0);
+  ASSERT_OK(row_encoder.EncodeSelected(
+      &row_table_seed, static_cast<uint32_t>(num_rows_seed), row_ids.data()));
+
+  // The target row table.
+  RowTableImpl row_table;
+  ASSERT_OK(row_table.Init(pool, table_metadata));
+
+  // Append seed num_rows times.
+  for (int i = 0; i < num_rows; ++i) {
+    ASSERT_OK(row_table.AppendSelectionFrom(row_table_seed, num_rows_seed,
+                                            /*source_row_ids=*/NULLPTR));
+  }
+
+  auto encoded_row_length = table_metadata.fixed_length + length_per_binary;
+  ASSERT_EQ(row_table.offsets()[num_rows - 1], encoded_row_length * (num_rows - 1));
+  ASSERT_EQ(row_table.offsets()[num_rows], encoded_row_length * num_rows);
+}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h
index d56e398667f66..9034849bbc36d 100644
--- a/cpp/src/arrow/compute/util.h
+++ b/cpp/src/arrow/compute/util.h
@@ -30,21 +30,22 @@
 #include "arrow/util/cpu_info.h"
 
 #if defined(__clang__) || defined(__GNUC__)
-#define BYTESWAP(x) __builtin_bswap64(x)
-#define ROTL(x, n) (((x) << (n)) | ((x) >> ((-n) & 31)))
-#define ROTL64(x, n) (((x) << (n)) | ((x) >> ((-n) & 63)))
-#define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  define BYTESWAP(x) __builtin_bswap64(x)
+#  define ROTL(x, n) (((x) << (n)) | ((x) >> ((-n) & 31)))
+#  define ROTL64(x, n) (((x) << (n)) | ((x) >> ((-n) & 63)))
+#  define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
 #elif defined(_MSC_VER)
-#include <intrin.h>
-#define BYTESWAP(x) _byteswap_uint64(x)
-#define ROTL(x, n) _rotl((x), (n))
-#define ROTL64(x, n) _rotl64((x), (n))
-#if defined(_M_X64) || defined(_M_I86)
-#include <mmintrin.h>  // https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx
-#define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
-#else
-#define PREFETCH(ptr) (void)(ptr) /* disabled */
-#endif
+#  include <intrin.h>
+#  define BYTESWAP(x) _byteswap_uint64(x)
+#  define ROTL(x, n) _rotl((x), (n))
+#  define ROTL64(x, n) _rotl64((x), (n))
+#  if defined(_M_X64) || defined(_M_I86)
+// https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx
+#    include <mmintrin.h>
+#    define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
+#  else
+#    define PREFETCH(ptr) (void)(ptr) /* disabled */
+#  endif
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/compute/util_internal.cc b/cpp/src/arrow/compute/util_internal.cc
index 7a7875162c434..7d6c41e092889 100644
--- a/cpp/src/arrow/compute/util_internal.cc
+++ b/cpp/src/arrow/compute/util_internal.cc
@@ -21,7 +21,7 @@
 #include "arrow/memory_pool.h"
 
 #ifdef ADDRESS_SANITIZER
-#include <sanitizer/asan_interface.h>
+#  include <sanitizer/asan_interface.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/csv/column_decoder_test.cc b/cpp/src/arrow/csv/column_decoder_test.cc
index ebac7a3da2fcf..567732647179e 100644
--- a/cpp/src/arrow/csv/column_decoder_test.cc
+++ b/cpp/src/arrow/csv/column_decoder_test.cc
@@ -175,6 +175,9 @@ class NullColumnDecoderTest : public ColumnDecoderTest {
   }
 
   void TestThreaded() {
+#ifndef ARROW_ENABLE_THREADING
+    GTEST_SKIP() << "Test requires threading support";
+#endif
     constexpr int NITERS = 10;
     auto type = int32();
     MakeDecoder(type);
@@ -257,6 +260,10 @@ class TypedColumnDecoderTest : public ColumnDecoderTest {
   }
 
   void TestThreaded() {
+#ifndef ARROW_ENABLE_THREADING
+    GTEST_SKIP() << "Test requires threading support";
+#endif
+
     constexpr int NITERS = 10;
     auto type = uint32();
     MakeDecoder(type, default_options);
@@ -305,6 +312,10 @@ class InferringColumnDecoderTest : public ColumnDecoderTest {
   }
 
   void TestThreaded() {
+#ifndef ARROW_ENABLE_THREADING
+    GTEST_SKIP() << "Test requires threading support";
+#endif
+
     constexpr int NITERS = 10;
     auto type = float64();
     MakeDecoder(default_options);
diff --git a/cpp/src/arrow/csv/converter_test.cc b/cpp/src/arrow/csv/converter_test.cc
index ea4e171d57e71..657e8d813ca1b 100644
--- a/cpp/src/arrow/csv/converter_test.cc
+++ b/cpp/src/arrow/csv/converter_test.cc
@@ -625,6 +625,11 @@ TEST(TimestampConversion, UserDefinedParsers) {
 }
 
 TEST(TimestampConversion, UserDefinedParsersWithZone) {
+#ifdef __EMSCRIPTEN__
+  GTEST_SKIP() << "Test temporarily disabled due to emscripten bug "
+                  "https://github.com/emscripten-core/emscripten/issues/20467";
+#endif
+
   auto options = ConvertOptions::Defaults();
   auto type = timestamp(TimeUnit::SECOND, "America/Phoenix");
 
diff --git a/cpp/src/arrow/csv/writer.cc b/cpp/src/arrow/csv/writer.cc
index 5b9c51cda5576..4b5252076af53 100644
--- a/cpp/src/arrow/csv/writer.cc
+++ b/cpp/src/arrow/csv/writer.cc
@@ -32,7 +32,7 @@
 #include <memory>
 
 #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
-#include <xsimd/xsimd.hpp>
+#  include <xsimd/xsimd.hpp>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/dataset/api.h b/cpp/src/arrow/dataset/api.h
index c2ebd9d300727..38caa1cff19de 100644
--- a/cpp/src/arrow/dataset/api.h
+++ b/cpp/src/arrow/dataset/api.h
@@ -24,16 +24,16 @@
 #include "arrow/dataset/discovery.h"
 #include "arrow/dataset/file_base.h"
 #ifdef ARROW_CSV
-#include "arrow/dataset/file_csv.h"
+#  include "arrow/dataset/file_csv.h"
 #endif
 #ifdef ARROW_JSON
-#include "arrow/dataset/file_json.h"
+#  include "arrow/dataset/file_json.h"
 #endif
 #include "arrow/dataset/file_ipc.h"
 #ifdef ARROW_ORC
-#include "arrow/dataset/file_orc.h"
+#  include "arrow/dataset/file_orc.h"
 #endif
 #ifdef ARROW_PARQUET
-#include "arrow/dataset/file_parquet.h"
+#  include "arrow/dataset/file_parquet.h"
 #endif
 #include "arrow/dataset/scanner.h"
diff --git a/cpp/src/arrow/dataset/dataset_writer_test.cc b/cpp/src/arrow/dataset/dataset_writer_test.cc
index 871b6ef6f5507..32ae8d7ee12fb 100644
--- a/cpp/src/arrow/dataset/dataset_writer_test.cc
+++ b/cpp/src/arrow/dataset/dataset_writer_test.cc
@@ -157,7 +157,7 @@ class DatasetWriterTestFixture : public testing::Test {
 
   std::shared_ptr<RecordBatch> ReadAsBatch(std::string_view data, int* num_batches) {
     std::shared_ptr<io::RandomAccessFile> in_stream =
-        std::make_shared<io::BufferReader>(data);
+        std::make_shared<io::BufferReader>(std::make_shared<Buffer>(data));
     EXPECT_OK_AND_ASSIGN(std::shared_ptr<ipc::RecordBatchFileReader> reader,
                          ipc::RecordBatchFileReader::Open(in_stream));
     RecordBatchVector batches;
diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc
index 60a6685dc22fd..e8e5838e6f93a 100644
--- a/cpp/src/arrow/dataset/file_csv_test.cc
+++ b/cpp/src/arrow/dataset/file_csv_test.cc
@@ -464,35 +464,35 @@ INSTANTIATE_TEST_SUITE_P(TestUncompressedCsvV2, TestCsvFileFormat,
 // codecs should be independently tested and so we do not need to cover those with
 // valgrind here.
 #ifndef ARROW_VALGRIND
-#ifdef ARROW_WITH_BZ2
+#  ifdef ARROW_WITH_BZ2
 INSTANTIATE_TEST_SUITE_P(TestBZ2Csv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::BZ2, false}));
 INSTANTIATE_TEST_SUITE_P(TestBZ2CsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::BZ2, true}));
-#endif
-#ifdef ARROW_WITH_LZ4
+#  endif
+#  ifdef ARROW_WITH_LZ4
 INSTANTIATE_TEST_SUITE_P(TestLZ4Csv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::LZ4_FRAME,
                                                                false}));
 INSTANTIATE_TEST_SUITE_P(TestLZ4CsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::LZ4_FRAME,
                                                                true}));
-#endif
+#  endif
 // Snappy does not support streaming compression
-#ifdef ARROW_WITH_ZLIB
+#  ifdef ARROW_WITH_ZLIB
 INSTANTIATE_TEST_SUITE_P(TestGzipCsv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::GZIP,
                                                                false}));
 INSTANTIATE_TEST_SUITE_P(TestGzipCsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::GZIP, true}));
-#endif
-#ifdef ARROW_WITH_ZSTD
+#  endif
+#  ifdef ARROW_WITH_ZSTD
 INSTANTIATE_TEST_SUITE_P(TestZSTDCsv, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::ZSTD,
                                                                false}));
 INSTANTIATE_TEST_SUITE_P(TestZSTDCsvV2, TestCsvFileFormat,
                          ::testing::Values(CsvFileFormatParams{Compression::ZSTD, true}));
-#endif
+#  endif
 #endif  // ARROW_VALGRIND
 
 class TestCsvFileFormatScan : public FileFormatScanMixin<CsvFormatHelper> {};
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 1f8b6cc4882cf..ca391b4354c07 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -366,8 +366,12 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     const parquet::Statistics& statistics) {
   auto field_expr = compute::field_ref(field_ref);
 
+  bool may_have_null = !statistics.HasNullCount() || statistics.null_count() > 0;
   // Optimize for corner case where all values are nulls
-  if (statistics.num_values() == 0 && statistics.null_count() > 0) {
+  if (statistics.num_values() == 0) {
+    // If there are no non-null values, column `field_ref` in the fragment
+    // might be empty or all values are nulls. In this case, we also return
+    // a null expression.
     return is_null(std::move(field_expr));
   }
 
@@ -378,7 +382,6 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
 
   auto maybe_min = Cast(min, field.type());
   auto maybe_max = Cast(max, field.type());
-
   if (maybe_min.ok() && maybe_max.ok()) {
     min = maybe_min.MoveValueUnsafe().scalar();
     max = maybe_max.MoveValueUnsafe().scalar();
@@ -386,7 +389,7 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     if (min->Equals(*max)) {
       auto single_value = compute::equal(field_expr, compute::literal(std::move(min)));
 
-      if (statistics.null_count() == 0) {
+      if (!may_have_null) {
         return single_value;
       }
       return compute::or_(std::move(single_value), is_null(std::move(field_expr)));
@@ -412,9 +415,8 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     } else {
       in_range = compute::and_(std::move(lower_bound), std::move(upper_bound));
     }
-
-    if (statistics.null_count() != 0) {
-      return compute::or_(std::move(in_range), compute::is_null(field_expr));
+    if (may_have_null) {
+      return compute::or_(std::move(in_range), compute::is_null(std::move(field_expr)));
     }
     return in_range;
   }
@@ -423,7 +425,7 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
 
 std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpression(
     const Field& field, const parquet::Statistics& statistics) {
-  const auto field_name = field.name();
+  auto field_name = field.name();
   return EvaluateStatisticsAsExpression(field, FieldRef(std::move(field_name)),
                                         statistics);
 }
diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc
index bf626826d4d1b..2c05dcd9be459 100644
--- a/cpp/src/arrow/dataset/file_parquet_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_test.cc
@@ -841,6 +841,56 @@ TEST(TestParquetStatistics, NullMax) {
   EXPECT_EQ(stat_expression->ToString(), "(x >= 1)");
 }
 
+TEST(TestParquetStatistics, NoNullCount) {
+  auto field = ::arrow::field("x", int32());
+  auto parquet_node_ptr = ::parquet::schema::Int32("x", ::parquet::Repetition::REQUIRED);
+  ::parquet::ColumnDescriptor descr(parquet_node_ptr, /*max_definition_level=*/1,
+                                    /*max_repetition_level=*/0);
+
+  auto int32_to_parquet_stats = [](int32_t v) {
+    std::string value;
+    value.resize(sizeof(int32_t));
+    memcpy(value.data(), &v, sizeof(int32_t));
+    return value;
+  };
+  {
+    // Base case: when null_count is not set, the expression might contain null
+    ::parquet::EncodedStatistics encoded_stats;
+    encoded_stats.set_min(int32_to_parquet_stats(1));
+    encoded_stats.set_max(int32_to_parquet_stats(100));
+    encoded_stats.has_null_count = false;
+    encoded_stats.all_null_value = false;
+    encoded_stats.null_count = 0;
+    auto stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/10);
+
+    auto stat_expression =
+        ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
+    ASSERT_TRUE(stat_expression.has_value());
+    EXPECT_EQ(stat_expression->ToString(),
+              "(((x >= 1) and (x <= 100)) or is_null(x, {nan_is_null=false}))");
+  }
+  {
+    // Special case: when num_value is 0, it would return
+    // "is_null".
+    ::parquet::EncodedStatistics encoded_stats;
+    encoded_stats.has_null_count = true;
+    encoded_stats.null_count = 1;
+    encoded_stats.all_null_value = true;
+    auto stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/0);
+    auto stat_expression =
+        ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
+    ASSERT_TRUE(stat_expression.has_value());
+    EXPECT_EQ(stat_expression->ToString(), "is_null(x, {nan_is_null=false})");
+
+    encoded_stats.has_null_count = false;
+    encoded_stats.all_null_value = false;
+    stats = ::parquet::Statistics::Make(&descr, &encoded_stats, /*num_values=*/0);
+    stat_expression = ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *stats);
+    ASSERT_TRUE(stat_expression.has_value());
+    EXPECT_EQ(stat_expression->ToString(), "is_null(x, {nan_is_null=false})");
+  }
+}
+
 class DelayedBufferReader : public ::arrow::io::BufferReader {
  public:
   explicit DelayedBufferReader(const std::shared_ptr<::arrow::Buffer>& buffer)
diff --git a/cpp/src/arrow/dataset/visibility.h b/cpp/src/arrow/dataset/visibility.h
index b43a253050fd8..752907238ca07 100644
--- a/cpp/src/arrow/dataset/visibility.h
+++ b/cpp/src/arrow/dataset/visibility.h
@@ -20,31 +20,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_DS_STATIC
-#define ARROW_DS_EXPORT
-#elif defined(ARROW_DS_EXPORTING)
-#define ARROW_DS_EXPORT __declspec(dllexport)
-#else
-#define ARROW_DS_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_DS_STATIC
+#    define ARROW_DS_EXPORT
+#  elif defined(ARROW_DS_EXPORTING)
+#    define ARROW_DS_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_DS_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_DS_NO_EXPORT
+#  define ARROW_DS_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_DS_EXPORT
-#define ARROW_DS_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_DS_NO_EXPORT
-#define ARROW_DS_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_DS_EXPORT
+#    define ARROW_DS_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_DS_NO_EXPORT
+#    define ARROW_DS_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/datum.cc b/cpp/src/arrow/datum.cc
index 2ac230232e1b7..b19d186447547 100644
--- a/cpp/src/arrow/datum.cc
+++ b/cpp/src/arrow/datum.cc
@@ -25,6 +25,7 @@
 #include "arrow/array/array_base.h"
 #include "arrow/array/util.h"
 #include "arrow/chunked_array.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/record_batch.h"
 #include "arrow/scalar.h"
 #include "arrow/table.h"
@@ -156,6 +157,45 @@ ArrayVector Datum::chunks() const {
   return this->chunked_array()->chunks();
 }
 
+DeviceAllocationTypeSet Datum::device_types() const {
+  switch (kind()) {
+    case NONE:
+      break;
+    case SCALAR:
+      // Scalars are asssumed as always residing in CPU memory for now.
+      return DeviceAllocationTypeSet::CpuOnly();
+    case ARRAY:
+      return DeviceAllocationTypeSet{array()->device_type()};
+    case CHUNKED_ARRAY:
+      return chunked_array()->device_types();
+    case RECORD_BATCH: {
+      auto& columns = record_batch()->columns();
+      if (columns.empty()) {
+        // An empty RecordBatch is considered to be CPU-only.
+        return DeviceAllocationTypeSet::CpuOnly();
+      }
+      DeviceAllocationTypeSet set;
+      for (const auto& column : columns) {
+        set.add(column->device_type());
+      }
+      return set;
+    }
+    case TABLE: {
+      auto& columns = table()->columns();
+      if (columns.empty()) {
+        // An empty Table is considered to be CPU-only.
+        return DeviceAllocationTypeSet::CpuOnly();
+      }
+      DeviceAllocationTypeSet set;
+      for (const auto& column : columns) {
+        set.Add(column->device_types());
+      }
+      return set;
+    }
+  }
+  return {};
+}
+
 bool Datum::Equals(const Datum& other) const {
   if (this->kind() != other.kind()) return false;
 
diff --git a/cpp/src/arrow/datum.h b/cpp/src/arrow/datum.h
index 31b2d2274c900..4a88e7a81125c 100644
--- a/cpp/src/arrow/datum.h
+++ b/cpp/src/arrow/datum.h
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "arrow/array/data.h"
+#include "arrow/device_allocation_type_set.h"
 #include "arrow/scalar.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
@@ -295,6 +296,8 @@ struct ARROW_EXPORT Datum {
   /// \return empty if not arraylike
   ArrayVector chunks() const;
 
+  DeviceAllocationTypeSet device_types() const;
+
   /// \brief True if the two data are equal
   bool Equals(const Datum& other) const;
 
diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h
index f5cca0d27d7b2..1dbe5b4b13e89 100644
--- a/cpp/src/arrow/device.h
+++ b/cpp/src/arrow/device.h
@@ -32,24 +32,6 @@
 
 namespace arrow {
 
-/// \brief EXPERIMENTAL: Device type enum which matches up with C Data Device types
-enum class DeviceAllocationType : char {
-  kCPU = 1,
-  kCUDA = 2,
-  kCUDA_HOST = 3,
-  kOPENCL = 4,
-  kVULKAN = 7,
-  kMETAL = 8,
-  kVPI = 9,
-  kROCM = 10,
-  kROCM_HOST = 11,
-  kEXT_DEV = 12,
-  kCUDA_MANAGED = 13,
-  kONEAPI = 14,
-  kWEBGPU = 15,
-  kHEXAGON = 16,
-};
-
 class MemoryManager;
 
 /// \brief EXPERIMENTAL: Abstract interface for hardware devices
diff --git a/cpp/src/arrow/device_allocation_type_set.cc b/cpp/src/arrow/device_allocation_type_set.cc
new file mode 100644
index 0000000000000..83e9e57f2ee47
--- /dev/null
+++ b/cpp/src/arrow/device_allocation_type_set.cc
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+
+#include "arrow/device_allocation_type_set.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+
+const char* DeviceAllocationTypeToCStr(DeviceAllocationType type) {
+  switch (type) {
+    case DeviceAllocationType::kCPU:
+      return "CPU";
+    case DeviceAllocationType::kCUDA:
+      return "CUDA";
+    case DeviceAllocationType::kCUDA_HOST:
+      return "CUDA_HOST";
+    case DeviceAllocationType::kOPENCL:
+      return "OPENCL";
+    case DeviceAllocationType::kVULKAN:
+      return "VULKAN";
+    case DeviceAllocationType::kMETAL:
+      return "METAL";
+    case DeviceAllocationType::kVPI:
+      return "VPI";
+    case DeviceAllocationType::kROCM:
+      return "ROCM";
+    case DeviceAllocationType::kROCM_HOST:
+      return "ROCM_HOST";
+    case DeviceAllocationType::kEXT_DEV:
+      return "EXT_DEV";
+    case DeviceAllocationType::kCUDA_MANAGED:
+      return "CUDA_MANAGED";
+    case DeviceAllocationType::kONEAPI:
+      return "ONEAPI";
+    case DeviceAllocationType::kWEBGPU:
+      return "WEBGPU";
+    case DeviceAllocationType::kHEXAGON:
+      return "HEXAGON";
+  }
+  return "<UNKNOWN>";
+}
+
+std::string DeviceAllocationTypeSet::ToString() const {
+  std::string result = "{";
+  for (int i = 1; i <= kDeviceAllocationTypeMax; i++) {
+    if (device_type_bitset_.test(i)) {
+      // Skip all the unused values in the enum.
+      switch (i) {
+        case 0:
+        case 5:
+        case 6:
+          continue;
+      }
+      if (result.size() > 1) {
+        result += ", ";
+      }
+      result += DeviceAllocationTypeToCStr(static_cast<DeviceAllocationType>(i));
+    }
+  }
+  result += "}";
+  return result;
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/device_allocation_type_set.h b/cpp/src/arrow/device_allocation_type_set.h
new file mode 100644
index 0000000000000..974367307e6d4
--- /dev/null
+++ b/cpp/src/arrow/device_allocation_type_set.h
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <bitset>
+#include <string>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+ARROW_EXPORT
+const char* DeviceAllocationTypeToCStr(DeviceAllocationType type);
+
+class ARROW_EXPORT DeviceAllocationTypeSet {
+ private:
+  std::bitset<kDeviceAllocationTypeMax + 1> device_type_bitset_;
+
+ public:
+  /// \brief Construct an empty set of device types.
+  DeviceAllocationTypeSet() = default;
+
+  /// \brief Construct a set of device types with a single device type.
+  DeviceAllocationTypeSet(  // NOLINT implicit construction
+      DeviceAllocationType accepted_device_type) {
+    add(accepted_device_type);
+  }
+
+  /// \brief Construct a set of device types containing only "kCPU".
+  static DeviceAllocationTypeSet CpuOnly() {
+    return DeviceAllocationTypeSet{DeviceAllocationType::kCPU};
+  }
+
+  /// \brief Construct a set of device types containing all device types.
+  static DeviceAllocationTypeSet All() {
+    DeviceAllocationTypeSet all;
+    all.device_type_bitset_.set();
+    // Don't set the invalid enum values.
+    all.device_type_bitset_.reset(0);
+    all.device_type_bitset_.reset(5);
+    all.device_type_bitset_.reset(6);
+    return all;
+  }
+
+  /// \brief Add a device type to the set of device types.
+  void add(DeviceAllocationType device_type) {
+    device_type_bitset_.set(static_cast<int>(device_type));
+  }
+
+  /// \brief Remove a device type from the set of device types.
+  void remove(DeviceAllocationType device_type) {
+    device_type_bitset_.reset(static_cast<int>(device_type));
+  }
+
+  /// \brief Return true iff the set only contains the CPU device type.
+  bool is_cpu_only() const {
+    return device_type_bitset_ == CpuOnly().device_type_bitset_;
+  }
+
+  /// \brief Return true if the set of accepted device types includes the
+  /// device type.
+  bool contains(DeviceAllocationType device_type) const {
+    return device_type_bitset_.test(static_cast<int>(device_type));
+  }
+
+  /// \brief Add all device types from another set to this set.
+  void Add(DeviceAllocationTypeSet other) {
+    device_type_bitset_ |= other.device_type_bitset_;
+  }
+
+  /// \brief Return true if the set of accepted device types includes all the
+  /// device types in the other set.
+  bool Contains(DeviceAllocationTypeSet other) const {
+    // other \subseteq this <==> (other \intersect this == other)
+    return (other.device_type_bitset_ & device_type_bitset_) == other.device_type_bitset_;
+  }
+
+  std::string ToString() const;
+};
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/engine/substrait/visibility.h b/cpp/src/arrow/engine/substrait/visibility.h
index d81d202ee6567..9ed1c67352d60 100644
--- a/cpp/src/arrow/engine/substrait/visibility.h
+++ b/cpp/src/arrow/engine/substrait/visibility.h
@@ -22,31 +22,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_ENGINE_STATIC
-#define ARROW_ENGINE_EXPORT
-#elif defined(ARROW_ENGINE_EXPORTING)
-#define ARROW_ENGINE_EXPORT __declspec(dllexport)
-#else
-#define ARROW_ENGINE_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_ENGINE_STATIC
+#    define ARROW_ENGINE_EXPORT
+#  elif defined(ARROW_ENGINE_EXPORTING)
+#    define ARROW_ENGINE_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_ENGINE_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_ENGINE_NO_EXPORT
+#  define ARROW_ENGINE_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_ENGINE_EXPORT
-#define ARROW_ENGINE_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_ENGINE_NO_EXPORT
-#define ARROW_ENGINE_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_ENGINE_EXPORT
+#    define ARROW_ENGINE_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_ENGINE_NO_EXPORT
+#    define ARROW_ENGINE_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/extension/CMakeLists.txt b/cpp/src/arrow/extension/CMakeLists.txt
index c15c42874d4de..4ab6a35b52e4f 100644
--- a/cpp/src/arrow/extension/CMakeLists.txt
+++ b/cpp/src/arrow/extension/CMakeLists.txt
@@ -15,10 +15,16 @@
 # specific language governing permissions and limitations
 # under the License.
 
+set(CANONICAL_EXTENSION_TESTS bool8_test.cc json_test.cc uuid_test.cc)
+
+if(ARROW_JSON)
+  list(APPEND CANONICAL_EXTENSION_TESTS fixed_shape_tensor_test.cc opaque_test.cc)
+endif()
+
 add_arrow_test(test
                SOURCES
-               fixed_shape_tensor_test.cc
+               ${CANONICAL_EXTENSION_TESTS}
                PREFIX
-               "arrow-fixed-shape-tensor")
+               "arrow-canonical-extensions")
 
 arrow_install_all_headers("arrow/extension")
diff --git a/cpp/src/arrow/extension/bool8.cc b/cpp/src/arrow/extension/bool8.cc
new file mode 100644
index 0000000000000..c081f0c2b2866
--- /dev/null
+++ b/cpp/src/arrow/extension/bool8.cc
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+
+#include "arrow/extension/bool8.h"
+#include "arrow/util/logging.h"
+
+namespace arrow::extension {
+
+bool Bool8Type::ExtensionEquals(const ExtensionType& other) const {
+  return extension_name() == other.extension_name();
+}
+
+std::string Bool8Type::ToString(bool show_metadata) const {
+  std::stringstream ss;
+  ss << "extension<" << this->extension_name() << ">";
+  return ss.str();
+}
+
+std::string Bool8Type::Serialize() const { return ""; }
+
+Result<std::shared_ptr<DataType>> Bool8Type::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized_data) const {
+  if (storage_type->id() != Type::INT8) {
+    return Status::Invalid("Expected INT8 storage type, got ", storage_type->ToString());
+  }
+  if (serialized_data != "") {
+    return Status::Invalid("Serialize data must be empty, got ", serialized_data);
+  }
+  return bool8();
+}
+
+std::shared_ptr<Array> Bool8Type::MakeArray(std::shared_ptr<ArrayData> data) const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  DCHECK_EQ("arrow.bool8",
+            internal::checked_cast<const ExtensionType&>(*data->type).extension_name());
+  return std::make_shared<Bool8Array>(data);
+}
+
+Result<std::shared_ptr<DataType>> Bool8Type::Make() {
+  return std::make_shared<Bool8Type>();
+}
+
+std::shared_ptr<DataType> bool8() { return std::make_shared<Bool8Type>(); }
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/bool8.h b/cpp/src/arrow/extension/bool8.h
new file mode 100644
index 0000000000000..fbb507639e272
--- /dev/null
+++ b/cpp/src/arrow/extension/bool8.h
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/extension_type.h"
+
+namespace arrow::extension {
+
+/// \brief Bool8 is an alternate representation for boolean
+/// arrays using 8 bits instead of 1 bit per value. The underlying
+/// storage type is int8.
+class ARROW_EXPORT Bool8Array : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+};
+
+/// \brief Bool8 is an alternate representation for boolean
+/// arrays using 8 bits instead of 1 bit per value. The underlying
+/// storage type is int8.
+class ARROW_EXPORT Bool8Type : public ExtensionType {
+ public:
+  /// \brief Construct a Bool8Type.
+  Bool8Type() : ExtensionType(int8()) {}
+
+  std::string extension_name() const override { return "arrow.bool8"; }
+  std::string ToString(bool show_metadata = false) const override;
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  std::string Serialize() const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized_data) const override;
+
+  /// Create a Bool8Array from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
+
+  static Result<std::shared_ptr<DataType>> Make();
+};
+
+/// \brief Return a Bool8Type instance.
+ARROW_EXPORT std::shared_ptr<DataType> bool8();
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/bool8_test.cc b/cpp/src/arrow/extension/bool8_test.cc
new file mode 100644
index 0000000000000..ee77332bc3257
--- /dev/null
+++ b/cpp/src/arrow/extension/bool8_test.cc
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/bool8.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/testing/gtest_util.h"
+
+namespace arrow {
+
+TEST(Bool8Type, Basics) {
+  auto type = internal::checked_pointer_cast<extension::Bool8Type>(extension::bool8());
+  auto type2 = internal::checked_pointer_cast<extension::Bool8Type>(extension::bool8());
+  ASSERT_EQ("arrow.bool8", type->extension_name());
+  ASSERT_EQ(*type, *type);
+  ASSERT_NE(*arrow::null(), *type);
+  ASSERT_EQ(*type, *type2);
+  ASSERT_EQ(*arrow::int8(), *type->storage_type());
+  ASSERT_EQ("", type->Serialize());
+  ASSERT_EQ("extension<arrow.bool8>", type->ToString(false));
+}
+
+TEST(Bool8Type, CreateFromArray) {
+  auto type = internal::checked_pointer_cast<extension::Bool8Type>(extension::bool8());
+  auto storage = ArrayFromJSON(int8(), "[-1,0,1,2,null]");
+  auto array = ExtensionType::WrapArray(type, storage);
+  ASSERT_EQ(5, array->length());
+  ASSERT_EQ(1, array->null_count());
+}
+
+TEST(Bool8Type, Deserialize) {
+  auto type = internal::checked_pointer_cast<extension::Bool8Type>(extension::bool8());
+  ASSERT_OK_AND_ASSIGN(auto deserialized, type->Deserialize(type->storage_type(), ""));
+  ASSERT_EQ(*type, *deserialized);
+  ASSERT_NOT_OK(type->Deserialize(type->storage_type(), "must be empty"));
+  ASSERT_EQ(*type, *deserialized);
+  ASSERT_NOT_OK(type->Deserialize(uint8(), ""));
+  ASSERT_EQ(*type, *deserialized);
+}
+
+TEST(Bool8Type, MetadataRoundTrip) {
+  auto type = internal::checked_pointer_cast<extension::Bool8Type>(extension::bool8());
+  std::string serialized = type->Serialize();
+  ASSERT_OK_AND_ASSIGN(auto deserialized,
+                       type->Deserialize(type->storage_type(), serialized));
+  ASSERT_EQ(*type, *deserialized);
+}
+
+TEST(Bool8Type, BatchRoundTrip) {
+  auto type = internal::checked_pointer_cast<extension::Bool8Type>(extension::bool8());
+
+  auto storage = ArrayFromJSON(int8(), "[-1,0,1,2,null]");
+  auto array = ExtensionType::WrapArray(type, storage);
+  auto batch =
+      RecordBatch::Make(schema({field("field", type)}), array->length(), {array});
+
+  std::shared_ptr<RecordBatch> written;
+  {
+    ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
+    ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
+                                          out_stream.get()));
+
+    ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
+
+    io::BufferReader reader(complete_ipc_stream);
+    std::shared_ptr<RecordBatchReader> batch_reader;
+    ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
+    ASSERT_OK(batch_reader->ReadNext(&written));
+  }
+
+  ASSERT_EQ(*batch->schema(), *written->schema());
+  ASSERT_BATCHES_EQUAL(*batch, *written);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h b/cpp/src/arrow/extension/fixed_shape_tensor.h
index 20ec20a64c2d4..80a602021c60b 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor.h
+++ b/cpp/src/arrow/extension/fixed_shape_tensor.h
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#pragma once
+
 #include "arrow/extension_type.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
index 3fd39a11ff50d..51aea4b25fdda 100644
--- a/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
+++ b/cpp/src/arrow/extension/fixed_shape_tensor_test.cc
@@ -23,7 +23,7 @@
 #include "arrow/array/array_primitive.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
-#include "arrow/ipc/writer.h"
+#include "arrow/ipc/test_common.h"
 #include "arrow/record_batch.h"
 #include "arrow/tensor.h"
 #include "arrow/testing/gtest_util.h"
@@ -33,6 +33,7 @@
 namespace arrow {
 
 using FixedShapeTensorType = extension::FixedShapeTensorType;
+using arrow::ipc::test::RoundtripBatch;
 using extension::fixed_shape_tensor;
 using extension::FixedShapeTensorArray;
 
@@ -71,20 +72,6 @@ class TestExtensionType : public ::testing::Test {
   std::string serialized_;
 };
 
-auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
-                         std::shared_ptr<RecordBatch>* out) {
-  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
-  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
-                                        out_stream.get()));
-
-  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
-
-  io::BufferReader reader(complete_ipc_stream);
-  std::shared_ptr<RecordBatchReader> batch_reader;
-  ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
-  ASSERT_OK(batch_reader->ReadNext(out));
-};
-
 TEST_F(TestExtensionType, CheckDummyRegistration) {
   // We need a registered dummy type at runtime to allow for IPC deserialization
   auto registered_type = GetExtensionType("arrow.fixed_shape_tensor");
@@ -218,7 +205,7 @@ TEST_F(TestExtensionType, RoundtripBatch) {
   std::shared_ptr<RecordBatch> read_batch;
   auto ext_field = field(/*name=*/"f0", /*type=*/ext_type_);
   auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), {ext_arr});
-  RoundtripBatch(batch, &read_batch);
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
   CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
 
   // Pass extension metadata and storage array, expect getting back extension array
@@ -229,7 +216,7 @@ TEST_F(TestExtensionType, RoundtripBatch) {
   ext_field = field(/*name=*/"f0", /*type=*/element_type_, /*nullable=*/true,
                     /*metadata=*/ext_metadata);
   auto batch2 = RecordBatch::Make(schema({ext_field}), fsla_arr->length(), {fsla_arr});
-  RoundtripBatch(batch2, &read_batch2);
+  ASSERT_OK(RoundtripBatch(batch2, &read_batch2));
   CompareBatch(*batch, *read_batch2, /*compare_metadata=*/true);
 }
 
@@ -482,7 +469,7 @@ TEST_F(TestExtensionType, RoundtripBatchFromTensor) {
   auto ext_field = field("f0", ext_type_, true, ext_metadata);
   auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), {ext_arr});
   std::shared_ptr<RecordBatch> read_batch;
-  RoundtripBatch(batch, &read_batch);
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
   CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
 }
 
diff --git a/cpp/src/arrow/extension/json.cc b/cpp/src/arrow/extension/json.cc
new file mode 100644
index 0000000000000..d793233c2b573
--- /dev/null
+++ b/cpp/src/arrow/extension/json.cc
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/json.h"
+
+#include <string>
+
+#include "arrow/extension_type.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/logging.h"
+
+namespace arrow::extension {
+
+bool JsonExtensionType::ExtensionEquals(const ExtensionType& other) const {
+  return other.extension_name() == this->extension_name();
+}
+
+Result<std::shared_ptr<DataType>> JsonExtensionType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
+  if (storage_type->id() != Type::STRING && storage_type->id() != Type::STRING_VIEW &&
+      storage_type->id() != Type::LARGE_STRING) {
+    return Status::Invalid("Invalid storage type for JsonExtensionType: ",
+                           storage_type->ToString());
+  }
+  return std::make_shared<JsonExtensionType>(storage_type);
+}
+
+std::string JsonExtensionType::Serialize() const { return ""; }
+
+std::shared_ptr<Array> JsonExtensionType::MakeArray(
+    std::shared_ptr<ArrayData> data) const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  DCHECK_EQ("arrow.json",
+            internal::checked_cast<const ExtensionType&>(*data->type).extension_name());
+  return std::make_shared<ExtensionArray>(data);
+}
+
+std::shared_ptr<DataType> json(const std::shared_ptr<DataType> storage_type) {
+  ARROW_CHECK(storage_type->id() != Type::STRING ||
+              storage_type->id() != Type::STRING_VIEW ||
+              storage_type->id() != Type::LARGE_STRING);
+  return std::make_shared<JsonExtensionType>(storage_type);
+}
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/json.h b/cpp/src/arrow/extension/json.h
new file mode 100644
index 0000000000000..4793ab2bc9b36
--- /dev/null
+++ b/cpp/src/arrow/extension/json.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdexcept>
+#include <string>
+
+#include "arrow/extension_type.h"
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow::extension {
+
+/// \brief Concrete type class for variable-size JSON data, utf8-encoded.
+class ARROW_EXPORT JsonExtensionType : public ExtensionType {
+ public:
+  explicit JsonExtensionType(const std::shared_ptr<DataType>& storage_type)
+      : ExtensionType(storage_type), storage_type_(storage_type) {}
+
+  std::string extension_name() const override { return "arrow.json"; }
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized_data) const override;
+
+  std::string Serialize() const override;
+
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
+
+ private:
+  std::shared_ptr<DataType> storage_type_;
+};
+
+/// \brief Return a JsonExtensionType instance.
+ARROW_EXPORT std::shared_ptr<DataType> json(
+    std::shared_ptr<DataType> storage_type = utf8());
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/json_test.cc b/cpp/src/arrow/extension/json_test.cc
new file mode 100644
index 0000000000000..143e4f9ceeac7
--- /dev/null
+++ b/cpp/src/arrow/extension/json_test.cc
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/json.h"
+
+#include "arrow/array/validate.h"
+#include "arrow/ipc/test_common.h"
+#include "arrow/record_batch.h"
+#include "arrow/testing/gtest_util.h"
+#include "parquet/exception.h"
+
+namespace arrow {
+
+using arrow::ipc::test::RoundtripBatch;
+using extension::json;
+
+class TestJsonExtensionType : public ::testing::Test {};
+
+std::shared_ptr<Array> ExampleJson(const std::shared_ptr<DataType>& storage_type) {
+  std::shared_ptr<Array> arr = ArrayFromJSON(storage_type, R"([
+    "null",
+    "1234",
+    "3.14159",
+    "true",
+    "false",
+    "\"a json string\"",
+    "[\"a\", \"json\", \"array\"]",
+    "{\"obj\": \"a simple json object\"}"
+   ])");
+  return ExtensionType::WrapArray(arrow::extension::json(storage_type), arr);
+}
+
+TEST_F(TestJsonExtensionType, JsonRoundtrip) {
+  for (const auto& storage_type : {utf8(), large_utf8(), utf8_view()}) {
+    std::shared_ptr<Array> ext_arr = ExampleJson(storage_type);
+    auto batch =
+        RecordBatch::Make(schema({field("f0", json(storage_type))}), 8, {ext_arr});
+
+    std::shared_ptr<RecordBatch> read_batch;
+    ASSERT_OK(RoundtripBatch(batch, &read_batch));
+    ASSERT_OK(read_batch->ValidateFull());
+    CompareBatch(*batch, *read_batch, /*compare_metadata*/ true);
+
+    auto read_ext_arr = read_batch->column(0);
+    ASSERT_OK(internal::ValidateUTF8(*read_ext_arr));
+    ASSERT_OK(read_ext_arr->ValidateFull());
+  }
+}
+
+TEST_F(TestJsonExtensionType, InvalidUTF8) {
+  for (const auto& storage_type : {utf8(), large_utf8(), utf8_view()}) {
+    auto json_type = json(storage_type);
+    auto invalid_input = ArrayFromJSON(storage_type, "[\"Ⱥa\xFFⱭ\", \"Ɽ\xe1\xbdⱤaA\"]");
+    auto ext_arr = ExtensionType::WrapArray(json_type, invalid_input);
+
+    ASSERT_RAISES_WITH_MESSAGE(Invalid,
+                               "Invalid: Invalid UTF8 sequence at string index 0",
+                               ext_arr->ValidateFull());
+    ASSERT_RAISES_WITH_MESSAGE(Invalid,
+                               "Invalid: Invalid UTF8 sequence at string index 0",
+                               arrow::internal::ValidateUTF8(*ext_arr));
+
+    auto batch = RecordBatch::Make(schema({field("f0", json_type)}), 2, {ext_arr});
+    std::shared_ptr<RecordBatch> read_batch;
+    ASSERT_OK(RoundtripBatch(batch, &read_batch));
+  }
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/extension/opaque.cc b/cpp/src/arrow/extension/opaque.cc
new file mode 100644
index 0000000000000..c430bb5d2eaab
--- /dev/null
+++ b/cpp/src/arrow/extension/opaque.cc
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/opaque.h"
+
+#include <sstream>
+
+#include "arrow/json/rapidjson_defs.h"  // IWYU pragma: keep
+#include "arrow/util/logging.h"
+
+#include <rapidjson/document.h>
+#include <rapidjson/error/en.h>
+#include <rapidjson/writer.h>
+
+namespace arrow::extension {
+
+std::string OpaqueType::ToString(bool show_metadata) const {
+  std::stringstream ss;
+  ss << "extension<" << this->extension_name()
+     << "[storage_type=" << storage_type_->ToString(show_metadata)
+     << ", type_name=" << type_name_ << ", vendor_name=" << vendor_name_ << "]>";
+  return ss.str();
+}
+
+bool OpaqueType::ExtensionEquals(const ExtensionType& other) const {
+  if (extension_name() != other.extension_name()) {
+    return false;
+  }
+  const auto& opaque = internal::checked_cast<const OpaqueType&>(other);
+  return storage_type()->Equals(*opaque.storage_type()) &&
+         type_name() == opaque.type_name() && vendor_name() == opaque.vendor_name();
+}
+
+std::string OpaqueType::Serialize() const {
+  rapidjson::Document document;
+  document.SetObject();
+  rapidjson::Document::AllocatorType& allocator = document.GetAllocator();
+
+  rapidjson::Value type_name(rapidjson::StringRef(type_name_));
+  document.AddMember(rapidjson::Value("type_name", allocator), type_name, allocator);
+  rapidjson::Value vendor_name(rapidjson::StringRef(vendor_name_));
+  document.AddMember(rapidjson::Value("vendor_name", allocator), vendor_name, allocator);
+
+  rapidjson::StringBuffer buffer;
+  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+  document.Accept(writer);
+  return buffer.GetString();
+}
+
+Result<std::shared_ptr<DataType>> OpaqueType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized_data) const {
+  rapidjson::Document document;
+  const auto& parsed = document.Parse(serialized_data.data(), serialized_data.length());
+  if (parsed.HasParseError()) {
+    return Status::Invalid("Invalid serialized JSON data for OpaqueType: ",
+                           rapidjson::GetParseError_En(parsed.GetParseError()), ": ",
+                           serialized_data);
+  } else if (!document.IsObject()) {
+    return Status::Invalid("Invalid serialized JSON data for OpaqueType: not an object");
+  }
+  if (!document.HasMember("type_name")) {
+    return Status::Invalid(
+        "Invalid serialized JSON data for OpaqueType: missing type_name");
+  } else if (!document.HasMember("vendor_name")) {
+    return Status::Invalid(
+        "Invalid serialized JSON data for OpaqueType: missing vendor_name");
+  }
+
+  const auto& type_name = document["type_name"];
+  const auto& vendor_name = document["vendor_name"];
+  if (!type_name.IsString()) {
+    return Status::Invalid(
+        "Invalid serialized JSON data for OpaqueType: type_name is not a string");
+  } else if (!vendor_name.IsString()) {
+    return Status::Invalid(
+        "Invalid serialized JSON data for OpaqueType: vendor_name is not a string");
+  }
+
+  return opaque(std::move(storage_type), type_name.GetString(), vendor_name.GetString());
+}
+
+std::shared_ptr<Array> OpaqueType::MakeArray(std::shared_ptr<ArrayData> data) const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  DCHECK_EQ("arrow.opaque",
+            internal::checked_cast<const ExtensionType&>(*data->type).extension_name());
+  return std::make_shared<OpaqueArray>(data);
+}
+
+std::shared_ptr<DataType> opaque(std::shared_ptr<DataType> storage_type,
+                                 std::string type_name, std::string vendor_name) {
+  return std::make_shared<OpaqueType>(std::move(storage_type), std::move(type_name),
+                                      std::move(vendor_name));
+}
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/opaque.h b/cpp/src/arrow/extension/opaque.h
new file mode 100644
index 0000000000000..5d3411798f88d
--- /dev/null
+++ b/cpp/src/arrow/extension/opaque.h
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/extension_type.h"
+#include "arrow/type.h"
+
+namespace arrow::extension {
+
+/// \brief Opaque is a placeholder for a type from an external (usually
+///   non-Arrow) system that could not be interpreted.
+class ARROW_EXPORT OpaqueType : public ExtensionType {
+ public:
+  /// \brief Construct an OpaqueType.
+  ///
+  /// \param[in] storage_type The underlying storage type.  Should be
+  ///   arrow::null if there is no data.
+  /// \param[in] type_name The name of the type in the external system.
+  /// \param[in] vendor_name The name of the external system.
+  explicit OpaqueType(std::shared_ptr<DataType> storage_type, std::string type_name,
+                      std::string vendor_name)
+      : ExtensionType(std::move(storage_type)),
+        type_name_(std::move(type_name)),
+        vendor_name_(std::move(vendor_name)) {}
+
+  std::string extension_name() const override { return "arrow.opaque"; }
+  std::string ToString(bool show_metadata) const override;
+  bool ExtensionEquals(const ExtensionType& other) const override;
+  std::string Serialize() const override;
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized_data) const override;
+  /// Create an OpaqueArray from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
+
+  std::string_view type_name() const { return type_name_; }
+  std::string_view vendor_name() const { return vendor_name_; }
+
+ private:
+  std::string type_name_;
+  std::string vendor_name_;
+};
+
+/// \brief Opaque is a wrapper for (usually binary) data from an external
+///   (often non-Arrow) system that could not be interpreted.
+class ARROW_EXPORT OpaqueArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+};
+
+/// \brief Return an OpaqueType instance.
+ARROW_EXPORT std::shared_ptr<DataType> opaque(std::shared_ptr<DataType> storage_type,
+                                              std::string type_name,
+                                              std::string vendor_name);
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/opaque_test.cc b/cpp/src/arrow/extension/opaque_test.cc
new file mode 100644
index 0000000000000..16fcba3fa6bb0
--- /dev/null
+++ b/cpp/src/arrow/extension/opaque_test.cc
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include "arrow/extension/fixed_shape_tensor.h"
+#include "arrow/extension/opaque.h"
+#include "arrow/extension_type.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/record_batch.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/checked_cast.h"
+
+namespace arrow {
+
+TEST(OpaqueType, Basics) {
+  auto type = internal::checked_pointer_cast<extension::OpaqueType>(
+      extension::opaque(null(), "type", "vendor"));
+  auto type2 = internal::checked_pointer_cast<extension::OpaqueType>(
+      extension::opaque(null(), "type2", "vendor"));
+  ASSERT_EQ("arrow.opaque", type->extension_name());
+  ASSERT_EQ(*type, *type);
+  ASSERT_NE(*arrow::null(), *type);
+  ASSERT_NE(*type, *type2);
+  ASSERT_EQ(*arrow::null(), *type->storage_type());
+  ASSERT_THAT(type->Serialize(), ::testing::Not(::testing::IsEmpty()));
+  ASSERT_EQ(R"({"type_name":"type","vendor_name":"vendor"})", type->Serialize());
+  ASSERT_EQ("type", type->type_name());
+  ASSERT_EQ("vendor", type->vendor_name());
+  ASSERT_EQ(
+      "extension<arrow.opaque[storage_type=null, type_name=type, vendor_name=vendor]>",
+      type->ToString(false));
+}
+
+TEST(OpaqueType, Equals) {
+  auto type = internal::checked_pointer_cast<extension::OpaqueType>(
+      extension::opaque(null(), "type", "vendor"));
+  auto type2 = internal::checked_pointer_cast<extension::OpaqueType>(
+      extension::opaque(null(), "type2", "vendor"));
+  auto type3 = internal::checked_pointer_cast<extension::OpaqueType>(
+      extension::opaque(null(), "type", "vendor2"));
+  auto type4 = internal::checked_pointer_cast<extension::OpaqueType>(
+      extension::opaque(int64(), "type", "vendor"));
+  auto type5 = internal::checked_pointer_cast<extension::OpaqueType>(
+      extension::opaque(null(), "type", "vendor"));
+  auto type6 = internal::checked_pointer_cast<extension::FixedShapeTensorType>(
+      extension::fixed_shape_tensor(float64(), {1}));
+
+  ASSERT_EQ(*type, *type);
+  ASSERT_EQ(*type2, *type2);
+  ASSERT_EQ(*type3, *type3);
+  ASSERT_EQ(*type4, *type4);
+  ASSERT_EQ(*type5, *type5);
+
+  ASSERT_EQ(*type, *type5);
+
+  ASSERT_NE(*type, *type2);
+  ASSERT_NE(*type, *type3);
+  ASSERT_NE(*type, *type4);
+  ASSERT_NE(*type, *type6);
+
+  ASSERT_NE(*type2, *type);
+  ASSERT_NE(*type2, *type3);
+  ASSERT_NE(*type2, *type4);
+  ASSERT_NE(*type2, *type6);
+
+  ASSERT_NE(*type3, *type);
+  ASSERT_NE(*type3, *type2);
+  ASSERT_NE(*type3, *type4);
+  ASSERT_NE(*type3, *type6);
+
+  ASSERT_NE(*type4, *type);
+  ASSERT_NE(*type4, *type2);
+  ASSERT_NE(*type4, *type3);
+  ASSERT_NE(*type4, *type6);
+  ASSERT_NE(*type6, *type4);
+}
+
+TEST(OpaqueType, CreateFromArray) {
+  auto type = internal::checked_pointer_cast<extension::OpaqueType>(
+      extension::opaque(binary(), "geometry", "adbc.postgresql"));
+  auto storage = ArrayFromJSON(binary(), R"(["foobar", null])");
+  auto array = ExtensionType::WrapArray(type, storage);
+  ASSERT_EQ(2, array->length());
+  ASSERT_EQ(1, array->null_count());
+}
+
+void CheckDeserialize(const std::string& serialized,
+                      const std::shared_ptr<DataType>& expected) {
+  auto type = internal::checked_pointer_cast<extension::OpaqueType>(expected);
+  ASSERT_OK_AND_ASSIGN(auto deserialized,
+                       type->Deserialize(type->storage_type(), serialized));
+  ASSERT_EQ(*expected, *deserialized);
+}
+
+TEST(OpaqueType, Deserialize) {
+  ASSERT_NO_FATAL_FAILURE(
+      CheckDeserialize(R"({"type_name": "type", "vendor_name": "vendor"})",
+                       extension::opaque(null(), "type", "vendor")));
+  ASSERT_NO_FATAL_FAILURE(
+      CheckDeserialize(R"({"type_name": "long name", "vendor_name": "long name"})",
+                       extension::opaque(null(), "long name", "long name")));
+  ASSERT_NO_FATAL_FAILURE(
+      CheckDeserialize(R"({"type_name": "名前", "vendor_name": "名字"})",
+                       extension::opaque(null(), "名前", "名字")));
+  ASSERT_NO_FATAL_FAILURE(CheckDeserialize(
+      R"({"type_name": "type", "vendor_name": "vendor", "extra_field": 2})",
+      extension::opaque(null(), "type", "vendor")));
+
+  auto type = internal::checked_pointer_cast<extension::OpaqueType>(
+      extension::opaque(null(), "type", "vendor"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("The document is empty"),
+                                  type->Deserialize(null(), R"()"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid,
+                                  testing::HasSubstr("Missing a name for object member"),
+                                  type->Deserialize(null(), R"({)"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("not an object"),
+                                  type->Deserialize(null(), R"([])"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("missing type_name"),
+                                  type->Deserialize(null(), R"({})"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("type_name is not a string"),
+      type->Deserialize(null(), R"({"type_name": 2, "vendor_name": ""})"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("type_name is not a string"),
+      type->Deserialize(null(), R"({"type_name": null, "vendor_name": ""})"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("vendor_name is not a string"),
+      type->Deserialize(null(), R"({"vendor_name": 2, "type_name": ""})"));
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, testing::HasSubstr("vendor_name is not a string"),
+      type->Deserialize(null(), R"({"vendor_name": null, "type_name": ""})"));
+}
+
+TEST(OpaqueType, MetadataRoundTrip) {
+  for (const auto& type : {
+           extension::opaque(null(), "foo", "bar"),
+           extension::opaque(binary(), "geometry", "postgis"),
+           extension::opaque(fixed_size_list(int64(), 4), "foo", "bar"),
+           extension::opaque(utf8(), "foo", "bar"),
+       }) {
+    auto opaque = internal::checked_pointer_cast<extension::OpaqueType>(type);
+    std::string serialized = opaque->Serialize();
+    ASSERT_OK_AND_ASSIGN(auto deserialized,
+                         opaque->Deserialize(opaque->storage_type(), serialized));
+    ASSERT_EQ(*type, *deserialized);
+  }
+}
+
+TEST(OpaqueType, BatchRoundTrip) {
+  auto type = internal::checked_pointer_cast<extension::OpaqueType>(
+      extension::opaque(binary(), "geometry", "adbc.postgresql"));
+
+  auto storage = ArrayFromJSON(binary(), R"(["foobar", null])");
+  auto array = ExtensionType::WrapArray(type, storage);
+  auto batch =
+      RecordBatch::Make(schema({field("field", type)}), array->length(), {array});
+
+  std::shared_ptr<RecordBatch> written;
+  {
+    ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
+    ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
+                                          out_stream.get()));
+
+    ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
+
+    io::BufferReader reader(complete_ipc_stream);
+    std::shared_ptr<RecordBatchReader> batch_reader;
+    ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
+    ASSERT_OK(batch_reader->ReadNext(&written));
+  }
+
+  ASSERT_EQ(*batch->schema(), *written->schema());
+  ASSERT_BATCHES_EQUAL(*batch, *written);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/extension/uuid.cc b/cpp/src/arrow/extension/uuid.cc
new file mode 100644
index 0000000000000..43b917a17f8b2
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid.cc
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+
+#include "arrow/extension_type.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/extension/uuid.h"
+
+namespace arrow::extension {
+
+bool UuidType::ExtensionEquals(const ExtensionType& other) const {
+  return (other.extension_name() == this->extension_name());
+}
+
+std::shared_ptr<Array> UuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  DCHECK_EQ("arrow.uuid",
+            static_cast<const ExtensionType&>(*data->type).extension_name());
+  return std::make_shared<UuidArray>(data);
+}
+
+Result<std::shared_ptr<DataType>> UuidType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
+  if (!serialized.empty()) {
+    return Status::Invalid("Unexpected serialized metadata: '", serialized, "'");
+  }
+  if (!storage_type->Equals(*fixed_size_binary(16))) {
+    return Status::Invalid("Invalid storage type for UuidType: ",
+                           storage_type->ToString());
+  }
+  return std::make_shared<UuidType>();
+}
+
+std::string UuidType::ToString(bool show_metadata) const {
+  std::stringstream ss;
+  ss << "extension<" << this->extension_name() << ">";
+  return ss.str();
+}
+
+std::shared_ptr<DataType> uuid() { return std::make_shared<UuidType>(); }
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/uuid.h b/cpp/src/arrow/extension/uuid.h
new file mode 100644
index 0000000000000..42bb21cf0b2ed
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/extension_type.h"
+
+namespace arrow::extension {
+
+/// \brief UuidArray stores array of UUIDs. Underlying storage type is
+/// FixedSizeBinary(16).
+class ARROW_EXPORT UuidArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+};
+
+/// \brief UuidType is a canonical arrow extension type for UUIDs.
+/// UUIDs are stored as FixedSizeBinary(16) with big-endian notation and this
+/// does not interpret the bytes in any way. Specific UUID version is not
+/// required or guaranteed.
+class ARROW_EXPORT UuidType : public ExtensionType {
+ public:
+  /// \brief Construct a UuidType.
+  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+
+  std::string extension_name() const override { return "arrow.uuid"; }
+  std::string ToString(bool show_metadata = false) const override;
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  /// Create a UuidArray from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized) const override;
+
+  std::string Serialize() const override { return ""; }
+
+  /// \brief Create a UuidType instance
+  static Result<std::shared_ptr<DataType>> Make() { return std::make_shared<UuidType>(); }
+};
+
+/// \brief Return a UuidType instance.
+ARROW_EXPORT std::shared_ptr<DataType> uuid();
+
+}  // namespace arrow::extension
diff --git a/cpp/src/arrow/extension/uuid_test.cc b/cpp/src/arrow/extension/uuid_test.cc
new file mode 100644
index 0000000000000..1c1ffb6eb8e15
--- /dev/null
+++ b/cpp/src/arrow/extension/uuid_test.cc
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/extension/uuid.h"
+
+#include "arrow/testing/matchers.h"
+
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/test_common.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/key_value_metadata.h"
+
+#include "arrow/testing/extension_type.h"
+
+namespace arrow {
+
+using arrow::ipc::test::RoundtripBatch;
+
+TEST(TestUuuidExtensionType, ExtensionTypeTest) {
+  auto type = uuid();
+  ASSERT_EQ(type->id(), Type::EXTENSION);
+
+  const auto& ext_type = static_cast<const ExtensionType&>(*type);
+  std::string serialized = ext_type.Serialize();
+
+  ASSERT_OK_AND_ASSIGN(auto deserialized,
+                       ext_type.Deserialize(fixed_size_binary(16), serialized));
+  ASSERT_TRUE(deserialized->Equals(*type));
+  ASSERT_FALSE(deserialized->Equals(*fixed_size_binary(16)));
+}
+
+TEST(TestUuuidExtensionType, RoundtripBatch) {
+  auto ext_type = extension::uuid();
+  auto exact_ext_type = internal::checked_pointer_cast<extension::UuidType>(ext_type);
+  auto arr = ArrayFromJSON(fixed_size_binary(16), R"(["abcdefghijklmnop", null])");
+  auto ext_arr = ExtensionType::WrapArray(ext_type, arr);
+
+  // Pass extension array, expect getting back extension array
+  std::shared_ptr<RecordBatch> read_batch;
+  auto ext_field = field(/*name=*/"f0", /*type=*/ext_type);
+  auto batch = RecordBatch::Make(schema({ext_field}), ext_arr->length(), {ext_arr});
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
+  CompareBatch(*batch, *read_batch, /*compare_metadata=*/true);
+
+  // Pass extension metadata and storage array, expect getting back extension array
+  std::shared_ptr<RecordBatch> read_batch2;
+  auto ext_metadata =
+      key_value_metadata({{"ARROW:extension:name", exact_ext_type->extension_name()},
+                          {"ARROW:extension:metadata", ""}});
+  ext_field = field(/*name=*/"f0", /*type=*/exact_ext_type->storage_type(),
+                    /*nullable=*/true, /*metadata=*/ext_metadata);
+  auto batch2 = RecordBatch::Make(schema({ext_field}), arr->length(), {arr});
+  ASSERT_OK(RoundtripBatch(batch2, &read_batch2));
+  CompareBatch(*batch, *read_batch2, /*compare_metadata=*/true);
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/extension_type.cc b/cpp/src/arrow/extension_type.cc
index cf8dda7a85df4..7ad39eab23f8d 100644
--- a/cpp/src/arrow/extension_type.cc
+++ b/cpp/src/arrow/extension_type.cc
@@ -27,9 +27,13 @@
 #include "arrow/array/util.h"
 #include "arrow/chunked_array.h"
 #include "arrow/config.h"
+#include "arrow/extension/bool8.h"
 #ifdef ARROW_JSON
-#include "arrow/extension/fixed_shape_tensor.h"
+#  include "arrow/extension/fixed_shape_tensor.h"
+#  include "arrow/extension/opaque.h"
 #endif
+#include "arrow/extension/json.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
@@ -142,15 +146,21 @@ static std::once_flag registry_initialized;
 namespace internal {
 
 static void CreateGlobalRegistry() {
+  // Register canonical extension types
+
   g_registry = std::make_shared<ExtensionTypeRegistryImpl>();
+  std::vector<std::shared_ptr<DataType>> ext_types{extension::bool8(), extension::json(),
+                                                   extension::uuid()};
 
 #ifdef ARROW_JSON
-  // Register canonical extension types
-  auto ext_type =
-      checked_pointer_cast<ExtensionType>(extension::fixed_shape_tensor(int64(), {}));
-
-  ARROW_CHECK_OK(g_registry->RegisterType(ext_type));
+  ext_types.push_back(extension::fixed_shape_tensor(int64(), {}));
+  ext_types.push_back(extension::opaque(null(), "", ""));
 #endif
+
+  for (const auto& ext_type : ext_types) {
+    ARROW_CHECK_OK(
+        g_registry->RegisterType(checked_pointer_cast<ExtensionType>(ext_type)));
+  }
 }
 
 }  // namespace internal
diff --git a/cpp/src/arrow/extension_type_test.cc b/cpp/src/arrow/extension_type_test.cc
index f104c984a64b4..029d833b98cd8 100644
--- a/cpp/src/arrow/extension_type_test.cc
+++ b/cpp/src/arrow/extension_type_test.cc
@@ -30,6 +30,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/ipc/options.h"
 #include "arrow/ipc/reader.h"
+#include "arrow/ipc/test_common.h"
 #include "arrow/ipc/writer.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -41,6 +42,8 @@
 
 namespace arrow {
 
+using arrow::ipc::test::RoundtripBatch;
+
 class Parametric1Array : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;
@@ -178,7 +181,7 @@ class ExtStructType : public ExtensionType {
 
 class TestExtensionType : public ::testing::Test {
  public:
-  void SetUp() { ASSERT_OK(RegisterExtensionType(std::make_shared<UuidType>())); }
+  void SetUp() { ASSERT_OK(RegisterExtensionType(std::make_shared<ExampleUuidType>())); }
 
   void TearDown() {
     if (GetExtensionType("uuid")) {
@@ -211,33 +214,19 @@ TEST_F(TestExtensionType, ExtensionTypeTest) {
   ASSERT_EQ(deserialized->byte_width(), 16);
 }
 
-auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
-                         std::shared_ptr<RecordBatch>* out) {
-  ASSERT_OK_AND_ASSIGN(auto out_stream, io::BufferOutputStream::Create());
-  ASSERT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
-                                        out_stream.get()));
-
-  ASSERT_OK_AND_ASSIGN(auto complete_ipc_stream, out_stream->Finish());
-
-  io::BufferReader reader(complete_ipc_stream);
-  std::shared_ptr<RecordBatchReader> batch_reader;
-  ASSERT_OK_AND_ASSIGN(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
-  ASSERT_OK(batch_reader->ReadNext(out));
-};
-
 TEST_F(TestExtensionType, IpcRoundtrip) {
   auto ext_arr = ExampleUuid();
   auto batch = RecordBatch::Make(schema({field("f0", uuid())}), 4, {ext_arr});
 
   std::shared_ptr<RecordBatch> read_batch;
-  RoundtripBatch(batch, &read_batch);
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
   CompareBatch(*batch, *read_batch, false /* compare_metadata */);
 
   // Wrap type in a ListArray and ensure it also makes it
   auto offsets_arr = ArrayFromJSON(int32(), "[0, 0, 2, 4]");
   ASSERT_OK_AND_ASSIGN(auto list_arr, ListArray::FromArrays(*offsets_arr, *ext_arr));
   batch = RecordBatch::Make(schema({field("f0", list(uuid()))}), 3, {list_arr});
-  RoundtripBatch(batch, &read_batch);
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
   CompareBatch(*batch, *read_batch, false /* compare_metadata */);
 }
 
@@ -300,7 +289,7 @@ TEST_F(TestExtensionType, ParametricTypes) {
                                  4, {p1, p2, p3, p4});
 
   std::shared_ptr<RecordBatch> read_batch;
-  RoundtripBatch(batch, &read_batch);
+  ASSERT_OK(RoundtripBatch(batch, &read_batch));
   CompareBatch(*batch, *read_batch, false /* compare_metadata */);
 }
 
diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt
index 0a31a64b7a3a4..7afdf566f2fb5 100644
--- a/cpp/src/arrow/filesystem/CMakeLists.txt
+++ b/cpp/src/arrow/filesystem/CMakeLists.txt
@@ -47,9 +47,7 @@ if(ARROW_GCS)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 google-cloud-cpp::storage
-                 Boost::filesystem
-                 Boost::system)
+                 google-cloud-cpp::storage)
 endif()
 
 if(ARROW_AZURE)
@@ -57,12 +55,23 @@ if(ARROW_AZURE)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 ${AZURE_SDK_LINK_LIBRARIES}
-                 Boost::filesystem
-                 Boost::system)
+                 ${AZURE_SDK_LINK_LIBRARIES})
 endif()
 
 if(ARROW_S3)
+  set(ARROW_S3_TEST_EXTRA_LINK_LIBS)
+  # arrow_shared/arrow_static is specified implicitly via
+  # arrow_testing_shared/arrow_testing_static but we specify
+  # arrow_shared/arrow_static explicitly here to ensure using libarrow
+  # before libaws* on link. If we use libaws*.a before libarrow,
+  # static variables storage of AWS SDK for C++ in libaws*.a may be
+  # mixed with one in libarrow.
+  if(ARROW_TEST_LINKAGE STREQUAL "shared")
+    list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_shared)
+  else()
+    list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS arrow_static)
+  endif()
+  list(APPEND ARROW_S3_TEST_EXTRA_LINK_LIBS ${AWSSDK_LINK_LIBRARIES})
   add_arrow_test(s3fs_test
                  SOURCES
                  s3fs_test.cc
@@ -70,18 +79,17 @@ if(ARROW_S3)
                  EXTRA_LABELS
                  filesystem
                  EXTRA_LINK_LIBS
-                 ${AWSSDK_LINK_LIBRARIES}
-                 Boost::filesystem
-                 Boost::system)
+                 ${ARROW_S3_TEST_EXTRA_LINK_LIBS})
   if(TARGET arrow-s3fs-test)
     set(ARROW_S3FS_TEST_COMPILE_DEFINITIONS)
     get_target_property(AWS_CPP_SDK_S3_TYPE aws-cpp-sdk-s3 TYPE)
-    # We need to initialize AWS C++ SDK for direct use (not via
+    # We need to initialize AWS SDK for C++ for direct use (not via
     # arrow::fs::S3FileSystem) in arrow-s3fs-test if we use static AWS
-    # C++ SDK and hide symbols of them. Because AWS C++ SDK has
-    # internal static variables that aren't shared in libarrow and
+    # SDK for C++ and hide symbols of them. Because AWS SDK for C++
+    # has internal static variables that aren't shared in libarrow and
     # arrow-s3fs-test. It means that arrow::fs::InitializeS3() doesn't
-    # initialize AWS C++ SDK that is directly used in arrow-s3fs-test.
+    # initialize AWS SDK for C++ that is directly used in
+    # arrow-s3fs-test.
     if(AWS_CPP_SDK_S3_TYPE STREQUAL "STATIC_LIBRARY"
        AND CXX_LINKER_SUPPORTS_VERSION_SCRIPT)
       list(APPEND ARROW_S3FS_TEST_COMPILE_DEFINITIONS "AWS_CPP_SDK_S3_PRIVATE_STATIC")
@@ -106,9 +114,7 @@ if(ARROW_S3)
                         s3_test_util.cc
                         STATIC_LINK_LIBS
                         ${AWSSDK_LINK_LIBRARIES}
-                        ${ARROW_BENCHMARK_LINK_LIBS}
-                        Boost::filesystem
-                        Boost::system)
+                        ${ARROW_BENCHMARK_LINK_LIBS})
     if(ARROW_TEST_LINKAGE STREQUAL "static")
       target_link_libraries(arrow-filesystem-s3fs-benchmark PRIVATE parquet_static)
     else()
diff --git a/cpp/src/arrow/filesystem/api.h b/cpp/src/arrow/filesystem/api.h
index 562b7c1808ec1..7211ad5c2ccdb 100644
--- a/cpp/src/arrow/filesystem/api.h
+++ b/cpp/src/arrow/filesystem/api.h
@@ -21,14 +21,14 @@
 
 #include "arrow/filesystem/filesystem.h"  // IWYU pragma: export
 #ifdef ARROW_AZURE
-#include "arrow/filesystem/azurefs.h"  // IWYU pragma: export
+#  include "arrow/filesystem/azurefs.h"  // IWYU pragma: export
 #endif
 #ifdef ARROW_GCS
-#include "arrow/filesystem/gcsfs.h"  // IWYU pragma: export
+#  include "arrow/filesystem/gcsfs.h"  // IWYU pragma: export
 #endif
 #include "arrow/filesystem/hdfs.h"     // IWYU pragma: export
 #include "arrow/filesystem/localfs.h"  // IWYU pragma: export
 #include "arrow/filesystem/mockfs.h"   // IWYU pragma: export
 #ifdef ARROW_S3
-#include "arrow/filesystem/s3fs.h"  // IWYU pragma: export
+#  include "arrow/filesystem/s3fs.h"  // IWYU pragma: export
 #endif
diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index a3aa2c8e837d9..d407b1654f5b5 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -22,16 +22,17 @@
 
 #include "arrow/filesystem/azurefs.h"
 #include "arrow/filesystem/azurefs_internal.h"
+#include "arrow/io/memory.h"
 
 // idenfity.hpp triggers -Wattributes warnings cause -Werror builds to fail,
 // so disable it for this file with pragmas.
 #if defined(__GNUC__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wattributes"
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wattributes"
 #endif
 #include <azure/identity.hpp>
 #if defined(__GNUC__)
-#pragma GCC diagnostic pop
+#  pragma GCC diagnostic pop
 #endif
 #include <azure/storage/blobs.hpp>
 #include <azure/storage/files/datalake.hpp>
@@ -144,6 +145,9 @@ Status AzureOptions::ExtractFromUriQuery(const Uri& uri) {
         blob_storage_scheme = "http";
         dfs_storage_scheme = "http";
       }
+    } else if (kv.first == "background_writes") {
+      ARROW_ASSIGN_OR_RAISE(background_writes,
+                            ::arrow::internal::ParseBoolean(kv.second));
     } else {
       return Status::Invalid(
           "Unexpected query parameter in Azure Blob File System URI: '", kv.first, "'");
@@ -937,8 +941,8 @@ Status CommitBlockList(std::shared_ptr<Storage::Blobs::BlockBlobClient> block_bl
                        const std::vector<std::string>& block_ids,
                        const Blobs::CommitBlockListOptions& options) {
   try {
-    // CommitBlockList puts all block_ids in the latest element. That means in the case of
-    // overlapping block_ids the newly staged block ids will always replace the
+    // CommitBlockList puts all block_ids in the latest element. That means in the case
+    // of overlapping block_ids the newly staged block ids will always replace the
     // previously committed blocks.
     // https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list?tabs=microsoft-entra-id#request-body
     block_blob_client->CommitBlockList(block_ids, options);
@@ -950,7 +954,34 @@ Status CommitBlockList(std::shared_ptr<Storage::Blobs::BlockBlobClient> block_bl
   return Status::OK();
 }
 
+Status StageBlock(Blobs::BlockBlobClient* block_blob_client, const std::string& id,
+                  Core::IO::MemoryBodyStream& content) {
+  try {
+    block_blob_client->StageBlock(id, content);
+  } catch (const Storage::StorageException& exception) {
+    return ExceptionToStatus(
+        exception, "StageBlock failed for '", block_blob_client->GetUrl(),
+        "' new_block_id: '", id,
+        "'. Staging new blocks is fundamental to streaming writes to blob storage.");
+  }
+
+  return Status::OK();
+}
+
+/// Writes will be buffered up to this size (in bytes) before actually uploading them.
+static constexpr int64_t kBlockUploadSizeBytes = 10 * 1024 * 1024;
+/// The maximum size of a block in Azure Blob (as per docs).
+static constexpr int64_t kMaxBlockSizeBytes = 4UL * 1024 * 1024 * 1024;
+
+/// This output stream, similar to other arrow OutputStreams, is not thread-safe.
 class ObjectAppendStream final : public io::OutputStream {
+ private:
+  struct UploadState;
+
+  std::shared_ptr<ObjectAppendStream> Self() {
+    return std::dynamic_pointer_cast<ObjectAppendStream>(shared_from_this());
+  }
+
  public:
   ObjectAppendStream(std::shared_ptr<Blobs::BlockBlobClient> block_blob_client,
                      const io::IOContext& io_context, const AzureLocation& location,
@@ -958,7 +989,8 @@ class ObjectAppendStream final : public io::OutputStream {
                      const AzureOptions& options)
       : block_blob_client_(std::move(block_blob_client)),
         io_context_(io_context),
-        location_(location) {
+        location_(location),
+        background_writes_(options.background_writes) {
     if (metadata && metadata->size() != 0) {
       ArrowMetadataToCommitBlockListOptions(metadata, commit_block_list_options_);
     } else if (options.default_metadata && options.default_metadata->size() != 0) {
@@ -1008,10 +1040,13 @@ class ObjectAppendStream final : public io::OutputStream {
         content_length_ = 0;
       }
     }
+
+    upload_state_ = std::make_shared<UploadState>();
+
     if (content_length_ > 0) {
       ARROW_ASSIGN_OR_RAISE(auto block_list, GetBlockList(block_blob_client_));
       for (auto block : block_list.CommittedBlocks) {
-        block_ids_.push_back(block.Name);
+        upload_state_->block_ids.push_back(block.Name);
       }
     }
     initialised_ = true;
@@ -1031,12 +1066,34 @@ class ObjectAppendStream final : public io::OutputStream {
     if (closed_) {
       return Status::OK();
     }
+
+    if (current_block_) {
+      // Upload remaining buffer
+      RETURN_NOT_OK(AppendCurrentBlock());
+    }
+
     RETURN_NOT_OK(Flush());
     block_blob_client_ = nullptr;
     closed_ = true;
     return Status::OK();
   }
 
+  Future<> CloseAsync() override {
+    if (closed_) {
+      return Status::OK();
+    }
+
+    if (current_block_) {
+      // Upload remaining buffer
+      RETURN_NOT_OK(AppendCurrentBlock());
+    }
+
+    return FlushAsync().Then([self = Self()]() {
+      self->block_blob_client_ = nullptr;
+      self->closed_ = true;
+    });
+  }
+
   bool closed() const override { return closed_; }
 
   Status CheckClosed(const char* action) const {
@@ -1052,11 +1109,11 @@ class ObjectAppendStream final : public io::OutputStream {
   }
 
   Status Write(const std::shared_ptr<Buffer>& buffer) override {
-    return DoAppend(buffer->data(), buffer->size(), buffer);
+    return DoWrite(buffer->data(), buffer->size(), buffer);
   }
 
   Status Write(const void* data, int64_t nbytes) override {
-    return DoAppend(data, nbytes);
+    return DoWrite(data, nbytes);
   }
 
   Status Flush() override {
@@ -1066,20 +1123,111 @@ class ObjectAppendStream final : public io::OutputStream {
       // flush. This also avoids some unhandled errors when flushing in the destructor.
       return Status::OK();
     }
-    return CommitBlockList(block_blob_client_, block_ids_, commit_block_list_options_);
+
+    Future<> pending_blocks_completed;
+    {
+      std::unique_lock<std::mutex> lock(upload_state_->mutex);
+      pending_blocks_completed = upload_state_->pending_blocks_completed;
+    }
+
+    RETURN_NOT_OK(pending_blocks_completed.status());
+    std::unique_lock<std::mutex> lock(upload_state_->mutex);
+    return CommitBlockList(block_blob_client_, upload_state_->block_ids,
+                           commit_block_list_options_);
+  }
+
+  Future<> FlushAsync() {
+    RETURN_NOT_OK(CheckClosed("flush async"));
+    if (!initialised_) {
+      // If the stream has not been successfully initialized then there is nothing to
+      // flush. This also avoids some unhandled errors when flushing in the destructor.
+      return Status::OK();
+    }
+
+    Future<> pending_blocks_completed;
+    {
+      std::unique_lock<std::mutex> lock(upload_state_->mutex);
+      pending_blocks_completed = upload_state_->pending_blocks_completed;
+    }
+
+    return pending_blocks_completed.Then([self = Self()] {
+      std::unique_lock<std::mutex> lock(self->upload_state_->mutex);
+      return CommitBlockList(self->block_blob_client_, self->upload_state_->block_ids,
+                             self->commit_block_list_options_);
+    });
   }
 
  private:
-  Status DoAppend(const void* data, int64_t nbytes,
-                  std::shared_ptr<Buffer> owned_buffer = nullptr) {
-    RETURN_NOT_OK(CheckClosed("append"));
-    auto append_data = reinterpret_cast<const uint8_t*>(data);
-    Core::IO::MemoryBodyStream block_content(append_data, nbytes);
-    if (block_content.Length() == 0) {
-      return Status::OK();
+  Status AppendCurrentBlock() {
+    ARROW_ASSIGN_OR_RAISE(auto buf, current_block_->Finish());
+    current_block_.reset();
+    current_block_size_ = 0;
+    return AppendBlock(buf);
+  }
+
+  Status DoWrite(const void* data, int64_t nbytes,
+                 std::shared_ptr<Buffer> owned_buffer = nullptr) {
+    if (closed_) {
+      return Status::Invalid("Operation on closed stream");
     }
 
-    const auto n_block_ids = block_ids_.size();
+    const auto* data_ptr = reinterpret_cast<const int8_t*>(data);
+    auto advance_ptr = [this, &data_ptr, &nbytes](const int64_t offset) {
+      data_ptr += offset;
+      nbytes -= offset;
+      pos_ += offset;
+      content_length_ += offset;
+    };
+
+    // Handle case where we have some bytes buffered from prior calls.
+    if (current_block_size_ > 0) {
+      // Try to fill current buffer
+      const int64_t to_copy =
+          std::min(nbytes, kBlockUploadSizeBytes - current_block_size_);
+      RETURN_NOT_OK(current_block_->Write(data_ptr, to_copy));
+      current_block_size_ += to_copy;
+      advance_ptr(to_copy);
+
+      // If buffer isn't full, break
+      if (current_block_size_ < kBlockUploadSizeBytes) {
+        return Status::OK();
+      }
+
+      // Upload current buffer
+      RETURN_NOT_OK(AppendCurrentBlock());
+    }
+
+    // We can upload chunks without copying them into a buffer
+    while (nbytes >= kBlockUploadSizeBytes) {
+      const auto upload_size = std::min(nbytes, kMaxBlockSizeBytes);
+      RETURN_NOT_OK(AppendBlock(data_ptr, upload_size));
+      advance_ptr(upload_size);
+    }
+
+    // Buffer remaining bytes
+    if (nbytes > 0) {
+      current_block_size_ = nbytes;
+
+      if (current_block_ == nullptr) {
+        ARROW_ASSIGN_OR_RAISE(
+            current_block_,
+            io::BufferOutputStream::Create(kBlockUploadSizeBytes, io_context_.pool()));
+      } else {
+        // Re-use the allocation from before.
+        RETURN_NOT_OK(current_block_->Reset(kBlockUploadSizeBytes, io_context_.pool()));
+      }
+
+      RETURN_NOT_OK(current_block_->Write(data_ptr, current_block_size_));
+      pos_ += current_block_size_;
+      content_length_ += current_block_size_;
+    }
+
+    return Status::OK();
+  }
+
+  std::string CreateBlock() {
+    std::unique_lock<std::mutex> lock(upload_state_->mutex);
+    const auto n_block_ids = upload_state_->block_ids.size();
 
     // New block ID must always be distinct from the existing block IDs. Otherwise we
     // will accidentally replace the content of existing blocks, causing corruption.
@@ -1093,36 +1241,106 @@ class ObjectAppendStream final : public io::OutputStream {
     new_block_id.insert(0, required_padding_digits, '0');
     // There is a small risk when appending to a blob created by another client that
     // `new_block_id` may overlapping with an existing block id. Adding the `-arrow`
-    // suffix significantly reduces the risk, but does not 100% eliminate it. For example
-    // if the blob was previously created with one block, with id `00001-arrow` then the
-    // next block we append will conflict with that, and cause corruption.
+    // suffix significantly reduces the risk, but does not 100% eliminate it. For
+    // example if the blob was previously created with one block, with id `00001-arrow`
+    // then the next block we append will conflict with that, and cause corruption.
     new_block_id += "-arrow";
     new_block_id = Core::Convert::Base64Encode(
         std::vector<uint8_t>(new_block_id.begin(), new_block_id.end()));
 
-    try {
-      block_blob_client_->StageBlock(new_block_id, block_content);
-    } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus(
-          exception, "StageBlock failed for '", block_blob_client_->GetUrl(),
-          "' new_block_id: '", new_block_id,
-          "'. Staging new blocks is fundamental to streaming writes to blob storage.");
+    upload_state_->block_ids.push_back(new_block_id);
+
+    // We only use the future if we have background writes enabled. Without background
+    // writes the future is initialized as finished and not mutated any more.
+    if (background_writes_ && upload_state_->blocks_in_progress++ == 0) {
+      upload_state_->pending_blocks_completed = Future<>::Make();
     }
-    block_ids_.push_back(new_block_id);
-    pos_ += nbytes;
-    content_length_ += nbytes;
+
+    return new_block_id;
+  }
+
+  Status AppendBlock(const void* data, int64_t nbytes,
+                     std::shared_ptr<Buffer> owned_buffer = nullptr) {
+    RETURN_NOT_OK(CheckClosed("append"));
+
+    if (nbytes == 0) {
+      return Status::OK();
+    }
+
+    const auto block_id = CreateBlock();
+
+    if (background_writes_) {
+      if (owned_buffer == nullptr) {
+        ARROW_ASSIGN_OR_RAISE(owned_buffer, AllocateBuffer(nbytes, io_context_.pool()));
+        memcpy(owned_buffer->mutable_data(), data, nbytes);
+      } else {
+        DCHECK_EQ(data, owned_buffer->data());
+        DCHECK_EQ(nbytes, owned_buffer->size());
+      }
+
+      // The closure keeps the buffer and the upload state alive
+      auto deferred = [owned_buffer, block_id, block_blob_client = block_blob_client_,
+                       state = upload_state_]() mutable -> Status {
+        Core::IO::MemoryBodyStream block_content(owned_buffer->data(),
+                                                 owned_buffer->size());
+
+        auto status = StageBlock(block_blob_client.get(), block_id, block_content);
+        HandleUploadOutcome(state, status);
+        return Status::OK();
+      };
+      RETURN_NOT_OK(io::internal::SubmitIO(io_context_, std::move(deferred)));
+    } else {
+      auto append_data = reinterpret_cast<const uint8_t*>(data);
+      Core::IO::MemoryBodyStream block_content(append_data, nbytes);
+
+      RETURN_NOT_OK(StageBlock(block_blob_client_.get(), block_id, block_content));
+    }
+
     return Status::OK();
   }
 
+  Status AppendBlock(std::shared_ptr<Buffer> buffer) {
+    return AppendBlock(buffer->data(), buffer->size(), buffer);
+  }
+
+  static void HandleUploadOutcome(const std::shared_ptr<UploadState>& state,
+                                  const Status& status) {
+    std::unique_lock<std::mutex> lock(state->mutex);
+    if (!status.ok()) {
+      state->status &= status;
+    }
+    // Notify completion
+    if (--state->blocks_in_progress == 0) {
+      auto fut = state->pending_blocks_completed;
+      lock.unlock();
+      fut.MarkFinished(state->status);
+    }
+  }
+
   std::shared_ptr<Blobs::BlockBlobClient> block_blob_client_;
   const io::IOContext io_context_;
   const AzureLocation location_;
+  const bool background_writes_;
   int64_t content_length_ = kNoSize;
 
+  std::shared_ptr<io::BufferOutputStream> current_block_;
+  int64_t current_block_size_ = 0;
+
   bool closed_ = false;
   bool initialised_ = false;
   int64_t pos_ = 0;
-  std::vector<std::string> block_ids_;
+
+  // This struct is kept alive through background writes to avoid problems
+  // in the completion handler.
+  struct UploadState {
+    std::mutex mutex;
+    std::vector<std::string> block_ids;
+    int64_t blocks_in_progress = 0;
+    Status status;
+    Future<> pending_blocks_completed = Future<>::MakeFinished(Status::OK());
+  };
+  std::shared_ptr<UploadState> upload_state_;
+
   Blobs::CommitBlockListOptions commit_block_list_options_;
 };
 
@@ -3199,4 +3417,31 @@ Result<std::shared_ptr<io::OutputStream>> AzureFileSystem::OpenAppendStream(
   return impl_->OpenAppendStream(location, metadata, false, this);
 }
 
+Result<std::string> AzureFileSystem::PathFromUri(const std::string& uri_string) const {
+  /// We can not use `internal::PathFromUriHelper` here because for Azure we have to
+  /// support different URI schemes where the authority is handled differently.
+  /// Example (both should yield the same path `container/some/path`):
+  ///   - (1) abfss://storageacc.blob.core.windows.net/container/some/path
+  ///   - (2) abfss://acc:pw@container/some/path
+  /// The authority handling is different with these two URIs. (1) requires no prepending
+  /// of the authority to the path, while (2) requires to preprend the authority to the
+  /// path.
+  std::string path;
+  Uri uri;
+  RETURN_NOT_OK(uri.Parse(uri_string));
+  RETURN_NOT_OK(AzureOptions::FromUri(uri, &path));
+
+  std::vector<std::string> supported_schemes = {"abfs", "abfss"};
+  const auto scheme = uri.scheme();
+  if (std::find(supported_schemes.begin(), supported_schemes.end(), scheme) ==
+      supported_schemes.end()) {
+    std::string expected_schemes =
+        ::arrow::internal::JoinStrings(supported_schemes, ", ");
+    return Status::Invalid("The filesystem expected a URI with one of the schemes (",
+                           expected_schemes, ") but received ", uri_string);
+  }
+
+  return path;
+}
+
 }  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index 93d6ec2f945b4..ebbe00c4ee784 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -112,6 +112,9 @@ struct ARROW_EXPORT AzureOptions {
   /// This will be ignored if non-empty metadata is passed to OpenOutputStream.
   std::shared_ptr<const KeyValueMetadata> default_metadata;
 
+  /// Whether OutputStream writes will be issued in the background, without blocking.
+  bool background_writes = true;
+
  private:
   enum class CredentialKind {
     kDefault,
@@ -367,6 +370,8 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
   Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
       const std::string& path,
       const std::shared_ptr<const KeyValueMetadata>& metadata) override;
+
+  Result<std::string> PathFromUri(const std::string& uri_string) const override;
 };
 
 }  // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index 9a11a6f24995a..a8dc923476752 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -15,30 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#include <boost/asio/io_context.hpp>
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-
 #include "arrow/filesystem/azurefs.h"
 #include "arrow/filesystem/azurefs_internal.h"
 
 #include <memory>
 #include <random>
 #include <string>
+#include <vector>
 
 #include <gmock/gmock-matchers.h>
 #include <gmock/gmock-more-matchers.h>
@@ -52,7 +35,9 @@
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/future.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
@@ -65,7 +50,6 @@ namespace arrow {
 using internal::TemporaryDir;
 namespace fs {
 using internal::ConcatAbstractPath;
-namespace bp = boost::process;
 
 using ::testing::IsEmpty;
 using ::testing::Not;
@@ -172,39 +156,32 @@ class AzuriteEnv : public AzureEnvImpl<AzuriteEnv> {
  private:
   std::unique_ptr<TemporaryDir> temp_dir_;
   arrow::internal::PlatformFilename debug_log_path_;
-  bp::child server_process_;
+  std::unique_ptr<util::Process> server_process_;
 
   using AzureEnvImpl::AzureEnvImpl;
 
  public:
   static const AzureBackend kBackend = AzureBackend::kAzurite;
 
-  ~AzuriteEnv() override {
-    server_process_.terminate();
-    server_process_.wait();
-  }
+  ~AzuriteEnv() = default;
 
   static Result<std::unique_ptr<AzureEnvImpl>> Make() {
     auto self = std::unique_ptr<AzuriteEnv>(
         new AzuriteEnv("devstoreaccount1",
                        "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
                        "K1SZFPTOtr/KBHBeksoGMGw=="));
-    auto exe_path = bp::search_path("azurite");
-    if (exe_path.empty()) {
-      return Status::Invalid("Could not find Azurite emulator.");
-    }
+    self->server_process_ = std::make_unique<util::Process>();
+    ARROW_RETURN_NOT_OK(self->server_process_->SetExecutable("azurite"));
     ARROW_ASSIGN_OR_RAISE(self->temp_dir_, TemporaryDir::Make("azurefs-test-"));
     ARROW_ASSIGN_OR_RAISE(self->debug_log_path_,
                           self->temp_dir_->path().Join("debug.log"));
-    auto server_process = bp::child(
-        boost::this_process::environment(), exe_path, "--silent", "--location",
-        self->temp_dir_->path().ToString(), "--debug", self->debug_log_path_.ToString());
-    if (!server_process.valid() || !server_process.running()) {
-      server_process.terminate();
-      server_process.wait();
-      return Status::Invalid("Could not start Azurite emulator.");
-    }
-    self->server_process_ = std::move(server_process);
+    self->server_process_->SetArgs({"--silent", "--location",
+                                    self->temp_dir_->path().ToString(), "--debug",
+                                    self->debug_log_path_.ToString(),
+                                    // For old Azurite. We can't install the latest
+                                    // Azurite with old Node.js on old Ubuntu.
+                                    "--skipApiVersionCheck"});
+    ARROW_RETURN_NOT_OK(self->server_process_->Execute());
     return self;
   }
 
@@ -563,6 +540,7 @@ class TestAzureOptions : public ::testing::Test {
     ASSERT_EQ(options.dfs_storage_scheme, default_options.dfs_storage_scheme);
     ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault);
     ASSERT_EQ(path, "container/dir/blob");
+    ASSERT_EQ(options.background_writes, true);
   }
 
   void TestFromUriDfsStorage() {
@@ -579,6 +557,7 @@ class TestAzureOptions : public ::testing::Test {
     ASSERT_EQ(options.dfs_storage_scheme, default_options.dfs_storage_scheme);
     ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kDefault);
     ASSERT_EQ(path, "file_system/dir/file");
+    ASSERT_EQ(options.background_writes, true);
   }
 
   void TestFromUriAbfs() {
@@ -594,6 +573,7 @@ class TestAzureOptions : public ::testing::Test {
     ASSERT_EQ(options.dfs_storage_scheme, "https");
     ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey);
     ASSERT_EQ(path, "container/dir/blob");
+    ASSERT_EQ(options.background_writes, true);
   }
 
   void TestFromUriAbfss() {
@@ -609,6 +589,7 @@ class TestAzureOptions : public ::testing::Test {
     ASSERT_EQ(options.dfs_storage_scheme, "https");
     ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey);
     ASSERT_EQ(path, "container/dir/blob");
+    ASSERT_EQ(options.background_writes, true);
   }
 
   void TestFromUriEnableTls() {
@@ -625,6 +606,17 @@ class TestAzureOptions : public ::testing::Test {
     ASSERT_EQ(options.dfs_storage_scheme, "http");
     ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kStorageSharedKey);
     ASSERT_EQ(path, "container/dir/blob");
+    ASSERT_EQ(options.background_writes, true);
+  }
+
+  void TestFromUriDisableBackgroundWrites() {
+    std::string path;
+    ASSERT_OK_AND_ASSIGN(auto options,
+                         AzureOptions::FromUri(
+                             "abfs://account:password@127.0.0.1:10000/container/dir/blob?"
+                             "background_writes=false",
+                             &path));
+    ASSERT_EQ(options.background_writes, false);
   }
 
   void TestFromUriCredentialDefault() {
@@ -770,6 +762,9 @@ TEST_F(TestAzureOptions, FromUriDfsStorage) { TestFromUriDfsStorage(); }
 TEST_F(TestAzureOptions, FromUriAbfs) { TestFromUriAbfs(); }
 TEST_F(TestAzureOptions, FromUriAbfss) { TestFromUriAbfss(); }
 TEST_F(TestAzureOptions, FromUriEnableTls) { TestFromUriEnableTls(); }
+TEST_F(TestAzureOptions, FromUriDisableBackgroundWrites) {
+  TestFromUriDisableBackgroundWrites();
+}
 TEST_F(TestAzureOptions, FromUriCredentialDefault) { TestFromUriCredentialDefault(); }
 TEST_F(TestAzureOptions, FromUriCredentialAnonymous) { TestFromUriCredentialAnonymous(); }
 TEST_F(TestAzureOptions, FromUriCredentialStorageSharedKey) {
@@ -926,8 +921,9 @@ class TestAzureFileSystem : public ::testing::Test {
   void UploadLines(const std::vector<std::string>& lines, const std::string& path,
                    int total_size) {
     ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
-    const auto all_lines = std::accumulate(lines.begin(), lines.end(), std::string(""));
-    ASSERT_OK(output->Write(all_lines));
+    for (auto const& line : lines) {
+      ASSERT_OK(output->Write(line.data(), line.size()));
+    }
     ASSERT_OK(output->Close());
   }
 
@@ -1471,6 +1467,162 @@ class TestAzureFileSystem : public ::testing::Test {
     arrow::fs::AssertFileInfo(fs(), data.Path("dir/file0"), FileType::File);
   }
 
+  void AssertObjectContents(AzureFileSystem* fs, std::string_view path,
+                            std::string_view expected) {
+    ASSERT_OK_AND_ASSIGN(auto input, fs->OpenInputStream(std::string{path}));
+    std::string contents;
+    std::shared_ptr<Buffer> buffer;
+    do {
+      ASSERT_OK_AND_ASSIGN(buffer, input->Read(128 * 1024));
+      contents.append(buffer->ToString());
+    } while (buffer->size() != 0);
+
+    EXPECT_EQ(expected, contents);
+  }
+
+  void TestOpenOutputStreamSmall() {
+    ASSERT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options_));
+
+    auto data = SetUpPreexistingData();
+    const auto path = data.ContainerPath("test-write-object");
+    ASSERT_OK_AND_ASSIGN(auto output, fs->OpenOutputStream(path, {}));
+    const std::string_view expected(PreexistingData::kLoremIpsum);
+    ASSERT_OK(output->Write(expected));
+    ASSERT_OK(output->Close());
+
+    // Verify we can read the object back.
+    AssertObjectContents(fs.get(), path, expected);
+  }
+
+  void TestOpenOutputStreamLarge() {
+    ASSERT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options_));
+
+    auto data = SetUpPreexistingData();
+    const auto path = data.ContainerPath("test-write-object");
+    ASSERT_OK_AND_ASSIGN(auto output, fs->OpenOutputStream(path, {}));
+
+    // Upload 5 MB, 4 MB und 2 MB and a very small write to test varying sizes
+    std::vector<std::int64_t> sizes{5 * 1024 * 1024, 4 * 1024 * 1024, 2 * 1024 * 1024,
+                                    2000};
+
+    std::vector<std::string> buffers{};
+    char current_char = 'A';
+    for (const auto size : sizes) {
+      buffers.emplace_back(size, current_char++);
+    }
+
+    auto expected_size = std::int64_t{0};
+    for (size_t i = 0; i < buffers.size(); ++i) {
+      ASSERT_OK(output->Write(buffers[i]));
+      expected_size += sizes[i];
+      ASSERT_EQ(expected_size, output->Tell());
+    }
+    ASSERT_OK(output->Close());
+
+    AssertObjectContents(fs.get(), path,
+                         buffers[0] + buffers[1] + buffers[2] + buffers[3]);
+  }
+
+  void TestOpenOutputStreamLargeSingleWrite() {
+    ASSERT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options_));
+
+    auto data = SetUpPreexistingData();
+    const auto path = data.ContainerPath("test-write-object");
+    ASSERT_OK_AND_ASSIGN(auto output, fs->OpenOutputStream(path, {}));
+
+    constexpr std::int64_t size{12 * 1024 * 1024};
+    const std::string large_string(size, 'X');
+
+    ASSERT_OK(output->Write(large_string));
+    ASSERT_EQ(size, output->Tell());
+    ASSERT_OK(output->Close());
+
+    AssertObjectContents(fs.get(), path, large_string);
+  }
+
+  void TestOpenOutputStreamCloseAsync() {
+#if defined(ADDRESS_SANITIZER) || defined(ARROW_VALGRIND)
+    // This false positive leak is similar to the one pinpointed in the
+    // have_false_positive_memory_leak_with_generator() comments above,
+    // though the stack trace is different. It happens when a block list
+    // is committed from a background thread.
+    //
+    // clang-format off
+    // Direct leak of 968 byte(s) in 1 object(s) allocated from:
+    //   #0 calloc
+    //   #1 (/lib/x86_64-linux-gnu/libxml2.so.2+0xe25a4)
+    //   #2 __xmlDefaultBufferSize
+    //   #3 xmlBufferCreate
+    //   #4 Azure::Storage::_internal::XmlWriter::XmlWriter()
+    //   #5 Azure::Storage::Blobs::_detail::BlockBlobClient::CommitBlockList
+    //   #6 Azure::Storage::Blobs::BlockBlobClient::CommitBlockList
+    //   #7 arrow::fs::(anonymous namespace)::CommitBlockList
+    //   #8 arrow::fs::(anonymous namespace)::ObjectAppendStream::FlushAsync()::'lambda'
+    // clang-format on
+    //
+    // TODO perhaps remove this skip once we can rely on
+    // https://github.com/Azure/azure-sdk-for-cpp/pull/5767
+    //
+    // Also note that ClickHouse has a workaround for a similar issue:
+    // https://github.com/ClickHouse/ClickHouse/pull/45796
+    if (options_.background_writes) {
+      GTEST_SKIP() << "False positive memory leak in libxml2 with CloseAsync";
+    }
+#endif
+    ASSERT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options_));
+    auto data = SetUpPreexistingData();
+    const std::string path = data.ContainerPath("test-write-object");
+    constexpr auto payload = PreexistingData::kLoremIpsum;
+
+    ASSERT_OK_AND_ASSIGN(auto stream, fs->OpenOutputStream(path));
+    ASSERT_OK(stream->Write(payload));
+    auto close_fut = stream->CloseAsync();
+
+    ASSERT_OK(close_fut.MoveResult());
+
+    AssertObjectContents(fs.get(), path, payload);
+  }
+
+  void TestOpenOutputStreamCloseAsyncDestructor() {
+#if defined(ADDRESS_SANITIZER) || defined(ARROW_VALGRIND)
+    // See above.
+    if (options_.background_writes) {
+      GTEST_SKIP() << "False positive memory leak in libxml2 with CloseAsync";
+    }
+#endif
+    ASSERT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options_));
+    auto data = SetUpPreexistingData();
+    const std::string path = data.ContainerPath("test-write-object");
+    constexpr auto payload = PreexistingData::kLoremIpsum;
+
+    ASSERT_OK_AND_ASSIGN(auto stream, fs->OpenOutputStream(path));
+    ASSERT_OK(stream->Write(payload));
+    // Destructor implicitly closes stream and completes the upload.
+    // Testing it doesn't matter whether flush is triggered asynchronously
+    // after CloseAsync or synchronously after stream.reset() since we're just
+    // checking that the future keeps the stream alive until completion
+    // rather than segfaulting on a dangling stream.
+    auto close_fut = stream->CloseAsync();
+    stream.reset();
+    ASSERT_OK(close_fut.MoveResult());
+
+    AssertObjectContents(fs.get(), path, payload);
+  }
+
+  void TestOpenOutputStreamDestructor() {
+    ASSERT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options_));
+    constexpr auto* payload = "new data";
+    auto data = SetUpPreexistingData();
+    const std::string path = data.ContainerPath("test-write-object");
+
+    ASSERT_OK_AND_ASSIGN(auto stream, fs->OpenOutputStream(path));
+    ASSERT_OK(stream->Write(payload));
+    // Destructor implicitly closes stream and completes the multipart upload.
+    stream.reset();
+
+    AssertObjectContents(fs.get(), path, payload);
+  }
+
  private:
   using StringMatcher =
       ::testing::PolymorphicMatcher<::testing::internal::HasSubstrMatcher<std::string>>;
@@ -2701,53 +2853,27 @@ TEST_F(TestAzuriteFileSystem, WriteMetadataHttpHeaders) {
   ASSERT_EQ("text/plain", content_type);
 }
 
-TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) {
-  auto data = SetUpPreexistingData();
-  const auto path = data.ContainerPath("test-write-object");
-  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
-  const std::string_view expected(PreexistingData::kLoremIpsum);
-  ASSERT_OK(output->Write(expected));
-  ASSERT_OK(output->Close());
-
-  // Verify we can read the object back.
-  ASSERT_OK_AND_ASSIGN(auto input, fs()->OpenInputStream(path));
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmallNoBackgroundWrites) {
+  options_.background_writes = false;
+  TestOpenOutputStreamSmall();
+}
 
-  std::array<char, 1024> inbuf{};
-  ASSERT_OK_AND_ASSIGN(auto size, input->Read(inbuf.size(), inbuf.data()));
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) { TestOpenOutputStreamSmall(); }
 
-  EXPECT_EQ(expected, std::string_view(inbuf.data(), size));
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamLargeNoBackgroundWrites) {
+  options_.background_writes = false;
+  TestOpenOutputStreamLarge();
 }
 
-TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) {
-  auto data = SetUpPreexistingData();
-  const auto path = data.ContainerPath("test-write-object");
-  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
-  std::array<std::int64_t, 3> sizes{257 * 1024, 258 * 1024, 259 * 1024};
-  std::array<std::string, 3> buffers{
-      std::string(sizes[0], 'A'),
-      std::string(sizes[1], 'B'),
-      std::string(sizes[2], 'C'),
-  };
-  auto expected = std::int64_t{0};
-  for (auto i = 0; i != 3; ++i) {
-    ASSERT_OK(output->Write(buffers[i]));
-    expected += sizes[i];
-    ASSERT_EQ(expected, output->Tell());
-  }
-  ASSERT_OK(output->Close());
-
-  // Verify we can read the object back.
-  ASSERT_OK_AND_ASSIGN(auto input, fs()->OpenInputStream(path));
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) { TestOpenOutputStreamLarge(); }
 
-  std::string contents;
-  std::shared_ptr<Buffer> buffer;
-  do {
-    ASSERT_OK_AND_ASSIGN(buffer, input->Read(128 * 1024));
-    ASSERT_TRUE(buffer);
-    contents.append(buffer->ToString());
-  } while (buffer->size() != 0);
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamLargeSingleWriteNoBackgroundWrites) {
+  options_.background_writes = false;
+  TestOpenOutputStreamLargeSingleWrite();
+}
 
-  EXPECT_EQ(contents, buffers[0] + buffers[1] + buffers[2]);
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamLargeSingleWrite) {
+  TestOpenOutputStreamLargeSingleWrite();
 }
 
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamTruncatesExistingFile) {
@@ -2817,6 +2943,33 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamClosed) {
   ASSERT_RAISES(Invalid, output->Tell());
 }
 
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamCloseAsync) {
+  TestOpenOutputStreamCloseAsync();
+}
+
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamCloseAsyncNoBackgroundWrites) {
+  options_.background_writes = false;
+  TestOpenOutputStreamCloseAsync();
+}
+
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamAsyncDestructor) {
+  TestOpenOutputStreamCloseAsyncDestructor();
+}
+
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamAsyncDestructorNoBackgroundWrites) {
+  options_.background_writes = false;
+  TestOpenOutputStreamCloseAsyncDestructor();
+}
+
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamDestructor) {
+  TestOpenOutputStreamDestructor();
+}
+
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamDestructorNoBackgroundWrites) {
+  options_.background_writes = false;
+  TestOpenOutputStreamDestructor();
+}
+
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamUri) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("open-output-stream-uri.txt");
@@ -2958,5 +3111,14 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
   ASSERT_RAISES(Invalid, stream->ReadAt(1, 1));
   ASSERT_RAISES(Invalid, stream->Seek(2));
 }
+
+TEST_F(TestAzuriteFileSystem, PathFromUri) {
+  ASSERT_EQ(
+      "container/some/path",
+      fs()->PathFromUri("abfss://storageacc.blob.core.windows.net/container/some/path"));
+  ASSERT_EQ("container/some/path",
+            fs()->PathFromUri("abfss://acc:pw@container/some/path"));
+  ASSERT_RAISES(Invalid, fs()->PathFromUri("http://acc:pw@container/some/path"));
+}
 }  // namespace fs
 }  // namespace arrow
diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc
index 284be685fa800..b5765010ec7e9 100644
--- a/cpp/src/arrow/filesystem/filesystem.cc
+++ b/cpp/src/arrow/filesystem/filesystem.cc
@@ -26,16 +26,16 @@
 
 #include "arrow/filesystem/filesystem.h"
 #ifdef ARROW_AZURE
-#include "arrow/filesystem/azurefs.h"
+#  include "arrow/filesystem/azurefs.h"
 #endif
 #ifdef ARROW_GCS
-#include "arrow/filesystem/gcsfs.h"
+#  include "arrow/filesystem/gcsfs.h"
 #endif
 #ifdef ARROW_HDFS
-#include "arrow/filesystem/hdfs.h"
+#  include "arrow/filesystem/hdfs.h"
 #endif
 #ifdef ARROW_S3
-#include "arrow/filesystem/s3fs.h"
+#  include "arrow/filesystem/s3fs.h"
 #endif
 #include "arrow/filesystem/localfs.h"
 #include "arrow/filesystem/mockfs.h"
diff --git a/cpp/src/arrow/filesystem/filesystem_test.cc b/cpp/src/arrow/filesystem/filesystem_test.cc
index 8477647b2cd73..5072c3a8c25b1 100644
--- a/cpp/src/arrow/filesystem/filesystem_test.cc
+++ b/cpp/src/arrow/filesystem/filesystem_test.cc
@@ -20,6 +20,7 @@
 #include <utility>
 #include <vector>
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
 #include "arrow/filesystem/filesystem.h"
@@ -632,6 +633,13 @@ TEST_F(TestMockFS, FileSystemFromUri) {
   ASSERT_OK_AND_ASSIGN(fs_, FileSystemFromUri("mock:///foo/bar?q=zzz", &path));
   ASSERT_EQ(path, "foo/bar");
   CheckDirs({});
+  ASSERT_OK_AND_ASSIGN(fs_, FileSystemFromUri("mock:/folder+name/bar?q=zzz", &path));
+  ASSERT_EQ(path, "folder+name/bar");
+  CheckDirs({});
+  EXPECT_RAISES_WITH_MESSAGE_THAT(
+      Invalid, ::testing::HasSubstr("syntax error at character ' ' (position 12)"),
+      FileSystemFromUri("mock:/folder name/bar", &path));
+  CheckDirs({});
 }
 
 ////////////////////////////////////////////////////////////////////////////
diff --git a/cpp/src/arrow/filesystem/gcsfs_test.cc b/cpp/src/arrow/filesystem/gcsfs_test.cc
index a6022a8d21681..d4d5edf4b8993 100644
--- a/cpp/src/arrow/filesystem/gcsfs_test.cc
+++ b/cpp/src/arrow/filesystem/gcsfs_test.cc
@@ -15,26 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#include <boost/asio/io_context.hpp>
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
-#include <boost/thread.hpp>
-
 #include "arrow/filesystem/gcsfs.h"
 
 #include <absl/time/time.h>
@@ -45,16 +25,15 @@
 #include <google/cloud/storage/options.h>
 #include <gtest/gtest.h>
 
-#include <array>
 #include <random>
 #include <string>
-#include <thread>
 
 #include "arrow/filesystem/gcsfs_internal.h"
 #include "arrow/filesystem/path_util.h"
 #include "arrow/filesystem/test_util.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/future.h"
 #include "arrow/util/key_value_metadata.h"
@@ -64,7 +43,6 @@ namespace fs {
 
 namespace {
 
-namespace bp = boost::process;
 namespace gc = google::cloud;
 namespace gcs = google::cloud::storage;
 
@@ -89,70 +67,62 @@ class GcsTestbench : public ::testing::Environment {
  public:
   GcsTestbench() {
     port_ = std::to_string(GetListenPort());
-    std::vector<std::string> names{"python3", "python"};
-    // If the build script or application developer provides a value in the PYTHON
-    // environment variable, then just use that.
-    if (const auto* env = std::getenv("PYTHON")) {
-      names = {env};
+    auto error = std::string("Could not start GCS emulator 'storage-testbench'");
+    auto server_process = std::make_unique<util::Process>();
+    auto status = server_process->SetExecutable("storage-testbench");
+    if (!status.ok()) {
+      error += ": " + status.ToString();
+      error_ = std::move(error);
+      return;
+    }
+
+    server_process->SetArgs({"--port", port_});
+    server_process->IgnoreStderr();
+    status = server_process->Execute();
+    if (!status.ok()) {
+      error += ": " + status.ToString();
+      error_ = std::move(error);
+      return;
     }
-    auto error = std::string(
-        "Could not start GCS emulator."
-        " Used the following list of python interpreter names:");
-    for (const auto& interpreter : names) {
-      auto exe_path = bp::search_path(interpreter);
-      error += " " + interpreter;
-      if (exe_path.empty()) {
-        error += " (exe not found)";
-        continue;
-      }
 
-      bp::ipstream output;
-      server_process_ = bp::child(exe_path, "-m", "testbench", "--port", port_, group_,
-                                  bp::std_err > output);
-      // Wait for message: "* Restarting with"
-      auto testbench_is_running = [&output, this](bp::child& process) {
-        std::string line;
-        std::chrono::time_point<std::chrono::steady_clock> end =
-            std::chrono::steady_clock::now() + std::chrono::seconds(10);
-        while (server_process_.valid() && server_process_.running() &&
-               std::chrono::steady_clock::now() < end) {
-          if (output.peek() && std::getline(output, line)) {
-            std::cerr << line << std::endl;
-            if (line.find("* Restarting with") != std::string::npos) return true;
-          } else {
-            std::this_thread::sleep_for(std::chrono::milliseconds(20));
-          }
+    auto testbench_is_running = [&server_process, this]() {
+      auto ready_timeout = std::chrono::seconds(10);
+      std::chrono::time_point<std::chrono::steady_clock> end =
+          std::chrono::steady_clock::now() + ready_timeout;
+      while (server_process->IsRunning() && std::chrono::steady_clock::now() < end) {
+        auto client = gcs::Client(
+            google::cloud::Options{}
+                .set<gcs::RestEndpointOption>("http://127.0.0.1:" + port_)
+                .set<gc::UnifiedCredentialsOption>(gc::MakeInsecureCredentials())
+                .set<gcs::RetryPolicyOption>(
+                    gcs::LimitedTimeRetryPolicy(ready_timeout).clone()));
+        auto metadata = client.GetBucketMetadata("nonexistent");
+        if (metadata.status().code() == google::cloud::StatusCode::kNotFound) {
+          return true;
         }
-        return false;
-      };
+      }
+      return false;
+    };
 
-      if (testbench_is_running(server_process_)) break;
-      error += " (failed to start)";
-      server_process_.terminate();
-      server_process_.wait();
+    if (!testbench_is_running()) {
+      error += " (failed to listen)";
+      error_ = std::move(error);
+      return;
     }
-    if (server_process_.valid() && server_process_.valid()) return;
-    error_ = std::move(error);
+
+    server_process_ = std::move(server_process);
   }
 
-  bool running() { return server_process_.running(); }
+  bool running() { return server_process_ && server_process_->IsRunning(); }
 
-  ~GcsTestbench() override {
-    // Brutal shutdown, kill the full process group because the GCS testbench may launch
-    // additional children.
-    group_.terminate();
-    if (server_process_.valid()) {
-      server_process_.wait();
-    }
-  }
+  ~GcsTestbench() = default;
 
   const std::string& port() const { return port_; }
   const std::string& error() const { return error_; }
 
  private:
   std::string port_;
-  bp::child server_process_;
-  bp::group group_;
+  std::unique_ptr<util::Process> server_process_;
   std::string error_;
 };
 
diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc
index 22d802d8f9f7f..9fe19cbf25058 100644
--- a/cpp/src/arrow/filesystem/localfs.cc
+++ b/cpp/src/arrow/filesystem/localfs.cc
@@ -22,12 +22,12 @@
 #include <utility>
 
 #ifdef _WIN32
-#include "arrow/util/windows_compatibility.h"
+#  include "arrow/util/windows_compatibility.h"
 #else
-#include <fcntl.h>
-#include <sys/stat.h>
-#include <cerrno>
-#include <cstdio>
+#  include <fcntl.h>
+#  include <sys/stat.h>
+#  include <cerrno>
+#  include <cstdio>
 #endif
 
 #include "arrow/filesystem/filesystem.h"
@@ -157,12 +157,12 @@ FileInfo StatToFileInfo(const struct stat& s) {
     info.set_type(FileType::Unknown);
     info.set_size(kNoSize);
   }
-#ifdef __APPLE__
+#  ifdef __APPLE__
   // macOS doesn't use the POSIX-compliant spelling
   info.set_mtime(ToTimePoint(s.st_mtimespec));
-#else
+#  else
   info.set_mtime(ToTimePoint(s.st_mtim));
-#endif
+#  endif
   return info;
 }
 
diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc
index eb29a677dae9e..db0c60f2e80f2 100644
--- a/cpp/src/arrow/filesystem/s3_test_util.cc
+++ b/cpp/src/arrow/filesystem/s3_test_util.cc
@@ -15,33 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <algorithm>  // Missing include in boost/process
-
 #ifndef _WIN32
-#include <sys/wait.h>
-#endif
-
-// This boost/asio/io_context.hpp include is needless for no MinGW
-// build.
-//
-// This is for including boost/asio/detail/socket_types.hpp before any
-// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
-// work if windows.h is already included. boost/process.h ->
-// boost/process/args.hpp -> boost/process/detail/basic_cmd.hpp
-// includes windows.h. boost/process/args.hpp is included before
-// boost/process/async.h that includes
-// boost/asio/detail/socket_types.hpp implicitly is included.
-#ifdef __MINGW32__
-#include <boost/asio/io_context.hpp>
+#  include <sys/wait.h>
 #endif
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
 
 #include "arrow/filesystem/s3_test_util.h"
 #include "arrow/filesystem/s3fs.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/async_generator.h"
 #include "arrow/util/future.h"
@@ -53,8 +33,6 @@ namespace fs {
 
 using ::arrow::internal::TemporaryDir;
 
-namespace bp = boost::process;
-
 namespace {
 
 const char* kMinioExecutableName = "minio";
@@ -75,7 +53,7 @@ struct MinioTestServer::Impl {
   std::string connect_string_;
   std::string access_key_ = kMinioAccessKey;
   std::string secret_key_ = kMinioSecretKey;
-  std::shared_ptr<::boost::process::child> server_process_;
+  std::unique_ptr<util::Process> server_process_;
 };
 
 MinioTestServer::MinioTestServer() : impl_(new Impl) {}
@@ -105,44 +83,23 @@ Status MinioTestServer::Start() {
 
   ARROW_ASSIGN_OR_RAISE(impl_->temp_dir_, TemporaryDir::Make("s3fs-test-"));
 
-  // Get a copy of the current environment.
-  // (NOTE: using "auto" would return a native_environment that mutates
-  //  the current environment)
-  bp::environment env = boost::this_process::environment();
-  env["MINIO_ACCESS_KEY"] = kMinioAccessKey;
-  env["MINIO_SECRET_KEY"] = kMinioSecretKey;
+  impl_->server_process_ = std::make_unique<util::Process>();
+  impl_->server_process_->SetEnv("MINIO_ACCESS_KEY", kMinioAccessKey);
+  impl_->server_process_->SetEnv("MINIO_SECRET_KEY", kMinioSecretKey);
   // Disable the embedded console (one less listening address to care about)
-  env["MINIO_BROWSER"] = "off";
-
+  impl_->server_process_->SetEnv("MINIO_BROWSER", "off");
   impl_->connect_string_ = GenerateConnectString();
-  auto exe_path = bp::search_path(kMinioExecutableName);
-  if (exe_path.empty()) {
-    return Status::IOError("Failed to find minio executable ('", kMinioExecutableName,
-                           "') in PATH");
-  }
-
-  try {
-    // NOTE: --quiet makes startup faster by suppressing remote version check
-    impl_->server_process_ = std::make_shared<bp::child>(
-        env, exe_path, "server", "--quiet", "--compat", "--address",
-        impl_->connect_string_, impl_->temp_dir_->path().ToString());
-  } catch (const std::exception& e) {
-    return Status::IOError("Failed to launch Minio server: ", e.what());
-  }
+  ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName));
+  // NOTE: --quiet makes startup faster by suppressing remote version check
+  impl_->server_process_->SetArgs({"server", "--quiet", "--compat", "--address",
+                                   impl_->connect_string_,
+                                   impl_->temp_dir_->path().ToString()});
+  ARROW_RETURN_NOT_OK(impl_->server_process_->Execute());
   return Status::OK();
 }
 
 Status MinioTestServer::Stop() {
-  if (impl_->server_process_ && impl_->server_process_->valid()) {
-    // Brutal shutdown
-    impl_->server_process_->terminate();
-    impl_->server_process_->wait();
-#ifndef _WIN32
-    // Despite calling wait() above, boost::process fails to clear zombies
-    // so do it ourselves.
-    waitpid(impl_->server_process_->id(), nullptr, 0);
-#endif
-  }
+  impl_->server_process_ = nullptr;
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 99cee19ed1e78..77b111f61bf4c 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -34,12 +34,12 @@
 
 #ifdef _WIN32
 // Undefine preprocessor macros that interfere with AWS function / method names
-#ifdef GetMessage
-#undef GetMessage
-#endif
-#ifdef GetObject
-#undef GetObject
-#endif
+#  ifdef GetMessage
+#    undef GetMessage
+#  endif
+#  ifdef GetObject
+#    undef GetObject
+#  endif
 #endif
 
 #include <aws/core/Aws.h>
@@ -51,6 +51,7 @@
 #include <aws/core/client/DefaultRetryStrategy.h>
 #include <aws/core/client/RetryStrategy.h>
 #include <aws/core/http/HttpResponse.h>
+#include <aws/core/utils/Outcome.h>
 #include <aws/core/utils/logging/ConsoleLogSystem.h>
 #include <aws/core/utils/stream/PreallocatedStreamBuf.h>
 #include <aws/core/utils/xml/XmlSerializer.h>
@@ -74,6 +75,7 @@
 #include <aws/s3/model/ListObjectsV2Request.h>
 #include <aws/s3/model/ObjectCannedACL.h>
 #include <aws/s3/model/PutObjectRequest.h>
+#include <aws/s3/model/PutObjectResult.h>
 #include <aws/s3/model/UploadPartRequest.h>
 
 // AWS_SDK_VERSION_{MAJOR,MINOR,PATCH} are available since 1.9.7.
@@ -82,13 +84,13 @@
 // Redundant "(...)" are for suppressing "Weird number of spaces at
 // line-start. Are you using a 2-space indent? [whitespace/indent]
 // [3]" errors...
-#define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)                      \
-  ((AWS_SDK_VERSION_MAJOR > (major) ||                                        \
-    (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) ||  \
-    ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
-      AWS_SDK_VERSION_PATCH >= (patch)))))
+#  define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch)                      \
+    ((AWS_SDK_VERSION_MAJOR > (major) ||                                        \
+      (AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR > (minor)) ||  \
+      ((AWS_SDK_VERSION_MAJOR == (major) && AWS_SDK_VERSION_MINOR == (minor) && \
+        AWS_SDK_VERSION_PATCH >= (patch)))))
 #else
-#define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
+#  define ARROW_AWS_SDK_VERSION_CHECK(major, minor, patch) 0
 #endif
 
 // This feature is available since 1.9.0 but
@@ -96,22 +98,22 @@
 // we can't use this feature for [1.9.0,1.9.6]. If it's a problem,
 // please report it to our issue tracker.
 #if ARROW_AWS_SDK_VERSION_CHECK(1, 9, 0)
-#define ARROW_S3_HAS_CRT
+#  define ARROW_S3_HAS_CRT
 #endif
 
 #if ARROW_AWS_SDK_VERSION_CHECK(1, 10, 0)
-#define ARROW_S3_HAS_S3CLIENT_CONFIGURATION
+#  define ARROW_S3_HAS_S3CLIENT_CONFIGURATION
 #endif
 
 #ifdef ARROW_S3_HAS_CRT
-#include <aws/crt/io/Bootstrap.h>
-#include <aws/crt/io/EventLoopGroup.h>
-#include <aws/crt/io/HostResolver.h>
+#  include <aws/crt/io/Bootstrap.h>
+#  include <aws/crt/io/EventLoopGroup.h>
+#  include <aws/crt/io/HostResolver.h>
 #endif
 
 #ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
-#include <aws/s3/S3ClientConfiguration.h>
-#include <aws/s3/S3EndpointProvider.h>
+#  include <aws/s3/S3ClientConfiguration.h>
+#  include <aws/s3/S3EndpointProvider.h>
 #endif
 
 #include "arrow/util/windows_fixup.h"
@@ -1335,7 +1337,7 @@ struct ObjectMetadataSetter {
   static std::unordered_map<std::string, Setter> GetSetters() {
     return {{"ACL", CannedACLSetter()},
             {"Cache-Control", StringSetter(&ObjectRequest::SetCacheControl)},
-            {"Content-Type", StringSetter(&ObjectRequest::SetContentType)},
+            {"Content-Type", ContentTypeSetter()},
             {"Content-Language", StringSetter(&ObjectRequest::SetContentLanguage)},
             {"Expires", DateTimeSetter(&ObjectRequest::SetExpires)}};
   }
@@ -1365,6 +1367,16 @@ struct ObjectMetadataSetter {
     };
   }
 
+  /** We need a special setter here and can not use `StringSetter` because for e.g. the
+   * `PutObjectRequest`, the setter is located in the base class (instead of the concrete
+   * class). */
+  static Setter ContentTypeSetter() {
+    return [](const std::string& str, ObjectRequest* req) {
+      req->SetContentType(str);
+      return Status::OK();
+    };
+  }
+
   static Result<S3Model::ObjectCannedACL> ParseACL(const std::string& v) {
     if (v.empty()) {
       return S3Model::ObjectCannedACL::NOT_SET;
@@ -1583,6 +1595,15 @@ class ObjectInputFile final : public io::RandomAccessFile {
 // (for rational, see: https://github.com/apache/arrow/issues/34363)
 static constexpr int64_t kPartUploadSize = 10 * 1024 * 1024;
 
+// Above this threshold, use a multi-part upload instead of a single request upload. Only
+// relevant if early sanitization of writing to the bucket is disabled (see
+// `allow_delayed_open`).
+static constexpr int64_t kMultiPartUploadThresholdSize = kPartUploadSize - 1;
+
+static_assert(kMultiPartUploadThresholdSize < kPartUploadSize,
+              "Multi part upload threshold size must be stricly less than the actual "
+              "multi part upload part size.");
+
 // An OutputStream that writes to a S3 object
 class ObjectOutputStream final : public io::OutputStream {
  protected:
@@ -1598,7 +1619,8 @@ class ObjectOutputStream final : public io::OutputStream {
         path_(path),
         metadata_(metadata),
         default_metadata_(options.default_metadata),
-        background_writes_(options.background_writes) {}
+        background_writes_(options.background_writes),
+        allow_delayed_open_(options.allow_delayed_open) {}
 
   ~ObjectOutputStream() override {
     // For compliance with the rest of the IO stack, Close rather than Abort,
@@ -1606,29 +1628,47 @@ class ObjectOutputStream final : public io::OutputStream {
     io::internal::CloseFromDestructor(this);
   }
 
+  template <typename ObjectRequest>
+  Status SetMetadataInRequest(ObjectRequest* request) {
+    std::shared_ptr<const KeyValueMetadata> metadata;
+
+    if (metadata_ && metadata_->size() != 0) {
+      metadata = metadata_;
+    } else if (default_metadata_ && default_metadata_->size() != 0) {
+      metadata = default_metadata_;
+    }
+
+    bool is_content_type_set{false};
+    if (metadata) {
+      RETURN_NOT_OK(SetObjectMetadata(metadata, request));
+
+      is_content_type_set = metadata->Contains("Content-Type");
+    }
+
+    if (!is_content_type_set) {
+      // If we do not set anything then the SDK will default to application/xml
+      // which confuses some tools (https://github.com/apache/arrow/issues/11934)
+      // So we instead default to application/octet-stream which is less misleading
+      request->SetContentType("application/octet-stream");
+    }
+
+    return Status::OK();
+  }
+
   std::shared_ptr<ObjectOutputStream> Self() {
     return std::dynamic_pointer_cast<ObjectOutputStream>(shared_from_this());
   }
 
-  Status Init() {
+  Status CreateMultipartUpload() {
+    DCHECK(ShouldBeMultipartUpload());
+
     ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock());
 
     // Initiate the multi-part upload
     S3Model::CreateMultipartUploadRequest req;
     req.SetBucket(ToAwsString(path_.bucket));
     req.SetKey(ToAwsString(path_.key));
-    if (metadata_ && metadata_->size() != 0) {
-      RETURN_NOT_OK(SetObjectMetadata(metadata_, &req));
-    } else if (default_metadata_ && default_metadata_->size() != 0) {
-      RETURN_NOT_OK(SetObjectMetadata(default_metadata_, &req));
-    }
-
-    // If we do not set anything then the SDK will default to application/xml
-    // which confuses some tools (https://github.com/apache/arrow/issues/11934)
-    // So we instead default to application/octet-stream which is less misleading
-    if (!req.ContentTypeHasBeenSet()) {
-      req.SetContentType("application/octet-stream");
-    }
+    RETURN_NOT_OK(SetMetadataInRequest(&req));
 
     auto outcome = client_lock.Move()->CreateMultipartUpload(req);
     if (!outcome.IsSuccess()) {
@@ -1637,7 +1677,19 @@ class ObjectOutputStream final : public io::OutputStream {
                                 path_.key, "' in bucket '", path_.bucket, "': "),
           "CreateMultipartUpload", outcome.GetError());
     }
-    upload_id_ = outcome.GetResult().GetUploadId();
+    multipart_upload_id_ = outcome.GetResult().GetUploadId();
+
+    return Status::OK();
+  }
+
+  Status Init() {
+    // If we are allowed to do delayed I/O, we can use a single request to upload the
+    // data. If not, we use a multi-part upload and initiate it here to
+    // sanitize that writing to the bucket is possible.
+    if (!allow_delayed_open_) {
+      RETURN_NOT_OK(CreateMultipartUpload());
+    }
+
     upload_state_ = std::make_shared<UploadState>();
     closed_ = false;
     return Status::OK();
@@ -1648,42 +1700,62 @@ class ObjectOutputStream final : public io::OutputStream {
       return Status::OK();
     }
 
-    ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock());
+    if (IsMultipartCreated()) {
+      ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock());
 
-    S3Model::AbortMultipartUploadRequest req;
-    req.SetBucket(ToAwsString(path_.bucket));
-    req.SetKey(ToAwsString(path_.key));
-    req.SetUploadId(upload_id_);
+      S3Model::AbortMultipartUploadRequest req;
+      req.SetBucket(ToAwsString(path_.bucket));
+      req.SetKey(ToAwsString(path_.key));
+      req.SetUploadId(multipart_upload_id_);
 
-    auto outcome = client_lock.Move()->AbortMultipartUpload(req);
-    if (!outcome.IsSuccess()) {
-      return ErrorToStatus(
-          std::forward_as_tuple("When aborting multiple part upload for key '", path_.key,
-                                "' in bucket '", path_.bucket, "': "),
-          "AbortMultipartUpload", outcome.GetError());
+      auto outcome = client_lock.Move()->AbortMultipartUpload(req);
+      if (!outcome.IsSuccess()) {
+        return ErrorToStatus(
+            std::forward_as_tuple("When aborting multiple part upload for key '",
+                                  path_.key, "' in bucket '", path_.bucket, "': "),
+            "AbortMultipartUpload", outcome.GetError());
+      }
     }
+
     current_part_.reset();
     holder_ = nullptr;
     closed_ = true;
+
     return Status::OK();
   }
 
   // OutputStream interface
 
+  bool ShouldBeMultipartUpload() const {
+    return pos_ > kMultiPartUploadThresholdSize || !allow_delayed_open_;
+  }
+
+  bool IsMultipartCreated() const { return !multipart_upload_id_.empty(); }
+
   Status EnsureReadyToFlushFromClose() {
-    if (current_part_) {
-      // Upload last part
-      RETURN_NOT_OK(CommitCurrentPart());
-    }
+    if (ShouldBeMultipartUpload()) {
+      if (current_part_) {
+        // Upload last part
+        RETURN_NOT_OK(CommitCurrentPart());
+      }
 
-    // S3 mandates at least one part, upload an empty one if necessary
-    if (part_number_ == 1) {
-      RETURN_NOT_OK(UploadPart("", 0));
+      // S3 mandates at least one part, upload an empty one if necessary
+      if (part_number_ == 1) {
+        RETURN_NOT_OK(UploadPart("", 0));
+      }
+    } else {
+      RETURN_NOT_OK(UploadUsingSingleRequest());
     }
 
     return Status::OK();
   }
 
+  Status CleanupAfterClose() {
+    holder_ = nullptr;
+    closed_ = true;
+    return Status::OK();
+  }
+
   Status FinishPartUploadAfterFlush() {
     ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock());
 
@@ -1697,7 +1769,7 @@ class ObjectOutputStream final : public io::OutputStream {
     S3Model::CompleteMultipartUploadRequest req;
     req.SetBucket(ToAwsString(path_.bucket));
     req.SetKey(ToAwsString(path_.key));
-    req.SetUploadId(upload_id_);
+    req.SetUploadId(multipart_upload_id_);
     req.SetMultipartUpload(std::move(completed_upload));
 
     auto outcome =
@@ -1709,8 +1781,6 @@ class ObjectOutputStream final : public io::OutputStream {
           "CompleteMultipartUpload", outcome.GetError());
     }
 
-    holder_ = nullptr;
-    closed_ = true;
     return Status::OK();
   }
 
@@ -1720,7 +1790,12 @@ class ObjectOutputStream final : public io::OutputStream {
     RETURN_NOT_OK(EnsureReadyToFlushFromClose());
 
     RETURN_NOT_OK(Flush());
-    return FinishPartUploadAfterFlush();
+
+    if (IsMultipartCreated()) {
+      RETURN_NOT_OK(FinishPartUploadAfterFlush());
+    }
+
+    return CleanupAfterClose();
   }
 
   Future<> CloseAsync() override {
@@ -1729,8 +1804,12 @@ class ObjectOutputStream final : public io::OutputStream {
     RETURN_NOT_OK(EnsureReadyToFlushFromClose());
 
     // Wait for in-progress uploads to finish (if async writes are enabled)
-    return FlushAsync().Then(
-        [self = Self()]() { return self->FinishPartUploadAfterFlush(); });
+    return FlushAsync().Then([self = Self()]() {
+      if (self->IsMultipartCreated()) {
+        RETURN_NOT_OK(self->FinishPartUploadAfterFlush());
+      }
+      return self->CleanupAfterClose();
+    });
   }
 
   bool closed() const override { return closed_; }
@@ -1776,7 +1855,8 @@ class ObjectOutputStream final : public io::OutputStream {
         return Status::OK();
       }
 
-      // Upload current buffer
+      // Upload current buffer. We're only reaching this point if we have accumulated
+      // enough data to upload.
       RETURN_NOT_OK(CommitCurrentPart());
     }
 
@@ -1810,40 +1890,73 @@ class ObjectOutputStream final : public io::OutputStream {
     }
     // Wait for background writes to finish
     std::unique_lock<std::mutex> lock(upload_state_->mutex);
-    return upload_state_->pending_parts_completed;
+    return upload_state_->pending_uploads_completed;
   }
 
   // Upload-related helpers
 
   Status CommitCurrentPart() {
+    if (!IsMultipartCreated()) {
+      RETURN_NOT_OK(CreateMultipartUpload());
+    }
+
     ARROW_ASSIGN_OR_RAISE(auto buf, current_part_->Finish());
     current_part_.reset();
     current_part_size_ = 0;
     return UploadPart(buf);
   }
 
-  Status UploadPart(std::shared_ptr<Buffer> buffer) {
-    return UploadPart(buffer->data(), buffer->size(), buffer);
+  Status UploadUsingSingleRequest() {
+    std::shared_ptr<Buffer> buf;
+    if (current_part_ == nullptr) {
+      // In case the stream is closed directly after it has been opened without writing
+      // anything, we'll have to create an empty buffer.
+      buf = std::make_shared<Buffer>("");
+    } else {
+      ARROW_ASSIGN_OR_RAISE(buf, current_part_->Finish());
+    }
+
+    current_part_.reset();
+    current_part_size_ = 0;
+    return UploadUsingSingleRequest(buf);
   }
 
-  Status UploadPart(const void* data, int64_t nbytes,
-                    std::shared_ptr<Buffer> owned_buffer = nullptr) {
-    S3Model::UploadPartRequest req;
+  template <typename RequestType, typename OutcomeType>
+  using UploadResultCallbackFunction =
+      std::function<Status(const RequestType& request, std::shared_ptr<UploadState>,
+                           int32_t part_number, OutcomeType outcome)>;
+
+  static Result<Aws::S3::Model::PutObjectOutcome> TriggerUploadRequest(
+      const Aws::S3::Model::PutObjectRequest& request,
+      const std::shared_ptr<S3ClientHolder>& holder) {
+    ARROW_ASSIGN_OR_RAISE(auto client_lock, holder->Lock());
+    return client_lock.Move()->PutObject(request);
+  }
+
+  static Result<Aws::S3::Model::UploadPartOutcome> TriggerUploadRequest(
+      const Aws::S3::Model::UploadPartRequest& request,
+      const std::shared_ptr<S3ClientHolder>& holder) {
+    ARROW_ASSIGN_OR_RAISE(auto client_lock, holder->Lock());
+    return client_lock.Move()->UploadPart(request);
+  }
+
+  template <typename RequestType, typename OutcomeType>
+  Status Upload(
+      RequestType&& req,
+      UploadResultCallbackFunction<RequestType, OutcomeType> sync_result_callback,
+      UploadResultCallbackFunction<RequestType, OutcomeType> async_result_callback,
+      const void* data, int64_t nbytes, std::shared_ptr<Buffer> owned_buffer = nullptr) {
     req.SetBucket(ToAwsString(path_.bucket));
     req.SetKey(ToAwsString(path_.key));
-    req.SetUploadId(upload_id_);
-    req.SetPartNumber(part_number_);
+    req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
     req.SetContentLength(nbytes);
 
     if (!background_writes_) {
       req.SetBody(std::make_shared<StringViewStream>(data, nbytes));
-      ARROW_ASSIGN_OR_RAISE(auto client_lock, holder_->Lock());
-      auto outcome = client_lock.Move()->UploadPart(req);
-      if (!outcome.IsSuccess()) {
-        return UploadPartError(req, outcome);
-      } else {
-        AddCompletedPart(upload_state_, part_number_, outcome.GetResult());
-      }
+
+      ARROW_ASSIGN_OR_RAISE(auto outcome, TriggerUploadRequest(req, holder_));
+
+      RETURN_NOT_OK(sync_result_callback(req, upload_state_, part_number_, outcome));
     } else {
       // If the data isn't owned, make an immutable copy for the lifetime of the closure
       if (owned_buffer == nullptr) {
@@ -1858,19 +1971,18 @@ class ObjectOutputStream final : public io::OutputStream {
 
       {
         std::unique_lock<std::mutex> lock(upload_state_->mutex);
-        if (upload_state_->parts_in_progress++ == 0) {
-          upload_state_->pending_parts_completed = Future<>::Make();
+        if (upload_state_->uploads_in_progress++ == 0) {
+          upload_state_->pending_uploads_completed = Future<>::Make();
         }
       }
 
       // The closure keeps the buffer and the upload state alive
       auto deferred = [owned_buffer, holder = holder_, req = std::move(req),
-                       state = upload_state_,
+                       state = upload_state_, async_result_callback,
                        part_number = part_number_]() mutable -> Status {
-        ARROW_ASSIGN_OR_RAISE(auto client_lock, holder->Lock());
-        auto outcome = client_lock.Move()->UploadPart(req);
-        HandleUploadOutcome(state, part_number, req, outcome);
-        return Status::OK();
+        ARROW_ASSIGN_OR_RAISE(auto outcome, TriggerUploadRequest(req, holder));
+
+        return async_result_callback(req, state, part_number, outcome);
       };
       RETURN_NOT_OK(SubmitIO(io_context_, std::move(deferred)));
     }
@@ -1880,9 +1992,118 @@ class ObjectOutputStream final : public io::OutputStream {
     return Status::OK();
   }
 
-  static void HandleUploadOutcome(const std::shared_ptr<UploadState>& state,
-                                  int part_number, const S3Model::UploadPartRequest& req,
-                                  const Result<S3Model::UploadPartOutcome>& result) {
+  static Status UploadUsingSingleRequestError(
+      const Aws::S3::Model::PutObjectRequest& request,
+      const Aws::S3::Model::PutObjectOutcome& outcome) {
+    return ErrorToStatus(
+        std::forward_as_tuple("When uploading object with key '", request.GetKey(),
+                              "' in bucket '", request.GetBucket(), "': "),
+        "PutObject", outcome.GetError());
+  }
+
+  Status UploadUsingSingleRequest(std::shared_ptr<Buffer> buffer) {
+    return UploadUsingSingleRequest(buffer->data(), buffer->size(), buffer);
+  }
+
+  Status UploadUsingSingleRequest(const void* data, int64_t nbytes,
+                                  std::shared_ptr<Buffer> owned_buffer = nullptr) {
+    auto sync_result_callback = [](const Aws::S3::Model::PutObjectRequest& request,
+                                   std::shared_ptr<UploadState> state,
+                                   int32_t part_number,
+                                   Aws::S3::Model::PutObjectOutcome outcome) {
+      if (!outcome.IsSuccess()) {
+        return UploadUsingSingleRequestError(request, outcome);
+      }
+      return Status::OK();
+    };
+
+    auto async_result_callback = [](const Aws::S3::Model::PutObjectRequest& request,
+                                    std::shared_ptr<UploadState> state,
+                                    int32_t part_number,
+                                    Aws::S3::Model::PutObjectOutcome outcome) {
+      HandleUploadUsingSingleRequestOutcome(state, request, outcome.GetResult());
+      return Status::OK();
+    };
+
+    Aws::S3::Model::PutObjectRequest req{};
+    RETURN_NOT_OK(SetMetadataInRequest(&req));
+
+    return Upload<Aws::S3::Model::PutObjectRequest, Aws::S3::Model::PutObjectOutcome>(
+        std::move(req), std::move(sync_result_callback), std::move(async_result_callback),
+        data, nbytes, std::move(owned_buffer));
+  }
+
+  Status UploadPart(std::shared_ptr<Buffer> buffer) {
+    return UploadPart(buffer->data(), buffer->size(), buffer);
+  }
+
+  static Status UploadPartError(const Aws::S3::Model::UploadPartRequest& request,
+                                const Aws::S3::Model::UploadPartOutcome& outcome) {
+    return ErrorToStatus(
+        std::forward_as_tuple("When uploading part for key '", request.GetKey(),
+                              "' in bucket '", request.GetBucket(), "': "),
+        "UploadPart", outcome.GetError());
+  }
+
+  Status UploadPart(const void* data, int64_t nbytes,
+                    std::shared_ptr<Buffer> owned_buffer = nullptr) {
+    if (!IsMultipartCreated()) {
+      RETURN_NOT_OK(CreateMultipartUpload());
+    }
+
+    Aws::S3::Model::UploadPartRequest req{};
+    req.SetPartNumber(part_number_);
+    req.SetUploadId(multipart_upload_id_);
+
+    auto sync_result_callback = [](const Aws::S3::Model::UploadPartRequest& request,
+                                   std::shared_ptr<UploadState> state,
+                                   int32_t part_number,
+                                   Aws::S3::Model::UploadPartOutcome outcome) {
+      if (!outcome.IsSuccess()) {
+        return UploadPartError(request, outcome);
+      } else {
+        AddCompletedPart(state, part_number, outcome.GetResult());
+      }
+
+      return Status::OK();
+    };
+
+    auto async_result_callback = [](const Aws::S3::Model::UploadPartRequest& request,
+                                    std::shared_ptr<UploadState> state,
+                                    int32_t part_number,
+                                    Aws::S3::Model::UploadPartOutcome outcome) {
+      HandleUploadPartOutcome(state, part_number, request, outcome.GetResult());
+      return Status::OK();
+    };
+
+    return Upload<Aws::S3::Model::UploadPartRequest, Aws::S3::Model::UploadPartOutcome>(
+        std::move(req), std::move(sync_result_callback), std::move(async_result_callback),
+        data, nbytes, std::move(owned_buffer));
+  }
+
+  static void HandleUploadUsingSingleRequestOutcome(
+      const std::shared_ptr<UploadState>& state, const S3Model::PutObjectRequest& req,
+      const Result<S3Model::PutObjectOutcome>& result) {
+    std::unique_lock<std::mutex> lock(state->mutex);
+    if (!result.ok()) {
+      state->status &= result.status();
+    } else {
+      const auto& outcome = *result;
+      if (!outcome.IsSuccess()) {
+        state->status &= UploadUsingSingleRequestError(req, outcome);
+      }
+    }
+    // GH-41862: avoid potential deadlock if the Future's callback is called
+    // with the mutex taken.
+    auto fut = state->pending_uploads_completed;
+    lock.unlock();
+    fut.MarkFinished(state->status);
+  }
+
+  static void HandleUploadPartOutcome(const std::shared_ptr<UploadState>& state,
+                                      int part_number,
+                                      const S3Model::UploadPartRequest& req,
+                                      const Result<S3Model::UploadPartOutcome>& result) {
     std::unique_lock<std::mutex> lock(state->mutex);
     if (!result.ok()) {
       state->status &= result.status();
@@ -1895,10 +2116,10 @@ class ObjectOutputStream final : public io::OutputStream {
       }
     }
     // Notify completion
-    if (--state->parts_in_progress == 0) {
+    if (--state->uploads_in_progress == 0) {
       // GH-41862: avoid potential deadlock if the Future's callback is called
       // with the mutex taken.
-      auto fut = state->pending_parts_completed;
+      auto fut = state->pending_uploads_completed;
       lock.unlock();
       // State could be mutated concurrently if another thread writes to the
       // stream, but in this case the Flush() call is only advisory anyway.
@@ -1923,14 +2144,6 @@ class ObjectOutputStream final : public io::OutputStream {
     state->completed_parts[slot] = std::move(part);
   }
 
-  static Status UploadPartError(const S3Model::UploadPartRequest& req,
-                                const S3Model::UploadPartOutcome& outcome) {
-    return ErrorToStatus(
-        std::forward_as_tuple("When uploading part for key '", req.GetKey(),
-                              "' in bucket '", req.GetBucket(), "': "),
-        "UploadPart", outcome.GetError());
-  }
-
  protected:
   std::shared_ptr<S3ClientHolder> holder_;
   const io::IOContext io_context_;
@@ -1938,8 +2151,9 @@ class ObjectOutputStream final : public io::OutputStream {
   const std::shared_ptr<const KeyValueMetadata> metadata_;
   const std::shared_ptr<const KeyValueMetadata> default_metadata_;
   const bool background_writes_;
+  const bool allow_delayed_open_;
 
-  Aws::String upload_id_;
+  Aws::String multipart_upload_id_;
   bool closed_ = true;
   int64_t pos_ = 0;
   int32_t part_number_ = 1;
@@ -1950,10 +2164,11 @@ class ObjectOutputStream final : public io::OutputStream {
   // in the completion handler.
   struct UploadState {
     std::mutex mutex;
+    // Only populated for multi-part uploads.
     Aws::Vector<S3Model::CompletedPart> completed_parts;
-    int64_t parts_in_progress = 0;
+    int64_t uploads_in_progress = 0;
     Status status;
-    Future<> pending_parts_completed = Future<>::MakeFinished(Status::OK());
+    Future<> pending_uploads_completed = Future<>::MakeFinished(Status::OK());
   };
   std::shared_ptr<UploadState> upload_state_;
 };
@@ -3174,6 +3389,12 @@ struct AwsInstance {
         ARROW_LOG(WARNING)
             << " arrow::fs::FinalizeS3 was not called even though S3 was initialized.  "
                "This could lead to a segmentation fault at exit";
+        // Leak the S3ClientFinalizer to avoid crashes when destroying remaining
+        // S3Client instances (GH-44071).
+        auto* leaked_shared_ptr =
+            new std::shared_ptr<S3ClientFinalizer>(GetClientFinalizer());
+        ARROW_UNUSED(leaked_shared_ptr);
+        return;
       }
       GetClientFinalizer()->Finalize();
 #ifdef ARROW_S3_HAS_S3CLIENT_CONFIGURATION
@@ -3265,7 +3486,14 @@ Status EnsureS3Initialized() {
 }
 
 Status FinalizeS3() {
-  GetAwsInstance()->Finalize();
+  auto instance = GetAwsInstance();
+  // The AWS instance might already be destroyed in case FinalizeS3
+  // is called from an atexit handler (which is a bad idea anyway as the
+  // AWS SDK is not safe anymore to shutdown by this time). See GH-44071.
+  if (instance == nullptr) {
+    return Status::Invalid("FinalizeS3 called too late");
+  }
+  instance->Finalize();
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index fbbe9d0b3f42b..85d5ff8fed553 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -177,6 +177,16 @@ struct ARROW_EXPORT S3Options {
   /// to be true to address these scenarios.
   bool check_directory_existence_before_creation = false;
 
+  /// Whether to allow file-open methods to return before the actual open.
+  ///
+  /// Enabling this may reduce the latency of `OpenInputStream`, `OpenOutputStream`,
+  /// and similar methods, by reducing the number of roundtrips necessary. It may also
+  /// allow usage of more efficient S3 APIs for small files.
+  /// The downside is that failure conditions such as attempting to open a file in a
+  /// non-existing bucket will only be reported when actual I/O is done (at worse,
+  /// when attempting to close the file).
+  bool allow_delayed_open = false;
+
   /// \brief Default metadata for OpenOutputStream.
   ///
   /// This will be ignored if non-empty metadata is passed to OpenOutputStream.
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc
index 5a160a78ceea0..82a7d6e546ef3 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -28,12 +28,12 @@
 
 #ifdef _WIN32
 // Undefine preprocessor macros that interfere with AWS function / method names
-#ifdef GetMessage
-#undef GetMessage
-#endif
-#ifdef GetObject
-#undef GetObject
-#endif
+#  ifdef GetMessage
+#    undef GetMessage
+#  endif
+#  ifdef GetObject
+#    undef GetObject
+#  endif
 #endif
 
 #include <aws/core/Aws.h>
@@ -45,7 +45,9 @@
 #include <aws/core/utils/logging/ConsoleLogSystem.h>
 #include <aws/s3/S3Client.h>
 #include <aws/s3/model/CreateBucketRequest.h>
+#include <aws/s3/model/DeleteObjectsRequest.h>
 #include <aws/s3/model/GetObjectRequest.h>
+#include <aws/s3/model/ListObjectsV2Request.h>
 #include <aws/s3/model/PutObjectRequest.h>
 #include <aws/sts/STSClient.h>
 
@@ -450,25 +452,8 @@ class TestS3FS : public S3TestMixin {
       req.SetBucket(ToAwsString("empty-bucket"));
       ASSERT_OK(OutcomeToStatus("CreateBucket", client_->CreateBucket(req)));
     }
-    {
-      Aws::S3::Model::PutObjectRequest req;
-      req.SetBucket(ToAwsString("bucket"));
-      req.SetKey(ToAwsString("emptydir/"));
-      req.SetBody(std::make_shared<std::stringstream>(""));
-      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
-      // NOTE: no need to create intermediate "directories" somedir/ and
-      // somedir/subdir/
-      req.SetKey(ToAwsString("somedir/subdir/subfile"));
-      req.SetBody(std::make_shared<std::stringstream>("sub data"));
-      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
-      req.SetKey(ToAwsString("somefile"));
-      req.SetBody(std::make_shared<std::stringstream>("some data"));
-      req.SetContentType("x-arrow/test");
-      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
-      req.SetKey(ToAwsString("otherdir/1/2/3/otherfile"));
-      req.SetBody(std::make_shared<std::stringstream>("other data"));
-      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
-    }
+
+    ASSERT_OK(PopulateTestBucket());
   }
 
   void TearDown() override {
@@ -478,6 +463,72 @@ class TestS3FS : public S3TestMixin {
     S3TestMixin::TearDown();
   }
 
+  Status PopulateTestBucket() {
+    Aws::S3::Model::PutObjectRequest req;
+    req.SetBucket(ToAwsString("bucket"));
+    req.SetKey(ToAwsString("emptydir/"));
+    req.SetBody(std::make_shared<std::stringstream>(""));
+    RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
+    // NOTE: no need to create intermediate "directories" somedir/ and
+    // somedir/subdir/
+    req.SetKey(ToAwsString("somedir/subdir/subfile"));
+    req.SetBody(std::make_shared<std::stringstream>("sub data"));
+    RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
+    req.SetKey(ToAwsString("somefile"));
+    req.SetBody(std::make_shared<std::stringstream>("some data"));
+    req.SetContentType("x-arrow/test");
+    RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
+    req.SetKey(ToAwsString("otherdir/1/2/3/otherfile"));
+    req.SetBody(std::make_shared<std::stringstream>("other data"));
+    RETURN_NOT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
+
+    return Status::OK();
+  }
+
+  Status RestoreTestBucket() {
+    // First empty the test bucket, and then re-upload initial test files.
+
+    Aws::S3::Model::Delete delete_object;
+    {
+      // Mostly taken from
+      // https://github.com/awsdocs/aws-doc-sdk-examples/blob/main/cpp/example_code/s3/list_objects.cpp
+      Aws::S3::Model::ListObjectsV2Request req;
+      req.SetBucket(Aws::String{"bucket"});
+
+      Aws::String continuation_token;
+      do {
+        if (!continuation_token.empty()) {
+          req.SetContinuationToken(continuation_token);
+        }
+
+        auto outcome = client_->ListObjectsV2(req);
+
+        if (!outcome.IsSuccess()) {
+          return OutcomeToStatus("ListObjectsV2", outcome);
+        } else {
+          Aws::Vector<Aws::S3::Model::Object> objects = outcome.GetResult().GetContents();
+          for (const auto& object : objects) {
+            delete_object.AddObjects(
+                Aws::S3::Model::ObjectIdentifier().WithKey(object.GetKey()));
+          }
+
+          continuation_token = outcome.GetResult().GetNextContinuationToken();
+        }
+      } while (!continuation_token.empty());
+    }
+
+    {
+      Aws::S3::Model::DeleteObjectsRequest req;
+
+      req.SetDelete(std::move(delete_object));
+      req.SetBucket(Aws::String{"bucket"});
+
+      RETURN_NOT_OK(OutcomeToStatus("DeleteObjects", client_->DeleteObjects(req)));
+    }
+
+    return PopulateTestBucket();
+  }
+
   Result<std::shared_ptr<S3FileSystem>> MakeNewFileSystem(
       io::IOContext io_context = io::default_io_context()) {
     options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key());
@@ -518,11 +569,13 @@ class TestS3FS : public S3TestMixin {
     AssertFileInfo(infos[11], "empty-bucket", FileType::Directory);
   }
 
-  void TestOpenOutputStream() {
+  void TestOpenOutputStream(bool allow_delayed_open) {
     std::shared_ptr<io::OutputStream> stream;
 
-    // Nonexistent
-    ASSERT_RAISES(IOError, fs_->OpenOutputStream("nonexistent-bucket/somefile"));
+    if (!allow_delayed_open) {
+      // Nonexistent
+      ASSERT_RAISES(IOError, fs_->OpenOutputStream("nonexistent-bucket/somefile"));
+    }
 
     // URI
     ASSERT_RAISES(Invalid, fs_->OpenOutputStream("s3:bucket/newfile1"));
@@ -843,8 +896,8 @@ TEST_F(TestS3FS, GetFileInfoGenerator) {
 
 TEST_F(TestS3FS, GetFileInfoGeneratorStress) {
   // This test is slow because it needs to create a bunch of seed files.  However, it is
-  // the only test that stresses listing and deleting when there are more than 1000 files
-  // and paging is required.
+  // the only test that stresses listing and deleting when there are more than 1000
+  // files and paging is required.
   constexpr int32_t kNumDirs = 4;
   constexpr int32_t kNumFilesPerDir = 512;
   FileInfoVector expected_infos;
@@ -1235,50 +1288,83 @@ TEST_F(TestS3FS, OpenInputFile) {
   ASSERT_RAISES(IOError, file->Seek(10));
 }
 
-TEST_F(TestS3FS, OpenOutputStreamBackgroundWrites) { TestOpenOutputStream(); }
+struct S3OptionsTestParameters {
+  bool background_writes{false};
+  bool allow_delayed_open{false};
 
-TEST_F(TestS3FS, OpenOutputStreamSyncWrites) {
-  options_.background_writes = false;
-  MakeFileSystem();
-  TestOpenOutputStream();
-}
+  void ApplyToS3Options(S3Options* options) const {
+    options->background_writes = background_writes;
+    options->allow_delayed_open = allow_delayed_open;
+  }
 
-TEST_F(TestS3FS, OpenOutputStreamAbortBackgroundWrites) { TestOpenOutputStreamAbort(); }
+  static std::vector<S3OptionsTestParameters> GetCartesianProduct() {
+    return {
+        S3OptionsTestParameters{true, false},
+        S3OptionsTestParameters{false, false},
+        S3OptionsTestParameters{true, true},
+        S3OptionsTestParameters{false, true},
+    };
+  }
 
-TEST_F(TestS3FS, OpenOutputStreamAbortSyncWrites) {
-  options_.background_writes = false;
-  MakeFileSystem();
-  TestOpenOutputStreamAbort();
-}
+  std::string ToString() const {
+    return std::string("background_writes = ") + (background_writes ? "true" : "false") +
+           ", allow_delayed_open = " + (allow_delayed_open ? "true" : "false");
+  }
+};
+
+TEST_F(TestS3FS, OpenOutputStream) {
+  for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) {
+    ARROW_SCOPED_TRACE(combination.ToString());
 
-TEST_F(TestS3FS, OpenOutputStreamDestructorBackgroundWrites) {
-  TestOpenOutputStreamDestructor();
+    combination.ApplyToS3Options(&options_);
+    MakeFileSystem();
+    TestOpenOutputStream(combination.allow_delayed_open);
+    ASSERT_OK(RestoreTestBucket());
+  }
 }
 
-TEST_F(TestS3FS, OpenOutputStreamDestructorSyncWrite) {
-  options_.background_writes = false;
-  MakeFileSystem();
-  TestOpenOutputStreamDestructor();
+TEST_F(TestS3FS, OpenOutputStreamAbort) {
+  for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) {
+    ARROW_SCOPED_TRACE(combination.ToString());
+
+    combination.ApplyToS3Options(&options_);
+    MakeFileSystem();
+    TestOpenOutputStreamAbort();
+    ASSERT_OK(RestoreTestBucket());
+  }
 }
 
-TEST_F(TestS3FS, OpenOutputStreamAsyncDestructorBackgroundWrites) {
-  TestOpenOutputStreamCloseAsyncDestructor();
+TEST_F(TestS3FS, OpenOutputStreamDestructor) {
+  for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) {
+    ARROW_SCOPED_TRACE(combination.ToString());
+
+    combination.ApplyToS3Options(&options_);
+    MakeFileSystem();
+    TestOpenOutputStreamDestructor();
+    ASSERT_OK(RestoreTestBucket());
+  }
 }
 
-TEST_F(TestS3FS, OpenOutputStreamAsyncDestructorSyncWrite) {
-  options_.background_writes = false;
-  MakeFileSystem();
-  TestOpenOutputStreamCloseAsyncDestructor();
+TEST_F(TestS3FS, OpenOutputStreamAsync) {
+  for (const auto& combination : S3OptionsTestParameters::GetCartesianProduct()) {
+    ARROW_SCOPED_TRACE(combination.ToString());
+
+    combination.ApplyToS3Options(&options_);
+    MakeFileSystem();
+    TestOpenOutputStreamCloseAsyncDestructor();
+  }
 }
 
 TEST_F(TestS3FS, OpenOutputStreamCloseAsyncFutureDeadlockBackgroundWrites) {
   TestOpenOutputStreamCloseAsyncFutureDeadlock();
+  ASSERT_OK(RestoreTestBucket());
 }
 
 TEST_F(TestS3FS, OpenOutputStreamCloseAsyncFutureDeadlockSyncWrite) {
   options_.background_writes = false;
   MakeFileSystem();
   TestOpenOutputStreamCloseAsyncFutureDeadlock();
+  ASSERT_OK(RestoreTestBucket());
 }
 
 TEST_F(TestS3FS, OpenOutputStreamMetadata) {
@@ -1396,8 +1482,8 @@ TEST_F(TestS3FS, CustomRetryStrategy) {
   auto retry_strategy = std::make_shared<TestRetryStrategy>();
   options_.retry_strategy = retry_strategy;
   MakeFileSystem();
-  // Attempt to open file that doesn't exist. Should hit TestRetryStrategy::ShouldRetry()
-  // 3 times before bubbling back up here.
+  // Attempt to open file that doesn't exist. Should hit
+  // TestRetryStrategy::ShouldRetry() 3 times before bubbling back up here.
   ASSERT_RAISES(IOError, fs_->OpenInputStream("nonexistent-bucket/somefile"));
   ASSERT_EQ(retry_strategy->GetErrorsEncountered().size(), 3);
   for (const auto& error : retry_strategy->GetErrorsEncountered()) {
diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt
index 43ac48b87678e..b12476ac3893a 100644
--- a/cpp/src/arrow/flight/CMakeLists.txt
+++ b/cpp/src/arrow/flight/CMakeLists.txt
@@ -26,6 +26,12 @@ endif()
 if(WIN32)
   list(APPEND ARROW_FLIGHT_LINK_LIBS ws2_32.lib)
 endif()
+# Updating the MACOSX_DEPLOYMENT_TARGET to 12 required us to explicitly
+# link Flight with OpenSSL on macOS. Read this comment for more details:
+# https://github.com/apache/arrow/pull/43137#pullrequestreview-2267476893
+if(APPLE AND ARROW_USE_OPENSSL)
+  list(APPEND ARROW_FLIGHT_LINK_LIBS ${ARROW_OPENSSL_LIBS})
+endif()
 
 set(ARROW_FLIGHT_TEST_LINKAGE "${ARROW_TEST_LINKAGE}")
 if(Protobuf_USE_STATIC_LIBS)
@@ -64,11 +70,6 @@ if(ARROW_BUILD_BENCHMARKS
     endif()
   endif()
 endif()
-list(APPEND
-     ARROW_FLIGHT_TEST_INTERFACE_LIBS
-     Boost::headers
-     Boost::filesystem
-     Boost::system)
 list(APPEND ARROW_FLIGHT_TEST_LINK_LIBS gRPC::grpc++)
 
 # TODO(wesm): Protobuf shared vs static linking
@@ -262,7 +263,9 @@ if(ARROW_TESTING)
                 OUTPUTS
                 ARROW_FLIGHT_TESTING_LIBRARIES
                 SOURCES
+                test_auth_handlers.cc
                 test_definitions.cc
+                test_flight_server.cc
                 test_util.cc
                 DEPENDENCIES
                 flight_grpc_gen
diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc
index 58a3ba4ab83e5..d0aee8ab9b3d2 100644
--- a/cpp/src/arrow/flight/client.cc
+++ b/cpp/src/arrow/flight/client.cc
@@ -584,8 +584,8 @@ arrow::Result<std::unique_ptr<ResultStream>> FlightClient::DoAction(
 
 arrow::Result<CancelFlightInfoResult> FlightClient::CancelFlightInfo(
     const FlightCallOptions& options, const CancelFlightInfoRequest& request) {
-  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString());
-  Action action{ActionType::kCancelFlightInfo.type, Buffer::FromString(body)};
+  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer());
+  Action action{ActionType::kCancelFlightInfo.type, std::move(body)};
   ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action));
   ARROW_ASSIGN_OR_RAISE(auto result, stream->Next());
   ARROW_ASSIGN_OR_RAISE(auto cancel_result, CancelFlightInfoResult::Deserialize(
@@ -596,8 +596,8 @@ arrow::Result<CancelFlightInfoResult> FlightClient::CancelFlightInfo(
 
 arrow::Result<FlightEndpoint> FlightClient::RenewFlightEndpoint(
     const FlightCallOptions& options, const RenewFlightEndpointRequest& request) {
-  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString());
-  Action action{ActionType::kRenewFlightEndpoint.type, Buffer::FromString(body)};
+  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer());
+  Action action{ActionType::kRenewFlightEndpoint.type, std::move(body)};
   ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action));
   ARROW_ASSIGN_OR_RAISE(auto result, stream->Next());
   ARROW_ASSIGN_OR_RAISE(auto renewed_endpoint,
@@ -716,8 +716,8 @@ arrow::Result<FlightClient::DoExchangeResult> FlightClient::DoExchange(
 ::arrow::Result<SetSessionOptionsResult> FlightClient::SetSessionOptions(
     const FlightCallOptions& options, const SetSessionOptionsRequest& request) {
   RETURN_NOT_OK(CheckOpen());
-  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString());
-  Action action{ActionType::kSetSessionOptions.type, Buffer::FromString(body)};
+  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer());
+  Action action{ActionType::kSetSessionOptions.type, std::move(body)};
   ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action));
   ARROW_ASSIGN_OR_RAISE(auto result, stream->Next());
   ARROW_ASSIGN_OR_RAISE(
@@ -730,8 +730,8 @@ ::arrow::Result<SetSessionOptionsResult> FlightClient::SetSessionOptions(
 ::arrow::Result<GetSessionOptionsResult> FlightClient::GetSessionOptions(
     const FlightCallOptions& options, const GetSessionOptionsRequest& request) {
   RETURN_NOT_OK(CheckOpen());
-  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString());
-  Action action{ActionType::kGetSessionOptions.type, Buffer::FromString(body)};
+  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer());
+  Action action{ActionType::kGetSessionOptions.type, std::move(body)};
   ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action));
   ARROW_ASSIGN_OR_RAISE(auto result, stream->Next());
   ARROW_ASSIGN_OR_RAISE(
@@ -744,8 +744,8 @@ ::arrow::Result<GetSessionOptionsResult> FlightClient::GetSessionOptions(
 ::arrow::Result<CloseSessionResult> FlightClient::CloseSession(
     const FlightCallOptions& options, const CloseSessionRequest& request) {
   RETURN_NOT_OK(CheckOpen());
-  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToString());
-  Action action{ActionType::kCloseSession.type, Buffer::FromString(body)};
+  ARROW_ASSIGN_OR_RAISE(auto body, request.SerializeToBuffer());
+  Action action{ActionType::kCloseSession.type, std::move(body)};
   ARROW_ASSIGN_OR_RAISE(auto stream, DoAction(options, action));
   ARROW_ASSIGN_OR_RAISE(auto result, stream->Next());
   ARROW_ASSIGN_OR_RAISE(auto close_session_result,
diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h
index 613903108949e..ae6011b117aa7 100644
--- a/cpp/src/arrow/flight/client.h
+++ b/cpp/src/arrow/flight/client.h
@@ -146,8 +146,8 @@ class ARROW_FLIGHT_EXPORT FlightStreamReader : public MetadataRecordBatchReader
 // Silence warning
 // "non dll-interface class RecordBatchReader used as base for dll-interface class"
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4275)
+#  pragma warning(push)
+#  pragma warning(disable : 4275)
 #endif
 
 /// \brief A RecordBatchWriter that also allows sending
@@ -163,7 +163,7 @@ class ARROW_FLIGHT_EXPORT FlightStreamWriter : public MetadataRecordBatchWriter
 };
 
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 /// \brief A reader for application-specific metadata sent back to the
diff --git a/cpp/src/arrow/flight/client_tracing_middleware.cc b/cpp/src/arrow/flight/client_tracing_middleware.cc
index a45784bd31ecd..9433ed48509aa 100644
--- a/cpp/src/arrow/flight/client_tracing_middleware.cc
+++ b/cpp/src/arrow/flight/client_tracing_middleware.cc
@@ -25,8 +25,8 @@
 #include "arrow/util/tracing_internal.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/cookie_internal.cc b/cpp/src/arrow/flight/cookie_internal.cc
index 75a10d148bf47..99fa8b238ddc8 100644
--- a/cpp/src/arrow/flight/cookie_internal.cc
+++ b/cpp/src/arrow/flight/cookie_internal.cc
@@ -28,7 +28,7 @@
 
 // Mingw-w64 defines strcasecmp in string.h
 #if defined(_WIN32) && !defined(strcasecmp)
-#define strcasecmp stricmp
+#  define strcasecmp stricmp
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc
index 057ef15c3c7ae..1383788e08233 100644
--- a/cpp/src/arrow/flight/flight_benchmark.cc
+++ b/cpp/src/arrow/flight/flight_benchmark.cc
@@ -40,11 +40,11 @@
 #include "arrow/flight/test_util.h"
 
 #ifdef ARROW_CUDA
-#include <cuda.h>
-#include "arrow/gpu/cuda_api.h"
+#  include <cuda.h>
+#  include "arrow/gpu/cuda_api.h"
 #endif
 #ifdef ARROW_WITH_UCX
-#include "arrow/flight/transport/ucx/ucx.h"
+#  include "arrow/flight/transport/ucx/ucx.h"
 #endif
 
 DEFINE_bool(cuda, false, "Allocate results in CUDA memory");
@@ -491,7 +491,7 @@ int main(int argc, char** argv) {
         if (FLAGS_cuda && FLAGS_test_put) {
           server_args.push_back("-cuda");
         }
-        server->Start(server_args);
+        ABORT_NOT_OK(server->Start(server_args));
       }
       std::cout << "Server host: " << FLAGS_server_host << std::endl
                 << "Server port: " << FLAGS_server_port << std::endl;
diff --git a/cpp/src/arrow/flight/flight_internals_test.cc b/cpp/src/arrow/flight/flight_internals_test.cc
index 57f4f3e030420..3f233cb7b74e5 100644
--- a/cpp/src/arrow/flight/flight_internals_test.cc
+++ b/cpp/src/arrow/flight/flight_internals_test.cc
@@ -79,12 +79,12 @@ void TestRoundtrip(const std::vector<FlightType>& values,
     ASSERT_OK(internal::ToProto(values[i], &pb_value));
 
     if constexpr (std::is_same_v<FlightType, FlightInfo>) {
-      ASSERT_OK_AND_ASSIGN(FlightInfo value, internal::FromProto(pb_value));
-      EXPECT_EQ(values[i], value);
+      FlightInfo::Data info_data;
+      ASSERT_OK(internal::FromProto(pb_value, &info_data));
+      EXPECT_EQ(values[i], FlightInfo{std::move(info_data)});
     } else if constexpr (std::is_same_v<FlightType, SchemaResult>) {
-      std::string data;
-      ASSERT_OK(internal::FromProto(pb_value, &data));
-      SchemaResult value(std::move(data));
+      SchemaResult value;
+      ASSERT_OK(internal::FromProto(pb_value, &value));
       EXPECT_EQ(values[i], value);
     } else {
       FlightType value;
@@ -152,9 +152,11 @@ TEST(FlightTypes, BasicAuth) {
 }
 
 TEST(FlightTypes, Criteria) {
-  std::vector<Criteria> values = {{""}, {"criteria"}};
-  std::vector<std::string> reprs = {"<Criteria expression=''>",
-                                    "<Criteria expression='criteria'>"};
+  std::vector<Criteria> values = {Criteria{""}, Criteria{"criteria"}};
+  std::vector<std::string> reprs = {
+      "<Criteria expression=''>",
+      "<Criteria expression='criteria'>",
+  };
   ASSERT_NO_FATAL_FAILURE(TestRoundtrip<pb::Criteria>(values, reprs));
 }
 
@@ -191,14 +193,14 @@ TEST(FlightTypes, FlightEndpoint) {
   Timestamp expiration_time(
       std::chrono::duration_cast<Timestamp::duration>(expiration_time_duration));
   std::vector<FlightEndpoint> values = {
-      {{""}, {}, std::nullopt, {}},
-      {{"foo"}, {}, std::nullopt, {}},
-      {{"bar"}, {}, std::nullopt, {"\xDE\xAD\xBE\xEF"}},
-      {{"foo"}, {}, expiration_time, {}},
-      {{"foo"}, {location1}, std::nullopt, {}},
-      {{"bar"}, {location1}, std::nullopt, {}},
-      {{"foo"}, {location2}, std::nullopt, {}},
-      {{"foo"}, {location1, location2}, std::nullopt, {"\xba\xdd\xca\xfe"}},
+      {Ticket{""}, {}, std::nullopt, {}},
+      {Ticket{"foo"}, {}, std::nullopt, {}},
+      {Ticket{"bar"}, {}, std::nullopt, {"\xDE\xAD\xBE\xEF"}},
+      {Ticket{"foo"}, {}, expiration_time, {}},
+      {Ticket{"foo"}, {location1}, std::nullopt, {}},
+      {Ticket{"bar"}, {location1}, std::nullopt, {}},
+      {Ticket{"foo"}, {location2}, std::nullopt, {}},
+      {Ticket{"foo"}, {location1, location2}, std::nullopt, {"\xba\xdd\xca\xfe"}},
   };
   std::vector<std::string> reprs = {
       "<FlightEndpoint ticket=<Ticket ticket=''> locations=[] "
@@ -299,9 +301,9 @@ TEST(FlightTypes, PollInfo) {
 
 TEST(FlightTypes, Result) {
   std::vector<Result> values = {
-      {Buffer::FromString("")},
-      {Buffer::FromString("foo")},
-      {Buffer::FromString("bar")},
+      Result{Buffer::FromString("")},
+      Result{Buffer::FromString("foo")},
+      Result{Buffer::FromString("bar")},
   };
   std::vector<std::string> reprs = {
       "<Result body=(0 bytes)>",
@@ -333,9 +335,9 @@ TEST(FlightTypes, SchemaResult) {
 
 TEST(FlightTypes, Ticket) {
   std::vector<Ticket> values = {
-      {""},
-      {"foo"},
-      {"bar"},
+      Ticket{""},
+      Ticket{"foo"},
+      Ticket{"bar"},
   };
   std::vector<std::string> reprs = {
       "<Ticket ticket=''>",
diff --git a/cpp/src/arrow/flight/flight_test.cc b/cpp/src/arrow/flight/flight_test.cc
index e179f3406d65e..863f21f8db5e4 100644
--- a/cpp/src/arrow/flight/flight_test.cc
+++ b/cpp/src/arrow/flight/flight_test.cc
@@ -44,7 +44,7 @@
 #include "arrow/util/logging.h"
 
 #ifdef GRPCPP_GRPCPP_H
-#error "gRPC headers should not be in public API"
+#  error "gRPC headers should not be in public API"
 #endif
 
 #include <grpcpp/grpcpp.h>
@@ -52,7 +52,9 @@
 // Include before test_util.h (boost), contains Windows fixes
 #include "arrow/flight/platform.h"
 #include "arrow/flight/serialization_internal.h"
+#include "arrow/flight/test_auth_handlers.h"
 #include "arrow/flight/test_definitions.h"
+#include "arrow/flight/test_flight_server.h"
 #include "arrow/flight/test_util.h"
 // OTel includes must come after any gRPC includes, and
 // client_header_internal.h includes gRPC. See:
@@ -69,11 +71,11 @@
 // > between the two different versions of Abseil.
 #include "arrow/util/tracing_internal.h"
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/sdk/trace/processor.h>
-#include <opentelemetry/sdk/trace/tracer_provider.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/sdk/trace/processor.h>
+#  include <opentelemetry/sdk/trace/tracer_provider.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
 #endif
 
 namespace arrow {
@@ -202,7 +204,7 @@ ARROW_FLIGHT_TEST_ASYNC_CLIENT(GrpcAsyncClientTest);
 
 TEST(TestFlight, ConnectUri) {
   TestServer server("flight-test-server");
-  server.Start();
+  ASSERT_OK(server.Start());
   ASSERT_TRUE(server.IsRunning());
 
   std::stringstream ss;
@@ -228,7 +230,7 @@ TEST(TestFlight, InvalidUriScheme) {
 #ifndef _WIN32
 TEST(TestFlight, ConnectUriUnix) {
   TestServer server("flight-test-server", "/tmp/flight-test.sock");
-  server.Start();
+  ASSERT_OK(server.Start());
   ASSERT_TRUE(server.IsRunning());
 
   std::stringstream ss;
@@ -247,7 +249,7 @@ TEST(TestFlight, ConnectUriUnix) {
 
 // CI environments don't have an IPv6 interface configured
 TEST(TestFlight, DISABLED_IpV6Port) {
-  std::unique_ptr<FlightServerBase> server = ExampleTestServer();
+  std::unique_ptr<FlightServerBase> server = TestFlightServer::Make();
 
   ASSERT_OK_AND_ASSIGN(auto location, Location::ForGrpcTcp("[::1]", 0));
   FlightServerOptions options(location);
@@ -261,7 +263,7 @@ TEST(TestFlight, DISABLED_IpV6Port) {
 }
 
 TEST(TestFlight, ServerCallContextIncomingHeaders) {
-  auto server = ExampleTestServer();
+  auto server = TestFlightServer::Make();
   ASSERT_OK_AND_ASSIGN(auto location, Location::ForGrpcTcp("localhost", 0));
   FlightServerOptions options(location);
   ASSERT_OK(server->Init(options));
@@ -290,7 +292,7 @@ TEST(TestFlight, ServerCallContextIncomingHeaders) {
 class TestFlightClient : public ::testing::Test {
  public:
   void SetUp() {
-    server_ = ExampleTestServer();
+    server_ = TestFlightServer::Make();
 
     ASSERT_OK_AND_ASSIGN(auto location, Location::ForGrpcTcp("localhost", 0));
     FlightServerOptions options(location);
@@ -998,7 +1000,8 @@ TEST_F(TestFlightClient, ListFlights) {
 }
 
 TEST_F(TestFlightClient, ListFlightsWithCriteria) {
-  ASSERT_OK_AND_ASSIGN(auto listing, client_->ListFlights(FlightCallOptions(), {"foo"}));
+  ASSERT_OK_AND_ASSIGN(auto listing,
+                       client_->ListFlights(FlightCallOptions{}, Criteria{"foo"}));
   std::unique_ptr<FlightInfo> info;
   ASSERT_OK_AND_ASSIGN(info, listing->Next());
   ASSERT_TRUE(info == nullptr);
diff --git a/cpp/src/arrow/flight/integration_tests/test_integration.cc b/cpp/src/arrow/flight/integration_tests/test_integration.cc
index 665c1f1ba036a..da6fcf81eb737 100644
--- a/cpp/src/arrow/flight/integration_tests/test_integration.cc
+++ b/cpp/src/arrow/flight/integration_tests/test_integration.cc
@@ -36,6 +36,7 @@
 #include "arrow/flight/sql/server.h"
 #include "arrow/flight/sql/server_session_middleware.h"
 #include "arrow/flight/sql/types.h"
+#include "arrow/flight/test_auth_handlers.h"
 #include "arrow/flight/test_util.h"
 #include "arrow/flight/types.h"
 #include "arrow/ipc/dictionary.h"
diff --git a/cpp/src/arrow/flight/otel_logging.h b/cpp/src/arrow/flight/otel_logging.h
index 9a91e5d99ce7d..d1e8cbb6fcc64 100644
--- a/cpp/src/arrow/flight/otel_logging.h
+++ b/cpp/src/arrow/flight/otel_logging.h
@@ -20,9 +20,9 @@
 #include "arrow/util/config.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/status.h"
-#include "arrow/telemetry/logging.h"
-#include "arrow/util/macros.h"
+#  include "arrow/status.h"
+#  include "arrow/telemetry/logging.h"
+#  include "arrow/util/macros.h"
 
 namespace arrow::flight {
 
diff --git a/cpp/src/arrow/flight/otel_logging_internal.h b/cpp/src/arrow/flight/otel_logging_internal.h
index 52602f0fe8aa5..426692297c362 100644
--- a/cpp/src/arrow/flight/otel_logging_internal.h
+++ b/cpp/src/arrow/flight/otel_logging_internal.h
@@ -21,8 +21,8 @@
 
 #include "arrow/util/macros.h"
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/flight/otel_logging.h"
-#include "arrow/util/logger.h"
+#  include "arrow/flight/otel_logging.h"
+#  include "arrow/util/logger.h"
 
 namespace arrow::flight::internal {
 
@@ -33,24 +33,24 @@ ARROW_EXPORT std::shared_ptr<util::Logger> GetOtelSqlServerLogger();
 
 }  // namespace arrow::flight::internal
 
-#define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...)                                  \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcClientLogger(), LEVEL, \
-                    __VA_ARGS__)
-#define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...)                                  \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcServerLogger(), LEVEL, \
-                    __VA_ARGS__)
-#define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...)                             \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlClientLogger(), LEVEL, \
-                    __VA_ARGS__)
-#define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...)                             \
-  ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlServerLogger(), LEVEL, \
-                    __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...)                                  \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcClientLogger(), LEVEL, \
+                      __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...)                                  \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelGrpcServerLogger(), LEVEL, \
+                      __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...)                             \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlClientLogger(), LEVEL, \
+                      __VA_ARGS__)
+#  define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...)                             \
+    ARROW_LOGGER_CALL(::arrow::flight::internal::GetOtelSqlServerLogger(), LEVEL, \
+                      __VA_ARGS__)
 
 #else
 
-#define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
-#define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...) ARROW_UNUSED(0)
-#define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
-#define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_SERVER(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_SQL_CLIENT(LEVEL, ...) ARROW_UNUSED(0)
+#  define ARROW_FLIGHT_OTELLOG_SQL_SERVER(LEVEL, ...) ARROW_UNUSED(0)
 
 #endif
diff --git a/cpp/src/arrow/flight/perf_server.cc b/cpp/src/arrow/flight/perf_server.cc
index 87676da11213d..e6477edd7050a 100644
--- a/cpp/src/arrow/flight/perf_server.cc
+++ b/cpp/src/arrow/flight/perf_server.cc
@@ -42,10 +42,10 @@
 #include "arrow/flight/test_util.h"
 
 #ifdef ARROW_CUDA
-#include "arrow/gpu/cuda_api.h"
+#  include "arrow/gpu/cuda_api.h"
 #endif
 #ifdef ARROW_WITH_UCX
-#include "arrow/flight/transport/ucx/ucx.h"
+#  include "arrow/flight/transport/ucx/ucx.h"
 #endif
 
 DEFINE_bool(cuda, false, "Allocate results in CUDA memory");
diff --git a/cpp/src/arrow/flight/platform.h b/cpp/src/arrow/flight/platform.h
index 8f8db2d2dc805..498c87c5b7dc9 100644
--- a/cpp/src/arrow/flight/platform.h
+++ b/cpp/src/arrow/flight/platform.h
@@ -24,7 +24,7 @@
 // The protobuf documentation says that C4251 warnings when using the
 // library are spurious and suppressed when the build the library and
 // compiler, but must be also suppressed in downstream projects
-#pragma warning(disable : 4251)
+#  pragma warning(disable : 4251)
 
 #endif  // _MSC_VER
 
diff --git a/cpp/src/arrow/flight/serialization_internal.cc b/cpp/src/arrow/flight/serialization_internal.cc
index 10600d055b3a8..a64ab713ddd9d 100644
--- a/cpp/src/arrow/flight/serialization_internal.cc
+++ b/cpp/src/arrow/flight/serialization_internal.cc
@@ -20,7 +20,10 @@
 #include <memory>
 #include <string>
 
+#include <google/protobuf/any.pb.h>
+
 #include "arrow/buffer.h"
+#include "arrow/flight/protocol_internal.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/reader.h"
 #include "arrow/ipc/writer.h"
@@ -39,6 +42,57 @@ namespace arrow {
 namespace flight {
 namespace internal {
 
+namespace {
+
+Status PackToAnyAndSerialize(const google::protobuf::Message& command, std::string* out) {
+  google::protobuf::Any any;
+#if PROTOBUF_VERSION >= 3015000
+  if (!any.PackFrom(command)) {
+    return Status::SerializationError("Failed to pack ", command.GetTypeName());
+  }
+#else
+  any.PackFrom(command);
+#endif
+
+#if PROTOBUF_VERSION >= 3015000
+  if (!any.SerializeToString(out)) {
+    return Status::SerializationError("Failed to serialize ", command.GetTypeName());
+  }
+#else
+  any.SerializeToString(out);
+#endif
+  return Status::OK();
+}
+
+}  // namespace
+
+Status PackProtoCommand(const google::protobuf::Message& command, FlightDescriptor* out) {
+  std::string buf;
+  RETURN_NOT_OK(PackToAnyAndSerialize(command, &buf));
+  *out = FlightDescriptor::Command(std::move(buf));
+  return Status::OK();
+}
+
+Status PackProtoAction(std::string action_type, const google::protobuf::Message& action,
+                       Action* out) {
+  std::string buf;
+  RETURN_NOT_OK(PackToAnyAndSerialize(action, &buf));
+  out->type = std::move(action_type);
+  out->body = Buffer::FromString(std::move(buf));
+  return Status::OK();
+}
+
+Status UnpackProtoAction(const Action& action, google::protobuf::Message* out) {
+  google::protobuf::Any any;
+  if (!any.ParseFromArray(action.body->data(), static_cast<int>(action.body->size()))) {
+    return Status::Invalid("Unable to parse action ", action.type);
+  }
+  if (!any.UnpackTo(out)) {
+    return Status::Invalid("Unable to unpack ", out->GetTypeName());
+  }
+  return Status::OK();
+}
+
 // Timestamp
 
 Status FromProto(const google::protobuf::Timestamp& pb_timestamp, Timestamp* timestamp) {
@@ -251,22 +305,28 @@ Status ToProto(const FlightDescriptor& descriptor, pb::FlightDescriptor* pb_desc
 
 // FlightInfo
 
-arrow::Result<FlightInfo> FromProto(const pb::FlightInfo& pb_info) {
-  FlightInfo::Data info;
-  RETURN_NOT_OK(FromProto(pb_info.flight_descriptor(), &info.descriptor));
+Status FromProto(const pb::FlightInfo& pb_info, FlightInfo::Data* info) {
+  RETURN_NOT_OK(FromProto(pb_info.flight_descriptor(), &info->descriptor));
 
-  info.schema = pb_info.schema();
+  info->schema = pb_info.schema();
 
-  info.endpoints.resize(pb_info.endpoint_size());
+  info->endpoints.resize(pb_info.endpoint_size());
   for (int i = 0; i < pb_info.endpoint_size(); ++i) {
-    RETURN_NOT_OK(FromProto(pb_info.endpoint(i), &info.endpoints[i]));
+    RETURN_NOT_OK(FromProto(pb_info.endpoint(i), &info->endpoints[i]));
   }
 
-  info.total_records = pb_info.total_records();
-  info.total_bytes = pb_info.total_bytes();
-  info.ordered = pb_info.ordered();
-  info.app_metadata = pb_info.app_metadata();
-  return FlightInfo(std::move(info));
+  info->total_records = pb_info.total_records();
+  info->total_bytes = pb_info.total_bytes();
+  info->ordered = pb_info.ordered();
+  info->app_metadata = pb_info.app_metadata();
+  return Status::OK();
+}
+
+Status FromProto(const pb::FlightInfo& pb_info, std::unique_ptr<FlightInfo>* info) {
+  FlightInfo::Data info_data;
+  RETURN_NOT_OK(FromProto(pb_info, &info_data));
+  *info = std::make_unique<FlightInfo>(std::move(info_data));
+  return Status::OK();
 }
 
 Status FromProto(const pb::BasicAuth& pb_basic_auth, BasicAuth* basic_auth) {
@@ -276,8 +336,8 @@ Status FromProto(const pb::BasicAuth& pb_basic_auth, BasicAuth* basic_auth) {
   return Status::OK();
 }
 
-Status FromProto(const pb::SchemaResult& pb_result, std::string* result) {
-  *result = pb_result.schema();
+Status FromProto(const pb::SchemaResult& pb_result, SchemaResult* result) {
+  *result = SchemaResult{pb_result.schema()};
   return Status::OK();
 }
 
@@ -315,8 +375,9 @@ Status ToProto(const FlightInfo& info, pb::FlightInfo* pb_info) {
 
 Status FromProto(const pb::PollInfo& pb_info, PollInfo* info) {
   if (pb_info.has_info()) {
-    ARROW_ASSIGN_OR_RAISE(auto flight_info, FromProto(pb_info.info()));
-    info->info = std::make_unique<FlightInfo>(std::move(flight_info));
+    FlightInfo::Data info_data;
+    RETURN_NOT_OK(FromProto(pb_info.info(), &info_data));
+    info->info = std::make_unique<FlightInfo>(std::move(info_data));
   }
   if (pb_info.has_flight_descriptor()) {
     FlightDescriptor descriptor;
@@ -340,6 +401,13 @@ Status FromProto(const pb::PollInfo& pb_info, PollInfo* info) {
   return Status::OK();
 }
 
+Status FromProto(const pb::PollInfo& pb_info, std::unique_ptr<PollInfo>* info) {
+  PollInfo poll_info;
+  RETURN_NOT_OK(FromProto(pb_info, &poll_info));
+  *info = std::make_unique<PollInfo>(std::move(poll_info));
+  return Status::OK();
+}
+
 Status ToProto(const PollInfo& info, pb::PollInfo* pb_info) {
   if (info.info) {
     RETURN_NOT_OK(ToProto(*info.info, pb_info->mutable_info()));
@@ -360,8 +428,9 @@ Status ToProto(const PollInfo& info, pb::PollInfo* pb_info) {
 
 Status FromProto(const pb::CancelFlightInfoRequest& pb_request,
                  CancelFlightInfoRequest* request) {
-  ARROW_ASSIGN_OR_RAISE(FlightInfo info, FromProto(pb_request.info()));
-  request->info = std::make_unique<FlightInfo>(std::move(info));
+  FlightInfo::Data info_data;
+  RETURN_NOT_OK(FromProto(pb_request.info(), &info_data));
+  request->info = std::make_unique<FlightInfo>(std::move(info_data));
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/flight/serialization_internal.h b/cpp/src/arrow/flight/serialization_internal.h
index 90dde87d3a5eb..827bde1674fb9 100644
--- a/cpp/src/arrow/flight/serialization_internal.h
+++ b/cpp/src/arrow/flight/serialization_internal.h
@@ -19,10 +19,15 @@
 
 #pragma once
 
-#include "arrow/flight/protocol_internal.h"  // IWYU pragma: keep
 #include "arrow/flight/transport.h"
+#include "arrow/flight/type_fwd.h"
 #include "arrow/flight/types.h"
-#include "arrow/util/macros.h"
+#include "arrow/flight/visibility.h"
+
+namespace google::protobuf {
+class Message;
+class Timestamp;
+}  // namespace google::protobuf
 
 namespace arrow {
 
@@ -34,6 +39,32 @@ class Message;
 }  // namespace ipc
 
 namespace flight {
+// Protobuf types from Flight.proto
+namespace protocol {
+class Action;
+class ActionType;
+class BasicAuth;
+class CancelFlightInfoRequest;
+class CancelFlightInfoResult;
+class Criteria;
+class FlightData;
+class FlightDescriptor;
+class FlightEndpoint;
+class FlightInfo;
+class GetSessionOptionsRequest;
+class Location;
+class PollInfo;
+class RenewFlightEndpointRequest;
+class Result;
+class SchemaResult;
+class SetSessionOptionsRequest;
+class SetSessionOptionsResult;
+class Ticket;
+class GetSessionOptionsRequest;
+class GetSessionOptionsResult;
+class CloseSessionRequest;
+class CloseSessionResult;
+}  // namespace protocol
 namespace pb = arrow::flight::protocol;
 namespace internal {
 
@@ -43,6 +74,29 @@ static constexpr char kAuthHeader[] = "authorization";
 ARROW_FLIGHT_EXPORT
 Status SchemaToString(const Schema& schema, std::string* out);
 
+/// \brief Wraps a protobuf message representing a Flight command in a FlightDescriptor.
+///
+/// A `FlightDescriptor` can carry a string representing a command in any
+/// format the implementation desires. A common pattern in Flight implementations
+/// is to wrap a message in a `protobuf::Any` message, which is then serialized
+/// into the string of the `FlightDescriptor.`
+ARROW_FLIGHT_EXPORT
+Status PackProtoCommand(const google::protobuf::Message& command, FlightDescriptor* out);
+
+/// \brief Wraps a protobuf message representing a Flight action.
+///
+/// A Flight action can carry a string representing an action in any format the
+/// implementation desires. A common pattern in Flight implementations is to
+/// wrap a message in a `protobuf::Any` message, which is then serialized into
+/// the string of the `Action.`
+ARROW_FLIGHT_EXPORT
+Status PackProtoAction(std::string action_type, const google::protobuf::Message& action,
+                       Action* out);
+
+/// \brief Unpacks a protobuf message packed by PackProtoAction.
+ARROW_FLIGHT_EXPORT
+Status UnpackProtoAction(const Action& action, google::protobuf::Message* out);
+
 // These functions depend on protobuf types which are not exported in the Flight DLL.
 
 Status FromProto(const google::protobuf::Timestamp& pb_timestamp, Timestamp* timestamp);
@@ -60,11 +114,13 @@ Status FromProto(const pb::FlightDescriptor& pb_descr, FlightDescriptor* descr);
 Status FromProto(const pb::FlightEndpoint& pb_endpoint, FlightEndpoint* endpoint);
 Status FromProto(const pb::RenewFlightEndpointRequest& pb_request,
                  RenewFlightEndpointRequest* request);
-arrow::Result<FlightInfo> FromProto(const pb::FlightInfo& pb_info);
+Status FromProto(const pb::FlightInfo& pb_info, FlightInfo::Data* info);
+Status FromProto(const pb::FlightInfo& pb_info, std::unique_ptr<FlightInfo>* info);
 Status FromProto(const pb::PollInfo& pb_info, PollInfo* info);
+Status FromProto(const pb::PollInfo& pb_info, std::unique_ptr<PollInfo>* info);
 Status FromProto(const pb::CancelFlightInfoRequest& pb_request,
                  CancelFlightInfoRequest* request);
-Status FromProto(const pb::SchemaResult& pb_result, std::string* result);
+Status FromProto(const pb::SchemaResult& pb_result, SchemaResult* result);
 Status FromProto(const pb::BasicAuth& pb_basic_auth, BasicAuth* info);
 Status FromProto(const pb::SetSessionOptionsRequest& pb_request,
                  SetSessionOptionsRequest* request);
@@ -92,6 +148,7 @@ Status ToProto(const Result& result, pb::Result* pb_result);
 Status ToProto(const CancelFlightInfoResult& result,
                pb::CancelFlightInfoResult* pb_result);
 Status ToProto(const Criteria& criteria, pb::Criteria* pb_criteria);
+Status ToProto(const Location& location, pb::Location* pb_location);
 Status ToProto(const SchemaResult& result, pb::SchemaResult* pb_result);
 Status ToProto(const Ticket& ticket, pb::Ticket* pb_ticket);
 Status ToProto(const BasicAuth& basic_auth, pb::BasicAuth* pb_basic_auth);
diff --git a/cpp/src/arrow/flight/server.cc b/cpp/src/arrow/flight/server.cc
index 06512bda36a49..adbdfb85f29e6 100644
--- a/cpp/src/arrow/flight/server.cc
+++ b/cpp/src/arrow/flight/server.cc
@@ -47,7 +47,7 @@ namespace flight {
 
 namespace {
 #if (ATOMIC_INT_LOCK_FREE != 2 || ATOMIC_POINTER_LOCK_FREE != 2)
-#error "atomic ints and atomic pointers not always lock-free!"
+#  error "atomic ints and atomic pointers not always lock-free!"
 #endif
 
 using ::arrow::internal::SelfPipe;
diff --git a/cpp/src/arrow/flight/server_tracing_middleware.cc b/cpp/src/arrow/flight/server_tracing_middleware.cc
index 02520cb66fd0e..6884308c7ff48 100644
--- a/cpp/src/arrow/flight/server_tracing_middleware.cc
+++ b/cpp/src/arrow/flight/server_tracing_middleware.cc
@@ -27,11 +27,11 @@
 #include "arrow/util/tracing_internal.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/trace/context.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
-#include <opentelemetry/trace/semantic_conventions.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/trace/context.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/trace/semantic_conventions.h>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/sql/client.cc b/cpp/src/arrow/flight/sql/client.cc
index 86fd4868bad2d..536bc67fc586d 100644
--- a/cpp/src/arrow/flight/sql/client.cc
+++ b/cpp/src/arrow/flight/sql/client.cc
@@ -24,6 +24,7 @@
 #include <google/protobuf/any.pb.h>
 
 #include "arrow/buffer.h"
+#include "arrow/flight/serialization_internal.h"
 #include "arrow/flight/sql/protocol_internal.h"
 #include "arrow/flight/types.h"
 #include "arrow/io/memory.h"
@@ -40,24 +41,9 @@ namespace sql {
 namespace {
 arrow::Result<FlightDescriptor> GetFlightDescriptorForCommand(
     const google::protobuf::Message& command) {
-  google::protobuf::Any any;
-#if PROTOBUF_VERSION >= 3015000
-  if (!any.PackFrom(command)) {
-    return Status::SerializationError("Failed to pack ", command.GetTypeName());
-  }
-#else
-  any.PackFrom(command);
-#endif
-
-  std::string buf;
-#if PROTOBUF_VERSION >= 3015000
-  if (!any.SerializeToString(&buf)) {
-    return Status::SerializationError("Failed to serialize ", command.GetTypeName());
-  }
-#else
-  any.SerializeToString(&buf);
-#endif
-  return FlightDescriptor::Command(buf);
+  FlightDescriptor descriptor;
+  RETURN_NOT_OK(flight::internal::PackProtoCommand(command, &descriptor));
+  return descriptor;
 }
 
 arrow::Result<std::unique_ptr<FlightInfo>> GetFlightInfoForCommand(
@@ -76,32 +62,14 @@ arrow::Result<std::unique_ptr<SchemaResult>> GetSchemaForCommand(
   return client->GetSchema(options, descriptor);
 }
 
-::arrow::Result<Action> PackAction(const std::string& action_type,
-                                   const google::protobuf::Message& message) {
-  google::protobuf::Any any;
-#if PROTOBUF_VERSION >= 3015000
-  if (!any.PackFrom(message)) {
-    return Status::SerializationError("Could not pack ", message.GetTypeName(),
-                                      " into Any");
-  }
-#else
-  any.PackFrom(message);
-#endif
-
-  std::string buffer;
-#if PROTOBUF_VERSION >= 3015000
-  if (!any.SerializeToString(&buffer)) {
-    return Status::SerializationError("Could not serialize packed ",
-                                      message.GetTypeName());
-  }
-#else
-  any.SerializeToString(&buffer);
-#endif
-
-  Action action;
-  action.type = action_type;
-  action.body = Buffer::FromString(std::move(buffer));
-  return action;
+// Pack a protobuf action and send it to the server.
+arrow::Result<std::unique_ptr<ResultStream>> DoProtoAction(
+    FlightSqlClient* client, const FlightCallOptions& options, std::string action_type,
+    const google::protobuf::Message& action) {
+  Action packed_action;
+  RETURN_NOT_OK(
+      flight::internal::PackProtoAction(std::move(action_type), action, &packed_action));
+  return client->DoAction(options, packed_action);
 }
 
 void SetPlan(const SubstraitPlan& plan, flight_sql_pb::SubstraitPlan* pb_plan) {
@@ -594,9 +562,8 @@ arrow::Result<std::shared_ptr<PreparedStatement>> FlightSqlClient::Prepare(
     request.set_transaction_id(transaction.transaction_id());
   }
 
-  ARROW_ASSIGN_OR_RAISE(auto action, PackAction("CreatePreparedStatement", request));
-  ARROW_ASSIGN_OR_RAISE(auto results, DoAction(options, action))
-
+  ARROW_ASSIGN_OR_RAISE(auto results,
+                        DoProtoAction(this, options, "CreatePreparedStatement", request))
   return PreparedStatement::ParseResponse(this, std::move(results));
 }
 
@@ -609,9 +576,8 @@ arrow::Result<std::shared_ptr<PreparedStatement>> FlightSqlClient::PrepareSubstr
     request.set_transaction_id(transaction.transaction_id());
   }
 
-  ARROW_ASSIGN_OR_RAISE(auto action, PackAction("CreatePreparedSubstraitPlan", request));
-  ARROW_ASSIGN_OR_RAISE(auto results, DoAction(options, action))
-
+  ARROW_ASSIGN_OR_RAISE(
+      auto results, DoProtoAction(this, options, "CreatePreparedSubstraitPlan", request));
   return PreparedStatement::ParseResponse(this, std::move(results));
 }
 
@@ -756,14 +722,12 @@ Status PreparedStatement::Close(const FlightCallOptions& options) {
   if (is_closed_) {
     return Status::Invalid("Statement with handle '", handle_, "' already closed");
   }
-
   flight_sql_pb::ActionClosePreparedStatementRequest request;
   request.set_prepared_statement_handle(handle_);
 
-  ARROW_ASSIGN_OR_RAISE(auto action, PackAction("ClosePreparedStatement", request));
-  ARROW_ASSIGN_OR_RAISE(auto results, client_->DoAction(options, action));
+  ARROW_ASSIGN_OR_RAISE(
+      auto results, DoProtoAction(client_, options, "ClosePreparedStatement", request));
   ARROW_RETURN_NOT_OK(results->Drain());
-
   is_closed_ = true;
   return Status::OK();
 }
@@ -772,8 +736,8 @@ ::arrow::Result<Transaction> FlightSqlClient::BeginTransaction(
     const FlightCallOptions& options) {
   flight_sql_pb::ActionBeginTransactionRequest request;
 
-  ARROW_ASSIGN_OR_RAISE(auto action, PackAction("BeginTransaction", request));
-  ARROW_ASSIGN_OR_RAISE(auto results, DoAction(options, action));
+  ARROW_ASSIGN_OR_RAISE(auto results,
+                        DoProtoAction(this, options, "BeginTransaction", request));
 
   flight_sql_pb::ActionBeginTransactionResult transaction;
   ARROW_RETURN_NOT_OK(ReadResult(results.get(), &transaction));
@@ -789,15 +753,14 @@ ::arrow::Result<Savepoint> FlightSqlClient::BeginSavepoint(
     const FlightCallOptions& options, const Transaction& transaction,
     const std::string& name) {
   flight_sql_pb::ActionBeginSavepointRequest request;
-
   if (!transaction.is_valid()) {
     return Status::Invalid("Must provide an active transaction");
   }
   request.set_transaction_id(transaction.transaction_id());
   request.set_name(name);
 
-  ARROW_ASSIGN_OR_RAISE(auto action, PackAction("BeginSavepoint", request));
-  ARROW_ASSIGN_OR_RAISE(auto results, DoAction(options, action));
+  ARROW_ASSIGN_OR_RAISE(auto results,
+                        DoProtoAction(this, options, "BeginSavepoint", request));
 
   flight_sql_pb::ActionBeginSavepointResult savepoint;
   ARROW_RETURN_NOT_OK(ReadResult(results.get(), &savepoint));
@@ -812,41 +775,34 @@ ::arrow::Result<Savepoint> FlightSqlClient::BeginSavepoint(
 Status FlightSqlClient::Commit(const FlightCallOptions& options,
                                const Transaction& transaction) {
   flight_sql_pb::ActionEndTransactionRequest request;
-
   if (!transaction.is_valid()) {
     return Status::Invalid("Must provide an active transaction");
   }
   request.set_transaction_id(transaction.transaction_id());
   request.set_action(flight_sql_pb::ActionEndTransactionRequest::END_TRANSACTION_COMMIT);
 
-  ARROW_ASSIGN_OR_RAISE(auto action, PackAction("EndTransaction", request));
-  ARROW_ASSIGN_OR_RAISE(auto results, DoAction(options, action));
-
-  ARROW_RETURN_NOT_OK(results->Drain());
-  return Status::OK();
+  ARROW_ASSIGN_OR_RAISE(auto results,
+                        DoProtoAction(this, options, "EndTransaction", request));
+  return results->Drain();
 }
 
 Status FlightSqlClient::Release(const FlightCallOptions& options,
                                 const Savepoint& savepoint) {
   flight_sql_pb::ActionEndSavepointRequest request;
-
   if (!savepoint.is_valid()) {
     return Status::Invalid("Must provide an active savepoint");
   }
   request.set_savepoint_id(savepoint.savepoint_id());
   request.set_action(flight_sql_pb::ActionEndSavepointRequest::END_SAVEPOINT_RELEASE);
 
-  ARROW_ASSIGN_OR_RAISE(auto action, PackAction("EndSavepoint", request));
-  ARROW_ASSIGN_OR_RAISE(auto results, DoAction(options, action));
-
-  ARROW_RETURN_NOT_OK(results->Drain());
-  return Status::OK();
+  ARROW_ASSIGN_OR_RAISE(auto results,
+                        DoProtoAction(this, options, "EndSavepoint", request));
+  return results->Drain();
 }
 
 Status FlightSqlClient::Rollback(const FlightCallOptions& options,
                                  const Transaction& transaction) {
   flight_sql_pb::ActionEndTransactionRequest request;
-
   if (!transaction.is_valid()) {
     return Status::Invalid("Must provide an active transaction");
   }
@@ -854,38 +810,33 @@ Status FlightSqlClient::Rollback(const FlightCallOptions& options,
   request.set_action(
       flight_sql_pb::ActionEndTransactionRequest::END_TRANSACTION_ROLLBACK);
 
-  ARROW_ASSIGN_OR_RAISE(auto action, PackAction("EndTransaction", request));
-  ARROW_ASSIGN_OR_RAISE(auto results, DoAction(options, action));
-
-  ARROW_RETURN_NOT_OK(results->Drain());
-  return Status::OK();
+  ARROW_ASSIGN_OR_RAISE(auto results,
+                        DoProtoAction(this, options, "EndTransaction", request));
+  return results->Drain();
 }
 
 Status FlightSqlClient::Rollback(const FlightCallOptions& options,
                                  const Savepoint& savepoint) {
   flight_sql_pb::ActionEndSavepointRequest request;
-
   if (!savepoint.is_valid()) {
     return Status::Invalid("Must provide an active savepoint");
   }
   request.set_savepoint_id(savepoint.savepoint_id());
   request.set_action(flight_sql_pb::ActionEndSavepointRequest::END_SAVEPOINT_ROLLBACK);
 
-  ARROW_ASSIGN_OR_RAISE(auto action, PackAction("EndSavepoint", request));
-  ARROW_ASSIGN_OR_RAISE(auto results, DoAction(options, action));
-
-  ARROW_RETURN_NOT_OK(results->Drain());
-  return Status::OK();
+  ARROW_ASSIGN_OR_RAISE(auto results,
+                        DoProtoAction(this, options, "EndSavepoint", request));
+  return results->Drain();
 }
 
 ::arrow::Result<CancelResult> FlightSqlClient::CancelQuery(
     const FlightCallOptions& options, const FlightInfo& info) {
-  flight_sql_pb::ActionCancelQueryRequest request;
+  flight_sql_pb::ActionCancelQueryRequest cancel_query;
   ARROW_ASSIGN_OR_RAISE(auto serialized_info, info.SerializeToString());
-  request.set_info(std::move(serialized_info));
+  cancel_query.set_info(std::move(serialized_info));
 
-  ARROW_ASSIGN_OR_RAISE(auto action, PackAction("CancelQuery", request));
-  ARROW_ASSIGN_OR_RAISE(auto results, DoAction(options, action))
+  ARROW_ASSIGN_OR_RAISE(auto results,
+                        DoProtoAction(this, options, "CancelQuery", cancel_query));
 
   flight_sql_pb::ActionCancelQueryResult result;
   ARROW_RETURN_NOT_OK(ReadResult(results.get(), &result));
diff --git a/cpp/src/arrow/flight/sql/client.h b/cpp/src/arrow/flight/sql/client.h
index c37c640e653a4..9541432114f9c 100644
--- a/cpp/src/arrow/flight/sql/client.h
+++ b/cpp/src/arrow/flight/sql/client.h
@@ -408,13 +408,22 @@ class ARROW_FLIGHT_SQL_EXPORT FlightSqlClient {
   /// \brief Explicitly shut down and clean up the client.
   Status Close();
 
- protected:
+  /// \brief Wrapper around FlightClient::DoGet.
+  ///
+  /// \internal
+  /// Don't call this directly.
+  /// \endinternal
   virtual ::arrow::Result<FlightClient::DoPutResult> DoPut(
       const FlightCallOptions& options, const FlightDescriptor& descriptor,
       const std::shared_ptr<Schema>& schema) {
     return impl_->DoPut(options, descriptor, schema);
   }
 
+  /// \brief Wrapper around FlightClient::DoPut. Don't call this directly.
+  ///
+  /// \internal
+  /// Don't call this directly.
+  /// \endinternal
   virtual ::arrow::Result<std::unique_ptr<ResultStream>> DoAction(
       const FlightCallOptions& options, const Action& action) {
     return impl_->DoAction(options, action);
diff --git a/cpp/src/arrow/flight/sql/example/sqlite_server.cc b/cpp/src/arrow/flight/sql/example/sqlite_server.cc
index 20b234e90ad3b..0651e6111c25d 100644
--- a/cpp/src/arrow/flight/sql/example/sqlite_server.cc
+++ b/cpp/src/arrow/flight/sql/example/sqlite_server.cc
@@ -126,7 +126,7 @@ arrow::Result<std::unique_ptr<FlightDataStream>> DoGetSQLiteQuery(
 arrow::Result<std::unique_ptr<FlightInfo>> GetFlightInfoForCommand(
     const FlightDescriptor& descriptor, const std::shared_ptr<Schema>& schema) {
   std::vector<FlightEndpoint> endpoints{
-      FlightEndpoint{{descriptor.cmd}, {}, std::nullopt, ""}};
+      FlightEndpoint{Ticket{descriptor.cmd}, {}, std::nullopt, ""}};
   ARROW_ASSIGN_OR_RAISE(auto result,
                         FlightInfo::Make(*schema, descriptor, endpoints, -1, -1, false))
 
@@ -389,7 +389,7 @@ class SQLiteFlightSqlServer::Impl {
       const ServerCallContext& context, const GetTables& command,
       const FlightDescriptor& descriptor) {
     std::vector<FlightEndpoint> endpoints{
-        FlightEndpoint{{descriptor.cmd}, {}, std::nullopt, ""}};
+        FlightEndpoint{Ticket{descriptor.cmd}, {}, std::nullopt, ""}};
 
     bool include_schema = command.include_schema;
     ARROW_LOG(INFO) << "GetTables include_schema=" << include_schema;
diff --git a/cpp/src/arrow/flight/sql/server.cc b/cpp/src/arrow/flight/sql/server.cc
index 63d1f5c5225fa..5f6154a576b02 100644
--- a/cpp/src/arrow/flight/sql/server.cc
+++ b/cpp/src/arrow/flight/sql/server.cc
@@ -318,11 +318,9 @@ arrow::Result<StatementIngest> ParseCommandStatementIngest(
 }
 
 arrow::Result<ActionBeginSavepointRequest> ParseActionBeginSavepointRequest(
-    const google::protobuf::Any& any) {
+    const Action& action) {
   pb::sql::ActionBeginSavepointRequest command;
-  if (!any.UnpackTo(&command)) {
-    return Status::Invalid("Unable to unpack ActionBeginSavepointRequest");
-  }
+  RETURN_NOT_OK(flight::internal::UnpackProtoAction(action, &command));
 
   ActionBeginSavepointRequest result;
   result.transaction_id = command.transaction_id();
@@ -331,22 +329,18 @@ arrow::Result<ActionBeginSavepointRequest> ParseActionBeginSavepointRequest(
 }
 
 arrow::Result<ActionBeginTransactionRequest> ParseActionBeginTransactionRequest(
-    const google::protobuf::Any& any) {
+    const Action& action) {
   pb::sql::ActionBeginTransactionRequest command;
-  if (!any.UnpackTo(&command)) {
-    return Status::Invalid("Unable to unpack ActionBeginTransactionRequest");
-  }
+  RETURN_NOT_OK(flight::internal::UnpackProtoAction(action, &command));
 
   ActionBeginTransactionRequest result;
   return result;
 }
 
 arrow::Result<ActionCancelQueryRequest> ParseActionCancelQueryRequest(
-    const google::protobuf::Any& any) {
+    const Action& action) {
   pb::sql::ActionCancelQueryRequest command;
-  if (!any.UnpackTo(&command)) {
-    return Status::Invalid("Unable to unpack ActionCancelQueryRequest");
-  }
+  RETURN_NOT_OK(flight::internal::UnpackProtoAction(action, &command));
 
   ActionCancelQueryRequest result;
   ARROW_ASSIGN_OR_RAISE(result.info, FlightInfo::Deserialize(command.info()));
@@ -354,11 +348,9 @@ arrow::Result<ActionCancelQueryRequest> ParseActionCancelQueryRequest(
 }
 
 arrow::Result<ActionCreatePreparedStatementRequest>
-ParseActionCreatePreparedStatementRequest(const google::protobuf::Any& any) {
+ParseActionCreatePreparedStatementRequest(const Action& action) {
   pb::sql::ActionCreatePreparedStatementRequest command;
-  if (!any.UnpackTo(&command)) {
-    return Status::Invalid("Unable to unpack ActionCreatePreparedStatementRequest");
-  }
+  RETURN_NOT_OK(flight::internal::UnpackProtoAction(action, &command));
 
   ActionCreatePreparedStatementRequest result;
   result.query = command.query();
@@ -367,11 +359,9 @@ ParseActionCreatePreparedStatementRequest(const google::protobuf::Any& any) {
 }
 
 arrow::Result<ActionCreatePreparedSubstraitPlanRequest>
-ParseActionCreatePreparedSubstraitPlanRequest(const google::protobuf::Any& any) {
+ParseActionCreatePreparedSubstraitPlanRequest(const Action& action) {
   pb::sql::ActionCreatePreparedSubstraitPlanRequest command;
-  if (!any.UnpackTo(&command)) {
-    return Status::Invalid("Unable to unpack ActionCreatePreparedSubstraitPlanRequest");
-  }
+  RETURN_NOT_OK(flight::internal::UnpackProtoAction(action, &command));
 
   ActionCreatePreparedSubstraitPlanRequest result;
   result.plan = ParseStatementSubstraitPlan(command.plan());
@@ -380,11 +370,9 @@ ParseActionCreatePreparedSubstraitPlanRequest(const google::protobuf::Any& any)
 }
 
 arrow::Result<ActionClosePreparedStatementRequest>
-ParseActionClosePreparedStatementRequest(const google::protobuf::Any& any) {
+ParseActionClosePreparedStatementRequest(const Action& action) {
   pb::sql::ActionClosePreparedStatementRequest command;
-  if (!any.UnpackTo(&command)) {
-    return Status::Invalid("Unable to unpack ActionClosePreparedStatementRequest");
-  }
+  RETURN_NOT_OK(flight::internal::UnpackProtoAction(action, &command));
 
   ActionClosePreparedStatementRequest result;
   result.prepared_statement_handle = command.prepared_statement_handle();
@@ -392,11 +380,9 @@ ParseActionClosePreparedStatementRequest(const google::protobuf::Any& any) {
 }
 
 arrow::Result<ActionEndSavepointRequest> ParseActionEndSavepointRequest(
-    const google::protobuf::Any& any) {
+    const Action& action) {
   pb::sql::ActionEndSavepointRequest command;
-  if (!any.UnpackTo(&command)) {
-    return Status::Invalid("Unable to unpack ActionEndSavepointRequest");
-  }
+  RETURN_NOT_OK(flight::internal::UnpackProtoAction(action, &command));
 
   ActionEndSavepointRequest result;
   result.savepoint_id = command.savepoint_id();
@@ -418,11 +404,9 @@ arrow::Result<ActionEndSavepointRequest> ParseActionEndSavepointRequest(
 }
 
 arrow::Result<ActionEndTransactionRequest> ParseActionEndTransactionRequest(
-    const google::protobuf::Any& any) {
+    const Action& action) {
   pb::sql::ActionEndTransactionRequest command;
-  if (!any.UnpackTo(&command)) {
-    return Status::Invalid("Unable to unpack ActionEndTransactionRequest");
-  }
+  RETURN_NOT_OK(flight::internal::UnpackProtoAction(action, &command));
 
   ActionEndTransactionRequest result;
   result.transaction_id = command.transaction_id();
@@ -477,13 +461,11 @@ arrow::Result<Result> PackActionResult(ActionBeginTransactionResult result) {
 }
 
 arrow::Result<Result> PackActionResult(CancelFlightInfoResult result) {
-  ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString());
-  return Result{Buffer::FromString(std::move(serialized))};
+  return result.SerializeToBuffer();
 }
 
 arrow::Result<Result> PackActionResult(const FlightEndpoint& endpoint) {
-  ARROW_ASSIGN_OR_RAISE(auto serialized, endpoint.SerializeToString());
-  return Result{Buffer::FromString(std::move(serialized))};
+  return endpoint.SerializeToBuffer();
 }
 
 arrow::Result<Result> PackActionResult(CancelResult result) {
@@ -525,21 +507,6 @@ arrow::Result<Result> PackActionResult(ActionCreatePreparedStatementResult resul
   return PackActionResult(pb_result);
 }
 
-arrow::Result<Result> PackActionResult(SetSessionOptionsResult result) {
-  ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString());
-  return Result{Buffer::FromString(std::move(serialized))};
-}
-
-arrow::Result<Result> PackActionResult(GetSessionOptionsResult result) {
-  ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString());
-  return Result{Buffer::FromString(std::move(serialized))};
-}
-
-arrow::Result<Result> PackActionResult(CloseSessionResult result) {
-  ARROW_ASSIGN_OR_RAISE(auto serialized, result.SerializeToString());
-  return Result{Buffer::FromString(std::move(serialized))};
-}
-
 }  // namespace
 
 arrow::Result<StatementQueryTicket> StatementQueryTicket::Deserialize(
@@ -908,32 +875,27 @@ Status FlightSqlServerBase::DoAction(const ServerCallContext& context,
     std::string_view body(*action.body);
     ARROW_ASSIGN_OR_RAISE(auto request, SetSessionOptionsRequest::Deserialize(body));
     ARROW_ASSIGN_OR_RAISE(auto result, SetSessionOptions(context, request));
-    ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result)));
+    ARROW_ASSIGN_OR_RAISE(auto packed_result, result.SerializeToBuffer());
 
-    results.push_back(std::move(packed_result));
+    results.emplace_back(std::move(packed_result));
   } else if (action.type == ActionType::kGetSessionOptions.type) {
     std::string_view body(*action.body);
     ARROW_ASSIGN_OR_RAISE(auto request, GetSessionOptionsRequest::Deserialize(body));
     ARROW_ASSIGN_OR_RAISE(auto result, GetSessionOptions(context, request));
-    ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result)));
+    ARROW_ASSIGN_OR_RAISE(auto packed_result, result.SerializeToBuffer());
 
-    results.push_back(std::move(packed_result));
+    results.emplace_back(std::move(packed_result));
   } else if (action.type == ActionType::kCloseSession.type) {
     std::string_view body(*action.body);
     ARROW_ASSIGN_OR_RAISE(auto request, CloseSessionRequest::Deserialize(body));
     ARROW_ASSIGN_OR_RAISE(auto result, CloseSession(context, request));
-    ARROW_ASSIGN_OR_RAISE(auto packed_result, PackActionResult(std::move(result)));
+    ARROW_ASSIGN_OR_RAISE(auto packed_result, result.SerializeToBuffer());
 
-    results.push_back(std::move(packed_result));
+    results.emplace_back(std::move(packed_result));
   } else {
-    google::protobuf::Any any;
-    if (!any.ParseFromArray(action.body->data(), static_cast<int>(action.body->size()))) {
-      return Status::Invalid("Unable to parse action");
-    }
-
     if (action.type == FlightSqlServerBase::kBeginSavepointActionType.type) {
       ARROW_ASSIGN_OR_RAISE(ActionBeginSavepointRequest internal_command,
-                            ParseActionBeginSavepointRequest(any));
+                            ParseActionBeginSavepointRequest(action));
       ARROW_ASSIGN_OR_RAISE(ActionBeginSavepointResult result,
                             BeginSavepoint(context, internal_command));
       ARROW_ASSIGN_OR_RAISE(Result packed_result, PackActionResult(std::move(result)));
@@ -941,7 +903,7 @@ Status FlightSqlServerBase::DoAction(const ServerCallContext& context,
       results.push_back(std::move(packed_result));
     } else if (action.type == FlightSqlServerBase::kBeginTransactionActionType.type) {
       ARROW_ASSIGN_OR_RAISE(ActionBeginTransactionRequest internal_command,
-                            ParseActionBeginTransactionRequest(any));
+                            ParseActionBeginTransactionRequest(action));
       ARROW_ASSIGN_OR_RAISE(ActionBeginTransactionResult result,
                             BeginTransaction(context, internal_command));
       ARROW_ASSIGN_OR_RAISE(Result packed_result, PackActionResult(std::move(result)));
@@ -949,7 +911,7 @@ Status FlightSqlServerBase::DoAction(const ServerCallContext& context,
       results.push_back(std::move(packed_result));
     } else if (action.type == FlightSqlServerBase::kCancelQueryActionType.type) {
       ARROW_ASSIGN_OR_RAISE(ActionCancelQueryRequest internal_command,
-                            ParseActionCancelQueryRequest(any));
+                            ParseActionCancelQueryRequest(action));
       ARROW_SUPPRESS_DEPRECATION_WARNING
       ARROW_ASSIGN_OR_RAISE(CancelResult result, CancelQuery(context, internal_command));
       ARROW_UNSUPPRESS_DEPRECATION_WARNING
@@ -959,7 +921,7 @@ Status FlightSqlServerBase::DoAction(const ServerCallContext& context,
     } else if (action.type ==
                FlightSqlServerBase::kCreatePreparedStatementActionType.type) {
       ARROW_ASSIGN_OR_RAISE(ActionCreatePreparedStatementRequest internal_command,
-                            ParseActionCreatePreparedStatementRequest(any));
+                            ParseActionCreatePreparedStatementRequest(action));
       ARROW_ASSIGN_OR_RAISE(ActionCreatePreparedStatementResult result,
                             CreatePreparedStatement(context, internal_command));
       ARROW_ASSIGN_OR_RAISE(Result packed_result, PackActionResult(std::move(result)));
@@ -968,7 +930,7 @@ Status FlightSqlServerBase::DoAction(const ServerCallContext& context,
     } else if (action.type ==
                FlightSqlServerBase::kCreatePreparedSubstraitPlanActionType.type) {
       ARROW_ASSIGN_OR_RAISE(ActionCreatePreparedSubstraitPlanRequest internal_command,
-                            ParseActionCreatePreparedSubstraitPlanRequest(any));
+                            ParseActionCreatePreparedSubstraitPlanRequest(action));
       ARROW_ASSIGN_OR_RAISE(ActionCreatePreparedStatementResult result,
                             CreatePreparedSubstraitPlan(context, internal_command));
       ARROW_ASSIGN_OR_RAISE(Result packed_result, PackActionResult(std::move(result)));
@@ -977,15 +939,15 @@ Status FlightSqlServerBase::DoAction(const ServerCallContext& context,
     } else if (action.type ==
                FlightSqlServerBase::kClosePreparedStatementActionType.type) {
       ARROW_ASSIGN_OR_RAISE(ActionClosePreparedStatementRequest internal_command,
-                            ParseActionClosePreparedStatementRequest(any));
+                            ParseActionClosePreparedStatementRequest(action));
       ARROW_RETURN_NOT_OK(ClosePreparedStatement(context, internal_command));
     } else if (action.type == FlightSqlServerBase::kEndSavepointActionType.type) {
       ARROW_ASSIGN_OR_RAISE(ActionEndSavepointRequest internal_command,
-                            ParseActionEndSavepointRequest(any));
+                            ParseActionEndSavepointRequest(action));
       ARROW_RETURN_NOT_OK(EndSavepoint(context, internal_command));
     } else if (action.type == FlightSqlServerBase::kEndTransactionActionType.type) {
       ARROW_ASSIGN_OR_RAISE(ActionEndTransactionRequest internal_command,
-                            ParseActionEndTransactionRequest(any));
+                            ParseActionEndTransactionRequest(action));
       ARROW_RETURN_NOT_OK(EndTransaction(context, internal_command));
     } else {
       return Status::NotImplemented("Action not implemented: ", action.type);
@@ -1063,7 +1025,7 @@ arrow::Result<std::unique_ptr<FlightInfo>> FlightSqlServerBase::GetFlightInfoSql
   }
 
   std::vector<FlightEndpoint> endpoints{
-      FlightEndpoint{{descriptor.cmd}, {}, std::nullopt, {}}};
+      FlightEndpoint{Ticket{descriptor.cmd}, {}, std::nullopt, {}}};
   ARROW_ASSIGN_OR_RAISE(
       auto result, FlightInfo::Make(*SqlSchema::GetSqlInfoSchema(), descriptor, endpoints,
                                     -1, -1, false))
diff --git a/cpp/src/arrow/flight/sql/test_app_cli.cc b/cpp/src/arrow/flight/sql/test_app_cli.cc
index 194ecf5e57808..c5606a605e018 100644
--- a/cpp/src/arrow/flight/sql/test_app_cli.cc
+++ b/cpp/src/arrow/flight/sql/test_app_cli.cc
@@ -35,16 +35,16 @@
 #include "arrow/table.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/flight/otel_logging.h"
-#include "arrow/util/tracing_internal.h"
-
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/sdk/trace/processor.h>
-#include <opentelemetry/sdk/trace/tracer_provider.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
-#include <opentelemetry/trace/provider.h>
-#include <opentelemetry/trace/scope.h>
+#  include "arrow/flight/otel_logging.h"
+#  include "arrow/util/tracing_internal.h"
+
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/sdk/trace/processor.h>
+#  include <opentelemetry/sdk/trace/tracer_provider.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/trace/provider.h>
+#  include <opentelemetry/trace/scope.h>
 #endif
 
 using arrow::Result;
diff --git a/cpp/src/arrow/flight/sql/test_server_cli.cc b/cpp/src/arrow/flight/sql/test_server_cli.cc
index a8124140497c6..b632851a1f97c 100644
--- a/cpp/src/arrow/flight/sql/test_server_cli.cc
+++ b/cpp/src/arrow/flight/sql/test_server_cli.cc
@@ -31,12 +31,12 @@
 #include "arrow/util/logging.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#include "arrow/flight/otel_logging.h"
-#include "arrow/util/tracing_internal.h"
+#  include "arrow/flight/otel_logging.h"
+#  include "arrow/util/tracing_internal.h"
 
-#include <opentelemetry/context/propagation/global_propagator.h>
-#include <opentelemetry/context/propagation/text_map_propagator.h>
-#include <opentelemetry/trace/propagation/http_trace_context.h>
+#  include <opentelemetry/context/propagation/global_propagator.h>
+#  include <opentelemetry/context/propagation/text_map_propagator.h>
+#  include <opentelemetry/trace/propagation/http_trace_context.h>
 #endif
 
 DEFINE_int32(port, 31337, "Server port to listen on");
diff --git a/cpp/src/arrow/flight/sql/visibility.h b/cpp/src/arrow/flight/sql/visibility.h
index 2074815e0a246..cdd8fd953d19f 100644
--- a/cpp/src/arrow/flight/sql/visibility.h
+++ b/cpp/src/arrow/flight/sql/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_FLIGHT_SQL_STATIC
-#define ARROW_FLIGHT_SQL_EXPORT
-#elif defined(ARROW_FLIGHT_SQL_EXPORTING)
-#define ARROW_FLIGHT_SQL_EXPORT __declspec(dllexport)
-#else
-#define ARROW_FLIGHT_SQL_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_FLIGHT_SQL_STATIC
+#    define ARROW_FLIGHT_SQL_EXPORT
+#  elif defined(ARROW_FLIGHT_SQL_EXPORTING)
+#    define ARROW_FLIGHT_SQL_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_FLIGHT_SQL_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_FLIGHT_SQL_NO_EXPORT
+#  define ARROW_FLIGHT_SQL_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_FLIGHT_SQL_EXPORT
-#define ARROW_FLIGHT_SQL_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_FLIGHT_SQL_NO_EXPORT
-#define ARROW_FLIGHT_SQL_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_FLIGHT_SQL_EXPORT
+#    define ARROW_FLIGHT_SQL_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_FLIGHT_SQL_NO_EXPORT
+#    define ARROW_FLIGHT_SQL_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/flight/test_auth_handlers.cc b/cpp/src/arrow/flight/test_auth_handlers.cc
new file mode 100644
index 0000000000000..856ccf0f2b271
--- /dev/null
+++ b/cpp/src/arrow/flight/test_auth_handlers.cc
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+
+#include "arrow/flight/client_auth.h"
+#include "arrow/flight/server.h"
+#include "arrow/flight/server_auth.h"
+#include "arrow/flight/test_auth_handlers.h"
+#include "arrow/flight/types.h"
+#include "arrow/flight/visibility.h"
+#include "arrow/status.h"
+
+namespace arrow::flight {
+
+// TestServerAuthHandler
+
+TestServerAuthHandler::TestServerAuthHandler(const std::string& username,
+                                             const std::string& password)
+    : username_(username), password_(password) {}
+
+TestServerAuthHandler::~TestServerAuthHandler() {}
+
+Status TestServerAuthHandler::Authenticate(const ServerCallContext& context,
+                                           ServerAuthSender* outgoing,
+                                           ServerAuthReader* incoming) {
+  std::string token;
+  RETURN_NOT_OK(incoming->Read(&token));
+  if (token != password_) {
+    return MakeFlightError(FlightStatusCode::Unauthenticated, "Invalid token");
+  }
+  RETURN_NOT_OK(outgoing->Write(username_));
+  return Status::OK();
+}
+
+Status TestServerAuthHandler::IsValid(const ServerCallContext& context,
+                                      const std::string& token,
+                                      std::string* peer_identity) {
+  if (token != password_) {
+    return MakeFlightError(FlightStatusCode::Unauthenticated, "Invalid token");
+  }
+  *peer_identity = username_;
+  return Status::OK();
+}
+
+// TestServerBasicAuthHandler
+
+TestServerBasicAuthHandler::TestServerBasicAuthHandler(const std::string& username,
+                                                       const std::string& password) {
+  basic_auth_.username = username;
+  basic_auth_.password = password;
+}
+
+TestServerBasicAuthHandler::~TestServerBasicAuthHandler() {}
+
+Status TestServerBasicAuthHandler::Authenticate(const ServerCallContext& context,
+                                                ServerAuthSender* outgoing,
+                                                ServerAuthReader* incoming) {
+  std::string token;
+  RETURN_NOT_OK(incoming->Read(&token));
+  ARROW_ASSIGN_OR_RAISE(BasicAuth incoming_auth, BasicAuth::Deserialize(token));
+  if (incoming_auth.username != basic_auth_.username ||
+      incoming_auth.password != basic_auth_.password) {
+    return MakeFlightError(FlightStatusCode::Unauthenticated, "Invalid token");
+  }
+  RETURN_NOT_OK(outgoing->Write(basic_auth_.username));
+  return Status::OK();
+}
+
+Status TestServerBasicAuthHandler::IsValid(const ServerCallContext& context,
+                                           const std::string& token,
+                                           std::string* peer_identity) {
+  if (token != basic_auth_.username) {
+    return MakeFlightError(FlightStatusCode::Unauthenticated, "Invalid token");
+  }
+  *peer_identity = basic_auth_.username;
+  return Status::OK();
+}
+
+// TestClientAuthHandler
+
+TestClientAuthHandler::TestClientAuthHandler(const std::string& username,
+                                             const std::string& password)
+    : username_(username), password_(password) {}
+
+TestClientAuthHandler::~TestClientAuthHandler() {}
+
+Status TestClientAuthHandler::Authenticate(ClientAuthSender* outgoing,
+                                           ClientAuthReader* incoming) {
+  RETURN_NOT_OK(outgoing->Write(password_));
+  std::string username;
+  RETURN_NOT_OK(incoming->Read(&username));
+  if (username != username_) {
+    return MakeFlightError(FlightStatusCode::Unauthenticated, "Invalid token");
+  }
+  return Status::OK();
+}
+
+Status TestClientAuthHandler::GetToken(std::string* token) {
+  *token = password_;
+  return Status::OK();
+}
+
+// TestClientBasicAuthHandler
+
+TestClientBasicAuthHandler::TestClientBasicAuthHandler(const std::string& username,
+                                                       const std::string& password) {
+  basic_auth_.username = username;
+  basic_auth_.password = password;
+}
+
+TestClientBasicAuthHandler::~TestClientBasicAuthHandler() {}
+
+Status TestClientBasicAuthHandler::Authenticate(ClientAuthSender* outgoing,
+                                                ClientAuthReader* incoming) {
+  ARROW_ASSIGN_OR_RAISE(std::string pb_result, basic_auth_.SerializeToString());
+  RETURN_NOT_OK(outgoing->Write(pb_result));
+  RETURN_NOT_OK(incoming->Read(&token_));
+  return Status::OK();
+}
+
+Status TestClientBasicAuthHandler::GetToken(std::string* token) {
+  *token = token_;
+  return Status::OK();
+}
+
+}  // namespace arrow::flight
diff --git a/cpp/src/arrow/flight/test_auth_handlers.h b/cpp/src/arrow/flight/test_auth_handlers.h
new file mode 100644
index 0000000000000..74f48798f3b02
--- /dev/null
+++ b/cpp/src/arrow/flight/test_auth_handlers.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "arrow/flight/client_auth.h"
+#include "arrow/flight/server.h"
+#include "arrow/flight/server_auth.h"
+#include "arrow/flight/types.h"
+#include "arrow/flight/visibility.h"
+#include "arrow/status.h"
+
+// A pair of authentication handlers that check for a predefined password
+// and set the peer identity to a predefined username.
+
+namespace arrow::flight {
+
+class ARROW_FLIGHT_EXPORT TestServerAuthHandler : public ServerAuthHandler {
+ public:
+  explicit TestServerAuthHandler(const std::string& username,
+                                 const std::string& password);
+  ~TestServerAuthHandler() override;
+  Status Authenticate(const ServerCallContext& context, ServerAuthSender* outgoing,
+                      ServerAuthReader* incoming) override;
+  Status IsValid(const ServerCallContext& context, const std::string& token,
+                 std::string* peer_identity) override;
+
+ private:
+  std::string username_;
+  std::string password_;
+};
+
+class ARROW_FLIGHT_EXPORT TestServerBasicAuthHandler : public ServerAuthHandler {
+ public:
+  explicit TestServerBasicAuthHandler(const std::string& username,
+                                      const std::string& password);
+  ~TestServerBasicAuthHandler() override;
+  Status Authenticate(const ServerCallContext& context, ServerAuthSender* outgoing,
+                      ServerAuthReader* incoming) override;
+  Status IsValid(const ServerCallContext& context, const std::string& token,
+                 std::string* peer_identity) override;
+
+ private:
+  BasicAuth basic_auth_;
+};
+
+class ARROW_FLIGHT_EXPORT TestClientAuthHandler : public ClientAuthHandler {
+ public:
+  explicit TestClientAuthHandler(const std::string& username,
+                                 const std::string& password);
+  ~TestClientAuthHandler() override;
+  Status Authenticate(ClientAuthSender* outgoing, ClientAuthReader* incoming) override;
+  Status GetToken(std::string* token) override;
+
+ private:
+  std::string username_;
+  std::string password_;
+};
+
+class ARROW_FLIGHT_EXPORT TestClientBasicAuthHandler : public ClientAuthHandler {
+ public:
+  explicit TestClientBasicAuthHandler(const std::string& username,
+                                      const std::string& password);
+  ~TestClientBasicAuthHandler() override;
+  Status Authenticate(ClientAuthSender* outgoing, ClientAuthReader* incoming) override;
+  Status GetToken(std::string* token) override;
+
+ private:
+  BasicAuth basic_auth_;
+  std::string token_;
+};
+
+}  // namespace arrow::flight
diff --git a/cpp/src/arrow/flight/test_definitions.cc b/cpp/src/arrow/flight/test_definitions.cc
index c43b693d84a47..ea6576088f2f5 100644
--- a/cpp/src/arrow/flight/test_definitions.cc
+++ b/cpp/src/arrow/flight/test_definitions.cc
@@ -27,6 +27,7 @@
 #include "arrow/array/util.h"
 #include "arrow/flight/api.h"
 #include "arrow/flight/client_middleware.h"
+#include "arrow/flight/test_flight_server.h"
 #include "arrow/flight/test_util.h"
 #include "arrow/flight/types.h"
 #include "arrow/flight/types_async.h"
@@ -41,7 +42,7 @@
 #include "gmock/gmock.h"
 
 #if defined(ARROW_CUDA)
-#include "arrow/gpu/cuda_api.h"
+#  include "arrow/gpu/cuda_api.h"
 #endif
 
 namespace arrow {
@@ -53,7 +54,7 @@ using arrow::internal::checked_cast;
 // Tests of initialization/shutdown
 
 void ConnectivityTest::TestGetPort() {
-  std::unique_ptr<FlightServerBase> server = ExampleTestServer();
+  std::unique_ptr<FlightServerBase> server = TestFlightServer::Make();
 
   ASSERT_OK_AND_ASSIGN(auto location, Location::ForScheme(transport(), "127.0.0.1", 0));
   FlightServerOptions options(location);
@@ -61,7 +62,7 @@ void ConnectivityTest::TestGetPort() {
   ASSERT_GT(server->port(), 0);
 }
 void ConnectivityTest::TestBuilderHook() {
-  std::unique_ptr<FlightServerBase> server = ExampleTestServer();
+  std::unique_ptr<FlightServerBase> server = TestFlightServer::Make();
 
   ASSERT_OK_AND_ASSIGN(auto location, Location::ForScheme(transport(), "127.0.0.1", 0));
   FlightServerOptions options(location);
@@ -80,7 +81,7 @@ void ConnectivityTest::TestShutdown() {
   constexpr int kIterations = 10;
   ASSERT_OK_AND_ASSIGN(auto location, Location::ForScheme(transport(), "127.0.0.1", 0));
   for (int i = 0; i < kIterations; i++) {
-    std::unique_ptr<FlightServerBase> server = ExampleTestServer();
+    std::unique_ptr<FlightServerBase> server = TestFlightServer::Make();
 
     FlightServerOptions options(location);
     ASSERT_OK(server->Init(options));
@@ -92,7 +93,7 @@ void ConnectivityTest::TestShutdown() {
   }
 }
 void ConnectivityTest::TestShutdownWithDeadline() {
-  std::unique_ptr<FlightServerBase> server = ExampleTestServer();
+  std::unique_ptr<FlightServerBase> server = TestFlightServer::Make();
 
   ASSERT_OK_AND_ASSIGN(auto location, Location::ForScheme(transport(), "127.0.0.1", 0));
   FlightServerOptions options(location);
@@ -105,7 +106,7 @@ void ConnectivityTest::TestShutdownWithDeadline() {
   ASSERT_OK(server->Wait());
 }
 void ConnectivityTest::TestBrokenConnection() {
-  std::unique_ptr<FlightServerBase> server = ExampleTestServer();
+  std::unique_ptr<FlightServerBase> server = TestFlightServer::Make();
   ASSERT_OK_AND_ASSIGN(auto location, Location::ForScheme(transport(), "127.0.0.1", 0));
   FlightServerOptions options(location);
   ASSERT_OK(server->Init(options));
@@ -151,7 +152,7 @@ class GetFlightInfoListener : public AsyncListener<FlightInfo> {
 }  // namespace
 
 void DataTest::SetUpTest() {
-  server_ = ExampleTestServer();
+  server_ = TestFlightServer::Make();
 
   ASSERT_OK_AND_ASSIGN(auto location, Location::ForScheme(transport(), "127.0.0.1", 0));
   FlightServerOptions options(location);
@@ -1822,7 +1823,7 @@ void AsyncClientTest::SetUpTest() {
 
   ASSERT_OK_AND_ASSIGN(auto location, Location::ForScheme(transport(), "127.0.0.1", 0));
 
-  server_ = ExampleTestServer();
+  server_ = TestFlightServer::Make();
   FlightServerOptions server_options(location);
   ASSERT_OK(server_->Init(server_options));
 
diff --git a/cpp/src/arrow/flight/test_flight_server.cc b/cpp/src/arrow/flight/test_flight_server.cc
new file mode 100644
index 0000000000000..0ea95ebd15b07
--- /dev/null
+++ b/cpp/src/arrow/flight/test_flight_server.cc
@@ -0,0 +1,417 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+
+#include "arrow/flight/test_flight_server.h"
+
+#include "arrow/array/array_base.h"
+#include "arrow/array/array_primitive.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/flight/server.h"
+#include "arrow/flight/test_util.h"
+#include "arrow/flight/type_fwd.h"
+#include "arrow/status.h"
+
+namespace arrow::flight {
+namespace {
+
+class ErrorRecordBatchReader : public RecordBatchReader {
+ public:
+  ErrorRecordBatchReader() : schema_(arrow::schema({})) {}
+
+  std::shared_ptr<Schema> schema() const override { return schema_; }
+
+  Status ReadNext(std::shared_ptr<RecordBatch>* out) override {
+    *out = nullptr;
+    return Status::OK();
+  }
+
+  Status Close() override {
+    // This should be propagated over DoGet to the client
+    return Status::IOError("Expected error");
+  }
+
+ private:
+  std::shared_ptr<Schema> schema_;
+};
+
+Status GetBatchForFlight(const Ticket& ticket, std::shared_ptr<RecordBatchReader>* out) {
+  if (ticket.ticket == "ticket-ints-1") {
+    RecordBatchVector batches;
+    RETURN_NOT_OK(ExampleIntBatches(&batches));
+    ARROW_ASSIGN_OR_RAISE(*out, RecordBatchReader::Make(batches));
+    return Status::OK();
+  } else if (ticket.ticket == "ticket-floats-1") {
+    RecordBatchVector batches;
+    RETURN_NOT_OK(ExampleFloatBatches(&batches));
+    ARROW_ASSIGN_OR_RAISE(*out, RecordBatchReader::Make(batches));
+    return Status::OK();
+  } else if (ticket.ticket == "ticket-dicts-1") {
+    RecordBatchVector batches;
+    RETURN_NOT_OK(ExampleDictBatches(&batches));
+    ARROW_ASSIGN_OR_RAISE(*out, RecordBatchReader::Make(batches));
+    return Status::OK();
+  } else if (ticket.ticket == "ticket-large-batch-1") {
+    RecordBatchVector batches;
+    RETURN_NOT_OK(ExampleLargeBatches(&batches));
+    ARROW_ASSIGN_OR_RAISE(*out, RecordBatchReader::Make(batches));
+    return Status::OK();
+  } else {
+    return Status::NotImplemented("no stream implemented for ticket: " + ticket.ticket);
+  }
+}
+
+}  // namespace
+
+std::unique_ptr<FlightServerBase> TestFlightServer::Make() {
+  return std::make_unique<TestFlightServer>();
+}
+
+Status TestFlightServer::ListFlights(const ServerCallContext& context,
+                                     const Criteria* criteria,
+                                     std::unique_ptr<FlightListing>* listings) {
+  std::vector<FlightInfo> flights = ExampleFlightInfo();
+  if (criteria && criteria->expression != "") {
+    // For test purposes, if we get criteria, return no results
+    flights.clear();
+  }
+  *listings = std::make_unique<SimpleFlightListing>(flights);
+  return Status::OK();
+}
+
+Status TestFlightServer::GetFlightInfo(const ServerCallContext& context,
+                                       const FlightDescriptor& request,
+                                       std::unique_ptr<FlightInfo>* out) {
+  // Test that Arrow-C++ status codes make it through the transport
+  if (request.type == FlightDescriptor::DescriptorType::CMD &&
+      request.cmd == "status-outofmemory") {
+    return Status::OutOfMemory("Sentinel");
+  }
+
+  std::vector<FlightInfo> flights = ExampleFlightInfo();
+
+  for (const auto& info : flights) {
+    if (info.descriptor().Equals(request)) {
+      *out = std::make_unique<FlightInfo>(info);
+      return Status::OK();
+    }
+  }
+  return Status::Invalid("Flight not found: ", request.ToString());
+}
+
+Status TestFlightServer::DoGet(const ServerCallContext& context, const Ticket& request,
+                               std::unique_ptr<FlightDataStream>* data_stream) {
+  // Test for ARROW-5095
+  if (request.ticket == "ARROW-5095-fail") {
+    return Status::UnknownError("Server-side error");
+  }
+  if (request.ticket == "ARROW-5095-success") {
+    return Status::OK();
+  }
+  if (request.ticket == "ARROW-13253-DoGet-Batch") {
+    // Make batch > 2GiB in size
+    ARROW_ASSIGN_OR_RAISE(auto batch, VeryLargeBatch());
+    ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchReader::Make({batch}));
+    *data_stream = std::make_unique<RecordBatchStream>(std::move(reader));
+    return Status::OK();
+  }
+  if (request.ticket == "ticket-stream-error") {
+    auto reader = std::make_shared<ErrorRecordBatchReader>();
+    *data_stream = std::make_unique<RecordBatchStream>(std::move(reader));
+    return Status::OK();
+  }
+
+  std::shared_ptr<RecordBatchReader> batch_reader;
+  RETURN_NOT_OK(GetBatchForFlight(request, &batch_reader));
+
+  *data_stream = std::make_unique<RecordBatchStream>(batch_reader);
+  return Status::OK();
+}
+
+Status TestFlightServer::DoPut(const ServerCallContext&,
+                               std::unique_ptr<FlightMessageReader> reader,
+                               std::unique_ptr<FlightMetadataWriter> writer) {
+  return reader->ToRecordBatches().status();
+}
+
+Status TestFlightServer::DoExchange(const ServerCallContext& context,
+                                    std::unique_ptr<FlightMessageReader> reader,
+                                    std::unique_ptr<FlightMessageWriter> writer) {
+  // Test various scenarios for a DoExchange
+  if (reader->descriptor().type != FlightDescriptor::DescriptorType::CMD) {
+    return Status::Invalid("Must provide a command descriptor");
+  }
+
+  const std::string& cmd = reader->descriptor().cmd;
+  if (cmd == "error") {
+    // Immediately return an error to the client.
+    return Status::NotImplemented("Expected error");
+  } else if (cmd == "get") {
+    return RunExchangeGet(std::move(reader), std::move(writer));
+  } else if (cmd == "put") {
+    return RunExchangePut(std::move(reader), std::move(writer));
+  } else if (cmd == "counter") {
+    return RunExchangeCounter(std::move(reader), std::move(writer));
+  } else if (cmd == "total") {
+    return RunExchangeTotal(std::move(reader), std::move(writer));
+  } else if (cmd == "echo") {
+    return RunExchangeEcho(std::move(reader), std::move(writer));
+  } else if (cmd == "large_batch") {
+    return RunExchangeLargeBatch(std::move(reader), std::move(writer));
+  } else if (cmd == "TestUndrained") {
+    ARROW_ASSIGN_OR_RAISE(auto schema, reader->GetSchema());
+    return Status::OK();
+  } else {
+    return Status::NotImplemented("Scenario not implemented: ", cmd);
+  }
+}
+
+// A simple example - act like DoGet.
+Status TestFlightServer::RunExchangeGet(std::unique_ptr<FlightMessageReader> reader,
+                                        std::unique_ptr<FlightMessageWriter> writer) {
+  RETURN_NOT_OK(writer->Begin(ExampleIntSchema()));
+  RecordBatchVector batches;
+  RETURN_NOT_OK(ExampleIntBatches(&batches));
+  for (const auto& batch : batches) {
+    RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
+  }
+  return Status::OK();
+}
+
+// A simple example - act like DoPut
+Status TestFlightServer::RunExchangePut(std::unique_ptr<FlightMessageReader> reader,
+                                        std::unique_ptr<FlightMessageWriter> writer) {
+  ARROW_ASSIGN_OR_RAISE(auto schema, reader->GetSchema());
+  if (!schema->Equals(ExampleIntSchema(), false)) {
+    return Status::Invalid("Schema is not as expected");
+  }
+  RecordBatchVector batches;
+  RETURN_NOT_OK(ExampleIntBatches(&batches));
+  FlightStreamChunk chunk;
+  for (const auto& batch : batches) {
+    ARROW_ASSIGN_OR_RAISE(chunk, reader->Next());
+    if (!chunk.data) {
+      return Status::Invalid("Expected another batch");
+    }
+    if (!batch->Equals(*chunk.data)) {
+      return Status::Invalid("Batch does not match");
+    }
+  }
+  ARROW_ASSIGN_OR_RAISE(chunk, reader->Next());
+  if (chunk.data || chunk.app_metadata) {
+    return Status::Invalid("Too many batches");
+  }
+
+  RETURN_NOT_OK(writer->WriteMetadata(Buffer::FromString("done")));
+  return Status::OK();
+}
+
+// Read some number of record batches from the client, send a
+// metadata message back with the count, then echo the batches back.
+Status TestFlightServer::RunExchangeCounter(std::unique_ptr<FlightMessageReader> reader,
+                                            std::unique_ptr<FlightMessageWriter> writer) {
+  std::vector<std::shared_ptr<RecordBatch>> batches;
+  FlightStreamChunk chunk;
+  int chunks = 0;
+  while (true) {
+    ARROW_ASSIGN_OR_RAISE(chunk, reader->Next());
+    if (!chunk.data && !chunk.app_metadata) {
+      break;
+    }
+    if (chunk.data) {
+      batches.push_back(chunk.data);
+      chunks++;
+    }
+  }
+
+  // Echo back the number of record batches read.
+  std::shared_ptr<Buffer> buf = Buffer::FromString(std::to_string(chunks));
+  RETURN_NOT_OK(writer->WriteMetadata(buf));
+  // Echo the record batches themselves.
+  if (chunks > 0) {
+    ARROW_ASSIGN_OR_RAISE(auto schema, reader->GetSchema());
+    RETURN_NOT_OK(writer->Begin(schema));
+
+    for (const auto& batch : batches) {
+      RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
+    }
+  }
+
+  return Status::OK();
+}
+
+// Read int64 batches from the client, each time sending back a
+// batch with a running sum of columns.
+Status TestFlightServer::RunExchangeTotal(std::unique_ptr<FlightMessageReader> reader,
+                                          std::unique_ptr<FlightMessageWriter> writer) {
+  FlightStreamChunk chunk{};
+  ARROW_ASSIGN_OR_RAISE(auto schema, reader->GetSchema());
+  // Ensure the schema contains only int64 columns
+  for (const auto& field : schema->fields()) {
+    if (field->type()->id() != Type::type::INT64) {
+      return Status::Invalid("Field is not INT64: ", field->name());
+    }
+  }
+  std::vector<int64_t> sums(schema->num_fields());
+  std::vector<std::shared_ptr<Array>> columns(schema->num_fields());
+  RETURN_NOT_OK(writer->Begin(schema));
+  while (true) {
+    ARROW_ASSIGN_OR_RAISE(chunk, reader->Next());
+    if (!chunk.data && !chunk.app_metadata) {
+      break;
+    }
+    if (chunk.data) {
+      if (!chunk.data->schema()->Equals(schema, false)) {
+        // A compliant client implementation would make this impossible
+        return Status::Invalid("Schemas are incompatible");
+      }
+
+      // Update the running totals
+      auto builder = std::make_shared<Int64Builder>();
+      int col_index = 0;
+      for (const auto& column : chunk.data->columns()) {
+        auto arr = std::dynamic_pointer_cast<Int64Array>(column);
+        if (!arr) {
+          return MakeFlightError(FlightStatusCode::Internal, "Could not cast array");
+        }
+        for (int row = 0; row < column->length(); row++) {
+          if (!arr->IsNull(row)) {
+            sums[col_index] += arr->Value(row);
+          }
+        }
+
+        builder->Reset();
+        RETURN_NOT_OK(builder->Append(sums[col_index]));
+        RETURN_NOT_OK(builder->Finish(&columns[col_index]));
+
+        col_index++;
+      }
+
+      // Echo the totals to the client
+      auto response = RecordBatch::Make(schema, /* num_rows */ 1, columns);
+      RETURN_NOT_OK(writer->WriteRecordBatch(*response));
+    }
+  }
+  return Status::OK();
+}
+
+// Echo the client's messages back.
+Status TestFlightServer::RunExchangeEcho(std::unique_ptr<FlightMessageReader> reader,
+                                         std::unique_ptr<FlightMessageWriter> writer) {
+  FlightStreamChunk chunk;
+  bool begun = false;
+  while (true) {
+    ARROW_ASSIGN_OR_RAISE(chunk, reader->Next());
+    if (!chunk.data && !chunk.app_metadata) {
+      break;
+    }
+    if (!begun && chunk.data) {
+      begun = true;
+      RETURN_NOT_OK(writer->Begin(chunk.data->schema()));
+    }
+    if (chunk.data && chunk.app_metadata) {
+      RETURN_NOT_OK(writer->WriteWithMetadata(*chunk.data, chunk.app_metadata));
+    } else if (chunk.data) {
+      RETURN_NOT_OK(writer->WriteRecordBatch(*chunk.data));
+    } else if (chunk.app_metadata) {
+      RETURN_NOT_OK(writer->WriteMetadata(chunk.app_metadata));
+    }
+  }
+  return Status::OK();
+}
+
+// Regression test for ARROW-13253
+Status TestFlightServer::RunExchangeLargeBatch(
+    std::unique_ptr<FlightMessageReader>, std::unique_ptr<FlightMessageWriter> writer) {
+  ARROW_ASSIGN_OR_RAISE(auto batch, VeryLargeBatch());
+  RETURN_NOT_OK(writer->Begin(batch->schema()));
+  return writer->WriteRecordBatch(*batch);
+}
+
+Status TestFlightServer::RunAction1(const Action& action,
+                                    std::unique_ptr<ResultStream>* out) {
+  std::vector<Result> results;
+  for (int i = 0; i < 3; ++i) {
+    Result result;
+    std::string value = action.body->ToString() + "-part" + std::to_string(i);
+    result.body = Buffer::FromString(std::move(value));
+    results.push_back(result);
+  }
+  *out = std::make_unique<SimpleResultStream>(std::move(results));
+  return Status::OK();
+}
+
+Status TestFlightServer::RunAction2(std::unique_ptr<ResultStream>* out) {
+  // Empty
+  *out = std::make_unique<SimpleResultStream>(std::vector<Result>{});
+  return Status::OK();
+}
+
+Status TestFlightServer::ListIncomingHeaders(const ServerCallContext& context,
+                                             const Action& action,
+                                             std::unique_ptr<ResultStream>* out) {
+  std::vector<Result> results;
+  std::string_view prefix(*action.body);
+  for (const auto& header : context.incoming_headers()) {
+    if (header.first.substr(0, prefix.size()) != prefix) {
+      continue;
+    }
+    Result result;
+    result.body =
+        Buffer::FromString(std::string(header.first) + ": " + std::string(header.second));
+    results.push_back(result);
+  }
+  *out = std::make_unique<SimpleResultStream>(std::move(results));
+  return Status::OK();
+}
+
+Status TestFlightServer::DoAction(const ServerCallContext& context, const Action& action,
+                                  std::unique_ptr<ResultStream>* out) {
+  if (action.type == "action1") {
+    return RunAction1(action, out);
+  } else if (action.type == "action2") {
+    return RunAction2(out);
+  } else if (action.type == "list-incoming-headers") {
+    return ListIncomingHeaders(context, action, out);
+  } else {
+    return Status::NotImplemented(action.type);
+  }
+}
+
+Status TestFlightServer::ListActions(const ServerCallContext& context,
+                                     std::vector<ActionType>* out) {
+  std::vector<ActionType> actions = ExampleActionTypes();
+  *out = std::move(actions);
+  return Status::OK();
+}
+
+Status TestFlightServer::GetSchema(const ServerCallContext& context,
+                                   const FlightDescriptor& request,
+                                   std::unique_ptr<SchemaResult>* schema) {
+  std::vector<FlightInfo> flights = ExampleFlightInfo();
+
+  for (const auto& info : flights) {
+    if (info.descriptor().Equals(request)) {
+      *schema = std::make_unique<SchemaResult>(info.serialized_schema());
+      return Status::OK();
+    }
+  }
+  return Status::Invalid("Flight not found: ", request.ToString());
+}
+
+}  // namespace arrow::flight
diff --git a/cpp/src/arrow/flight/test_flight_server.h b/cpp/src/arrow/flight/test_flight_server.h
new file mode 100644
index 0000000000000..794dd834c014b
--- /dev/null
+++ b/cpp/src/arrow/flight/test_flight_server.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/flight/server.h"
+#include "arrow/flight/type_fwd.h"
+#include "arrow/flight/visibility.h"
+#include "arrow/status.h"
+
+namespace arrow::flight {
+
+class ARROW_FLIGHT_EXPORT TestFlightServer : public FlightServerBase {
+ public:
+  static std::unique_ptr<FlightServerBase> Make();
+
+  Status ListFlights(const ServerCallContext& context, const Criteria* criteria,
+                     std::unique_ptr<FlightListing>* listings) override;
+
+  Status GetFlightInfo(const ServerCallContext& context, const FlightDescriptor& request,
+                       std::unique_ptr<FlightInfo>* out) override;
+
+  Status DoGet(const ServerCallContext& context, const Ticket& request,
+               std::unique_ptr<FlightDataStream>* data_stream) override;
+
+  Status DoPut(const ServerCallContext&, std::unique_ptr<FlightMessageReader> reader,
+               std::unique_ptr<FlightMetadataWriter> writer) override;
+
+  Status DoExchange(const ServerCallContext& context,
+                    std::unique_ptr<FlightMessageReader> reader,
+                    std::unique_ptr<FlightMessageWriter> writer) override;
+
+  // A simple example - act like DoGet.
+  Status RunExchangeGet(std::unique_ptr<FlightMessageReader> reader,
+                        std::unique_ptr<FlightMessageWriter> writer);
+
+  // A simple example - act like DoPut
+  Status RunExchangePut(std::unique_ptr<FlightMessageReader> reader,
+                        std::unique_ptr<FlightMessageWriter> writer);
+
+  // Read some number of record batches from the client, send a
+  // metadata message back with the count, then echo the batches back.
+  Status RunExchangeCounter(std::unique_ptr<FlightMessageReader> reader,
+                            std::unique_ptr<FlightMessageWriter> writer);
+
+  // Read int64 batches from the client, each time sending back a
+  // batch with a running sum of columns.
+  Status RunExchangeTotal(std::unique_ptr<FlightMessageReader> reader,
+                          std::unique_ptr<FlightMessageWriter> writer);
+
+  // Echo the client's messages back.
+  Status RunExchangeEcho(std::unique_ptr<FlightMessageReader> reader,
+                         std::unique_ptr<FlightMessageWriter> writer);
+
+  // Regression test for ARROW-13253
+  Status RunExchangeLargeBatch(std::unique_ptr<FlightMessageReader>,
+                               std::unique_ptr<FlightMessageWriter> writer);
+
+  Status RunAction1(const Action& action, std::unique_ptr<ResultStream>* out);
+
+  Status RunAction2(std::unique_ptr<ResultStream>* out);
+
+  Status ListIncomingHeaders(const ServerCallContext& context, const Action& action,
+                             std::unique_ptr<ResultStream>* out);
+
+  Status DoAction(const ServerCallContext& context, const Action& action,
+                  std::unique_ptr<ResultStream>* out) override;
+
+  Status ListActions(const ServerCallContext& context,
+                     std::vector<ActionType>* out) override;
+
+  Status GetSchema(const ServerCallContext& context, const FlightDescriptor& request,
+                   std::unique_ptr<SchemaResult>* schema) override;
+};
+
+}  // namespace arrow::flight
diff --git a/cpp/src/arrow/flight/test_server.cc b/cpp/src/arrow/flight/test_server.cc
index 18bf2b4135990..ba84b8f532e03 100644
--- a/cpp/src/arrow/flight/test_server.cc
+++ b/cpp/src/arrow/flight/test_server.cc
@@ -26,6 +26,7 @@
 #include <gflags/gflags.h>
 
 #include "arrow/flight/server.h"
+#include "arrow/flight/test_flight_server.h"
 #include "arrow/flight/test_util.h"
 #include "arrow/flight/types.h"
 #include "arrow/util/logging.h"
@@ -38,7 +39,7 @@ std::unique_ptr<arrow::flight::FlightServerBase> g_server;
 int main(int argc, char** argv) {
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
-  g_server = arrow::flight::ExampleTestServer();
+  g_server = arrow::flight::TestFlightServer::Make();
 
   arrow::flight::Location location;
   if (FLAGS_unix.empty()) {
diff --git a/cpp/src/arrow/flight/test_util.cc b/cpp/src/arrow/flight/test_util.cc
index bf2f4c2b4effc..aa10d9a7da822 100644
--- a/cpp/src/arrow/flight/test_util.cc
+++ b/cpp/src/arrow/flight/test_util.cc
@@ -17,11 +17,6 @@
 
 #include "arrow/flight/test_util.h"
 
-#ifdef __APPLE__
-#include <limits.h>
-#include <mach-o/dyld.h>
-#endif
-
 #include <algorithm>
 #include <cstdlib>
 #include <fstream>
@@ -31,504 +26,47 @@
 #include "arrow/util/windows_compatibility.h"
 
 #include <gtest/gtest.h>
-#include <boost/filesystem.hpp>
-#define BOOST_NO_CXX98_FUNCTION_BASE  // ARROW-17805
-// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
-// boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in
-// cpp/cmake_modules/ThirdpartyToolchain.cmake for details.
-#include <boost/process.hpp>
 
 #include "arrow/array.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/ipc/test_common.h"
 #include "arrow/testing/generator.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 #include "arrow/util/logging.h"
 
 #include "arrow/flight/api.h"
 #include "arrow/flight/serialization_internal.h"
 
-namespace arrow {
-namespace flight {
-
-namespace bp = boost::process;
-namespace fs = boost::filesystem;
-
-namespace {
+namespace arrow::flight {
 
-Status ResolveCurrentExecutable(fs::path* out) {
-  // See https://stackoverflow.com/a/1024937/10194 for various
-  // platform-specific recipes.
-
-  boost::system::error_code ec;
-
-#if defined(__linux__)
-  *out = fs::canonical("/proc/self/exe", ec);
-#elif defined(__APPLE__)
-  char buf[PATH_MAX + 1];
-  uint32_t bufsize = sizeof(buf);
-  if (_NSGetExecutablePath(buf, &bufsize) < 0) {
-    return Status::Invalid("Can't resolve current exe: path too large");
-  }
-  *out = fs::canonical(buf, ec);
-#elif defined(_WIN32)
-  char buf[MAX_PATH + 1];
-  if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) {
-    return Status::Invalid("Can't get executable file path");
-  }
-  *out = fs::canonical(buf, ec);
-#else
-  ARROW_UNUSED(ec);
-  return Status::NotImplemented("Not available on this system");
-#endif
-  if (ec) {
-    // XXX fold this into the Status class?
-    return Status::IOError("Can't resolve current exe: ", ec.message());
+Status TestServer::Start(const std::vector<std::string>& extra_args) {
+  server_process_ = std::make_unique<util::Process>();
+  ARROW_RETURN_NOT_OK(server_process_->SetExecutable(executable_name_));
+  std::vector<std::string> args = {};
+  if (unix_sock_.empty()) {
+    args.push_back("-port");
+    args.push_back(std::to_string(port_));
   } else {
-    return Status::OK();
-  }
-}
-
-class ErrorRecordBatchReader : public RecordBatchReader {
- public:
-  ErrorRecordBatchReader() : schema_(arrow::schema({})) {}
-
-  std::shared_ptr<Schema> schema() const override { return schema_; }
-
-  Status ReadNext(std::shared_ptr<RecordBatch>* out) override {
-    *out = nullptr;
-    return Status::OK();
-  }
-
-  Status Close() override {
-    // This should be propagated over DoGet to the client
-    return Status::IOError("Expected error");
-  }
-
- private:
-  std::shared_ptr<Schema> schema_;
-};
-}  // namespace
-
-void TestServer::Start(const std::vector<std::string>& extra_args) {
-  namespace fs = boost::filesystem;
-
-  std::string str_port = std::to_string(port_);
-  std::vector<fs::path> search_path = ::boost::this_process::path();
-  // If possible, prepend current executable directory to search path,
-  // since it's likely that the test server executable is located in
-  // the same directory as the running unit test.
-  fs::path current_exe;
-  Status st = ResolveCurrentExecutable(&current_exe);
-  if (st.ok()) {
-    search_path.insert(search_path.begin(), current_exe.parent_path());
-  } else if (st.IsNotImplemented()) {
-    ARROW_CHECK(st.IsNotImplemented()) << st.ToString();
-  }
-
-  try {
-    if (unix_sock_.empty()) {
-      server_process_ =
-          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
-                                      "-port", str_port, bp::args(extra_args));
-    } else {
-      server_process_ =
-          std::make_shared<bp::child>(bp::search_path(executable_name_, search_path),
-                                      "-server_unix", unix_sock_, bp::args(extra_args));
-    }
-  } catch (...) {
-    std::stringstream ss;
-    ss << "Failed to launch test server '" << executable_name_ << "', looked in ";
-    for (const auto& path : search_path) {
-      ss << path << " : ";
-    }
-    ARROW_LOG(FATAL) << ss.str();
-    throw;
+    args.push_back("-server_unix");
+    args.push_back(unix_sock_);
   }
-  std::cout << "Server running with pid " << server_process_->id() << std::endl;
+  args.insert(args.end(), extra_args.begin(), extra_args.end());
+  server_process_->SetArgs(args);
+  ARROW_RETURN_NOT_OK(server_process_->Execute());
+  std::cout << "Server running with pid " << server_process_->pid() << std::endl;
+  return Status::OK();
 }
 
-int TestServer::Stop() {
-  if (server_process_ && server_process_->valid()) {
-#ifndef _WIN32
-    kill(server_process_->id(), SIGTERM);
-#else
-    // This would use SIGKILL on POSIX, which is more brutal than SIGTERM
-    server_process_->terminate();
-#endif
-    server_process_->wait();
-    return server_process_->exit_code();
-  } else {
-    // Presumably the server wasn't able to start
-    return -1;
-  }
-}
+void TestServer::Stop() { server_process_ = nullptr; }
 
-bool TestServer::IsRunning() { return server_process_->running(); }
+bool TestServer::IsRunning() { return server_process_->IsRunning(); }
 
 int TestServer::port() const { return port_; }
 
 const std::string& TestServer::unix_sock() const { return unix_sock_; }
 
-Status GetBatchForFlight(const Ticket& ticket, std::shared_ptr<RecordBatchReader>* out) {
-  if (ticket.ticket == "ticket-ints-1") {
-    RecordBatchVector batches;
-    RETURN_NOT_OK(ExampleIntBatches(&batches));
-    ARROW_ASSIGN_OR_RAISE(*out, RecordBatchReader::Make(batches));
-    return Status::OK();
-  } else if (ticket.ticket == "ticket-floats-1") {
-    RecordBatchVector batches;
-    RETURN_NOT_OK(ExampleFloatBatches(&batches));
-    ARROW_ASSIGN_OR_RAISE(*out, RecordBatchReader::Make(batches));
-    return Status::OK();
-  } else if (ticket.ticket == "ticket-dicts-1") {
-    RecordBatchVector batches;
-    RETURN_NOT_OK(ExampleDictBatches(&batches));
-    ARROW_ASSIGN_OR_RAISE(*out, RecordBatchReader::Make(batches));
-    return Status::OK();
-  } else if (ticket.ticket == "ticket-large-batch-1") {
-    RecordBatchVector batches;
-    RETURN_NOT_OK(ExampleLargeBatches(&batches));
-    ARROW_ASSIGN_OR_RAISE(*out, RecordBatchReader::Make(batches));
-    return Status::OK();
-  } else {
-    return Status::NotImplemented("no stream implemented for ticket: " + ticket.ticket);
-  }
-}
-
-class FlightTestServer : public FlightServerBase {
-  Status ListFlights(const ServerCallContext& context, const Criteria* criteria,
-                     std::unique_ptr<FlightListing>* listings) override {
-    std::vector<FlightInfo> flights = ExampleFlightInfo();
-    if (criteria && criteria->expression != "") {
-      // For test purposes, if we get criteria, return no results
-      flights.clear();
-    }
-    *listings = std::make_unique<SimpleFlightListing>(flights);
-    return Status::OK();
-  }
-
-  Status GetFlightInfo(const ServerCallContext& context, const FlightDescriptor& request,
-                       std::unique_ptr<FlightInfo>* out) override {
-    // Test that Arrow-C++ status codes make it through the transport
-    if (request.type == FlightDescriptor::DescriptorType::CMD &&
-        request.cmd == "status-outofmemory") {
-      return Status::OutOfMemory("Sentinel");
-    }
-
-    std::vector<FlightInfo> flights = ExampleFlightInfo();
-
-    for (const auto& info : flights) {
-      if (info.descriptor().Equals(request)) {
-        *out = std::make_unique<FlightInfo>(info);
-        return Status::OK();
-      }
-    }
-    return Status::Invalid("Flight not found: ", request.ToString());
-  }
-
-  Status DoGet(const ServerCallContext& context, const Ticket& request,
-               std::unique_ptr<FlightDataStream>* data_stream) override {
-    // Test for ARROW-5095
-    if (request.ticket == "ARROW-5095-fail") {
-      return Status::UnknownError("Server-side error");
-    }
-    if (request.ticket == "ARROW-5095-success") {
-      return Status::OK();
-    }
-    if (request.ticket == "ARROW-13253-DoGet-Batch") {
-      // Make batch > 2GiB in size
-      ARROW_ASSIGN_OR_RAISE(auto batch, VeryLargeBatch());
-      ARROW_ASSIGN_OR_RAISE(auto reader, RecordBatchReader::Make({batch}));
-      *data_stream = std::make_unique<RecordBatchStream>(std::move(reader));
-      return Status::OK();
-    }
-    if (request.ticket == "ticket-stream-error") {
-      auto reader = std::make_shared<ErrorRecordBatchReader>();
-      *data_stream = std::make_unique<RecordBatchStream>(std::move(reader));
-      return Status::OK();
-    }
-
-    std::shared_ptr<RecordBatchReader> batch_reader;
-    RETURN_NOT_OK(GetBatchForFlight(request, &batch_reader));
-
-    *data_stream = std::make_unique<RecordBatchStream>(batch_reader);
-    return Status::OK();
-  }
-
-  Status DoPut(const ServerCallContext&, std::unique_ptr<FlightMessageReader> reader,
-               std::unique_ptr<FlightMetadataWriter> writer) override {
-    return reader->ToRecordBatches().status();
-  }
-
-  Status DoExchange(const ServerCallContext& context,
-                    std::unique_ptr<FlightMessageReader> reader,
-                    std::unique_ptr<FlightMessageWriter> writer) override {
-    // Test various scenarios for a DoExchange
-    if (reader->descriptor().type != FlightDescriptor::DescriptorType::CMD) {
-      return Status::Invalid("Must provide a command descriptor");
-    }
-
-    const std::string& cmd = reader->descriptor().cmd;
-    if (cmd == "error") {
-      // Immediately return an error to the client.
-      return Status::NotImplemented("Expected error");
-    } else if (cmd == "get") {
-      return RunExchangeGet(std::move(reader), std::move(writer));
-    } else if (cmd == "put") {
-      return RunExchangePut(std::move(reader), std::move(writer));
-    } else if (cmd == "counter") {
-      return RunExchangeCounter(std::move(reader), std::move(writer));
-    } else if (cmd == "total") {
-      return RunExchangeTotal(std::move(reader), std::move(writer));
-    } else if (cmd == "echo") {
-      return RunExchangeEcho(std::move(reader), std::move(writer));
-    } else if (cmd == "large_batch") {
-      return RunExchangeLargeBatch(std::move(reader), std::move(writer));
-    } else if (cmd == "TestUndrained") {
-      ARROW_ASSIGN_OR_RAISE(auto schema, reader->GetSchema());
-      return Status::OK();
-    } else {
-      return Status::NotImplemented("Scenario not implemented: ", cmd);
-    }
-  }
-
-  // A simple example - act like DoGet.
-  Status RunExchangeGet(std::unique_ptr<FlightMessageReader> reader,
-                        std::unique_ptr<FlightMessageWriter> writer) {
-    RETURN_NOT_OK(writer->Begin(ExampleIntSchema()));
-    RecordBatchVector batches;
-    RETURN_NOT_OK(ExampleIntBatches(&batches));
-    for (const auto& batch : batches) {
-      RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
-    }
-    return Status::OK();
-  }
-
-  // A simple example - act like DoPut
-  Status RunExchangePut(std::unique_ptr<FlightMessageReader> reader,
-                        std::unique_ptr<FlightMessageWriter> writer) {
-    ARROW_ASSIGN_OR_RAISE(auto schema, reader->GetSchema());
-    if (!schema->Equals(ExampleIntSchema(), false)) {
-      return Status::Invalid("Schema is not as expected");
-    }
-    RecordBatchVector batches;
-    RETURN_NOT_OK(ExampleIntBatches(&batches));
-    FlightStreamChunk chunk;
-    for (const auto& batch : batches) {
-      ARROW_ASSIGN_OR_RAISE(chunk, reader->Next());
-      if (!chunk.data) {
-        return Status::Invalid("Expected another batch");
-      }
-      if (!batch->Equals(*chunk.data)) {
-        return Status::Invalid("Batch does not match");
-      }
-    }
-    ARROW_ASSIGN_OR_RAISE(chunk, reader->Next());
-    if (chunk.data || chunk.app_metadata) {
-      return Status::Invalid("Too many batches");
-    }
-
-    RETURN_NOT_OK(writer->WriteMetadata(Buffer::FromString("done")));
-    return Status::OK();
-  }
-
-  // Read some number of record batches from the client, send a
-  // metadata message back with the count, then echo the batches back.
-  Status RunExchangeCounter(std::unique_ptr<FlightMessageReader> reader,
-                            std::unique_ptr<FlightMessageWriter> writer) {
-    std::vector<std::shared_ptr<RecordBatch>> batches;
-    FlightStreamChunk chunk;
-    int chunks = 0;
-    while (true) {
-      ARROW_ASSIGN_OR_RAISE(chunk, reader->Next());
-      if (!chunk.data && !chunk.app_metadata) {
-        break;
-      }
-      if (chunk.data) {
-        batches.push_back(chunk.data);
-        chunks++;
-      }
-    }
-
-    // Echo back the number of record batches read.
-    std::shared_ptr<Buffer> buf = Buffer::FromString(std::to_string(chunks));
-    RETURN_NOT_OK(writer->WriteMetadata(buf));
-    // Echo the record batches themselves.
-    if (chunks > 0) {
-      ARROW_ASSIGN_OR_RAISE(auto schema, reader->GetSchema());
-      RETURN_NOT_OK(writer->Begin(schema));
-
-      for (const auto& batch : batches) {
-        RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
-      }
-    }
-
-    return Status::OK();
-  }
-
-  // Read int64 batches from the client, each time sending back a
-  // batch with a running sum of columns.
-  Status RunExchangeTotal(std::unique_ptr<FlightMessageReader> reader,
-                          std::unique_ptr<FlightMessageWriter> writer) {
-    FlightStreamChunk chunk{};
-    ARROW_ASSIGN_OR_RAISE(auto schema, reader->GetSchema());
-    // Ensure the schema contains only int64 columns
-    for (const auto& field : schema->fields()) {
-      if (field->type()->id() != Type::type::INT64) {
-        return Status::Invalid("Field is not INT64: ", field->name());
-      }
-    }
-    std::vector<int64_t> sums(schema->num_fields());
-    std::vector<std::shared_ptr<Array>> columns(schema->num_fields());
-    RETURN_NOT_OK(writer->Begin(schema));
-    while (true) {
-      ARROW_ASSIGN_OR_RAISE(chunk, reader->Next());
-      if (!chunk.data && !chunk.app_metadata) {
-        break;
-      }
-      if (chunk.data) {
-        if (!chunk.data->schema()->Equals(schema, false)) {
-          // A compliant client implementation would make this impossible
-          return Status::Invalid("Schemas are incompatible");
-        }
-
-        // Update the running totals
-        auto builder = std::make_shared<Int64Builder>();
-        int col_index = 0;
-        for (const auto& column : chunk.data->columns()) {
-          auto arr = std::dynamic_pointer_cast<Int64Array>(column);
-          if (!arr) {
-            return MakeFlightError(FlightStatusCode::Internal, "Could not cast array");
-          }
-          for (int row = 0; row < column->length(); row++) {
-            if (!arr->IsNull(row)) {
-              sums[col_index] += arr->Value(row);
-            }
-          }
-
-          builder->Reset();
-          RETURN_NOT_OK(builder->Append(sums[col_index]));
-          RETURN_NOT_OK(builder->Finish(&columns[col_index]));
-
-          col_index++;
-        }
-
-        // Echo the totals to the client
-        auto response = RecordBatch::Make(schema, /* num_rows */ 1, columns);
-        RETURN_NOT_OK(writer->WriteRecordBatch(*response));
-      }
-    }
-    return Status::OK();
-  }
-
-  // Echo the client's messages back.
-  Status RunExchangeEcho(std::unique_ptr<FlightMessageReader> reader,
-                         std::unique_ptr<FlightMessageWriter> writer) {
-    FlightStreamChunk chunk;
-    bool begun = false;
-    while (true) {
-      ARROW_ASSIGN_OR_RAISE(chunk, reader->Next());
-      if (!chunk.data && !chunk.app_metadata) {
-        break;
-      }
-      if (!begun && chunk.data) {
-        begun = true;
-        RETURN_NOT_OK(writer->Begin(chunk.data->schema()));
-      }
-      if (chunk.data && chunk.app_metadata) {
-        RETURN_NOT_OK(writer->WriteWithMetadata(*chunk.data, chunk.app_metadata));
-      } else if (chunk.data) {
-        RETURN_NOT_OK(writer->WriteRecordBatch(*chunk.data));
-      } else if (chunk.app_metadata) {
-        RETURN_NOT_OK(writer->WriteMetadata(chunk.app_metadata));
-      }
-    }
-    return Status::OK();
-  }
-
-  // Regression test for ARROW-13253
-  Status RunExchangeLargeBatch(std::unique_ptr<FlightMessageReader>,
-                               std::unique_ptr<FlightMessageWriter> writer) {
-    ARROW_ASSIGN_OR_RAISE(auto batch, VeryLargeBatch());
-    RETURN_NOT_OK(writer->Begin(batch->schema()));
-    return writer->WriteRecordBatch(*batch);
-  }
-
-  Status RunAction1(const Action& action, std::unique_ptr<ResultStream>* out) {
-    std::vector<Result> results;
-    for (int i = 0; i < 3; ++i) {
-      Result result;
-      std::string value = action.body->ToString() + "-part" + std::to_string(i);
-      result.body = Buffer::FromString(std::move(value));
-      results.push_back(result);
-    }
-    *out = std::make_unique<SimpleResultStream>(std::move(results));
-    return Status::OK();
-  }
-
-  Status RunAction2(std::unique_ptr<ResultStream>* out) {
-    // Empty
-    *out = std::make_unique<SimpleResultStream>(std::vector<Result>{});
-    return Status::OK();
-  }
-
-  Status ListIncomingHeaders(const ServerCallContext& context, const Action& action,
-                             std::unique_ptr<ResultStream>* out) {
-    std::vector<Result> results;
-    std::string_view prefix(*action.body);
-    for (const auto& header : context.incoming_headers()) {
-      if (header.first.substr(0, prefix.size()) != prefix) {
-        continue;
-      }
-      Result result;
-      result.body = Buffer::FromString(std::string(header.first) + ": " +
-                                       std::string(header.second));
-      results.push_back(result);
-    }
-    *out = std::make_unique<SimpleResultStream>(std::move(results));
-    return Status::OK();
-  }
-
-  Status DoAction(const ServerCallContext& context, const Action& action,
-                  std::unique_ptr<ResultStream>* out) override {
-    if (action.type == "action1") {
-      return RunAction1(action, out);
-    } else if (action.type == "action2") {
-      return RunAction2(out);
-    } else if (action.type == "list-incoming-headers") {
-      return ListIncomingHeaders(context, action, out);
-    } else {
-      return Status::NotImplemented(action.type);
-    }
-  }
-
-  Status ListActions(const ServerCallContext& context,
-                     std::vector<ActionType>* out) override {
-    std::vector<ActionType> actions = ExampleActionTypes();
-    *out = std::move(actions);
-    return Status::OK();
-  }
-
-  Status GetSchema(const ServerCallContext& context, const FlightDescriptor& request,
-                   std::unique_ptr<SchemaResult>* schema) override {
-    std::vector<FlightInfo> flights = ExampleFlightInfo();
-
-    for (const auto& info : flights) {
-      if (info.descriptor().Equals(request)) {
-        *schema = std::make_unique<SchemaResult>(info.serialized_schema());
-        return Status::OK();
-      }
-    }
-    return Status::Invalid("Flight not found: ", request.ToString());
-  }
-};
-
-std::unique_ptr<FlightServerBase> ExampleTestServer() {
-  return std::make_unique<FlightTestServer>();
-}
-
 FlightInfo MakeFlightInfo(const Schema& schema, const FlightDescriptor& descriptor,
                           const std::vector<FlightEndpoint>& endpoints,
                           int64_t total_records, int64_t total_bytes, bool ordered,
@@ -604,11 +142,11 @@ std::vector<FlightInfo> ExampleFlightInfo() {
   Location location4 = *Location::ForGrpcTcp("foo4.bar.com", 12345);
   Location location5 = *Location::ForGrpcTcp("foo5.bar.com", 12345);
 
-  FlightEndpoint endpoint1({{"ticket-ints-1"}, {location1}, std::nullopt, {}});
-  FlightEndpoint endpoint2({{"ticket-ints-2"}, {location2}, std::nullopt, {}});
-  FlightEndpoint endpoint3({{"ticket-cmd"}, {location3}, std::nullopt, {}});
-  FlightEndpoint endpoint4({{"ticket-dicts-1"}, {location4}, std::nullopt, {}});
-  FlightEndpoint endpoint5({{"ticket-floats-1"}, {location5}, std::nullopt, {}});
+  FlightEndpoint endpoint1({Ticket{"ticket-ints-1"}, {location1}, std::nullopt, {}});
+  FlightEndpoint endpoint2({Ticket{"ticket-ints-2"}, {location2}, std::nullopt, {}});
+  FlightEndpoint endpoint3({Ticket{"ticket-cmd"}, {location3}, std::nullopt, {}});
+  FlightEndpoint endpoint4({Ticket{"ticket-dicts-1"}, {location4}, std::nullopt, {}});
+  FlightEndpoint endpoint5({Ticket{"ticket-floats-1"}, {location5}, std::nullopt, {}});
 
   FlightDescriptor descr1{FlightDescriptor::PATH, "", {"examples", "ints"}};
   FlightDescriptor descr2{FlightDescriptor::CMD, "my_command", {}};
@@ -701,109 +239,6 @@ std::vector<ActionType> ExampleActionTypes() {
   return {{"drop", "drop a dataset"}, {"cache", "cache a dataset"}};
 }
 
-TestServerAuthHandler::TestServerAuthHandler(const std::string& username,
-                                             const std::string& password)
-    : username_(username), password_(password) {}
-
-TestServerAuthHandler::~TestServerAuthHandler() {}
-
-Status TestServerAuthHandler::Authenticate(const ServerCallContext& context,
-                                           ServerAuthSender* outgoing,
-                                           ServerAuthReader* incoming) {
-  std::string token;
-  RETURN_NOT_OK(incoming->Read(&token));
-  if (token != password_) {
-    return MakeFlightError(FlightStatusCode::Unauthenticated, "Invalid token");
-  }
-  RETURN_NOT_OK(outgoing->Write(username_));
-  return Status::OK();
-}
-
-Status TestServerAuthHandler::IsValid(const ServerCallContext& context,
-                                      const std::string& token,
-                                      std::string* peer_identity) {
-  if (token != password_) {
-    return MakeFlightError(FlightStatusCode::Unauthenticated, "Invalid token");
-  }
-  *peer_identity = username_;
-  return Status::OK();
-}
-
-TestServerBasicAuthHandler::TestServerBasicAuthHandler(const std::string& username,
-                                                       const std::string& password) {
-  basic_auth_.username = username;
-  basic_auth_.password = password;
-}
-
-TestServerBasicAuthHandler::~TestServerBasicAuthHandler() {}
-
-Status TestServerBasicAuthHandler::Authenticate(const ServerCallContext& context,
-                                                ServerAuthSender* outgoing,
-                                                ServerAuthReader* incoming) {
-  std::string token;
-  RETURN_NOT_OK(incoming->Read(&token));
-  ARROW_ASSIGN_OR_RAISE(BasicAuth incoming_auth, BasicAuth::Deserialize(token));
-  if (incoming_auth.username != basic_auth_.username ||
-      incoming_auth.password != basic_auth_.password) {
-    return MakeFlightError(FlightStatusCode::Unauthenticated, "Invalid token");
-  }
-  RETURN_NOT_OK(outgoing->Write(basic_auth_.username));
-  return Status::OK();
-}
-
-Status TestServerBasicAuthHandler::IsValid(const ServerCallContext& context,
-                                           const std::string& token,
-                                           std::string* peer_identity) {
-  if (token != basic_auth_.username) {
-    return MakeFlightError(FlightStatusCode::Unauthenticated, "Invalid token");
-  }
-  *peer_identity = basic_auth_.username;
-  return Status::OK();
-}
-
-TestClientAuthHandler::TestClientAuthHandler(const std::string& username,
-                                             const std::string& password)
-    : username_(username), password_(password) {}
-
-TestClientAuthHandler::~TestClientAuthHandler() {}
-
-Status TestClientAuthHandler::Authenticate(ClientAuthSender* outgoing,
-                                           ClientAuthReader* incoming) {
-  RETURN_NOT_OK(outgoing->Write(password_));
-  std::string username;
-  RETURN_NOT_OK(incoming->Read(&username));
-  if (username != username_) {
-    return MakeFlightError(FlightStatusCode::Unauthenticated, "Invalid token");
-  }
-  return Status::OK();
-}
-
-Status TestClientAuthHandler::GetToken(std::string* token) {
-  *token = password_;
-  return Status::OK();
-}
-
-TestClientBasicAuthHandler::TestClientBasicAuthHandler(const std::string& username,
-                                                       const std::string& password) {
-  basic_auth_.username = username;
-  basic_auth_.password = password;
-}
-
-TestClientBasicAuthHandler::~TestClientBasicAuthHandler() {}
-
-Status TestClientBasicAuthHandler::Authenticate(ClientAuthSender* outgoing,
-                                                ClientAuthReader* incoming) {
-  ARROW_ASSIGN_OR_RAISE(std::string pb_result, basic_auth_.SerializeToString());
-  RETURN_NOT_OK(outgoing->Write(pb_result));
-  RETURN_NOT_OK(incoming->Read(&token_));
-  return Status::OK();
-}
-
-Status TestClientBasicAuthHandler::GetToken(std::string* token) {
-  *token = token_;
-  return Status::OK();
-}
-
 Status ExampleTlsCertificates(std::vector<CertKeyPair>* out) {
   std::string root;
   RETURN_NOT_OK(GetTestResourceRoot(&root));
@@ -860,5 +295,4 @@ Status ExampleTlsCertificateRoot(CertKeyPair* out) {
   }
 }
 
-}  // namespace flight
-}  // namespace arrow
+}  // namespace arrow::flight
diff --git a/cpp/src/arrow/flight/test_util.h b/cpp/src/arrow/flight/test_util.h
index c0b42d9b90c5a..946caebcc2b5a 100644
--- a/cpp/src/arrow/flight/test_util.h
+++ b/cpp/src/arrow/flight/test_util.h
@@ -29,23 +29,14 @@
 
 #include "arrow/status.h"
 #include "arrow/testing/gtest_util.h"
+#include "arrow/testing/process.h"
 #include "arrow/testing/util.h"
 
 #include "arrow/flight/client.h"
-#include "arrow/flight/client_auth.h"
 #include "arrow/flight/server.h"
-#include "arrow/flight/server_auth.h"
 #include "arrow/flight/types.h"
 #include "arrow/flight/visibility.h"
 
-namespace boost {
-namespace process {
-
-class child;
-
-}  // namespace process
-}  // namespace boost
-
 namespace arrow {
 namespace flight {
 
@@ -78,10 +69,10 @@ class ARROW_FLIGHT_EXPORT TestServer {
   TestServer(const std::string& executable_name, const std::string& unix_sock)
       : executable_name_(executable_name), unix_sock_(unix_sock) {}
 
-  void Start(const std::vector<std::string>& extra_args);
-  void Start() { Start({}); }
+  Status Start(const std::vector<std::string>& extra_args);
+  Status Start() { return Start({}); }
 
-  int Stop();
+  void Stop();
 
   bool IsRunning();
 
@@ -92,13 +83,9 @@ class ARROW_FLIGHT_EXPORT TestServer {
   std::string executable_name_;
   int port_;
   std::string unix_sock_;
-  std::shared_ptr<::boost::process::child> server_process_;
+  std::unique_ptr<util::Process> server_process_;
 };
 
-/// \brief Create a simple Flight server for testing
-ARROW_FLIGHT_EXPORT
-std::unique_ptr<FlightServerBase> ExampleTestServer();
-
 // Helper to initialize a server and matching client with callbacks to
 // populate options.
 template <typename T, typename... Args>
@@ -195,65 +182,6 @@ FlightInfo MakeFlightInfo(const Schema& schema, const FlightDescriptor& descript
                           int64_t total_records, int64_t total_bytes, bool ordered,
                           std::string app_metadata);
 
-// ----------------------------------------------------------------------
-// A pair of authentication handlers that check for a predefined password
-// and set the peer identity to a predefined username.
-
-class ARROW_FLIGHT_EXPORT TestServerAuthHandler : public ServerAuthHandler {
- public:
-  explicit TestServerAuthHandler(const std::string& username,
-                                 const std::string& password);
-  ~TestServerAuthHandler() override;
-  Status Authenticate(const ServerCallContext& context, ServerAuthSender* outgoing,
-                      ServerAuthReader* incoming) override;
-  Status IsValid(const ServerCallContext& context, const std::string& token,
-                 std::string* peer_identity) override;
-
- private:
-  std::string username_;
-  std::string password_;
-};
-
-class ARROW_FLIGHT_EXPORT TestServerBasicAuthHandler : public ServerAuthHandler {
- public:
-  explicit TestServerBasicAuthHandler(const std::string& username,
-                                      const std::string& password);
-  ~TestServerBasicAuthHandler() override;
-  Status Authenticate(const ServerCallContext& context, ServerAuthSender* outgoing,
-                      ServerAuthReader* incoming) override;
-  Status IsValid(const ServerCallContext& context, const std::string& token,
-                 std::string* peer_identity) override;
-
- private:
-  BasicAuth basic_auth_;
-};
-
-class ARROW_FLIGHT_EXPORT TestClientAuthHandler : public ClientAuthHandler {
- public:
-  explicit TestClientAuthHandler(const std::string& username,
-                                 const std::string& password);
-  ~TestClientAuthHandler() override;
-  Status Authenticate(ClientAuthSender* outgoing, ClientAuthReader* incoming) override;
-  Status GetToken(std::string* token) override;
-
- private:
-  std::string username_;
-  std::string password_;
-};
-
-class ARROW_FLIGHT_EXPORT TestClientBasicAuthHandler : public ClientAuthHandler {
- public:
-  explicit TestClientBasicAuthHandler(const std::string& username,
-                                      const std::string& password);
-  ~TestClientBasicAuthHandler() override;
-  Status Authenticate(ClientAuthSender* outgoing, ClientAuthReader* incoming) override;
-  Status GetToken(std::string* token) override;
-
- private:
-  BasicAuth basic_auth_;
-  std::string token_;
-};
-
 ARROW_FLIGHT_EXPORT
 Status ExampleTlsCertificates(std::vector<CertKeyPair>* out);
 
diff --git a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
index 5005fc6b16eb4..b668022087587 100644
--- a/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
+++ b/cpp/src/arrow/flight/transport/grpc/customize_grpc.h
@@ -26,16 +26,16 @@
 
 // Silence protobuf warnings
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4267)
+#  pragma warning(push)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4267)
 #endif
 
 #include <grpcpp/impl/codegen/config_protobuf.h>
 #include <grpcpp/impl/codegen/proto_utils.h>
 
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 namespace grpc {
diff --git a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
index f799ba761c40d..22e8676707342 100644
--- a/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
+++ b/cpp/src/arrow/flight/transport/grpc/grpc_client.cc
@@ -31,7 +31,7 @@
 #include <grpcpp/grpcpp.h>
 #include <grpcpp/support/client_callback.h>
 #if defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
-#include <grpcpp/security/tls_credentials_options.h>
+#  include <grpcpp/security/tls_credentials_options.h>
 #endif
 
 #include <grpc/grpc_security_constants.h>
@@ -648,10 +648,10 @@ class UnaryUnaryAsyncCall : public ::grpc::ClientUnaryReactor, public internal::
 
   void OnDone(const ::grpc::Status& status) override {
     if (status.ok()) {
-      auto result = internal::FromProto(pb_response);
-      client_status = result.status();
+      FlightInfo::Data info_data;
+      client_status = internal::FromProto(pb_response, &info_data);
       if (client_status.ok()) {
-        listener->OnNext(std::move(result).MoveValueUnsafe());
+        listener->OnNext(FlightInfo{std::move(info_data)});
       }
     }
     Finish(status);
@@ -670,11 +670,11 @@ class UnaryUnaryAsyncCall : public ::grpc::ClientUnaryReactor, public internal::
   }
 };
 
-#define LISTENER_NOT_OK(LISTENER, EXPR)                 \
-  if (auto arrow_status = (EXPR); !arrow_status.ok()) { \
-    (LISTENER)->OnFinish(std::move(arrow_status));      \
-    return;                                             \
-  }
+#  define LISTENER_NOT_OK(LISTENER, EXPR)                 \
+    if (auto arrow_status = (EXPR); !arrow_status.ok()) { \
+      (LISTENER)->OnFinish(std::move(arrow_status));      \
+      return;                                             \
+    }
 #endif
 
 class GrpcClientImpl : public internal::ClientTransport {
@@ -697,7 +697,7 @@ class GrpcClientImpl : public internal::ClientTransport {
 #if defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
           namespace ge = ::GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS;
 
-#if defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#  if defined(GRPC_USE_CERTIFICATE_VERIFIER)
           // gRPC >= 1.43
           class NoOpCertificateVerifier : public ge::ExternalCertificateVerifier {
            public:
@@ -712,10 +712,10 @@ class GrpcClientImpl : public internal::ClientTransport {
           auto cert_verifier =
               ge::ExternalCertificateVerifier::Create<NoOpCertificateVerifier>();
 
-#else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
-        // gRPC < 1.43
-        // A callback to supply to TlsCredentialsOptions that accepts any server
-        // arguments.
+#  else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+          // gRPC < 1.43
+          // A callback to supply to TlsCredentialsOptions that accepts any server
+          // arguments.
           struct NoOpTlsAuthorizationCheck
               : public ge::TlsServerAuthorizationCheckInterface {
             int Schedule(ge::TlsServerAuthorizationCheckArg* arg) override {
@@ -727,33 +727,33 @@ class GrpcClientImpl : public internal::ClientTransport {
           auto server_authorization_check = std::make_shared<NoOpTlsAuthorizationCheck>();
           noop_auth_check_ = std::make_shared<ge::TlsServerAuthorizationCheckConfig>(
               server_authorization_check);
-#endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#  endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
 
-#if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
+#  if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
           auto certificate_provider =
               std::make_shared<::grpc::experimental::StaticDataCertificateProvider>(
                   kDummyRootCert);
-#if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
+#    if defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
           ::grpc::experimental::TlsChannelCredentialsOptions tls_options(
               certificate_provider);
-#else   // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
-        // While gRPC >= 1.36 does not require a root cert (it has a default)
-        // in practice the path it hardcodes is broken. See grpc/grpc#21655.
+#    else   // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
+            // While gRPC >= 1.36 does not require a root cert (it has a default)
+            // in practice the path it hardcodes is broken. See grpc/grpc#21655.
           ::grpc::experimental::TlsChannelCredentialsOptions tls_options;
           tls_options.set_certificate_provider(certificate_provider);
-#endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
+#    endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS_ROOT_CERTS)
           tls_options.watch_root_certs();
           tls_options.set_root_cert_name("dummy");
-#if defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#    if defined(GRPC_USE_CERTIFICATE_VERIFIER)
           tls_options.set_certificate_verifier(std::move(cert_verifier));
           tls_options.set_check_call_host(false);
           tls_options.set_verify_server_certs(false);
-#else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#    else   // defined(GRPC_USE_CERTIFICATE_VERIFIER)
           tls_options.set_server_verification_option(
               grpc_tls_server_verification_option::GRPC_TLS_SKIP_ALL_SERVER_VERIFICATION);
           tls_options.set_server_authorization_check_config(noop_auth_check_);
-#endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
-#elif defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
+#    endif  // defined(GRPC_USE_CERTIFICATE_VERIFIER)
+#  elif defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
           // continues defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
           auto materials_config = std::make_shared<ge::TlsKeyMaterialsConfig>();
           materials_config->set_pem_root_certs(kDummyRootCert);
@@ -761,7 +761,7 @@ class GrpcClientImpl : public internal::ClientTransport {
               GRPC_SSL_DONT_REQUEST_CLIENT_CERTIFICATE,
               GRPC_TLS_SKIP_ALL_SERVER_VERIFICATION, materials_config,
               std::shared_ptr<ge::TlsCredentialReloadConfig>(), noop_auth_check_);
-#endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
+#  endif  // defined(GRPC_USE_TLS_CHANNEL_CREDENTIALS_OPTIONS)
           creds = ge::TlsCredentials(tls_options);
 #else   // defined(GRPC_NAMESPACE_FOR_TLS_CREDENTIALS_OPTIONS)
           return Status::NotImplemented(
@@ -889,7 +889,8 @@ class GrpcClientImpl : public internal::ClientTransport {
 
     pb::FlightInfo pb_info;
     while (!options.stop_token.IsStopRequested() && stream->Read(&pb_info)) {
-      ARROW_ASSIGN_OR_RAISE(FlightInfo info_data, internal::FromProto(pb_info));
+      FlightInfo::Data info_data;
+      RETURN_NOT_OK(internal::FromProto(pb_info, &info_data));
       flights.emplace_back(std::move(info_data));
     }
     if (options.stop_token.IsStopRequested()) rpc.context.TryCancel();
@@ -939,7 +940,8 @@ class GrpcClientImpl : public internal::ClientTransport {
         stub_->GetFlightInfo(&rpc.context, pb_descriptor, &pb_response), &rpc.context);
     RETURN_NOT_OK(s);
 
-    ARROW_ASSIGN_OR_RAISE(auto info_data, internal::FromProto(pb_response));
+    FlightInfo::Data info_data;
+    RETURN_NOT_OK(internal::FromProto(pb_response, &info_data));
     *info = std::make_unique<FlightInfo>(std::move(info_data));
     return Status::OK();
   }
@@ -976,9 +978,9 @@ class GrpcClientImpl : public internal::ClientTransport {
                               &rpc.context);
     RETURN_NOT_OK(s);
 
-    std::string str;
-    RETURN_NOT_OK(internal::FromProto(pb_response, &str));
-    return std::make_unique<SchemaResult>(std::move(str));
+    auto schema_result = std::make_unique<SchemaResult>();
+    RETURN_NOT_OK(internal::FromProto(pb_response, schema_result.get()));
+    return schema_result;
   }
 
   Status DoGet(const FlightCallOptions& options, const Ticket& ticket,
diff --git a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
index 372dca7a2c4c8..9b503ede05655 100644
--- a/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
+++ b/cpp/src/arrow/flight/transport/grpc/serialization_internal.cc
@@ -28,8 +28,8 @@
 #include "arrow/flight/platform.h"
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4267)
+#  pragma warning(push)
+#  pragma warning(disable : 4267)
 #endif
 
 #include <google/protobuf/io/coded_stream.h>
@@ -41,7 +41,7 @@
 #include <grpcpp/impl/codegen/proto_utils.h>
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 #include "arrow/buffer.h"
@@ -284,7 +284,7 @@ ::grpc::Status FlightDataSerialize(const FlightPayload& msg, ByteBuffer* out,
       for (const auto& buffer : ipc_msg.body_buffers) {
         // Buffer may be null when the row length is zero, or when all
         // entries are invalid.
-        if (!buffer) continue;
+        if (!buffer || buffer->size() == 0) continue;
 
         ::grpc::Slice slice;
         auto status = SliceFromBuffer(buffer).Value(&slice);
@@ -400,8 +400,8 @@ ::grpc::Status FlightDataDeserialize(ByteBuffer* buffer,
 
 // The pointer bitcast hack below causes legitimate warnings, silence them.
 #ifndef _WIN32
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wstrict-aliasing"
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wstrict-aliasing"
 #endif
 
 // Pointer bitcast explanation: grpc::*Writer<T>::Write() and grpc::*Reader<T>::Read()
@@ -478,7 +478,7 @@ bool ReadPayload(::grpc::ClientReaderWriter<pb::FlightData, pb::PutResult>* read
 }
 
 #ifndef _WIN32
-#pragma GCC diagnostic pop
+#  pragma GCC diagnostic pop
 #endif
 
 }  // namespace grpc
diff --git a/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc b/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc
index c3481d834f6ea..1090b8356294a 100644
--- a/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc
+++ b/cpp/src/arrow/flight/transport/ucx/flight_transport_ucx_test.cc
@@ -27,13 +27,13 @@
 #include "arrow/util/config.h"
 
 #ifdef UCP_API_VERSION
-#error "UCX headers should not be in public API"
+#  error "UCX headers should not be in public API"
 #endif
 
 #include "arrow/flight/transport/ucx/ucx_internal.h"
 
 #ifdef ARROW_CUDA
-#include "arrow/gpu/cuda_api.h"
+#  include "arrow/gpu/cuda_api.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_client.cc b/cpp/src/arrow/flight/transport/ucx/ucx_client.cc
index 946ac2d176203..a78b6f825a0e9 100644
--- a/cpp/src/arrow/flight/transport/ucx/ucx_client.cc
+++ b/cpp/src/arrow/flight/transport/ucx/ucx_client.cc
@@ -118,7 +118,7 @@ class ClientConnection {
       params.flags = UCP_EP_PARAMS_FLAGS_CLIENT_SERVER;
       params.name = "UcxClientImpl";
       params.sockaddr.addr = reinterpret_cast<const sockaddr*>(&connect_addr);
-      params.sockaddr.addrlen = addrlen;
+      params.sockaddr.addrlen = static_cast<socklen_t>(addrlen);
 
       auto status = ucp_ep_create(ucp_worker_->get(), &params, &remote_endpoint_);
       RETURN_NOT_OK(FromUcsStatus("ucp_ep_create", status));
diff --git a/cpp/src/arrow/flight/transport/ucx/ucx_server.cc b/cpp/src/arrow/flight/transport/ucx/ucx_server.cc
index cb9c8948ccf1e..b1096ece77b1b 100644
--- a/cpp/src/arrow/flight/transport/ucx/ucx_server.cc
+++ b/cpp/src/arrow/flight/transport/ucx/ucx_server.cc
@@ -258,7 +258,7 @@ class UcxServerImpl : public arrow::flight::internal::ServerTransport {
       params.field_mask =
           UCP_LISTENER_PARAM_FIELD_SOCK_ADDR | UCP_LISTENER_PARAM_FIELD_CONN_HANDLER;
       params.sockaddr.addr = reinterpret_cast<const sockaddr*>(&listen_addr);
-      params.sockaddr.addrlen = addrlen;
+      params.sockaddr.addrlen = static_cast<socklen_t>(addrlen);
       params.conn_handler.cb = HandleIncomingConnection;
       params.conn_handler.arg = this;
 
@@ -376,7 +376,7 @@ class UcxServerImpl : public arrow::flight::internal::ServerTransport {
     std::unique_ptr<FlightInfo> info;
     std::string response;
     SERVER_RETURN_NOT_OK(driver, base_->GetFlightInfo(context, descriptor, &info));
-    SERVER_RETURN_NOT_OK(driver, info->SerializeToString().Value(&response));
+    SERVER_RETURN_NOT_OK(driver, info->SerializeToString(&response));
     RETURN_NOT_OK(driver->SendFrame(FrameType::kBuffer,
                                     reinterpret_cast<const uint8_t*>(response.data()),
                                     static_cast<int64_t>(response.size())));
@@ -397,7 +397,7 @@ class UcxServerImpl : public arrow::flight::internal::ServerTransport {
     std::unique_ptr<PollInfo> info;
     std::string response;
     SERVER_RETURN_NOT_OK(driver, base_->PollFlightInfo(context, descriptor, &info));
-    SERVER_RETURN_NOT_OK(driver, info->SerializeToString().Value(&response));
+    SERVER_RETURN_NOT_OK(driver, info->SerializeToString(&response));
     RETURN_NOT_OK(driver->SendFrame(FrameType::kBuffer,
                                     reinterpret_cast<const uint8_t*>(response.data()),
                                     static_cast<int64_t>(response.size())));
diff --git a/cpp/src/arrow/flight/types.cc b/cpp/src/arrow/flight/types.cc
index 39b59f65d9cfb..65beec97d64df 100644
--- a/cpp/src/arrow/flight/types.cc
+++ b/cpp/src/arrow/flight/types.cc
@@ -25,6 +25,7 @@
 #include <utility>
 
 #include "arrow/buffer.h"
+#include "arrow/flight/protocol_internal.h"
 #include "arrow/flight/serialization_internal.h"
 #include "arrow/flight/types_async.h"
 #include "arrow/io/memory.h"
@@ -41,6 +42,60 @@
 
 namespace arrow {
 namespace flight {
+namespace {
+
+ARROW_NOINLINE
+Status ProtoStringInputTooBig(const char* name) {
+  return Status::Invalid("Serialized ", name, " size should not exceed 2 GiB");
+}
+
+ARROW_NOINLINE
+Status ProtoStringOutputTooBig(const char* name) {
+  return Status::Invalid("Serialized ", name, " exceeded 2 GiB limit");
+}
+
+ARROW_NOINLINE
+Status InvalidProtoString(const char* name) {
+  return Status::Invalid("Not a valid ", name);
+}
+
+// Status-returning ser/de functions that allow reuse of the same output objects
+
+template <class PBType>
+Status ParseFromString(const char* name, std::string_view serialized, PBType* out) {
+  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    return ProtoStringInputTooBig(name);
+  }
+  if (!out->ParseFromArray(serialized.data(), static_cast<int>(serialized.size()))) {
+    return InvalidProtoString(name);
+  }
+  return Status::OK();
+}
+
+template <class PBType, class T>
+Status SerializeToString(const char* name, const T& in, PBType* out_pb,
+                         std::string* out) {
+  RETURN_NOT_OK(internal::ToProto(in, out_pb));
+  return out_pb->SerializeToString(out) ? Status::OK() : ProtoStringOutputTooBig(name);
+}
+
+// Result-returning ser/de functions (more convenient)
+
+template <class PBType, class T>
+arrow::Status DeserializeProtoString(const char* name, std::string_view serialized,
+                                     T* out) {
+  PBType pb;
+  RETURN_NOT_OK(ParseFromString(name, serialized, &pb));
+  return internal::FromProto(pb, out);
+}
+
+template <class PBType, class T>
+Status SerializeToProtoString(const char* name, const T& in, std::string* out) {
+  PBType pb;
+  return SerializeToString<PBType>(name, in, &pb, out);
+}
+
+}  // namespace
 
 const char* kSchemeGrpc = "grpc";
 const char* kSchemeGrpcTcp = "grpc+tcp";
@@ -97,20 +152,67 @@ Status MakeFlightError(FlightStatusCode code, std::string message,
                        std::make_shared<FlightStatusDetail>(code, std::move(extra_info)));
 }
 
-bool FlightDescriptor::Equals(const FlightDescriptor& other) const {
-  if (type != other.type) {
-    return false;
+static std::ostream& operator<<(std::ostream& os, std::vector<std::string> values) {
+  os << '[';
+  std::string sep = "";
+  for (const auto& v : values) {
+    os << sep << std::quoted(v);
+    sep = ", ";
   }
-  switch (type) {
-    case PATH:
-      return path == other.path;
-    case CMD:
-      return cmd == other.cmd;
-    default:
-      return false;
+  os << ']';
+
+  return os;
+}
+
+template <typename T>
+static std::ostream& operator<<(std::ostream& os, std::map<std::string, T> m) {
+  os << '{';
+  std::string sep = "";
+  if constexpr (std::is_convertible_v<T, std::string_view>) {
+    // std::string, char*, std::string_view
+    for (const auto& [k, v] : m) {
+      os << sep << '[' << k << "]: " << std::quoted(v) << '"';
+      sep = ", ";
+    }
+  } else {
+    for (const auto& [k, v] : m) {
+      os << sep << '[' << k << "]: " << v;
+      sep = ", ";
+    }
   }
+  os << '}';
+
+  return os;
+}
+
+//------------------------------------------------------------
+// Wrapper types for Flight RPC protobuf messages
+
+std::string BasicAuth::ToString() const {
+  return arrow::util::StringBuilder("<BasicAuth username='", username,
+                                    "' password=(redacted)>");
 }
 
+bool BasicAuth::Equals(const BasicAuth& other) const {
+  return (username == other.username) && (password == other.password);
+}
+
+arrow::Status BasicAuth::Deserialize(std::string_view serialized, BasicAuth* out) {
+  return DeserializeProtoString<pb::BasicAuth, BasicAuth>("BasicAuth", serialized, out);
+}
+
+arrow::Status BasicAuth::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::BasicAuth>("BasicAuth", *this, out);
+}
+
+FlightDescriptor::FlightDescriptor() = default;
+
+FlightDescriptor::FlightDescriptor(DescriptorType type, std::string cmd,
+                                   std::vector<std::string> path) noexcept
+    : type(type), cmd(std::move(cmd)), path(std::move(path)) {}
+
+FlightDescriptor::~FlightDescriptor() = default;
+
 std::string FlightDescriptor::ToString() const {
   std::stringstream ss;
   ss << "<FlightDescriptor ";
@@ -138,124 +240,28 @@ std::string FlightDescriptor::ToString() const {
   return ss.str();
 }
 
-Status FlightPayload::Validate() const {
-  static constexpr int64_t kInt32Max = std::numeric_limits<int32_t>::max();
-  if (descriptor && descriptor->size() > kInt32Max) {
-    return Status::CapacityError("Descriptor size overflow (>= 2**31)");
-  }
-  if (app_metadata && app_metadata->size() > kInt32Max) {
-    return Status::CapacityError("app_metadata size overflow (>= 2**31)");
-  }
-  if (ipc_message.body_length > kInt32Max) {
-    return Status::Invalid("Cannot send record batches exceeding 2GiB yet");
-  }
-  return Status::OK();
-}
-
-arrow::Result<std::shared_ptr<Schema>> SchemaResult::GetSchema(
-    ipc::DictionaryMemo* dictionary_memo) const {
-  // Create a non-owned Buffer to avoid copying
-  io::BufferReader schema_reader(std::make_shared<Buffer>(raw_schema_));
-  return ipc::ReadSchema(&schema_reader, dictionary_memo);
-}
-
-arrow::Result<std::unique_ptr<SchemaResult>> SchemaResult::Make(const Schema& schema) {
-  std::string schema_in;
-  RETURN_NOT_OK(internal::SchemaToString(schema, &schema_in));
-  return std::make_unique<SchemaResult>(std::move(schema_in));
-}
-
-std::string SchemaResult::ToString() const {
-  return "<SchemaResult raw_schema=(serialized)>";
-}
-
-bool SchemaResult::Equals(const SchemaResult& other) const {
-  return raw_schema_ == other.raw_schema_;
-}
-
-arrow::Result<std::string> SchemaResult::SerializeToString() const {
-  pb::SchemaResult pb_schema_result;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_schema_result));
-
-  std::string out;
-  if (!pb_schema_result.SerializeToString(&out)) {
-    return Status::IOError("Serialized SchemaResult exceeded 2 GiB limit");
-  }
-  return out;
-}
-
-arrow::Result<SchemaResult> SchemaResult::Deserialize(std::string_view serialized) {
-  pb::SchemaResult pb_schema_result;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized SchemaResult size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_schema_result.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid SchemaResult");
-  }
-  return SchemaResult{pb_schema_result.schema()};
-}
-
-arrow::Result<std::string> FlightDescriptor::SerializeToString() const {
-  pb::FlightDescriptor pb_descriptor;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_descriptor));
-
-  std::string out;
-  if (!pb_descriptor.SerializeToString(&out)) {
-    return Status::IOError("Serialized FlightDescriptor exceeded 2 GiB limit");
-  }
-  return out;
-}
-
-arrow::Result<FlightDescriptor> FlightDescriptor::Deserialize(
-    std::string_view serialized) {
-  pb::FlightDescriptor pb_descriptor;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized FlightDescriptor size should not exceed 2 GiB");
+bool FlightDescriptor::Equals(const FlightDescriptor& other) const {
+  if (type != other.type) {
+    return false;
   }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_descriptor.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid FlightDescriptor");
+  switch (type) {
+    case PATH:
+      return path == other.path;
+    case CMD:
+      return cmd == other.cmd;
+    default:
+      return false;
   }
-  FlightDescriptor out;
-  RETURN_NOT_OK(internal::FromProto(pb_descriptor, &out));
-  return out;
 }
 
-std::string Ticket::ToString() const {
-  std::stringstream ss;
-  ss << "<Ticket ticket='" << ticket << "'>";
-  return ss.str();
+arrow::Status FlightDescriptor::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::FlightDescriptor>("FlightDescriptor", *this, out);
 }
 
-bool Ticket::Equals(const Ticket& other) const { return ticket == other.ticket; }
-
-arrow::Result<std::string> Ticket::SerializeToString() const {
-  pb::Ticket pb_ticket;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_ticket));
-
-  std::string out;
-  if (!pb_ticket.SerializeToString(&out)) {
-    return Status::IOError("Serialized Ticket exceeded 2 GiB limit");
-  }
-  return out;
-}
-
-arrow::Result<Ticket> Ticket::Deserialize(std::string_view serialized) {
-  pb::Ticket pb_ticket;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized Ticket size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_ticket.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid Ticket");
-  }
-  Ticket out;
-  RETURN_NOT_OK(internal::FromProto(pb_ticket, &out));
-  return out;
+arrow::Status FlightDescriptor::Deserialize(std::string_view serialized,
+                                            FlightDescriptor* out) {
+  return DeserializeProtoString<pb::FlightDescriptor, FlightDescriptor>(
+      "FlightDescriptor", serialized, out);
 }
 
 arrow::Result<FlightInfo> FlightInfo::Make(const Schema& schema,
@@ -271,7 +277,7 @@ arrow::Result<FlightInfo> FlightInfo::Make(const Schema& schema,
   data.ordered = ordered;
   data.app_metadata = std::move(app_metadata);
   RETURN_NOT_OK(internal::SchemaToString(schema, &data.schema));
-  return FlightInfo(data);
+  return FlightInfo(std::move(data));
 }
 
 arrow::Result<std::shared_ptr<Schema>> FlightInfo::GetSchema(
@@ -286,30 +292,14 @@ arrow::Result<std::shared_ptr<Schema>> FlightInfo::GetSchema(
   return schema_;
 }
 
-arrow::Result<std::string> FlightInfo::SerializeToString() const {
-  pb::FlightInfo pb_info;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_info));
-
-  std::string out;
-  if (!pb_info.SerializeToString(&out)) {
-    return Status::IOError("Serialized FlightInfo exceeded 2 GiB limit");
-  }
-  return out;
+arrow::Status FlightInfo::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::FlightInfo>("FlightInfo", *this, out);
 }
 
-arrow::Result<std::unique_ptr<FlightInfo>> FlightInfo::Deserialize(
-    std::string_view serialized) {
-  pb::FlightInfo pb_info;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized FlightInfo size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_info.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid FlightInfo");
-  }
-  ARROW_ASSIGN_OR_RAISE(FlightInfo info, internal::FromProto(pb_info));
-  return std::make_unique<FlightInfo>(std::move(info));
+arrow::Status FlightInfo::Deserialize(std::string_view serialized,
+                                      std::unique_ptr<FlightInfo>* out) {
+  return DeserializeProtoString<pb::FlightInfo, std::unique_ptr<FlightInfo>>(
+      "FlightInfo", serialized, out);
 }
 
 std::string FlightInfo::ToString() const {
@@ -346,31 +336,14 @@ bool FlightInfo::Equals(const FlightInfo& other) const {
          data_.app_metadata == other.data_.app_metadata;
 }
 
-arrow::Result<std::string> PollInfo::SerializeToString() const {
-  pb::PollInfo pb_info;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_info));
-
-  std::string out;
-  if (!pb_info.SerializeToString(&out)) {
-    return Status::IOError("Serialized PollInfo exceeded 2 GiB limit");
-  }
-  return out;
+arrow::Status PollInfo::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::PollInfo>("PollInfo", *this, out);
 }
 
-arrow::Result<std::unique_ptr<PollInfo>> PollInfo::Deserialize(
-    std::string_view serialized) {
-  pb::PollInfo pb_info;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized PollInfo size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_info.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid PollInfo");
-  }
-  PollInfo info;
-  RETURN_NOT_OK(internal::FromProto(pb_info, &info));
-  return std::make_unique<PollInfo>(std::move(info));
+arrow::Status PollInfo::Deserialize(std::string_view serialized,
+                                    std::unique_ptr<PollInfo>* out) {
+  return DeserializeProtoString<pb::PollInfo, std::unique_ptr<PollInfo>>("PollInfo",
+                                                                         serialized, out);
 }
 
 std::string PollInfo::ToString() const {
@@ -447,71 +420,60 @@ bool CancelFlightInfoRequest::Equals(const CancelFlightInfoRequest& other) const
   return info == other.info;
 }
 
-arrow::Result<std::string> CancelFlightInfoRequest::SerializeToString() const {
-  pb::CancelFlightInfoRequest pb_request;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_request));
-
-  std::string out;
-  if (!pb_request.SerializeToString(&out)) {
-    return Status::IOError("Serialized CancelFlightInfoRequest exceeded 2 GiB limit");
-  }
-  return out;
+arrow::Status CancelFlightInfoRequest::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::CancelFlightInfoRequest>("CancelFlightInfoRequest",
+                                                             *this, out);
 }
 
-arrow::Result<CancelFlightInfoRequest> CancelFlightInfoRequest::Deserialize(
-    std::string_view serialized) {
-  pb::CancelFlightInfoRequest pb_request;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid(
-        "Serialized CancelFlightInfoRequest size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_request.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid CancelFlightInfoRequest");
-  }
-  CancelFlightInfoRequest out;
-  RETURN_NOT_OK(internal::FromProto(pb_request, &out));
-  return out;
+arrow::Status CancelFlightInfoRequest::Deserialize(std::string_view serialized,
+                                                   CancelFlightInfoRequest* out) {
+  return DeserializeProtoString<pb::CancelFlightInfoRequest, CancelFlightInfoRequest>(
+      "CancelFlightInfoRequest", serialized, out);
 }
 
-static const char* const SetSessionOptionStatusNames[] = {"Unspecified", "InvalidName",
-                                                          "InvalidValue", "Error"};
-static const char* const CloseSessionStatusNames[] = {"Unspecified", "Closed", "Closing",
-                                                      "NotClosable"};
-
-// Helpers for stringifying maps containing various types
-std::string ToString(const SetSessionOptionErrorValue& error_value) {
-  return SetSessionOptionStatusNames[static_cast<int>(error_value)];
+std::string CancelFlightInfoResult::ToString() const {
+  std::stringstream ss;
+  ss << "<CancelFlightInfoResult status=" << status << ">";
+  return ss.str();
 }
 
-std::ostream& operator<<(std::ostream& os,
-                         const SetSessionOptionErrorValue& error_value) {
-  os << ToString(error_value);
-  return os;
+bool CancelFlightInfoResult::Equals(const CancelFlightInfoResult& other) const {
+  return status == other.status;
 }
 
-std::string ToString(const CloseSessionStatus& status) {
-  return CloseSessionStatusNames[static_cast<int>(status)];
+arrow::Status CancelFlightInfoResult::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::CancelFlightInfoResult>("CancelFlightInfoResult",
+                                                            *this, out);
 }
 
-std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status) {
-  os << ToString(status);
-  return os;
+arrow::Status CancelFlightInfoResult::Deserialize(std::string_view serialized,
+                                                  CancelFlightInfoResult* out) {
+  return DeserializeProtoString<pb::CancelFlightInfoResult, CancelFlightInfoResult>(
+      "CancelFlightInfoResult", serialized, out);
 }
 
-std::ostream& operator<<(std::ostream& os, std::vector<std::string> values) {
-  os << '[';
-  std::string sep = "";
-  for (const auto& v : values) {
-    os << sep << std::quoted(v);
-    sep = ", ";
+std::ostream& operator<<(std::ostream& os, CancelStatus status) {
+  switch (status) {
+    case CancelStatus::kUnspecified:
+      os << "Unspecified";
+      break;
+    case CancelStatus::kCancelled:
+      os << "Cancelled";
+      break;
+    case CancelStatus::kCancelling:
+      os << "Cancelling";
+      break;
+    case CancelStatus::kNotCancellable:
+      os << "NotCancellable";
+      break;
   }
-  os << ']';
-
   return os;
 }
 
+// Session management messages
+
+// SessionOptionValue
+
 std::ostream& operator<<(std::ostream& os, const SessionOptionValue& v) {
   if (std::holds_alternative<std::monostate>(v)) {
     os << "<EMPTY>";
@@ -530,33 +492,6 @@ std::ostream& operator<<(std::ostream& os, const SessionOptionValue& v) {
   return os;
 }
 
-std::ostream& operator<<(std::ostream& os, const SetSessionOptionsResult::Error& e) {
-  os << '{' << e.value << '}';
-  return os;
-}
-
-template <typename T>
-std::ostream& operator<<(std::ostream& os, std::map<std::string, T> m) {
-  os << '{';
-  std::string sep = "";
-  if constexpr (std::is_convertible_v<T, std::string_view>) {
-    // std::string, char*, std::string_view
-    for (const auto& [k, v] : m) {
-      os << sep << '[' << k << "]: " << std::quoted(v) << '"';
-      sep = ", ";
-    }
-  } else {
-    for (const auto& [k, v] : m) {
-      os << sep << '[' << k << "]: " << v;
-      sep = ", ";
-    }
-  }
-  os << '}';
-
-  return os;
-}
-
-namespace {
 static bool CompareSessionOptionMaps(const std::map<std::string, SessionOptionValue>& a,
                                      const std::map<std::string, SessionOptionValue>& b) {
   if (a.size() != b.size()) {
@@ -577,15 +512,30 @@ static bool CompareSessionOptionMaps(const std::map<std::string, SessionOptionVa
   }
   return true;
 }
-}  // namespace
+
+// SetSessionOptionErrorValue
+
+std::string ToString(const SetSessionOptionErrorValue& error_value) {
+  static constexpr const char* SetSessionOptionStatusNames[] = {
+      "Unspecified",
+      "InvalidName",
+      "InvalidValue",
+      "Error",
+  };
+  return SetSessionOptionStatusNames[static_cast<int>(error_value)];
+}
+
+std::ostream& operator<<(std::ostream& os,
+                         const SetSessionOptionErrorValue& error_value) {
+  os << ToString(error_value);
+  return os;
+}
 
 // SetSessionOptionsRequest
 
 std::string SetSessionOptionsRequest::ToString() const {
   std::stringstream ss;
-
   ss << "<SetSessionOptionsRequest session_options=" << session_options << '>';
-
   return ss.str();
 }
 
@@ -593,42 +543,27 @@ bool SetSessionOptionsRequest::Equals(const SetSessionOptionsRequest& other) con
   return CompareSessionOptionMaps(session_options, other.session_options);
 }
 
-arrow::Result<std::string> SetSessionOptionsRequest::SerializeToString() const {
-  pb::SetSessionOptionsRequest pb_request;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_request));
-
-  std::string out;
-  if (!pb_request.SerializeToString(&out)) {
-    return Status::IOError("Serialized SetSessionOptionsRequest exceeded 2GiB limit");
-  }
-  return out;
+arrow::Status SetSessionOptionsRequest::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::SetSessionOptionsRequest>("SetSessionOptionsRequest",
+                                                              *this, out);
 }
 
-arrow::Result<SetSessionOptionsRequest> SetSessionOptionsRequest::Deserialize(
-    std::string_view serialized) {
-  // TODO these & SerializeToString should all be factored out to a superclass
-  pb::SetSessionOptionsRequest pb_request;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid(
-        "Serialized SetSessionOptionsRequest size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_request.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid SetSessionOptionsRequest");
-  }
-  SetSessionOptionsRequest out;
-  RETURN_NOT_OK(internal::FromProto(pb_request, &out));
-  return out;
+arrow::Status SetSessionOptionsRequest::Deserialize(std::string_view serialized,
+                                                    SetSessionOptionsRequest* out) {
+  return DeserializeProtoString<pb::SetSessionOptionsRequest, SetSessionOptionsRequest>(
+      "SetSessionOptionsRequest", serialized, out);
 }
 
 // SetSessionOptionsResult
 
+std::ostream& operator<<(std::ostream& os, const SetSessionOptionsResult::Error& e) {
+  os << '{' << e.value << '}';
+  return os;
+}
+
 std::string SetSessionOptionsResult::ToString() const {
   std::stringstream ss;
-
   ss << "<SetSessionOptionsResult errors=" << errors << '>';
-
   return ss.str();
 }
 
@@ -639,32 +574,15 @@ bool SetSessionOptionsResult::Equals(const SetSessionOptionsResult& other) const
   return true;
 }
 
-arrow::Result<std::string> SetSessionOptionsResult::SerializeToString() const {
-  pb::SetSessionOptionsResult pb_result;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_result));
-
-  std::string out;
-  if (!pb_result.SerializeToString(&out)) {
-    return Status::IOError("Serialized SetSessionOptionsResult exceeded 2GiB limit");
-  }
-  return out;
+arrow::Status SetSessionOptionsResult::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::SetSessionOptionsResult>("SetSessionOptionsResult",
+                                                             *this, out);
 }
 
-arrow::Result<SetSessionOptionsResult> SetSessionOptionsResult::Deserialize(
-    std::string_view serialized) {
-  pb::SetSessionOptionsResult pb_result;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid(
-        "Serialized SetSessionOptionsResult size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_result.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid SetSessionOptionsResult");
-  }
-  SetSessionOptionsResult out;
-  RETURN_NOT_OK(internal::FromProto(pb_result, &out));
-  return out;
+arrow::Status SetSessionOptionsResult::Deserialize(std::string_view serialized,
+                                                   SetSessionOptionsResult* out) {
+  return DeserializeProtoString<pb::SetSessionOptionsResult, SetSessionOptionsResult>(
+      "SetSessionOptionsResult", serialized, out);
 }
 
 // GetSessionOptionsRequest
@@ -677,41 +595,22 @@ bool GetSessionOptionsRequest::Equals(const GetSessionOptionsRequest& other) con
   return true;
 }
 
-arrow::Result<std::string> GetSessionOptionsRequest::SerializeToString() const {
-  pb::GetSessionOptionsRequest pb_request;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_request));
-
-  std::string out;
-  if (!pb_request.SerializeToString(&out)) {
-    return Status::IOError("Serialized GetSessionOptionsRequest exceeded 2GiB limit");
-  }
-  return out;
+arrow::Status GetSessionOptionsRequest::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::GetSessionOptionsRequest>("GetSessionOptionsRequest",
+                                                              *this, out);
 }
 
-arrow::Result<GetSessionOptionsRequest> GetSessionOptionsRequest::Deserialize(
-    std::string_view serialized) {
-  pb::GetSessionOptionsRequest pb_request;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid(
-        "Serialized GetSessionOptionsRequest size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_request.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid GetSessionOptionsRequest");
-  }
-  GetSessionOptionsRequest out;
-  RETURN_NOT_OK(internal::FromProto(pb_request, &out));
-  return out;
+arrow::Status GetSessionOptionsRequest::Deserialize(std::string_view serialized,
+                                                    GetSessionOptionsRequest* out) {
+  return DeserializeProtoString<pb::GetSessionOptionsRequest, GetSessionOptionsRequest>(
+      "GetSessionOptionsRequest", serialized, out);
 }
 
 // GetSessionOptionsResult
 
 std::string GetSessionOptionsResult::ToString() const {
   std::stringstream ss;
-
   ss << "<GetSessionOptionsResult session_options=" << session_options << '>';
-
   return ss.str();
 }
 
@@ -719,32 +618,15 @@ bool GetSessionOptionsResult::Equals(const GetSessionOptionsResult& other) const
   return CompareSessionOptionMaps(session_options, other.session_options);
 }
 
-arrow::Result<std::string> GetSessionOptionsResult::SerializeToString() const {
-  pb::GetSessionOptionsResult pb_result;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_result));
-
-  std::string out;
-  if (!pb_result.SerializeToString(&out)) {
-    return Status::IOError("Serialized GetSessionOptionsResult exceeded 2GiB limit");
-  }
-  return out;
+arrow::Status GetSessionOptionsResult::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::GetSessionOptionsResult>("GetSessionOptionsResult",
+                                                             *this, out);
 }
 
-arrow::Result<GetSessionOptionsResult> GetSessionOptionsResult::Deserialize(
-    std::string_view serialized) {
-  pb::GetSessionOptionsResult pb_result;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid(
-        "Serialized GetSessionOptionsResult size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_result.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid GetSessionOptionsResult");
-  }
-  GetSessionOptionsResult out;
-  RETURN_NOT_OK(internal::FromProto(pb_result, &out));
-  return out;
+arrow::Status GetSessionOptionsResult::Deserialize(std::string_view serialized,
+                                                   GetSessionOptionsResult* out) {
+  return DeserializeProtoString<pb::GetSessionOptionsResult, GetSessionOptionsResult>(
+      "GetSessionOptionsResult", serialized, out);
 }
 
 // CloseSessionRequest
@@ -753,40 +635,39 @@ std::string CloseSessionRequest::ToString() const { return "<CloseSessionRequest
 
 bool CloseSessionRequest::Equals(const CloseSessionRequest& other) const { return true; }
 
-arrow::Result<std::string> CloseSessionRequest::SerializeToString() const {
-  pb::CloseSessionRequest pb_request;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_request));
+arrow::Status CloseSessionRequest::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::CloseSessionRequest>("CloseSessionRequest", *this,
+                                                         out);
+}
 
-  std::string out;
-  if (!pb_request.SerializeToString(&out)) {
-    return Status::IOError("Serialized CloseSessionRequest exceeded 2GiB limit");
-  }
-  return out;
+arrow::Status CloseSessionRequest::Deserialize(std::string_view serialized,
+                                               CloseSessionRequest* out) {
+  return DeserializeProtoString<pb::CloseSessionRequest, CloseSessionRequest>(
+      "CloseSessionRequest", serialized, out);
 }
 
-arrow::Result<CloseSessionRequest> CloseSessionRequest::Deserialize(
-    std::string_view serialized) {
-  pb::CloseSessionRequest pb_request;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized CloseSessionRequest size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_request.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid CloseSessionRequest");
-  }
-  CloseSessionRequest out;
-  RETURN_NOT_OK(internal::FromProto(pb_request, &out));
-  return out;
+// CloseSessionStatus
+
+std::string ToString(const CloseSessionStatus& status) {
+  static constexpr const char* CloseSessionStatusNames[] = {
+      "Unspecified",
+      "Closed",
+      "Closing",
+      "NotClosable",
+  };
+  return CloseSessionStatusNames[static_cast<int>(status)];
+}
+
+std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status) {
+  os << ToString(status);
+  return os;
 }
 
 // CloseSessionResult
 
 std::string CloseSessionResult::ToString() const {
   std::stringstream ss;
-
   ss << "<CloseSessionResult status=" << status << '>';
-
   return ss.str();
 }
 
@@ -794,35 +675,38 @@ bool CloseSessionResult::Equals(const CloseSessionResult& other) const {
   return status == other.status;
 }
 
-arrow::Result<std::string> CloseSessionResult::SerializeToString() const {
-  pb::CloseSessionResult pb_result;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_result));
+arrow::Status CloseSessionResult::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::CloseSessionResult>("CloseSessionResult", *this, out);
+}
 
-  std::string out;
-  if (!pb_result.SerializeToString(&out)) {
-    return Status::IOError("Serialized CloseSessionResult exceeded 2GiB limit");
-  }
-  return out;
+arrow::Status CloseSessionResult::Deserialize(std::string_view serialized,
+                                              CloseSessionResult* out) {
+  return DeserializeProtoString<pb::CloseSessionResult, CloseSessionResult>(
+      "CloseSessionResult", serialized, out);
 }
 
-arrow::Result<CloseSessionResult> CloseSessionResult::Deserialize(
-    std::string_view serialized) {
-  pb::CloseSessionResult pb_result;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized CloseSessionResult size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_result.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid CloseSessionResult");
-  }
-  CloseSessionResult out;
-  RETURN_NOT_OK(internal::FromProto(pb_result, &out));
-  return out;
+// Ticket
+
+std::string Ticket::ToString() const {
+  std::stringstream ss;
+  ss << "<Ticket ticket='" << ticket << "'>";
+  return ss.str();
+}
+
+bool Ticket::Equals(const Ticket& other) const { return ticket == other.ticket; }
+
+arrow::Status Ticket::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::Ticket>("Ticket", *this, out);
+}
+
+arrow::Status Ticket::Deserialize(std::string_view serialized, Ticket* out) {
+  return DeserializeProtoString<pb::Ticket, Ticket>("Ticket", serialized, out);
 }
 
 Location::Location() { uri_ = std::make_shared<arrow::util::Uri>(); }
 
+Location::~Location() = default;
+
 arrow::Result<Location> Location::Parse(const std::string& uri_string) {
   Location location;
   RETURN_NOT_OK(location.uri_->Parse(uri_string));
@@ -860,7 +744,6 @@ arrow::Result<Location> Location::ForScheme(const std::string& scheme,
   return Location::Parse(uri_string.str());
 }
 
-std::string Location::ToString() const { return uri_->ToString(); }
 std::string Location::scheme() const {
   std::string scheme = uri_->scheme();
   if (scheme.empty()) {
@@ -870,6 +753,8 @@ std::string Location::scheme() const {
   return scheme;
 }
 
+std::string Location::ToString() const { return uri_->ToString(); }
+
 bool Location::Equals(const Location& other) const {
   return ToString() == other.ToString();
 }
@@ -923,30 +808,22 @@ bool FlightEndpoint::Equals(const FlightEndpoint& other) const {
   return true;
 }
 
-arrow::Result<std::string> FlightEndpoint::SerializeToString() const {
-  pb::FlightEndpoint pb_flight_endpoint;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_flight_endpoint));
+arrow::Status Location::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::Location>("Location", *this, out);
+}
 
-  std::string out;
-  if (!pb_flight_endpoint.SerializeToString(&out)) {
-    return Status::IOError("Serialized FlightEndpoint exceeded 2 GiB limit");
-  }
-  return out;
+arrow::Status Location::Deserialize(std::string_view serialized, Location* out) {
+  return DeserializeProtoString<pb::Location, Location>("Location", serialized, out);
 }
 
-arrow::Result<FlightEndpoint> FlightEndpoint::Deserialize(std::string_view serialized) {
-  pb::FlightEndpoint pb_flight_endpoint;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized FlightEndpoint size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_flight_endpoint.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid FlightEndpoint");
-  }
-  FlightEndpoint out;
-  RETURN_NOT_OK(internal::FromProto(pb_flight_endpoint, &out));
-  return out;
+arrow::Status FlightEndpoint::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::FlightEndpoint>("FlightEndpoint", *this, out);
+}
+
+arrow::Status FlightEndpoint::Deserialize(std::string_view serialized,
+                                          FlightEndpoint* out) {
+  return DeserializeProtoString<pb::FlightEndpoint, FlightEndpoint>("FlightEndpoint",
+                                                                    serialized, out);
 }
 
 std::string RenewFlightEndpointRequest::ToString() const {
@@ -959,32 +836,30 @@ bool RenewFlightEndpointRequest::Equals(const RenewFlightEndpointRequest& other)
   return endpoint == other.endpoint;
 }
 
-arrow::Result<std::string> RenewFlightEndpointRequest::SerializeToString() const {
-  pb::RenewFlightEndpointRequest pb_request;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_request));
+arrow::Status RenewFlightEndpointRequest::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::RenewFlightEndpointRequest>(
+      "RenewFlightEndpointRequest", *this, out);
+}
 
-  std::string out;
-  if (!pb_request.SerializeToString(&out)) {
-    return Status::IOError("Serialized RenewFlightEndpointRequest exceeded 2 GiB limit");
-  }
-  return out;
+arrow::Status RenewFlightEndpointRequest::Deserialize(std::string_view serialized,
+                                                      RenewFlightEndpointRequest* out) {
+  return DeserializeProtoString<pb::RenewFlightEndpointRequest,
+                                RenewFlightEndpointRequest>("RenewFlightEndpointRequest",
+                                                            serialized, out);
 }
 
-arrow::Result<RenewFlightEndpointRequest> RenewFlightEndpointRequest::Deserialize(
-    std::string_view serialized) {
-  pb::RenewFlightEndpointRequest pb_request;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid(
-        "Serialized RenewFlightEndpointRequest size should not exceed 2 GiB");
+Status FlightPayload::Validate() const {
+  static constexpr int64_t kInt32Max = std::numeric_limits<int32_t>::max();
+  if (descriptor && descriptor->size() > kInt32Max) {
+    return Status::CapacityError("Descriptor size overflow (>= 2**31)");
   }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_request.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid RenewFlightEndpointRequest");
+  if (app_metadata && app_metadata->size() > kInt32Max) {
+    return Status::CapacityError("app_metadata size overflow (>= 2**31)");
   }
-  RenewFlightEndpointRequest out;
-  RETURN_NOT_OK(internal::FromProto(pb_request, &out));
-  return out;
+  if (ipc_message.body_length > kInt32Max) {
+    return Status::Invalid("Cannot send record batches exceeding 2GiB yet");
+  }
+  return Status::OK();
 }
 
 std::string ActionType::ToString() const {
@@ -1022,30 +897,13 @@ bool ActionType::Equals(const ActionType& other) const {
   return type == other.type && description == other.description;
 }
 
-arrow::Result<std::string> ActionType::SerializeToString() const {
-  pb::ActionType pb_action_type;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_action_type));
-
-  std::string out;
-  if (!pb_action_type.SerializeToString(&out)) {
-    return Status::IOError("Serialized ActionType exceeded 2 GiB limit");
-  }
-  return out;
+arrow::Status ActionType::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::ActionType>("ActionType", *this, out);
 }
 
-arrow::Result<ActionType> ActionType::Deserialize(std::string_view serialized) {
-  pb::ActionType pb_action_type;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized ActionType size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_action_type.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid ActionType");
-  }
-  ActionType out;
-  RETURN_NOT_OK(internal::FromProto(pb_action_type, &out));
-  return out;
+arrow::Status ActionType::Deserialize(std::string_view serialized, ActionType* out) {
+  return DeserializeProtoString<pb::ActionType, ActionType>("ActionType", serialized,
+                                                            out);
 }
 
 std::string Criteria::ToString() const {
@@ -1056,30 +914,12 @@ bool Criteria::Equals(const Criteria& other) const {
   return expression == other.expression;
 }
 
-arrow::Result<std::string> Criteria::SerializeToString() const {
-  pb::Criteria pb_criteria;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_criteria));
-
-  std::string out;
-  if (!pb_criteria.SerializeToString(&out)) {
-    return Status::IOError("Serialized Criteria exceeded 2 GiB limit");
-  }
-  return out;
+arrow::Status Criteria::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::Criteria>("Criteria", *this, out);
 }
 
-arrow::Result<Criteria> Criteria::Deserialize(std::string_view serialized) {
-  pb::Criteria pb_criteria;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized Criteria size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_criteria.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid Criteria");
-  }
-  Criteria out;
-  RETURN_NOT_OK(internal::FromProto(pb_criteria, &out));
-  return out;
+arrow::Status Criteria::Deserialize(std::string_view serialized, Criteria* out) {
+  return DeserializeProtoString<pb::Criteria, Criteria>("Criteria", serialized, out);
 }
 
 std::string Action::ToString() const {
@@ -1100,30 +940,12 @@ bool Action::Equals(const Action& other) const {
          ((body == other.body) || (body && other.body && body->Equals(*other.body)));
 }
 
-arrow::Result<std::string> Action::SerializeToString() const {
-  pb::Action pb_action;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_action));
-
-  std::string out;
-  if (!pb_action.SerializeToString(&out)) {
-    return Status::IOError("Serialized Action exceeded 2 GiB limit");
-  }
-  return out;
+arrow::Status Action::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::Action>("Action", *this, out);
 }
 
-arrow::Result<Action> Action::Deserialize(std::string_view serialized) {
-  pb::Action pb_action;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized Action size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_action.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid Action");
-  }
-  Action out;
-  RETURN_NOT_OK(internal::FromProto(pb_action, &out));
-  return out;
+arrow::Status Action::Deserialize(std::string_view serialized, Action* out) {
+  return DeserializeProtoString<pb::Action, Action>("Action", serialized, out);
 }
 
 std::string Result::ToString() const {
@@ -1141,89 +963,46 @@ bool Result::Equals(const Result& other) const {
   return (body == other.body) || (body && other.body && body->Equals(*other.body));
 }
 
-arrow::Result<std::string> Result::SerializeToString() const {
-  pb::Result pb_result;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_result));
-
-  std::string out;
-  if (!pb_result.SerializeToString(&out)) {
-    return Status::IOError("Serialized Result exceeded 2 GiB limit");
-  }
-  return out;
+arrow::Status Result::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::Result>("Result", *this, out);
 }
 
-arrow::Result<Result> Result::Deserialize(std::string_view serialized) {
-  pb::Result pb_result;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized Result size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_result.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid Result");
-  }
-  Result out;
-  RETURN_NOT_OK(internal::FromProto(pb_result, &out));
-  return out;
+arrow::Status Result::Deserialize(std::string_view serialized, Result* out) {
+  return DeserializeProtoString<pb::Result, Result>("Result", serialized, out);
 }
 
-std::string CancelFlightInfoResult::ToString() const {
-  std::stringstream ss;
-  ss << "<CancelFlightInfoResult status=" << status << ">";
-  return ss.str();
+arrow::Result<std::shared_ptr<Schema>> SchemaResult::GetSchema(
+    ipc::DictionaryMemo* dictionary_memo) const {
+  // Create a non-owned Buffer to avoid copying
+  io::BufferReader schema_reader(std::make_shared<Buffer>(raw_schema_));
+  return ipc::ReadSchema(&schema_reader, dictionary_memo);
 }
 
-bool CancelFlightInfoResult::Equals(const CancelFlightInfoResult& other) const {
-  return status == other.status;
+arrow::Result<std::unique_ptr<SchemaResult>> SchemaResult::Make(const Schema& schema) {
+  std::string schema_in;
+  RETURN_NOT_OK(internal::SchemaToString(schema, &schema_in));
+  return std::make_unique<SchemaResult>(std::move(schema_in));
 }
 
-arrow::Result<std::string> CancelFlightInfoResult::SerializeToString() const {
-  pb::CancelFlightInfoResult pb_result;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_result));
+std::string SchemaResult::ToString() const {
+  return "<SchemaResult raw_schema=(serialized)>";
+}
 
-  std::string out;
-  if (!pb_result.SerializeToString(&out)) {
-    return Status::IOError(
-        "Serialized ActionCancelFlightInfoResult exceeded 2 GiB limit");
-  }
-  return out;
+bool SchemaResult::Equals(const SchemaResult& other) const {
+  return raw_schema_ == other.raw_schema_;
 }
 
-arrow::Result<CancelFlightInfoResult> CancelFlightInfoResult::Deserialize(
-    std::string_view serialized) {
-  pb::CancelFlightInfoResult pb_result;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid(
-        "Serialized ActionCancelFlightInfoResult size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_result.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid CancelFlightInfoResult");
-  }
-  CancelFlightInfoResult out;
-  RETURN_NOT_OK(internal::FromProto(pb_result, &out));
-  return out;
+arrow::Status SchemaResult::SerializeToString(std::string* out) const {
+  return SerializeToProtoString<pb::SchemaResult>("SchemaResult", *this, out);
 }
 
-std::ostream& operator<<(std::ostream& os, CancelStatus status) {
-  switch (status) {
-    case CancelStatus::kUnspecified:
-      os << "Unspecified";
-      break;
-    case CancelStatus::kCancelled:
-      os << "Cancelled";
-      break;
-    case CancelStatus::kCancelling:
-      os << "Cancelling";
-      break;
-    case CancelStatus::kNotCancellable:
-      os << "NotCancellable";
-      break;
-  }
-  return os;
+arrow::Status SchemaResult::Deserialize(std::string_view serialized, SchemaResult* out) {
+  return DeserializeProtoString<pb::SchemaResult, SchemaResult>("SchemaResult",
+                                                                serialized, out);
 }
 
+//------------------------------------------------------------
+
 Status ResultStream::Drain() {
   while (true) {
     ARROW_ASSIGN_OR_RAISE(auto result, Next());
@@ -1232,6 +1011,9 @@ Status ResultStream::Drain() {
   return Status::OK();
 }
 
+FlightStreamChunk::FlightStreamChunk() noexcept = default;
+FlightStreamChunk::~FlightStreamChunk() = default;
+
 arrow::Result<std::vector<std::shared_ptr<RecordBatch>>>
 MetadataRecordBatchReader::ToRecordBatches() {
   std::vector<std::shared_ptr<RecordBatch>> batches;
@@ -1311,40 +1093,6 @@ arrow::Result<std::unique_ptr<Result>> SimpleResultStream::Next() {
   return std::make_unique<Result>(std::move(results_[position_++]));
 }
 
-std::string BasicAuth::ToString() const {
-  return arrow::util::StringBuilder("<BasicAuth username='", username,
-                                    "' password=(redacted)>");
-}
-
-bool BasicAuth::Equals(const BasicAuth& other) const {
-  return (username == other.username) && (password == other.password);
-}
-
-arrow::Result<BasicAuth> BasicAuth::Deserialize(std::string_view serialized) {
-  pb::BasicAuth pb_result;
-  if (serialized.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
-    return Status::Invalid("Serialized BasicAuth size should not exceed 2 GiB");
-  }
-  google::protobuf::io::ArrayInputStream input(serialized.data(),
-                                               static_cast<int>(serialized.size()));
-  if (!pb_result.ParseFromZeroCopyStream(&input)) {
-    return Status::Invalid("Not a valid BasicAuth");
-  }
-  BasicAuth out;
-  RETURN_NOT_OK(internal::FromProto(pb_result, &out));
-  return out;
-}
-
-arrow::Result<std::string> BasicAuth::SerializeToString() const {
-  pb::BasicAuth pb_result;
-  RETURN_NOT_OK(internal::ToProto(*this, &pb_result));
-  std::string out;
-  if (!pb_result.SerializeToString(&out)) {
-    return Status::IOError("Serialized BasicAuth exceeded 2 GiB limit");
-  }
-  return out;
-}
-
 //------------------------------------------------------------
 // Error propagation helpers
 
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index cdf03f21041ee..b6309d0af2a71 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -31,6 +31,7 @@
 #include <variant>
 #include <vector>
 
+#include "arrow/buffer.h"
 #include "arrow/flight/type_fwd.h"
 #include "arrow/flight/visibility.h"
 #include "arrow/ipc/options.h"
@@ -40,26 +41,32 @@
 
 namespace arrow {
 
-class Buffer;
 class RecordBatch;
 class Schema;
-class Status;
 class Table;
 
 namespace ipc {
-
 class DictionaryMemo;
-
 }  // namespace ipc
 
 namespace util {
-
 class Uri;
-
 }  // namespace util
 
 namespace flight {
 
+ARROW_FLIGHT_EXPORT
+extern const char* kSchemeGrpc;
+ARROW_FLIGHT_EXPORT
+extern const char* kSchemeGrpcTcp;
+ARROW_FLIGHT_EXPORT
+extern const char* kSchemeGrpcUnix;
+ARROW_FLIGHT_EXPORT
+extern const char* kSchemeGrpcTls;
+
+class FlightClient;
+class FlightServerBase;
+
 /// \brief A timestamp compatible with Protocol Buffer's
 /// google.protobuf.Timestamp:
 ///
@@ -97,8 +104,8 @@ enum class FlightStatusCode : int8_t {
 // Silence warning
 // "non dll-interface class RecordBatchReader used as base for dll-interface class"
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4275)
+#  pragma warning(push)
+#  pragma warning(disable : 4275)
 #endif
 
 /// \brief Flight-specific error information in a Status.
@@ -132,7 +139,7 @@ class ARROW_FLIGHT_EXPORT FlightStatusDetail : public arrow::StatusDetail {
 };
 
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 /// \brief Make an appropriate Arrow status for the given
@@ -159,29 +166,122 @@ struct ARROW_FLIGHT_EXPORT CertKeyPair {
   std::string pem_key;
 };
 
+namespace internal {
+
+template <typename T>
+struct remove_unique_ptr {
+  using type = T;
+};
+
+template <typename T>
+struct remove_unique_ptr<std::unique_ptr<T>> {
+  using type = T;
+};
+
+// Base CRTP type
+template <class T>
+struct BaseType {
+ protected:
+  using SuperT = BaseType<T>;
+  using SelfT = typename remove_unique_ptr<T>::type;
+
+  const SelfT& self() const { return static_cast<const SelfT&>(*this); }
+  SelfT& self() { return static_cast<SelfT&>(*this); }
+
+ public:
+  BaseType() = default;
+
+  friend bool operator==(const SelfT& left, const SelfT& right) {
+    return left.Equals(right);
+  }
+  friend bool operator!=(const SelfT& left, const SelfT& right) {
+    return !left.Equals(right);
+  }
+
+  /// \brief Serialize this message to its wire-format representation.
+  inline arrow::Result<std::string> SerializeToString() const {
+    std::string out;
+    ARROW_RETURN_NOT_OK(self().SelfT::SerializeToString(&out));
+    return out;
+  }
+
+  inline static arrow::Result<T> Deserialize(std::string_view serialized) {
+    T out;
+    ARROW_RETURN_NOT_OK(SelfT::Deserialize(serialized, &out));
+    return out;
+  }
+
+  inline arrow::Result<std::shared_ptr<Buffer>> SerializeToBuffer() const {
+    std::string out;
+    ARROW_RETURN_NOT_OK(self().SelfT::SerializeToString(&out));
+    return Buffer::FromString(std::move(out));
+  }
+};
+
+}  // namespace internal
+
+//------------------------------------------------------------
+// Wrapper types for Flight RPC protobuf messages
+
+// A wrapper around arrow.flight.protocol.HandshakeRequest is not defined
+// A wrapper around arrow.flight.protocol.HandshakeResponse is not defined
+
+/// \brief message for simple auth
+struct ARROW_FLIGHT_EXPORT BasicAuth : public internal::BaseType<BasicAuth> {
+  std::string username;
+  std::string password;
+
+  BasicAuth() = default;
+  BasicAuth(std::string username, std::string password)
+      : username(std::move(username)), password(std::move(password)) {}
+
+  std::string ToString() const;
+  bool Equals(const BasicAuth& other) const;
+
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
+
+  /// \brief Serialize this message to its wire-format representation.
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
+
+  /// \brief Deserialize this message from its wire-format representation.
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, BasicAuth* out);
+};
+
+// A wrapper around arrow.flight.protocol.Empty is not defined
+
 /// \brief A type of action that can be performed with the DoAction RPC.
-struct ARROW_FLIGHT_EXPORT ActionType {
+struct ARROW_FLIGHT_EXPORT ActionType : public internal::BaseType<ActionType> {
   /// \brief The name of the action.
   std::string type;
 
   /// \brief A human-readable description of the action.
   std::string description;
 
+  ActionType() = default;
+
+  ActionType(std::string type, std::string description)
+      : type(std::move(type)), description(std::move(description)) {}
+
   std::string ToString() const;
   bool Equals(const ActionType& other) const;
 
-  friend bool operator==(const ActionType& left, const ActionType& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const ActionType& left, const ActionType& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<ActionType> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, ActionType* out);
 
   static const ActionType kCancelFlightInfo;
   static const ActionType kRenewFlightEndpoint;
@@ -191,138 +291,126 @@ struct ARROW_FLIGHT_EXPORT ActionType {
 };
 
 /// \brief Opaque selection criteria for ListFlights RPC
-struct ARROW_FLIGHT_EXPORT Criteria {
+struct ARROW_FLIGHT_EXPORT Criteria : public internal::BaseType<Criteria> {
   /// Opaque criteria expression, dependent on server implementation
   std::string expression;
 
+  Criteria() = default;
+  Criteria(std::string expression)  // NOLINT runtime/explicit
+      : expression(std::move(expression)) {}
+
   std::string ToString() const;
   bool Equals(const Criteria& other) const;
 
-  friend bool operator==(const Criteria& left, const Criteria& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const Criteria& left, const Criteria& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<Criteria> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, Criteria* out);
 };
 
 /// \brief An action to perform with the DoAction RPC
-struct ARROW_FLIGHT_EXPORT Action {
+struct ARROW_FLIGHT_EXPORT Action : public internal::BaseType<Action> {
   /// The action type
   std::string type;
 
   /// The action content as a Buffer
   std::shared_ptr<Buffer> body;
 
+  Action() = default;
+  Action(std::string type, std::shared_ptr<Buffer> body)
+      : type(std::move(type)), body(std::move(body)) {}
+
   std::string ToString() const;
   bool Equals(const Action& other) const;
 
-  friend bool operator==(const Action& left, const Action& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const Action& left, const Action& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<Action> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, Action* out);
 };
 
 /// \brief Opaque result returned after executing an action
-struct ARROW_FLIGHT_EXPORT Result {
+struct ARROW_FLIGHT_EXPORT Result : public internal::BaseType<Result> {
   std::shared_ptr<Buffer> body;
 
+  Result() = default;
+  Result(std::shared_ptr<Buffer> body)  // NOLINT runtime/explicit
+      : body(std::move(body)) {}
+
   std::string ToString() const;
   bool Equals(const Result& other) const;
 
-  friend bool operator==(const Result& left, const Result& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const Result& left, const Result& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<Result> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, Result* out);
 };
 
-enum class CancelStatus {
-  /// The cancellation status is unknown. Servers should avoid using
-  /// this value (send a kNotCancellable if the requested FlightInfo
-  /// is not known). Clients can retry the request.
-  kUnspecified = 0,
-  /// The cancellation request is complete. Subsequent requests with
-  /// the same payload may return kCancelled or a kNotCancellable error.
-  kCancelled = 1,
-  /// The cancellation request is in progress. The client may retry
-  /// the cancellation request.
-  kCancelling = 2,
-  // The FlightInfo is not cancellable. The client should not retry the
-  // cancellation request.
-  kNotCancellable = 3,
-};
+/// \brief Schema result returned after a schema request RPC
+struct ARROW_FLIGHT_EXPORT SchemaResult : public internal::BaseType<SchemaResult> {
+ public:
+  SchemaResult() = default;
+  explicit SchemaResult(std::string schema) : raw_schema_(std::move(schema)) {}
 
-/// \brief The result of the CancelFlightInfo action.
-struct ARROW_FLIGHT_EXPORT CancelFlightInfoResult {
-  CancelStatus status;
+  /// \brief Factory method to construct a SchemaResult.
+  static arrow::Result<std::unique_ptr<SchemaResult>> Make(const Schema& schema);
+
+  /// \brief return schema
+  /// \param[in,out] dictionary_memo for dictionary bookkeeping, will
+  /// be modified
+  /// \return Arrow result with the reconstructed Schema
+  arrow::Result<std::shared_ptr<Schema>> GetSchema(
+      ipc::DictionaryMemo* dictionary_memo) const;
+
+  const std::string& serialized_schema() const { return raw_schema_; }
 
   std::string ToString() const;
-  bool Equals(const CancelFlightInfoResult& other) const;
+  bool Equals(const SchemaResult& other) const;
 
-  friend bool operator==(const CancelFlightInfoResult& left,
-                         const CancelFlightInfoResult& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const CancelFlightInfoResult& left,
-                         const CancelFlightInfoResult& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<CancelFlightInfoResult> Deserialize(std::string_view serialized);
-};
-
-ARROW_FLIGHT_EXPORT
-std::ostream& operator<<(std::ostream& os, CancelStatus status);
-
-/// \brief message for simple auth
-struct ARROW_FLIGHT_EXPORT BasicAuth {
-  std::string username;
-  std::string password;
-
-  std::string ToString() const;
-  bool Equals(const BasicAuth& other) const;
-
-  friend bool operator==(const BasicAuth& left, const BasicAuth& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const BasicAuth& left, const BasicAuth& right) {
-    return !(left == right);
-  }
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, SchemaResult* out);
 
-  /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<BasicAuth> Deserialize(std::string_view serialized);
-  /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+ private:
+  std::string raw_schema_;
 };
 
 /// \brief A request to retrieve or generate a dataset
-struct ARROW_FLIGHT_EXPORT FlightDescriptor {
+struct ARROW_FLIGHT_EXPORT FlightDescriptor
+    : public internal::BaseType<FlightDescriptor> {
   enum DescriptorType {
     UNKNOWN = 0,  /// Unused
     PATH = 1,     /// Named path identifying a dataset
@@ -330,7 +418,7 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor {
   };
 
   /// The descriptor type
-  DescriptorType type;
+  DescriptorType type = UNKNOWN;
 
   /// Opaque value used to express a command. Should only be defined when type
   /// is CMD
@@ -340,92 +428,325 @@ struct ARROW_FLIGHT_EXPORT FlightDescriptor {
   /// when type is PATH
   std::vector<std::string> path;
 
-  bool Equals(const FlightDescriptor& other) const;
+  FlightDescriptor();
+  FlightDescriptor(DescriptorType type, std::string cmd,
+                   std::vector<std::string> path) noexcept;
+  ~FlightDescriptor();
 
   /// \brief Get a human-readable form of this descriptor.
   std::string ToString() const;
+  bool Equals(const FlightDescriptor& other) const;
+
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Get the wire-format representation of this type.
   ///
   /// Useful when interoperating with non-Flight systems (e.g. REST
   /// services) that may want to return Flight types.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Parse the wire-format representation of this type.
   ///
   /// Useful when interoperating with non-Flight systems (e.g. REST
   /// services) that may want to return Flight types.
-  static arrow::Result<FlightDescriptor> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, FlightDescriptor* out);
 
   // Convenience factory functions
 
-  static FlightDescriptor Command(const std::string& c) {
-    return FlightDescriptor{CMD, c, {}};
+  static FlightDescriptor Command(std::string cmd) {
+    return FlightDescriptor{CMD, std::move(cmd), {}};
   }
 
-  static FlightDescriptor Path(const std::vector<std::string>& p) {
-    return FlightDescriptor{PATH, "", p};
-  }
-
-  friend bool operator==(const FlightDescriptor& left, const FlightDescriptor& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const FlightDescriptor& left, const FlightDescriptor& right) {
-    return !(left == right);
+  static FlightDescriptor Path(std::vector<std::string> path) {
+    return FlightDescriptor{PATH, "", std::move(path)};
   }
 };
 
-/// \brief Data structure providing an opaque identifier or credential to use
-/// when requesting a data stream with the DoGet RPC
-struct ARROW_FLIGHT_EXPORT Ticket {
-  std::string ticket;
+/// \brief The access coordinates for retrieval of a dataset, returned by
+/// GetFlightInfo
+class ARROW_FLIGHT_EXPORT FlightInfo
+    : public internal::BaseType<std::unique_ptr<FlightInfo>> {
+ public:
+  struct Data {
+    std::string schema;
+    FlightDescriptor descriptor;
+    std::vector<FlightEndpoint> endpoints;
+    int64_t total_records = -1;
+    int64_t total_bytes = -1;
+    bool ordered = false;
+    std::string app_metadata;
+  };
 
-  std::string ToString() const;
-  bool Equals(const Ticket& other) const;
+  explicit FlightInfo(Data data) : data_(std::move(data)), reconstructed_schema_(false) {}
 
-  friend bool operator==(const Ticket& left, const Ticket& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const Ticket& left, const Ticket& right) {
-    return !(left == right);
-  }
+  /// \brief Factory method to construct a FlightInfo.
+  static arrow::Result<FlightInfo> Make(const Schema& schema,
+                                        const FlightDescriptor& descriptor,
+                                        const std::vector<FlightEndpoint>& endpoints,
+                                        int64_t total_records, int64_t total_bytes,
+                                        bool ordered = false,
+                                        std::string app_metadata = "");
+
+  /// \brief Deserialize the Arrow schema of the dataset. Populate any
+  ///   dictionary encoded fields into a DictionaryMemo for
+  ///   bookkeeping
+  /// \param[in,out] dictionary_memo for dictionary bookkeeping, will
+  /// be modified
+  /// \return Arrow result with the reconstructed Schema
+  arrow::Result<std::shared_ptr<Schema>> GetSchema(
+      ipc::DictionaryMemo* dictionary_memo) const;
+
+  const std::string& serialized_schema() const { return data_.schema; }
+
+  /// The descriptor associated with this flight, may not be set
+  const FlightDescriptor& descriptor() const { return data_.descriptor; }
+
+  /// A list of endpoints associated with the flight (dataset). To consume the
+  /// whole flight, all endpoints must be consumed
+  const std::vector<FlightEndpoint>& endpoints() const { return data_.endpoints; }
+
+  /// The total number of records (rows) in the dataset. If unknown, set to -1
+  int64_t total_records() const { return data_.total_records; }
+
+  /// The total number of bytes in the dataset. If unknown, set to -1
+  int64_t total_bytes() const { return data_.total_bytes; }
+
+  /// Whether endpoints are in the same order as the data.
+  bool ordered() const { return data_.ordered; }
+
+  /// Application-defined opaque metadata
+  const std::string& app_metadata() const { return data_.app_metadata; }
+
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Get the wire-format representation of this type.
   ///
   /// Useful when interoperating with non-Flight systems (e.g. REST
   /// services) that may want to return Flight types.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Parse the wire-format representation of this type.
   ///
   /// Useful when interoperating with non-Flight systems (e.g. REST
   /// services) that may want to return Flight types.
-  static arrow::Result<Ticket> Deserialize(std::string_view serialized);
-};
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized,
+                                   std::unique_ptr<FlightInfo>* out);
 
-class FlightClient;
-class FlightServerBase;
+  std::string ToString() const;
 
-ARROW_FLIGHT_EXPORT
-extern const char* kSchemeGrpc;
-ARROW_FLIGHT_EXPORT
-extern const char* kSchemeGrpcTcp;
-ARROW_FLIGHT_EXPORT
-extern const char* kSchemeGrpcUnix;
-ARROW_FLIGHT_EXPORT
-extern const char* kSchemeGrpcTls;
+  /// Compare two FlightInfo for equality. This will compare the
+  /// serialized schema representations, NOT the logical equality of
+  /// the schemas.
+  bool Equals(const FlightInfo& other) const;
 
-/// \brief A host location (a URI)
-struct ARROW_FLIGHT_EXPORT Location {
+ private:
+  Data data_;
+  mutable std::shared_ptr<Schema> schema_;
+  mutable bool reconstructed_schema_;
+};
+
+/// \brief The information to process a long-running query.
+class ARROW_FLIGHT_EXPORT PollInfo
+    : public internal::BaseType<std::unique_ptr<PollInfo>> {
  public:
-  /// \brief Initialize a blank location.
-  Location();
+  /// The currently available results so far.
+  std::unique_ptr<FlightInfo> info = NULLPTR;
+  /// The descriptor the client should use on the next try. If unset,
+  /// the query is complete.
+  std::optional<FlightDescriptor> descriptor = std::nullopt;
+  /// Query progress. Must be in [0.0, 1.0] but need not be
+  /// monotonic or nondecreasing. If unknown, do not set.
+  std::optional<double> progress = std::nullopt;
+  /// Expiration time for this request. After this passes, the server
+  /// might not accept the poll descriptor anymore (and the query may
+  /// be cancelled). This may be updated on a call to PollFlightInfo.
+  std::optional<Timestamp> expiration_time = std::nullopt;
 
-  /// \brief Initialize a location by parsing a URI string
-  static arrow::Result<Location> Parse(const std::string& uri_string);
+  PollInfo()
+      : info(NULLPTR),
+        descriptor(std::nullopt),
+        progress(std::nullopt),
+        expiration_time(std::nullopt) {}
 
-  /// \brief Get the fallback URI.
-  ///
+  PollInfo(std::unique_ptr<FlightInfo> info, std::optional<FlightDescriptor> descriptor,
+           std::optional<double> progress, std::optional<Timestamp> expiration_time)
+      : info(std::move(info)),
+        descriptor(std::move(descriptor)),
+        progress(progress),
+        expiration_time(expiration_time) {}
+
+  PollInfo(const PollInfo& other)
+      : info(other.info ? std::make_unique<FlightInfo>(*other.info) : NULLPTR),
+        descriptor(other.descriptor),
+        progress(other.progress),
+        expiration_time(other.expiration_time) {}
+  PollInfo(PollInfo&& other) noexcept = default;
+  ~PollInfo() = default;
+  PollInfo& operator=(const PollInfo& other) {
+    info = other.info ? std::make_unique<FlightInfo>(*other.info) : NULLPTR;
+    descriptor = other.descriptor;
+    progress = other.progress;
+    expiration_time = other.expiration_time;
+    return *this;
+  }
+  PollInfo& operator=(PollInfo&& other) = default;
+
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
+
+  /// \brief Get the wire-format representation of this type.
+  ///
+  /// Useful when interoperating with non-Flight systems (e.g. REST
+  /// services) that may want to return Flight types.
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
+
+  /// \brief Parse the wire-format representation of this type.
+  ///
+  /// Useful when interoperating with non-Flight systems (e.g. REST
+  /// services) that may want to return Flight types.
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized,
+                                   std::unique_ptr<PollInfo>* out);
+
+  std::string ToString() const;
+
+  /// Compare two PollInfo for equality. This will compare the
+  /// serialized schema representations, NOT the logical equality of
+  /// the schemas.
+  bool Equals(const PollInfo& other) const;
+};
+
+/// \brief The request of the CancelFlightInfoRequest action.
+struct ARROW_FLIGHT_EXPORT CancelFlightInfoRequest
+    : public internal::BaseType<CancelFlightInfoRequest> {
+  std::unique_ptr<FlightInfo> info;
+
+  CancelFlightInfoRequest() = default;
+  CancelFlightInfoRequest(std::unique_ptr<FlightInfo> info)  // NOLINT runtime/explicit
+      : info(std::move(info)) {}
+
+  std::string ToString() const;
+  bool Equals(const CancelFlightInfoRequest& other) const;
+
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
+
+  /// \brief Serialize this message to its wire-format representation.
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
+
+  /// \brief Deserialize this message from its wire-format representation.
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized,
+                                   CancelFlightInfoRequest* out);
+};
+
+enum class CancelStatus {
+  /// The cancellation status is unknown. Servers should avoid using
+  /// this value (send a kNotCancellable if the requested FlightInfo
+  /// is not known). Clients can retry the request.
+  kUnspecified = 0,
+  /// The cancellation request is complete. Subsequent requests with
+  /// the same payload may return kCancelled or a kNotCancellable error.
+  kCancelled = 1,
+  /// The cancellation request is in progress. The client may retry
+  /// the cancellation request.
+  kCancelling = 2,
+  // The FlightInfo is not cancellable. The client should not retry the
+  // cancellation request.
+  kNotCancellable = 3,
+};
+
+/// \brief The result of the CancelFlightInfo action.
+struct ARROW_FLIGHT_EXPORT CancelFlightInfoResult
+    : public internal::BaseType<CancelFlightInfoResult> {
+  CancelStatus status = CancelStatus::kUnspecified;
+
+  CancelFlightInfoResult() = default;
+  CancelFlightInfoResult(CancelStatus status)  // NOLINT runtime/explicit
+      : status(status) {}
+
+  std::string ToString() const;
+  bool Equals(const CancelFlightInfoResult& other) const;
+
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
+
+  /// \brief Serialize this message to its wire-format representation.
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
+
+  /// \brief Deserialize this message from its wire-format representation.
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized,
+                                   CancelFlightInfoResult* out);
+};
+
+ARROW_FLIGHT_EXPORT
+std::ostream& operator<<(std::ostream& os, CancelStatus status);
+
+/// \brief Data structure providing an opaque identifier or credential to use
+/// when requesting a data stream with the DoGet RPC
+struct ARROW_FLIGHT_EXPORT Ticket : public internal::BaseType<Ticket> {
+  std::string ticket;
+
+  Ticket() = default;
+  Ticket(std::string ticket)  // NOLINT runtime/explicit
+      : ticket(std::move(ticket)) {}
+
+  std::string ToString() const;
+  bool Equals(const Ticket& other) const;
+
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
+
+  /// \brief Get the wire-format representation of this type.
+  ///
+  /// Useful when interoperating with non-Flight systems (e.g. REST
+  /// services) that may want to return Flight types.
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
+
+  /// \brief Parse the wire-format representation of this type.
+  ///
+  /// Useful when interoperating with non-Flight systems (e.g. REST
+  /// services) that may want to return Flight types.
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, Ticket* out);
+};
+
+/// \brief A host location (a URI)
+struct ARROW_FLIGHT_EXPORT Location : public internal::BaseType<Location> {
+ public:
+  /// \brief Initialize a blank location.
+  Location();
+
+  ~Location();
+
+  /// \brief Initialize a location by parsing a URI string
+  static arrow::Result<Location> Parse(const std::string& uri_string);
+
+  /// \brief Get the fallback URI.
+  ///
   /// arrow-flight-reuse-connection://? means that a client may attempt to
   /// reuse an existing connection to a Flight service to fetch data instead
   /// of creating a new connection to one of the other locations listed in a
@@ -456,20 +777,25 @@ struct ARROW_FLIGHT_EXPORT Location {
   static arrow::Result<Location> ForScheme(const std::string& scheme,
                                            const std::string& host, const int port);
 
-  /// \brief Get a representation of this URI as a string.
-  std::string ToString() const;
-
   /// \brief Get the scheme of this URI.
   std::string scheme() const;
 
+  /// \brief Get a representation of this URI as a string.
+  std::string ToString() const;
   bool Equals(const Location& other) const;
 
-  friend bool operator==(const Location& left, const Location& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const Location& left, const Location& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
+
+  /// \brief Serialize this message to its wire-format representation.
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
+
+  /// \brief Deserialize this message from its wire-format representation.
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, Location* out);
 
  private:
   friend class FlightClient;
@@ -479,7 +805,7 @@ struct ARROW_FLIGHT_EXPORT Location {
 
 /// \brief A flight ticket and list of locations where the ticket can be
 /// redeemed
-struct ARROW_FLIGHT_EXPORT FlightEndpoint {
+struct ARROW_FLIGHT_EXPORT FlightEndpoint : public internal::BaseType<FlightEndpoint> {
   /// Opaque ticket identify; use with DoGet RPC
   Ticket ticket;
 
@@ -496,47 +822,60 @@ struct ARROW_FLIGHT_EXPORT FlightEndpoint {
   /// Opaque Application-defined metadata
   std::string app_metadata;
 
+  FlightEndpoint() = default;
+  FlightEndpoint(Ticket ticket, std::vector<Location> locations,
+                 std::optional<Timestamp> expiration_time, std::string app_metadata)
+      : ticket(std::move(ticket)),
+        locations(std::move(locations)),
+        expiration_time(expiration_time),
+        app_metadata(std::move(app_metadata)) {}
+
   std::string ToString() const;
   bool Equals(const FlightEndpoint& other) const;
 
-  friend bool operator==(const FlightEndpoint& left, const FlightEndpoint& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const FlightEndpoint& left, const FlightEndpoint& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<FlightEndpoint> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, FlightEndpoint* out);
 };
 
 /// \brief The request of the RenewFlightEndpoint action.
-struct ARROW_FLIGHT_EXPORT RenewFlightEndpointRequest {
+struct ARROW_FLIGHT_EXPORT RenewFlightEndpointRequest
+    : public internal::BaseType<RenewFlightEndpointRequest> {
   FlightEndpoint endpoint;
 
+  RenewFlightEndpointRequest() = default;
+  explicit RenewFlightEndpointRequest(FlightEndpoint endpoint)
+      : endpoint(std::move(endpoint)) {}
+
   std::string ToString() const;
   bool Equals(const RenewFlightEndpointRequest& other) const;
 
-  friend bool operator==(const RenewFlightEndpointRequest& left,
-                         const RenewFlightEndpointRequest& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const RenewFlightEndpointRequest& left,
-                         const RenewFlightEndpointRequest& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<RenewFlightEndpointRequest> Deserialize(
-      std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized,
+                                   RenewFlightEndpointRequest* out);
 };
 
+// FlightData in Flight.proto maps to FlightPayload here.
+
 /// \brief Staging data structure for messages about to be put on the wire
 ///
 /// This structure corresponds to FlightData in the protocol.
@@ -545,241 +884,57 @@ struct ARROW_FLIGHT_EXPORT FlightPayload {
   std::shared_ptr<Buffer> app_metadata;
   ipc::IpcPayload ipc_message;
 
+  FlightPayload() = default;
+  FlightPayload(std::shared_ptr<Buffer> descriptor, std::shared_ptr<Buffer> app_metadata,
+                ipc::IpcPayload ipc_message)
+      : descriptor(std::move(descriptor)),
+        app_metadata(std::move(app_metadata)),
+        ipc_message(std::move(ipc_message)) {}
+
   /// \brief Check that the payload can be written to the wire.
   Status Validate() const;
 };
 
-/// \brief Schema result returned after a schema request RPC
-struct ARROW_FLIGHT_EXPORT SchemaResult {
- public:
-  SchemaResult() = default;
-  explicit SchemaResult(std::string schema) : raw_schema_(std::move(schema)) {}
-
-  /// \brief Factory method to construct a SchemaResult.
-  static arrow::Result<std::unique_ptr<SchemaResult>> Make(const Schema& schema);
-
-  /// \brief return schema
-  /// \param[in,out] dictionary_memo for dictionary bookkeeping, will
-  /// be modified
-  /// \return Arrow result with the reconstructed Schema
-  arrow::Result<std::shared_ptr<Schema>> GetSchema(
-      ipc::DictionaryMemo* dictionary_memo) const;
-
-  const std::string& serialized_schema() const { return raw_schema_; }
-
-  std::string ToString() const;
-  bool Equals(const SchemaResult& other) const;
-
-  friend bool operator==(const SchemaResult& left, const SchemaResult& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const SchemaResult& left, const SchemaResult& right) {
-    return !(left == right);
-  }
-
-  /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
-
-  /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<SchemaResult> Deserialize(std::string_view serialized);
-
- private:
-  std::string raw_schema_;
-};
-
-/// \brief The access coordinates for retrieval of a dataset, returned by
-/// GetFlightInfo
-class ARROW_FLIGHT_EXPORT FlightInfo {
- public:
-  struct Data {
-    std::string schema;
-    FlightDescriptor descriptor;
-    std::vector<FlightEndpoint> endpoints;
-    int64_t total_records = -1;
-    int64_t total_bytes = -1;
-    bool ordered = false;
-    std::string app_metadata;
-  };
-
-  explicit FlightInfo(Data data) : data_(std::move(data)), reconstructed_schema_(false) {}
-
-  /// \brief Factory method to construct a FlightInfo.
-  static arrow::Result<FlightInfo> Make(const Schema& schema,
-                                        const FlightDescriptor& descriptor,
-                                        const std::vector<FlightEndpoint>& endpoints,
-                                        int64_t total_records, int64_t total_bytes,
-                                        bool ordered = false,
-                                        std::string app_metadata = "");
-
-  /// \brief Deserialize the Arrow schema of the dataset. Populate any
-  ///   dictionary encoded fields into a DictionaryMemo for
-  ///   bookkeeping
-  /// \param[in,out] dictionary_memo for dictionary bookkeeping, will
-  /// be modified
-  /// \return Arrow result with the reconstructed Schema
-  arrow::Result<std::shared_ptr<Schema>> GetSchema(
-      ipc::DictionaryMemo* dictionary_memo) const;
-
-  const std::string& serialized_schema() const { return data_.schema; }
-
-  /// The descriptor associated with this flight, may not be set
-  const FlightDescriptor& descriptor() const { return data_.descriptor; }
-
-  /// A list of endpoints associated with the flight (dataset). To consume the
-  /// whole flight, all endpoints must be consumed
-  const std::vector<FlightEndpoint>& endpoints() const { return data_.endpoints; }
-
-  /// The total number of records (rows) in the dataset. If unknown, set to -1
-  int64_t total_records() const { return data_.total_records; }
-
-  /// The total number of bytes in the dataset. If unknown, set to -1
-  int64_t total_bytes() const { return data_.total_bytes; }
-
-  /// Whether endpoints are in the same order as the data.
-  bool ordered() const { return data_.ordered; }
-
-  /// Application-defined opaque metadata
-  const std::string& app_metadata() const { return data_.app_metadata; }
-
-  /// \brief Get the wire-format representation of this type.
-  ///
-  /// Useful when interoperating with non-Flight systems (e.g. REST
-  /// services) that may want to return Flight types.
-  arrow::Result<std::string> SerializeToString() const;
-
-  /// \brief Parse the wire-format representation of this type.
-  ///
-  /// Useful when interoperating with non-Flight systems (e.g. REST
-  /// services) that may want to return Flight types.
-  static arrow::Result<std::unique_ptr<FlightInfo>> Deserialize(
-      std::string_view serialized);
-
-  std::string ToString() const;
-
-  /// Compare two FlightInfo for equality. This will compare the
-  /// serialized schema representations, NOT the logical equality of
-  /// the schemas.
-  bool Equals(const FlightInfo& other) const;
-
-  friend bool operator==(const FlightInfo& left, const FlightInfo& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const FlightInfo& left, const FlightInfo& right) {
-    return !(left == right);
-  }
-
- private:
-  Data data_;
-  mutable std::shared_ptr<Schema> schema_;
-  mutable bool reconstructed_schema_;
-};
-
-/// \brief The information to process a long-running query.
-class ARROW_FLIGHT_EXPORT PollInfo {
- public:
-  /// The currently available results so far.
-  std::unique_ptr<FlightInfo> info = NULLPTR;
-  /// The descriptor the client should use on the next try. If unset,
-  /// the query is complete.
-  std::optional<FlightDescriptor> descriptor = std::nullopt;
-  /// Query progress. Must be in [0.0, 1.0] but need not be
-  /// monotonic or nondecreasing. If unknown, do not set.
-  std::optional<double> progress = std::nullopt;
-  /// Expiration time for this request. After this passes, the server
-  /// might not accept the poll descriptor anymore (and the query may
-  /// be cancelled). This may be updated on a call to PollFlightInfo.
-  std::optional<Timestamp> expiration_time = std::nullopt;
-
-  PollInfo()
-      : info(NULLPTR),
-        descriptor(std::nullopt),
-        progress(std::nullopt),
-        expiration_time(std::nullopt) {}
-
-  explicit PollInfo(std::unique_ptr<FlightInfo> info,
-                    std::optional<FlightDescriptor> descriptor,
-                    std::optional<double> progress,
-                    std::optional<Timestamp> expiration_time)
-      : info(std::move(info)),
-        descriptor(std::move(descriptor)),
-        progress(progress),
-        expiration_time(expiration_time) {}
-
-  // Must not be explicit; to declare one we must declare all ("rule of five")
-  PollInfo(const PollInfo& other)  // NOLINT(runtime/explicit)
-      : info(other.info ? std::make_unique<FlightInfo>(*other.info) : NULLPTR),
-        descriptor(other.descriptor),
-        progress(other.progress),
-        expiration_time(other.expiration_time) {}
-  PollInfo(PollInfo&& other) noexcept = default;  // NOLINT(runtime/explicit)
-  ~PollInfo() = default;
-  PollInfo& operator=(const PollInfo& other) {
-    info = other.info ? std::make_unique<FlightInfo>(*other.info) : NULLPTR;
-    descriptor = other.descriptor;
-    progress = other.progress;
-    expiration_time = other.expiration_time;
-    return *this;
-  }
-  PollInfo& operator=(PollInfo&& other) = default;
+// A wrapper around arrow.flight.protocol.PutResult is not defined
 
-  /// \brief Get the wire-format representation of this type.
-  ///
-  /// Useful when interoperating with non-Flight systems (e.g. REST
-  /// services) that may want to return Flight types.
-  arrow::Result<std::string> SerializeToString() const;
-
-  /// \brief Parse the wire-format representation of this type.
-  ///
-  /// Useful when interoperating with non-Flight systems (e.g. REST
-  /// services) that may want to return Flight types.
-  static arrow::Result<std::unique_ptr<PollInfo>> Deserialize(
-      std::string_view serialized);
-
-  std::string ToString() const;
+// Session management messages
 
-  /// Compare two PollInfo for equality. This will compare the
-  /// serialized schema representations, NOT the logical equality of
-  /// the schemas.
-  bool Equals(const PollInfo& other) const;
+/// \brief Variant supporting all possible value types for {Set,Get}SessionOptions
+///
+/// By convention, an attempt to set a valueless (std::monostate) SessionOptionValue
+/// should attempt to unset or clear the named option value on the server.
+using SessionOptionValue = std::variant<std::monostate, std::string, bool, int64_t,
+                                        double, std::vector<std::string>>;
+std::ostream& operator<<(std::ostream& os, const SessionOptionValue& v);
 
-  friend bool operator==(const PollInfo& left, const PollInfo& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const PollInfo& left, const PollInfo& right) {
-    return !(left == right);
-  }
-};
+/// \brief A request to set a set of session options by name/value.
+struct ARROW_FLIGHT_EXPORT SetSessionOptionsRequest
+    : public internal::BaseType<SetSessionOptionsRequest> {
+  std::map<std::string, SessionOptionValue> session_options;
 
-/// \brief The request of the CancelFlightInfoRequest action.
-struct ARROW_FLIGHT_EXPORT CancelFlightInfoRequest {
-  std::unique_ptr<FlightInfo> info;
+  SetSessionOptionsRequest() = default;
+  explicit SetSessionOptionsRequest(
+      std::map<std::string, SessionOptionValue> session_options)
+      : session_options(std::move(session_options)) {}
 
   std::string ToString() const;
-  bool Equals(const CancelFlightInfoRequest& other) const;
+  bool Equals(const SetSessionOptionsRequest& other) const;
 
-  friend bool operator==(const CancelFlightInfoRequest& left,
-                         const CancelFlightInfoRequest& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const CancelFlightInfoRequest& left,
-                         const CancelFlightInfoRequest& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<CancelFlightInfoRequest> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized,
+                                   SetSessionOptionsRequest* out);
 };
 
-/// \brief Variant supporting all possible value types for {Set,Get}SessionOptions
-///
-/// By convention, an attempt to set a valueless (std::monostate) SessionOptionValue
-/// should attempt to unset or clear the named option value on the server.
-using SessionOptionValue = std::variant<std::monostate, std::string, bool, int64_t,
-                                        double, std::vector<std::string>>;
-
 /// \brief The result of setting a session option.
 enum class SetSessionOptionErrorValue : int8_t {
   /// \brief The status of setting the option is unknown.
@@ -797,54 +952,9 @@ enum class SetSessionOptionErrorValue : int8_t {
 std::string ToString(const SetSessionOptionErrorValue& error_value);
 std::ostream& operator<<(std::ostream& os, const SetSessionOptionErrorValue& error_value);
 
-/// \brief The result of closing a session.
-enum class CloseSessionStatus : int8_t {
-  // \brief The session close status is unknown.
-  //
-  // Servers should avoid using this value (send a NOT_FOUND error if the requested
-  // session is not known). Clients can retry the request.
-  kUnspecified,
-  // \brief The session close request is complete.
-  //
-  // Subsequent requests with the same session produce a NOT_FOUND error.
-  kClosed,
-  // \brief The session close request is in progress.
-  //
-  // The client may retry the request.
-  kClosing,
-  // \brief The session is not closeable.
-  //
-  // The client should not retry the request.
-  kNotClosable
-};
-std::string ToString(const CloseSessionStatus& status);
-std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status);
-
-/// \brief A request to set a set of session options by name/value.
-struct ARROW_FLIGHT_EXPORT SetSessionOptionsRequest {
-  std::map<std::string, SessionOptionValue> session_options;
-
-  std::string ToString() const;
-  bool Equals(const SetSessionOptionsRequest& other) const;
-
-  friend bool operator==(const SetSessionOptionsRequest& left,
-                         const SetSessionOptionsRequest& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const SetSessionOptionsRequest& left,
-                         const SetSessionOptionsRequest& right) {
-    return !(left == right);
-  }
-
-  /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
-
-  /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<SetSessionOptionsRequest> Deserialize(std::string_view serialized);
-};
-
 /// \brief The result(s) of setting session option(s).
-struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult {
+struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult
+    : public internal::BaseType<SetSessionOptionsResult> {
   struct Error {
     SetSessionOptionErrorValue value;
 
@@ -859,113 +969,152 @@ struct ARROW_FLIGHT_EXPORT SetSessionOptionsResult {
 
   std::map<std::string, Error> errors;
 
+  SetSessionOptionsResult() = default;
+  SetSessionOptionsResult(std::map<std::string, Error> errors)  // NOLINT runtime/explicit
+      : errors(std::move(errors)) {}
+
   std::string ToString() const;
   bool Equals(const SetSessionOptionsResult& other) const;
 
-  friend bool operator==(const SetSessionOptionsResult& left,
-                         const SetSessionOptionsResult& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const SetSessionOptionsResult& left,
-                         const SetSessionOptionsResult& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<SetSessionOptionsResult> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized,
+                                   SetSessionOptionsResult* out);
 };
 
 /// \brief A request to get current session options.
-struct ARROW_FLIGHT_EXPORT GetSessionOptionsRequest {
+struct ARROW_FLIGHT_EXPORT GetSessionOptionsRequest
+    : public internal::BaseType<GetSessionOptionsRequest> {
+  GetSessionOptionsRequest() = default;
+
   std::string ToString() const;
   bool Equals(const GetSessionOptionsRequest& other) const;
 
-  friend bool operator==(const GetSessionOptionsRequest& left,
-                         const GetSessionOptionsRequest& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const GetSessionOptionsRequest& left,
-                         const GetSessionOptionsRequest& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<GetSessionOptionsRequest> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized,
+                                   GetSessionOptionsRequest* out);
 };
 
 /// \brief The current session options.
-struct ARROW_FLIGHT_EXPORT GetSessionOptionsResult {
+struct ARROW_FLIGHT_EXPORT GetSessionOptionsResult
+    : public internal::BaseType<GetSessionOptionsResult> {
   std::map<std::string, SessionOptionValue> session_options;
 
+  GetSessionOptionsResult() = default;
+  GetSessionOptionsResult(  // NOLINT runtime/explicit
+      std::map<std::string, SessionOptionValue> session_options)
+      : session_options(std::move(session_options)) {}
+
   std::string ToString() const;
   bool Equals(const GetSessionOptionsResult& other) const;
 
-  friend bool operator==(const GetSessionOptionsResult& left,
-                         const GetSessionOptionsResult& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const GetSessionOptionsResult& left,
-                         const GetSessionOptionsResult& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<GetSessionOptionsResult> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized,
+                                   GetSessionOptionsResult* out);
 };
 
 /// \brief A request to close the open client session.
-struct ARROW_FLIGHT_EXPORT CloseSessionRequest {
+struct ARROW_FLIGHT_EXPORT CloseSessionRequest
+    : public internal::BaseType<CloseSessionRequest> {
+  CloseSessionRequest() = default;
+
   std::string ToString() const;
   bool Equals(const CloseSessionRequest& other) const;
 
-  friend bool operator==(const CloseSessionRequest& left,
-                         const CloseSessionRequest& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const CloseSessionRequest& left,
-                         const CloseSessionRequest& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<CloseSessionRequest> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, CloseSessionRequest* out);
+};
+
+/// \brief The result of closing a session.
+enum class CloseSessionStatus : int8_t {
+  // \brief The session close status is unknown.
+  //
+  // Servers should avoid using this value (send a NOT_FOUND error if the requested
+  // session is not known). Clients can retry the request.
+  kUnspecified,
+  // \brief The session close request is complete.
+  //
+  // Subsequent requests with the same session produce a NOT_FOUND error.
+  kClosed,
+  // \brief The session close request is in progress.
+  //
+  // The client may retry the request.
+  kClosing,
+  // \brief The session is not closeable.
+  //
+  // The client should not retry the request.
+  kNotClosable
 };
+std::string ToString(const CloseSessionStatus& status);
+std::ostream& operator<<(std::ostream& os, const CloseSessionStatus& status);
 
 /// \brief The result of attempting to close the client session.
-struct ARROW_FLIGHT_EXPORT CloseSessionResult {
+struct ARROW_FLIGHT_EXPORT CloseSessionResult
+    : public internal::BaseType<CloseSessionResult> {
   CloseSessionStatus status;
 
+  CloseSessionResult() = default;
+  CloseSessionResult(CloseSessionStatus status)  // NOLINT runtime/explicit
+      : status(status) {}
+
   std::string ToString() const;
   bool Equals(const CloseSessionResult& other) const;
 
-  friend bool operator==(const CloseSessionResult& left,
-                         const CloseSessionResult& right) {
-    return left.Equals(right);
-  }
-  friend bool operator!=(const CloseSessionResult& left,
-                         const CloseSessionResult& right) {
-    return !(left == right);
-  }
+  using SuperT::Deserialize;
+  using SuperT::SerializeToString;
 
   /// \brief Serialize this message to its wire-format representation.
-  arrow::Result<std::string> SerializeToString() const;
+  ///
+  /// Use `SerializeToString()` if you want a Result-returning version.
+  arrow::Status SerializeToString(std::string* out) const;
 
   /// \brief Deserialize this message from its wire-format representation.
-  static arrow::Result<CloseSessionResult> Deserialize(std::string_view serialized);
+  ///
+  /// Use `Deserialize(serialized)` if you want a Result-returning version.
+  static arrow::Status Deserialize(std::string_view serialized, CloseSessionResult* out);
 };
 
+//------------------------------------------------------------
+
 /// \brief An iterator to FlightInfo instances returned by ListFlights.
 class ARROW_FLIGHT_EXPORT FlightListing {
  public:
@@ -995,6 +1144,9 @@ class ARROW_FLIGHT_EXPORT ResultStream {
 /// \brief A holder for a RecordBatch with associated Flight metadata.
 struct ARROW_FLIGHT_EXPORT FlightStreamChunk {
  public:
+  FlightStreamChunk() noexcept;
+  ~FlightStreamChunk();
+
   std::shared_ptr<RecordBatch> data;
   std::shared_ptr<Buffer> app_metadata;
 };
diff --git a/cpp/src/arrow/flight/visibility.h b/cpp/src/arrow/flight/visibility.h
index bdee8b751d8a3..06f864ba8cffc 100644
--- a/cpp/src/arrow/flight/visibility.h
+++ b/cpp/src/arrow/flight/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_FLIGHT_STATIC
-#define ARROW_FLIGHT_EXPORT
-#elif defined(ARROW_FLIGHT_EXPORTING)
-#define ARROW_FLIGHT_EXPORT __declspec(dllexport)
-#else
-#define ARROW_FLIGHT_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_FLIGHT_STATIC
+#    define ARROW_FLIGHT_EXPORT
+#  elif defined(ARROW_FLIGHT_EXPORTING)
+#    define ARROW_FLIGHT_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_FLIGHT_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_FLIGHT_NO_EXPORT
+#  define ARROW_FLIGHT_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_FLIGHT_EXPORT
-#define ARROW_FLIGHT_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_FLIGHT_NO_EXPORT
-#define ARROW_FLIGHT_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_FLIGHT_EXPORT
+#    define ARROW_FLIGHT_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_FLIGHT_NO_EXPORT
+#    define ARROW_FLIGHT_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/gpu/CMakeLists.txt b/cpp/src/arrow/gpu/CMakeLists.txt
index 48216034375bd..7f8650c7dc824 100644
--- a/cpp/src/arrow/gpu/CMakeLists.txt
+++ b/cpp/src/arrow/gpu/CMakeLists.txt
@@ -47,11 +47,14 @@ endif()
 
 set(ARROW_CUDA_SRCS cuda_arrow_ipc.cc cuda_context.cc cuda_internal.cc cuda_memory.cc)
 
+set(ARROW_CUDA_PKG_CONFIG_NAME_ARGS)
+if(NOT WINDOWS)
+  list(APPEND ARROW_CUDA_PKG_CONFIG_NAME_ARGS PKG_CONFIG_NAME arrow-cuda)
+endif()
 add_arrow_lib(arrow_cuda
               CMAKE_PACKAGE_NAME
               ArrowCUDA
-              PKG_CONFIG_NAME
-              arrow-cuda
+              ${ARROW_CUDA_PKG_CONFIG_NAME_ARGS}
               SOURCES
               ${ARROW_CUDA_SRCS}
               OUTPUTS
diff --git a/cpp/src/arrow/integration/json_integration_test.cc b/cpp/src/arrow/integration/json_integration_test.cc
index 9b56928c68843..0e84ea6124d5d 100644
--- a/cpp/src/arrow/integration/json_integration_test.cc
+++ b/cpp/src/arrow/integration/json_integration_test.cc
@@ -1046,7 +1046,7 @@ TEST(TestJsonFileReadWrite, JsonExample2) {
 
     auto storage_array =
         ArrayFromJSON(fixed_size_binary(16), R"(["0123456789abcdef", null])");
-    AssertArraysEqual(*batch->column(0), UuidArray(uuid_type, storage_array));
+    AssertArraysEqual(*batch->column(0), ExampleUuidArray(uuid_type, storage_array));
 
     AssertArraysEqual(*batch->column(1), NullArray(2));
   }
diff --git a/cpp/src/arrow/io/buffered_test.cc b/cpp/src/arrow/io/buffered_test.cc
index cbf2c2cf06938..89fe4b159f341 100644
--- a/cpp/src/arrow/io/buffered_test.cc
+++ b/cpp/src/arrow/io/buffered_test.cc
@@ -16,8 +16,8 @@
 // under the License.
 
 #ifndef _WIN32
-#include <fcntl.h>  // IWYU pragma: keep
-#include <unistd.h>
+#  include <fcntl.h>  // IWYU pragma: keep
+#  include <unistd.h>
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/io/compressed_test.cc b/cpp/src/arrow/io/compressed_test.cc
index 12d116e3395d4..7724c65e9dd66 100644
--- a/cpp/src/arrow/io/compressed_test.cc
+++ b/cpp/src/arrow/io/compressed_test.cc
@@ -262,7 +262,7 @@ TEST_P(CompressedOutputStreamTest, RandomData) {
 TEST(TestSnappyInputStream, NotImplemented) {
   std::unique_ptr<Codec> codec;
   ASSERT_OK_AND_ASSIGN(codec, Codec::Create(Compression::SNAPPY));
-  std::shared_ptr<InputStream> stream = std::make_shared<BufferReader>("");
+  std::shared_ptr<InputStream> stream = BufferReader::FromString("");
   ASSERT_RAISES(NotImplemented, CompressedInputStream::Make(codec.get(), stream));
 }
 
diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc
index a22accf65660a..9fda5b7fdc16e 100644
--- a/cpp/src/arrow/io/file.cc
+++ b/cpp/src/arrow/io/file.cc
@@ -19,16 +19,16 @@
 
 // sys/mman.h not present in Visual Studio or Cygwin
 #ifdef _WIN32
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
-#include "arrow/io/mman.h"
-#undef Realloc
-#undef Free
+#  ifndef NOMINMAX
+#    define NOMINMAX
+#  endif
+#  include "arrow/io/mman.h"
+#  undef Realloc
+#  undef Free
 #else
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h>  // IWYU pragma: keep
+#  include <fcntl.h>
+#  include <sys/mman.h>
+#  include <unistd.h>  // IWYU pragma: keep
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/io/file_benchmark.cc b/cpp/src/arrow/io/file_benchmark.cc
index 02ccfb6337f4b..bcaa1a9df014b 100644
--- a/cpp/src/arrow/io/file_benchmark.cc
+++ b/cpp/src/arrow/io/file_benchmark.cc
@@ -33,13 +33,13 @@
 
 #ifdef _WIN32
 
-#include <io.h>
+#  include <io.h>
 
 #else
 
-#include <fcntl.h>
-#include <poll.h>
-#include <unistd.h>
+#  include <fcntl.h>
+#  include <poll.h>
+#  include <unistd.h>
 
 #endif
 
diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc
index af414891b950e..44a63e9fdfa81 100644
--- a/cpp/src/arrow/io/file_test.cc
+++ b/cpp/src/arrow/io/file_test.cc
@@ -16,8 +16,8 @@
 // under the License.
 
 #ifndef _WIN32
-#include <fcntl.h>  // IWYU pragma: keep
-#include <unistd.h>
+#  include <fcntl.h>  // IWYU pragma: keep
+#  include <unistd.h>
 #endif
 
 #include <atomic>
diff --git a/cpp/src/arrow/io/hdfs_internal.cc b/cpp/src/arrow/io/hdfs_internal.cc
index 5619dd2435acc..0f46a6faff924 100644
--- a/cpp/src/arrow/io/hdfs_internal.cc
+++ b/cpp/src/arrow/io/hdfs_internal.cc
@@ -40,7 +40,7 @@
 #include "arrow/util/basic_decimal.h"
 
 #ifndef _WIN32
-#include <dlfcn.h>
+#  include <dlfcn.h>
 #endif
 
 #include "arrow/result.h"
@@ -162,13 +162,13 @@ Result<std::vector<PlatformFilename>> get_potential_libjvm_paths() {
 // SFrame uses /usr/libexec/java_home to find JAVA_HOME; for now we are
 // expecting users to set an environment variable
 #else
-#if defined(__aarch64__)
+#  if defined(__aarch64__)
   const std::string prefix_arch{"arm64"};
   const std::string suffix_arch{"aarch64"};
-#else
+#  else
   const std::string prefix_arch{"amd64"};
   const std::string suffix_arch{"amd64"};
-#endif
+#  endif
   ARROW_ASSIGN_OR_RAISE(
       search_prefixes,
       MakeFilenameVector({
diff --git a/cpp/src/arrow/io/memory_benchmark.cc b/cpp/src/arrow/io/memory_benchmark.cc
index e16bbaf03ec47..fda5e17e073bd 100644
--- a/cpp/src/arrow/io/memory_benchmark.cc
+++ b/cpp/src/arrow/io/memory_benchmark.cc
@@ -39,50 +39,50 @@ constexpr size_t kMemoryPerCore = 32 * 1024 * 1024;
 using BufferPtr = std::shared_ptr<Buffer>;
 
 #ifdef ARROW_WITH_BENCHMARKS_REFERENCE
-#ifndef _MSC_VER
+#  ifndef _MSC_VER
 
-#ifdef ARROW_HAVE_SSE4_2
+#    ifdef ARROW_HAVE_SSE4_2
 
-#ifdef ARROW_HAVE_AVX512
+#      ifdef ARROW_HAVE_AVX512
 
 using VectorType = __m512i;
-#define VectorSet _mm512_set1_epi32
-#define VectorLoad _mm512_stream_load_si512
-#define VectorLoadAsm(SRC, DST) \
-  asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamLoad _mm512_stream_load_si512
-#define VectorStreamLoadAsm(SRC, DST) \
-  asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamWrite _mm512_stream_si512
+#        define VectorSet _mm512_set1_epi32
+#        define VectorLoad _mm512_stream_load_si512
+#        define VectorLoadAsm(SRC, DST) \
+          asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#        define VectorStreamLoad _mm512_stream_load_si512
+#        define VectorStreamLoadAsm(SRC, DST) \
+          asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#        define VectorStreamWrite _mm512_stream_si512
 
-#else
+#      else
 
-#ifdef ARROW_HAVE_AVX2
+#        ifdef ARROW_HAVE_AVX2
 
 using VectorType = __m256i;
-#define VectorSet _mm256_set1_epi32
-#define VectorLoad _mm256_stream_load_si256
-#define VectorLoadAsm(SRC, DST) \
-  asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamLoad _mm256_stream_load_si256
-#define VectorStreamLoadAsm(SRC, DST) \
-  asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
-#define VectorStreamWrite _mm256_stream_si256
+#          define VectorSet _mm256_set1_epi32
+#          define VectorLoad _mm256_stream_load_si256
+#          define VectorLoadAsm(SRC, DST) \
+            asm volatile("vmovaps %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamLoad _mm256_stream_load_si256
+#          define VectorStreamLoadAsm(SRC, DST) \
+            asm volatile("vmovntdqa %[src], %[dst]" : [dst] "=v"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamWrite _mm256_stream_si256
 
-#else  // ARROW_HAVE_AVX2 not set
+#        else  // ARROW_HAVE_AVX2 not set
 
 using VectorType = __m128i;
-#define VectorSet _mm_set1_epi32
-#define VectorLoad _mm_stream_load_si128
-#define VectorLoadAsm(SRC, DST) \
-  asm volatile("movaps %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
-#define VectorStreamLoad _mm_stream_load_si128
-#define VectorStreamLoadAsm(SRC, DST) \
-  asm volatile("movntdqa %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
-#define VectorStreamWrite _mm_stream_si128
-
-#endif  // ARROW_HAVE_AVX2
-#endif  // ARROW_HAVE_AVX512
+#          define VectorSet _mm_set1_epi32
+#          define VectorLoad _mm_stream_load_si128
+#          define VectorLoadAsm(SRC, DST) \
+            asm volatile("movaps %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamLoad _mm_stream_load_si128
+#          define VectorStreamLoadAsm(SRC, DST) \
+            asm volatile("movntdqa %[src], %[dst]" : [dst] "=x"(DST) : [src] "m"(SRC) :)
+#          define VectorStreamWrite _mm_stream_si128
+
+#        endif  // ARROW_HAVE_AVX2
+#      endif    // ARROW_HAVE_AVX512
 
 static void Read(void* src, void* dst, size_t size) {
   const auto simd = static_cast<VectorType*>(src);
@@ -154,15 +154,15 @@ static void StreamReadWrite(void* src, void* dst, size_t size) {
   }
 }
 
-#endif  // ARROW_HAVE_SSE4_2
+#    endif  // ARROW_HAVE_SSE4_2
 
-#ifdef ARROW_HAVE_NEON
+#    ifdef ARROW_HAVE_NEON
 
 using VectorType = uint8x16_t;
 using VectorTypeDual = uint8x16x2_t;
 
-#define VectorSet vdupq_n_u8
-#define VectorLoadAsm vld1q_u8
+#      define VectorSet vdupq_n_u8
+#      define VectorLoadAsm vld1q_u8
 
 static void armv8_stream_load_pair(VectorType* src, VectorType* dst) {
   asm volatile("LDNP %[reg1], %[reg2], [%[from]]\n\t"
@@ -239,7 +239,7 @@ static void StreamReadWrite(void* src, void* dst, size_t size) {
   }
 }
 
-#endif  // ARROW_HAVE_NEON
+#    endif  // ARROW_HAVE_NEON
 
 static void PlatformMemcpy(void* src, void* dst, size_t size) { memcpy(src, dst, size); }
 
@@ -261,7 +261,7 @@ static void MemoryBandwidth(benchmark::State& state) {  // NOLINT non-const refe
   state.SetBytesProcessed(state.iterations() * buffer_size);
 }
 
-#ifdef ARROW_HAVE_SSE4_2
+#    ifdef ARROW_HAVE_SSE4_2
 static void SetCacheBandwidthArgs(benchmark::internal::Benchmark* bench) {
   auto cache_sizes = {kL1Size, kL2Size, kL3Size};
   for (auto size : cache_sizes) {
@@ -274,7 +274,7 @@ static void SetCacheBandwidthArgs(benchmark::internal::Benchmark* bench) {
 }
 
 BENCHMARK_TEMPLATE(MemoryBandwidth, Read)->Apply(SetCacheBandwidthArgs);
-#endif  // ARROW_HAVE_SSE4_2
+#    endif  // ARROW_HAVE_SSE4_2
 
 static void SetMemoryBandwidthArgs(benchmark::internal::Benchmark* bench) {
   // `UseRealTime` is required due to threads, otherwise the cumulative CPU time
@@ -287,8 +287,8 @@ BENCHMARK_TEMPLATE(MemoryBandwidth, StreamWrite)->Apply(SetMemoryBandwidthArgs);
 BENCHMARK_TEMPLATE(MemoryBandwidth, StreamReadWrite)->Apply(SetMemoryBandwidthArgs);
 BENCHMARK_TEMPLATE(MemoryBandwidth, PlatformMemcpy)->Apply(SetMemoryBandwidthArgs);
 
-#endif  // _MSC_VER
-#endif  // ARROW_WITH_BENCHMARKS_REFERENCE
+#  endif  // _MSC_VER
+#endif    // ARROW_WITH_BENCHMARKS_REFERENCE
 
 static void ParallelMemoryCopy(benchmark::State& state) {  // NOLINT non-const reference
   const int64_t n_threads = state.range(0);
diff --git a/cpp/src/arrow/io/memory_test.cc b/cpp/src/arrow/io/memory_test.cc
index bd898f17181a5..58f51ffa8d0eb 100644
--- a/cpp/src/arrow/io/memory_test.cc
+++ b/cpp/src/arrow/io/memory_test.cc
@@ -404,7 +404,7 @@ template <typename SlowStreamType>
 void TestSlowInputStream() {
   using clock = std::chrono::high_resolution_clock;
 
-  auto stream = std::make_shared<BufferReader>(std::string_view("abcdefghijkl"));
+  std::shared_ptr<RandomAccessFile> stream = BufferReader::FromString("abcdefghijkl");
   const double latency = 0.6;
   auto slow = std::make_shared<SlowStreamType>(stream, latency);
 
@@ -519,7 +519,7 @@ class TestTransformInputStream : public ::testing::Test {
   TransformInputStream::TransformFunc transform() const { return T(); }
 
   void TestEmptyStream() {
-    auto wrapped = std::make_shared<BufferReader>(std::string_view());
+    std::shared_ptr<InputStream> wrapped = BufferReader::FromString({});
     auto stream = std::make_shared<TransformInputStream>(wrapped, transform());
 
     ASSERT_OK_AND_EQ(0, stream->Tell());
@@ -797,7 +797,7 @@ TEST(RangeReadCache, Basics) {
 TEST(RangeReadCache, Concurrency) {
   std::string data = "abcdefghijklmnopqrstuvwxyz";
 
-  auto file = std::make_shared<BufferReader>(Buffer(data));
+  auto file = std::make_shared<BufferReader>(std::make_shared<Buffer>(data));
   std::vector<ReadRange> ranges{{1, 2},  {3, 2},  {8, 2},  {20, 2},
                                 {25, 0}, {10, 4}, {14, 0}, {15, 4}};
 
diff --git a/cpp/src/arrow/io/mman.h b/cpp/src/arrow/io/mman.h
index 9b06ac8e7b5ca..04d450cbff513 100644
--- a/cpp/src/arrow/io/mman.h
+++ b/cpp/src/arrow/io/mman.h
@@ -36,7 +36,7 @@
 #define MS_INVALIDATE 4
 
 #ifndef FILE_MAP_EXECUTE
-#define FILE_MAP_EXECUTE 0x0020
+#  define FILE_MAP_EXECUTE 0x0020
 #endif
 
 static inline int __map_mman_error(const DWORD err, const int deferr) {
diff --git a/cpp/src/arrow/io/test_common.cc b/cpp/src/arrow/io/test_common.cc
index 5caa20a445e6d..a06ef2f59221c 100644
--- a/cpp/src/arrow/io/test_common.cc
+++ b/cpp/src/arrow/io/test_common.cc
@@ -23,7 +23,7 @@
 #include <vector>
 
 #ifndef _WIN32
-#include <fcntl.h>
+#  include <fcntl.h>
 #endif
 
 #include "arrow/buffer.h"
diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 2fc9b145ccc98..9e0b1d723b9c3 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -71,7 +71,12 @@ endif()
 
 add_arrow_benchmark(read_write_benchmark PREFIX "arrow-ipc")
 
-if(ARROW_FUZZING)
+if(ARROW_FUZZING
+   OR (ARROW_BUILD_UTILITIES
+       AND ARROW_TESTING
+       AND ARROW_WITH_LZ4
+       AND ARROW_WITH_ZSTD
+      ))
   add_executable(arrow-ipc-generate-fuzz-corpus generate_fuzz_corpus.cc)
   target_link_libraries(arrow-ipc-generate-fuzz-corpus ${ARROW_UTIL_LIB}
                         ${ARROW_TEST_LINK_LIBS})
diff --git a/cpp/src/arrow/ipc/generate_fuzz_corpus.cc b/cpp/src/arrow/ipc/generate_fuzz_corpus.cc
index 682c352132a11..6ccf1155d120b 100644
--- a/cpp/src/arrow/ipc/generate_fuzz_corpus.cc
+++ b/cpp/src/arrow/ipc/generate_fuzz_corpus.cc
@@ -33,11 +33,11 @@
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/testing/extension_type.h"
+#include "arrow/util/compression.h"
 #include "arrow/util/io_util.h"
 #include "arrow/util/key_value_metadata.h"
 
-namespace arrow {
-namespace ipc {
+namespace arrow::ipc {
 
 using ::arrow::internal::CreateDir;
 using ::arrow::internal::PlatformFilename;
@@ -88,6 +88,13 @@ Result<std::vector<std::shared_ptr<RecordBatch>>> Batches() {
   batches.push_back(batch);
   RETURN_NOT_OK(test::MakeFixedSizeListRecordBatch(&batch));
   batches.push_back(batch);
+  RETURN_NOT_OK(test::MakeStringTypesRecordBatch(&batch));
+  batches.push_back(batch);
+  RETURN_NOT_OK(test::MakeUuid(&batch));
+  batches.push_back(batch);
+  RETURN_NOT_OK(test::MakeRunEndEncoded(&batch));
+  batches.push_back(batch);
+
   ARROW_ASSIGN_OR_RAISE(batch, MakeExtensionBatch());
   batches.push_back(batch);
   ARROW_ASSIGN_OR_RAISE(batch, MakeMapBatch());
@@ -97,13 +104,14 @@ Result<std::vector<std::shared_ptr<RecordBatch>>> Batches() {
 }
 
 Result<std::shared_ptr<Buffer>> SerializeRecordBatch(
-    const std::shared_ptr<RecordBatch>& batch, bool is_stream_format) {
+    const std::shared_ptr<RecordBatch>& batch, const IpcWriteOptions& options,
+    bool is_stream_format) {
   ARROW_ASSIGN_OR_RAISE(auto sink, io::BufferOutputStream::Create(1024));
   std::shared_ptr<RecordBatchWriter> writer;
   if (is_stream_format) {
-    ARROW_ASSIGN_OR_RAISE(writer, MakeStreamWriter(sink, batch->schema()));
+    ARROW_ASSIGN_OR_RAISE(writer, MakeStreamWriter(sink, batch->schema(), options));
   } else {
-    ARROW_ASSIGN_OR_RAISE(writer, MakeFileWriter(sink, batch->schema()));
+    ARROW_ASSIGN_OR_RAISE(writer, MakeFileWriter(sink, batch->schema(), options));
   }
   RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
   RETURN_NOT_OK(writer->Close());
@@ -119,16 +127,27 @@ Status DoMain(bool is_stream_format, const std::string& out_dir) {
     return "batch-" + std::to_string(sample_num++);
   };
 
+  // codec 0 is uncompressed
+  std::vector<std::shared_ptr<util::Codec>> codecs(3, nullptr);
+  ARROW_ASSIGN_OR_RAISE(codecs[1], util::Codec::Create(Compression::LZ4_FRAME));
+  ARROW_ASSIGN_OR_RAISE(codecs[2], util::Codec::Create(Compression::ZSTD));
+
   ARROW_ASSIGN_OR_RAISE(auto batches, Batches());
 
+  // Emit a separate file for each (batch, codec) pair
   for (const auto& batch : batches) {
     RETURN_NOT_OK(batch->ValidateFull());
-    ARROW_ASSIGN_OR_RAISE(auto buf, SerializeRecordBatch(batch, is_stream_format));
-    ARROW_ASSIGN_OR_RAISE(auto sample_fn, dir_fn.Join(sample_name()));
-    std::cerr << sample_fn.ToString() << std::endl;
-    ARROW_ASSIGN_OR_RAISE(auto file, io::FileOutputStream::Open(sample_fn.ToString()));
-    RETURN_NOT_OK(file->Write(buf));
-    RETURN_NOT_OK(file->Close());
+    for (const auto& codec : codecs) {
+      IpcWriteOptions options = IpcWriteOptions::Defaults();
+      options.codec = codec;
+      ARROW_ASSIGN_OR_RAISE(auto buf,
+                            SerializeRecordBatch(batch, options, is_stream_format));
+      ARROW_ASSIGN_OR_RAISE(auto sample_fn, dir_fn.Join(sample_name()));
+      std::cerr << sample_fn.ToString() << std::endl;
+      ARROW_ASSIGN_OR_RAISE(auto file, io::FileOutputStream::Open(sample_fn.ToString()));
+      RETURN_NOT_OK(file->Write(buf));
+      RETURN_NOT_OK(file->Close());
+    }
   }
   return Status::OK();
 }
@@ -157,7 +176,6 @@ int Main(int argc, char** argv) {
   return 0;
 }
 
-}  // namespace ipc
-}  // namespace arrow
+}  // namespace arrow::ipc
 
 int main(int argc, char** argv) { return arrow::ipc::Main(argc, argv); }
diff --git a/cpp/src/arrow/ipc/generate_tensor_fuzz_corpus.cc b/cpp/src/arrow/ipc/generate_tensor_fuzz_corpus.cc
index dd40ef0ab2fb9..870f458670826 100644
--- a/cpp/src/arrow/ipc/generate_tensor_fuzz_corpus.cc
+++ b/cpp/src/arrow/ipc/generate_tensor_fuzz_corpus.cc
@@ -41,7 +41,7 @@ using ::arrow::internal::PlatformFilename;
 Result<PlatformFilename> PrepareDirectory(const std::string& dir) {
   ARROW_ASSIGN_OR_RAISE(auto dir_fn, PlatformFilename::FromString(dir));
   RETURN_NOT_OK(::arrow::internal::CreateDir(dir_fn));
-  return std::move(dir_fn);
+  return dir_fn;
 }
 
 Result<std::shared_ptr<Buffer>> MakeSerializedBuffer(
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc
index c6f14b1e1d50e..d3201d8571b2c 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -48,7 +48,7 @@
 
 #if defined(_MSC_VER)
 // "warning C4307: '+': integral constant overflow"
-#pragma warning(disable : 4307)
+#  pragma warning(disable : 4307)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/ipc/read_write_benchmark.cc b/cpp/src/arrow/ipc/read_write_benchmark.cc
index defe9790678c0..19ed2d3034e8d 100644
--- a/cpp/src/arrow/ipc/read_write_benchmark.cc
+++ b/cpp/src/arrow/ipc/read_write_benchmark.cc
@@ -185,24 +185,24 @@ static void DecodeStream(benchmark::State& state) {  // NOLINT non-const referen
 }
 
 #ifdef ARROW_WITH_ZSTD
-#define GENERATE_COMPRESSED_DATA_IN_MEMORY()                                      \
-  constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
-  constexpr int64_t kBatches = 16;                                                \
-  auto options = ipc::IpcWriteOptions::Defaults();                                \
-  ASSIGN_OR_ABORT(options.codec,                                                  \
-                  arrow::util::Codec::Create(arrow::Compression::type::ZSTD));    \
-  std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);       \
-  {                                                                               \
-    auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
-    io::BufferOutputStream stream(buffer);                                        \
-    auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options); \
-    for (int i = 0; i < kBatches; i++) {                                          \
-      ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                      \
-    }                                                                             \
-    ABORT_NOT_OK(writer->Close());                                                \
-    ABORT_NOT_OK(stream.Close());                                                 \
-  }                                                                               \
-  constexpr int64_t total_size = kBatchSize * kBatches;
+#  define GENERATE_COMPRESSED_DATA_IN_MEMORY()                                      \
+    constexpr int64_t kBatchSize = 1 << 20; /* 1 MB */                              \
+    constexpr int64_t kBatches = 16;                                                \
+    auto options = ipc::IpcWriteOptions::Defaults();                                \
+    ASSIGN_OR_ABORT(options.codec,                                                  \
+                    arrow::util::Codec::Create(arrow::Compression::type::ZSTD));    \
+    std::shared_ptr<ResizableBuffer> buffer = *AllocateResizableBuffer(1024);       \
+    {                                                                               \
+      auto record_batch = MakeRecordBatch(kBatchSize, state.range(0));              \
+      io::BufferOutputStream stream(buffer);                                        \
+      auto writer = *ipc::MakeFileWriter(&stream, record_batch->schema(), options); \
+      for (int i = 0; i < kBatches; i++) {                                          \
+        ABORT_NOT_OK(writer->WriteRecordBatch(*record_batch));                      \
+      }                                                                             \
+      ABORT_NOT_OK(writer->Close());                                                \
+      ABORT_NOT_OK(stream.Close());                                                 \
+    }                                                                               \
+    constexpr int64_t total_size = kBatchSize * kBatches;
 #endif
 
 #define GENERATE_DATA_IN_MEMORY()                                                 \
diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc
index ff7838cc39d72..39fd2c40fb4ec 100644
--- a/cpp/src/arrow/ipc/read_write_test.cc
+++ b/cpp/src/arrow/ipc/read_write_test.cc
@@ -1081,9 +1081,9 @@ TEST_F(RecursionLimits, ReadLimit) {
 // Test fails with a structured exception on Windows + Debug
 #if !defined(_WIN32) || defined(NDEBUG)
 TEST_F(RecursionLimits, StressLimit) {
-#ifdef __EMSCRIPTEN__
+#  ifdef __EMSCRIPTEN__
   GTEST_SKIP() << "This crashes the Emscripten runtime.";
-#endif
+#  endif
 
   auto CheckDepth = [this](int recursion_depth, bool* it_works) {
     int32_t metadata_length = -1;
@@ -1112,10 +1112,10 @@ TEST_F(RecursionLimits, StressLimit) {
   ASSERT_TRUE(it_works);
 
 // Mitigate Valgrind's slowness
-#if !defined(ARROW_VALGRIND)
+#  if !defined(ARROW_VALGRIND)
   CheckDepth(500, &it_works);
   ASSERT_TRUE(it_works);
-#endif
+#  endif
 }
 #endif  // !defined(_WIN32) || defined(NDEBUG)
 
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index da84f2f2dc87d..98214c1debb86 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -305,7 +305,7 @@ class ArrayLoader {
       RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
     } else {
       buffer_index_++;
-      out_->buffers[1].reset(new Buffer(nullptr, 0));
+      out_->buffers[1] = std::make_shared<Buffer>(nullptr, 0);
     }
     return Status::OK();
   }
@@ -644,11 +644,11 @@ Result<std::shared_ptr<RecordBatch>> LoadRecordBatch(
     const flatbuf::RecordBatch* metadata, const std::shared_ptr<Schema>& schema,
     const std::vector<bool>& inclusion_mask, const IpcReadContext& context,
     io::RandomAccessFile* file) {
-  if (inclusion_mask.size() > 0) {
-    return LoadRecordBatchSubset(metadata, schema, &inclusion_mask, context, file);
-  } else {
+  if (inclusion_mask.empty()) {
     return LoadRecordBatchSubset(metadata, schema, /*inclusion_mask=*/nullptr, context,
                                  file);
+  } else {
+    return LoadRecordBatchSubset(metadata, schema, &inclusion_mask, context, file);
   }
 }
 
@@ -1447,7 +1447,7 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
     // Prebuffering's read patterns are also slightly worse than the alternative
     // when doing whole-file reads because the logic is not in place to recognize
     // we can just read the entire file up-front
-    if (options_.included_fields.size() != 0 &&
+    if (!options_.included_fields.empty() &&
         options_.included_fields.size() != schema_->fields().size() &&
         !file_->supports_zero_copy()) {
       RETURN_NOT_OK(state->PreBufferMetadata({}));
@@ -1907,7 +1907,7 @@ Result<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::Open(
 Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
     const std::shared_ptr<io::RandomAccessFile>& file, const IpcReadOptions& options) {
   ARROW_ASSIGN_OR_RAISE(int64_t footer_offset, file->GetSize());
-  return OpenAsync(std::move(file), footer_offset, options);
+  return OpenAsync(file, footer_offset, options);
 }
 
 Future<std::shared_ptr<RecordBatchFileReader>> RecordBatchFileReader::OpenAsync(
diff --git a/cpp/src/arrow/ipc/test_common.cc b/cpp/src/arrow/ipc/test_common.cc
index 87c02e2d87a1e..e354e2f89b3b3 100644
--- a/cpp/src/arrow/ipc/test_common.cc
+++ b/cpp/src/arrow/ipc/test_common.cc
@@ -27,8 +27,10 @@
 #include "arrow/array.h"
 #include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_primitive.h"
-#include "arrow/array/builder_time.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/reader.h"
 #include "arrow/ipc/test_common.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/pretty_print.h"
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
@@ -242,11 +244,11 @@ Status MakeRandomBooleanArray(const int length, bool include_nulls,
                               std::shared_ptr<Array>* out) {
   std::vector<uint8_t> values(length);
   random_null_bytes(length, 0.5, values.data());
-  ARROW_ASSIGN_OR_RAISE(auto data, internal::BytesToBits(values));
+  ARROW_ASSIGN_OR_RAISE(auto data, arrow::internal::BytesToBits(values));
 
   if (include_nulls) {
     std::vector<uint8_t> valid_bytes(length);
-    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, internal::BytesToBits(valid_bytes));
+    ARROW_ASSIGN_OR_RAISE(auto null_bitmap, arrow::internal::BytesToBits(valid_bytes));
     random_null_bytes(length, 0.1, valid_bytes.data());
     *out = std::make_shared<BooleanArray>(length, data, null_bitmap, -1);
   } else {
@@ -596,7 +598,7 @@ Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
   std::shared_ptr<Array> no_nulls(new StructArray(type, list_batch->num_rows(), columns));
   std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
   null_bytes[0] = 0;
-  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, internal::BytesToBits(null_bytes));
+  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, arrow::internal::BytesToBits(null_bytes));
   std::shared_ptr<Array> with_nulls(
       new StructArray(type, list_batch->num_rows(), columns, null_bitmap, 1));
 
@@ -1088,9 +1090,9 @@ Status MakeUuid(std::shared_ptr<RecordBatch>* out) {
   auto f1 = field("f1", uuid_type, /*nullable=*/false);
   auto schema = ::arrow::schema({f0, f1});
 
-  auto a0 = std::make_shared<UuidArray>(
+  auto a0 = std::make_shared<ExampleUuidArray>(
       uuid_type, ArrayFromJSON(storage_type, R"(["0123456789abcdef", null])"));
-  auto a1 = std::make_shared<UuidArray>(
+  auto a1 = std::make_shared<ExampleUuidArray>(
       uuid_type,
       ArrayFromJSON(storage_type, R"(["ZYXWVUTSRQPONMLK", "JIHGFEDBA9876543"])"));
 
@@ -1176,12 +1178,13 @@ enable_if_t<std::is_floating_point<CValueType>::value, void> FillRandomData(
 Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
                         const std::vector<int64_t>& shape, bool row_major_p,
                         std::shared_ptr<Tensor>* out, uint32_t seed) {
-  const auto& element_type = internal::checked_cast<const FixedWidthType&>(*type);
+  const auto& element_type = arrow::internal::checked_cast<const FixedWidthType&>(*type);
   std::vector<int64_t> strides;
   if (row_major_p) {
-    RETURN_NOT_OK(internal::ComputeRowMajorStrides(element_type, shape, &strides));
+    RETURN_NOT_OK(arrow::internal::ComputeRowMajorStrides(element_type, shape, &strides));
   } else {
-    RETURN_NOT_OK(internal::ComputeColumnMajorStrides(element_type, shape, &strides));
+    RETURN_NOT_OK(
+        arrow::internal::ComputeColumnMajorStrides(element_type, shape, &strides));
   }
 
   const int64_t element_size = element_type.bit_width() / CHAR_BIT;
@@ -1233,6 +1236,21 @@ Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
   return Tensor::Make(type, buf, shape, strides).Value(out);
 }
 
+Status RoundtripBatch(const std::shared_ptr<RecordBatch>& batch,
+                      std::shared_ptr<RecordBatch>* out) {
+  ARROW_ASSIGN_OR_RAISE(auto out_stream, io::BufferOutputStream::Create());
+  RETURN_NOT_OK(ipc::WriteRecordBatchStream({batch}, ipc::IpcWriteOptions::Defaults(),
+                                            out_stream.get()));
+
+  ARROW_ASSIGN_OR_RAISE(auto complete_ipc_stream, out_stream->Finish());
+
+  io::BufferReader reader(complete_ipc_stream);
+  std::shared_ptr<RecordBatchReader> batch_reader;
+  ARROW_ASSIGN_OR_RAISE(batch_reader, ipc::RecordBatchStreamReader::Open(&reader));
+  RETURN_NOT_OK(batch_reader->ReadNext(out));
+  return Status::OK();
+}
+
 }  // namespace test
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/test_common.h b/cpp/src/arrow/ipc/test_common.h
index db8613cbb1e6a..189de288795c0 100644
--- a/cpp/src/arrow/ipc/test_common.h
+++ b/cpp/src/arrow/ipc/test_common.h
@@ -184,6 +184,9 @@ Status MakeRandomTensor(const std::shared_ptr<DataType>& type,
                         const std::vector<int64_t>& shape, bool row_major_p,
                         std::shared_ptr<Tensor>* out, uint32_t seed = 0);
 
+ARROW_TESTING_EXPORT Status RoundtripBatch(const std::shared_ptr<RecordBatch>& batch,
+                                           std::shared_ptr<RecordBatch>* out);
+
 }  // namespace test
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index f603e60c66555..88aa3f3f8a47a 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -86,7 +86,7 @@ bool HasNestedDict(const ArrayData& data) {
 }
 
 Status GetTruncatedBitmap(int64_t offset, int64_t length,
-                          const std::shared_ptr<Buffer> input, MemoryPool* pool,
+                          const std::shared_ptr<Buffer>& input, MemoryPool* pool,
                           std::shared_ptr<Buffer>* buffer) {
   if (!input) {
     *buffer = input;
@@ -103,7 +103,7 @@ Status GetTruncatedBitmap(int64_t offset, int64_t length,
 }
 
 Status GetTruncatedBuffer(int64_t offset, int64_t length, int32_t byte_width,
-                          const std::shared_ptr<Buffer> input, MemoryPool* pool,
+                          const std::shared_ptr<Buffer>& input, MemoryPool* pool,
                           std::shared_ptr<Buffer>* buffer) {
   if (!input) {
     *buffer = input;
@@ -252,7 +252,7 @@ class RecordBatchSerializer {
   }
 
   Status Assemble(const RecordBatch& batch) {
-    if (field_nodes_.size() > 0) {
+    if (!field_nodes_.empty()) {
       field_nodes_.clear();
       buffer_meta_.clear();
       out_->body_buffers.clear();
@@ -335,8 +335,7 @@ class RecordBatchSerializer {
       ARROW_ASSIGN_OR_RAISE(auto shifted_offsets,
                             AllocateBuffer(required_bytes, options_.memory_pool));
 
-      offset_type* dest_offsets =
-          reinterpret_cast<offset_type*>(shifted_offsets->mutable_data());
+      auto dest_offsets = shifted_offsets->mutable_span_as<offset_type>();
       const offset_type start_offset = array.value_offset(0);
 
       for (int i = 0; i < array.length(); ++i) {
@@ -362,7 +361,6 @@ class RecordBatchSerializer {
                                      offset_type* out_min_offset,
                                      offset_type* out_max_end) {
     auto offsets = array.value_offsets();
-    auto sizes = array.value_sizes();
 
     const int64_t required_bytes = sizeof(offset_type) * array.length();
     if (array.offset() != 0) {
@@ -572,7 +570,7 @@ class RecordBatchSerializer {
   Status Visit(const StructArray& array) {
     --max_recursion_depth_;
     for (int i = 0; i < array.num_fields(); ++i) {
-      std::shared_ptr<Array> field = array.field(i);
+      const auto& field = array.field(i);
       RETURN_NOT_OK(VisitArray(*field));
     }
     ++max_recursion_depth_;
@@ -641,8 +639,7 @@ class RecordBatchSerializer {
       ARROW_ASSIGN_OR_RAISE(
           auto shifted_offsets_buffer,
           AllocateBuffer(length * sizeof(int32_t), options_.memory_pool));
-      int32_t* shifted_offsets =
-          reinterpret_cast<int32_t*>(shifted_offsets_buffer->mutable_data());
+      auto shifted_offsets = shifted_offsets_buffer->mutable_span_as<int32_t>();
 
       // Offsets are guaranteed to be increasing according to the spec, so
       // the first offset we find for a child is the initial offset and
@@ -899,7 +896,7 @@ Status GetContiguousTensor(const Tensor& tensor, MemoryPool* pool,
   RETURN_NOT_OK(WriteStridedTensorData(0, 0, elem_size, tensor,
                                        scratch_space->mutable_data(), &stream));
 
-  out->reset(new Tensor(tensor.type(), contiguous_data, tensor.shape()));
+  *out = std::make_unique<Tensor>(tensor.type(), contiguous_data, tensor.shape());
 
   return Status::OK();
 }
@@ -1005,7 +1002,7 @@ class SparseTensorSerializer {
   }
 
   Status Assemble(const SparseTensor& sparse_tensor) {
-    if (buffer_meta_.size() > 0) {
+    if (!buffer_meta_.empty()) {
       buffer_meta_.clear();
       out_->body_buffers.clear();
     }
@@ -1169,7 +1166,7 @@ Status RecordBatchWriter::WriteTable(const Table& table) { return WriteTable(tab
 
 namespace internal {
 
-IpcPayloadWriter::~IpcPayloadWriter() {}
+IpcPayloadWriter::~IpcPayloadWriter() = default;
 
 Status IpcPayloadWriter::Start() { return Status::OK(); }
 
diff --git a/cpp/src/arrow/json/rapidjson_defs.h b/cpp/src/arrow/json/rapidjson_defs.h
index 9ed81d000c555..2354c6157263a 100644
--- a/cpp/src/arrow/json/rapidjson_defs.h
+++ b/cpp/src/arrow/json/rapidjson_defs.h
@@ -34,10 +34,10 @@
 
 // enable SIMD whitespace skipping, if available
 #if defined(ARROW_HAVE_SSE4_2)
-#define RAPIDJSON_SSE2 1
-#define RAPIDJSON_SSE42 1
+#  define RAPIDJSON_SSE2 1
+#  define RAPIDJSON_SSE42 1
 #endif
 
 #if defined(ARROW_HAVE_NEON)
-#define RAPIDJSON_NEON 1
+#  define RAPIDJSON_NEON 1
 #endif
diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc
index 2f8ce3a6fa8c7..3ace2c8f23ab0 100644
--- a/cpp/src/arrow/memory_pool.cc
+++ b/cpp/src/arrow/memory_pool.cc
@@ -28,7 +28,7 @@
 #include <optional>
 
 #if defined(sun) || defined(__sun)
-#include <stdlib.h>
+#  include <stdlib.h>
 #endif
 
 #include "arrow/buffer.h"
@@ -46,11 +46,11 @@
 #include "arrow/util/ubsan.h"
 
 #ifdef __GLIBC__
-#include <malloc.h>
+#  include <malloc.h>
 #endif
 
 #ifdef ARROW_MIMALLOC
-#include <mimalloc.h>
+#  include <mimalloc.h>
 #endif
 
 namespace arrow {
@@ -85,19 +85,17 @@ struct SupportedBackend {
 
 const std::vector<SupportedBackend>& SupportedBackends() {
   static std::vector<SupportedBackend> backends = {
-  // ARROW-12316: Apple => mimalloc first, then jemalloc
-  //              non-Apple => jemalloc first, then mimalloc
-#if defined(ARROW_JEMALLOC) && !defined(__APPLE__)
-    {"jemalloc", MemoryPoolBackend::Jemalloc},
-#endif
+  // mimalloc is our preferred allocator for several reasons:
+  // 1) it has good performance
+  // 2) it is well-supported on all our main platforms (Linux, macOS, Windows)
+  // 3) it is easy to configure and has a consistent API.
 #ifdef ARROW_MIMALLOC
-    {"mimalloc", MemoryPoolBackend::Mimalloc},
+      {"mimalloc", MemoryPoolBackend::Mimalloc},
 #endif
-#if defined(ARROW_JEMALLOC) && defined(__APPLE__)
-    {"jemalloc", MemoryPoolBackend::Jemalloc},
+#ifdef ARROW_JEMALLOC
+      {"jemalloc", MemoryPoolBackend::Jemalloc},
 #endif
-    {"system", MemoryPoolBackend::System}
-  };
+      {"system", MemoryPoolBackend::System}};
   return backends;
 }
 
@@ -860,7 +858,7 @@ class PoolBuffer final : public ResizableBuffer {
     }
     uint8_t* ptr = mutable_data();
     if (!ptr || capacity > capacity_) {
-      int64_t new_capacity = bit_util::RoundUpToMultipleOf64(capacity);
+      ARROW_ASSIGN_OR_RAISE(int64_t new_capacity, RoundCapacity(capacity));
       if (ptr) {
         RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, alignment_, &ptr));
       } else {
@@ -880,7 +878,7 @@ class PoolBuffer final : public ResizableBuffer {
     if (ptr && shrink_to_fit && new_size <= size_) {
       // Buffer is non-null and is not growing, so shrink to the requested size without
       // excess space.
-      int64_t new_capacity = bit_util::RoundUpToMultipleOf64(new_size);
+      ARROW_ASSIGN_OR_RAISE(int64_t new_capacity, RoundCapacity(new_size));
       if (capacity_ != new_capacity) {
         // Buffer hasn't got yet the requested size.
         RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, alignment_, &ptr));
@@ -918,6 +916,13 @@ class PoolBuffer final : public ResizableBuffer {
   }
 
  private:
+  static Result<int64_t> RoundCapacity(int64_t capacity) {
+    if (capacity > std::numeric_limits<int64_t>::max() - 63) {
+      return Status::OutOfMemory("capacity too large");
+    }
+    return bit_util::RoundUpToMultipleOf64(capacity);
+  }
+
   MemoryPool* pool_;
   int64_t alignment_;
 };
diff --git a/cpp/src/arrow/memory_pool_jemalloc.cc b/cpp/src/arrow/memory_pool_jemalloc.cc
index 24bc0f27f0912..239d83b81bc67 100644
--- a/cpp/src/arrow/memory_pool_jemalloc.cc
+++ b/cpp/src/arrow/memory_pool_jemalloc.cc
@@ -26,11 +26,11 @@
 // its family.
 
 #ifdef ARROW_JEMALLOC_VENDORED
-#define JEMALLOC_MANGLE
+#  define JEMALLOC_MANGLE
 // Explicitly link to our version of jemalloc
-#include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
+#  include "jemalloc_ep/dist/include/jemalloc/jemalloc.h"
 #else
-#include <jemalloc/jemalloc.h>
+#  include <jemalloc/jemalloc.h>
 #endif
 
 #ifdef ARROW_JEMALLOC_VENDORED
@@ -47,31 +47,31 @@
 // aggressively (and in the background) to the OS. This can be configured
 // further by using the arrow::jemalloc_set_decay_ms API
 
-#undef USE_JEMALLOC_BACKGROUND_THREAD
-#ifndef __APPLE__
+#  undef USE_JEMALLOC_BACKGROUND_THREAD
+#  ifndef __APPLE__
 // ARROW-6977: jemalloc's background_thread isn't always enabled on macOS
-#define USE_JEMALLOC_BACKGROUND_THREAD
-#endif
+#    define USE_JEMALLOC_BACKGROUND_THREAD
+#  endif
 
 // In debug mode, add memory poisoning on alloc / free
-#ifdef NDEBUG
-#define JEMALLOC_DEBUG_OPTIONS ""
-#else
-#define JEMALLOC_DEBUG_OPTIONS ",junk:true"
-#endif
+#  ifdef NDEBUG
+#    define JEMALLOC_DEBUG_OPTIONS ""
+#  else
+#    define JEMALLOC_DEBUG_OPTIONS ",junk:true"
+#  endif
 
 const char* je_arrow_malloc_conf =
     ("oversize_threshold:0"
-#ifdef USE_JEMALLOC_BACKGROUND_THREAD
+#  ifdef USE_JEMALLOC_BACKGROUND_THREAD
      ",dirty_decay_ms:1000"
      ",muzzy_decay_ms:1000"
      ",background_thread:true"
-#else
+#  else
      // ARROW-6994: return memory immediately to the OS if the
      // background_thread option isn't available
      ",dirty_decay_ms:0"
      ",muzzy_decay_ms:0"
-#endif
+#  endif
      JEMALLOC_DEBUG_OPTIONS);  // NOLINT: whitespace/parens
 
 #endif  // ARROW_JEMALLOC_VENDORED
diff --git a/cpp/src/arrow/pretty_print.cc b/cpp/src/arrow/pretty_print.cc
index 53a6953681660..c5905d0c8c5ea 100644
--- a/cpp/src/arrow/pretty_print.cc
+++ b/cpp/src/arrow/pretty_print.cc
@@ -394,7 +394,8 @@ class ArrayPrinter : public PrettyPrinter {
     if (array.device_type() != DeviceAllocationType::kCPU) {
       // GH-43055: ideally we only copy start/end slices from non-CPU memory
       // based on the window size that is being printed
-      ARROW_ASSIGN_OR_RAISE(auto array_cpu, array.CopyTo(default_cpu_memory_manager()));
+      ARROW_ASSIGN_OR_RAISE(auto array_cpu,
+                            array.ViewOrCopyTo(default_cpu_memory_manager()));
       RETURN_NOT_OK(VisitArrayInline(*array_cpu, this));
     } else {
       RETURN_NOT_OK(VisitArrayInline(array, this));
diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc
index 20de827ced13f..ccc80dc93a50a 100644
--- a/cpp/src/arrow/public_api_test.cc
+++ b/cpp/src/arrow/public_api_test.cc
@@ -28,32 +28,32 @@
 #include "arrow/ipc/api.h"      // IWYU pragma: keep
 
 #ifdef ARROW_CSV
-#include "arrow/csv/api.h"  // IWYU pragma: keep
+#  include "arrow/csv/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_DATASET
-#include "arrow/dataset/api.h"  // IWYU pragma: keep
+#  include "arrow/dataset/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_FILESYSTEM
-#include "arrow/filesystem/api.h"  // IWYU pragma: keep
+#  include "arrow/filesystem/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_FLIGHT
-#include "arrow/flight/api.h"  // IWYU pragma: keep
+#  include "arrow/flight/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_FLIGHT_SQL
-#include "arrow/flight/sql/api.h"  // IWYU pragma: keep
+#  include "arrow/flight/sql/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_JSON
-#include "arrow/json/api.h"  // IWYU pragma: keep
+#  include "arrow/json/api.h"  // IWYU pragma: keep
 #endif
 
 #ifdef ARROW_SUBSTRAIT
-#include "arrow/engine/api.h"            // IWYU pragma: keep
-#include "arrow/engine/substrait/api.h"  // IWYU pragma: keep
+#  include "arrow/engine/api.h"            // IWYU pragma: keep
+#  include "arrow/engine/substrait/api.h"  // IWYU pragma: keep
 #endif
 
 #include <gmock/gmock-matchers.h>
diff --git a/cpp/src/arrow/result_internal.h b/cpp/src/arrow/result_internal.h
index 7550f945d85d0..134902e1b75ad 100644
--- a/cpp/src/arrow/result_internal.h
+++ b/cpp/src/arrow/result_internal.h
@@ -18,5 +18,5 @@
 #include "arrow/result.h"
 
 #ifndef ASSIGN_OR_RAISE
-#define ASSIGN_OR_RAISE(lhs, rhs) ARROW_ASSIGN_OR_RAISE(lhs, rhs)
+#  define ASSIGN_OR_RAISE(lhs, rhs) ARROW_ASSIGN_OR_RAISE(lhs, rhs)
 #endif
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index 104a5697b5727..e9ec13e98b4ee 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -43,7 +43,6 @@ namespace arrow {
 
 using compute::Cast;
 using compute::CastOptions;
-
 using internal::checked_cast;
 using internal::checked_pointer_cast;
 
@@ -2038,7 +2037,7 @@ class TestExtensionScalar : public ::testing::Test {
   void SetUp() {
     type_ = uuid();
     storage_type_ = fixed_size_binary(16);
-    uuid_type_ = checked_cast<const UuidType*>(type_.get());
+    uuid_type_ = checked_cast<const ExampleUuidType*>(type_.get());
   }
 
  protected:
@@ -2049,7 +2048,7 @@ class TestExtensionScalar : public ::testing::Test {
   }
 
   std::shared_ptr<DataType> type_, storage_type_;
-  const UuidType* uuid_type_{nullptr};
+  const ExampleUuidType* uuid_type_{nullptr};
 
   const std::string_view uuid_string1_{UUID_STRING1};
   const std::string_view uuid_string2_{UUID_STRING2};
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index ac384fc389a49..fb75d963f3a3c 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -28,23 +28,23 @@
 #ifdef ARROW_EXTRA_ERROR_CONTEXT
 
 /// \brief Return with given status if condition is met.
-#define ARROW_RETURN_IF_(condition, status, expr)   \
-  do {                                              \
-    if (ARROW_PREDICT_FALSE(condition)) {           \
-      ::arrow::Status _st = (status);               \
-      _st.AddContextLine(__FILE__, __LINE__, expr); \
-      return _st;                                   \
-    }                                               \
-  } while (0)
+#  define ARROW_RETURN_IF_(condition, status, expr)   \
+    do {                                              \
+      if (ARROW_PREDICT_FALSE(condition)) {           \
+        ::arrow::Status _st = (status);               \
+        _st.AddContextLine(__FILE__, __LINE__, expr); \
+        return _st;                                   \
+      }                                               \
+    } while (0)
 
 #else
 
-#define ARROW_RETURN_IF_(condition, status, _) \
-  do {                                         \
-    if (ARROW_PREDICT_FALSE(condition)) {      \
-      return (status);                         \
-    }                                          \
-  } while (0)
+#  define ARROW_RETURN_IF_(condition, status, _) \
+    do {                                         \
+      if (ARROW_PREDICT_FALSE(condition)) {      \
+        return (status);                         \
+      }                                          \
+    } while (0)
 
 #endif  // ARROW_EXTRA_ERROR_CONTEXT
 
@@ -78,7 +78,7 @@
 
 // This is an internal-use macro and should not be used in public headers.
 #ifndef RETURN_NOT_OK
-#define RETURN_NOT_OK(s) ARROW_RETURN_NOT_OK(s)
+#  define RETURN_NOT_OK(s) ARROW_RETURN_NOT_OK(s)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/telemetry/logging.cc b/cpp/src/arrow/telemetry/logging.cc
index 7e9a69afedbb5..11a70ae1319c8 100644
--- a/cpp/src/arrow/telemetry/logging.cc
+++ b/cpp/src/arrow/telemetry/logging.cc
@@ -21,8 +21,8 @@
 #include "arrow/util/logging.h"
 
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4522)
+#  pragma warning(push)
+#  pragma warning(disable : 4522)
 #endif
 
 #include <google/protobuf/util/json_util.h>
@@ -46,7 +46,7 @@
 
 #include <opentelemetry/exporters/otlp/protobuf_include_suffix.h>
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/testing/extension_type.h b/cpp/src/arrow/testing/extension_type.h
index 6515631f202ae..a4526e31c2b93 100644
--- a/cpp/src/arrow/testing/extension_type.h
+++ b/cpp/src/arrow/testing/extension_type.h
@@ -27,14 +27,14 @@
 
 namespace arrow {
 
-class ARROW_TESTING_EXPORT UuidArray : public ExtensionArray {
+class ARROW_TESTING_EXPORT ExampleUuidArray : public ExtensionArray {
  public:
   using ExtensionArray::ExtensionArray;
 };
 
-class ARROW_TESTING_EXPORT UuidType : public ExtensionType {
+class ARROW_TESTING_EXPORT ExampleUuidType : public ExtensionType {
  public:
-  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+  ExampleUuidType() : ExtensionType(fixed_size_binary(16)) {}
 
   std::string extension_name() const override { return "uuid"; }
 
diff --git a/cpp/src/arrow/testing/gtest_compat.h b/cpp/src/arrow/testing/gtest_compat.h
index c934dd2793890..1fd0bfd32c5bc 100644
--- a/cpp/src/arrow/testing/gtest_compat.h
+++ b/cpp/src/arrow/testing/gtest_compat.h
@@ -21,13 +21,13 @@
 
 // GTest < 1.11
 #ifndef GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST
-#define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(A)
+#  define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(A)
 #endif
 // GTest < 1.10
 #ifndef TYPED_TEST_SUITE
-#define TYPED_TEST_SUITE TYPED_TEST_CASE
-#define TYPED_TEST_SUITE_P TYPED_TEST_CASE_P
-#define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
-#define REGISTER_TYPED_TEST_SUITE_P REGISTER_TYPED_TEST_CASE_P
-#define INSTANTIATE_TYPED_TEST_SUITE_P INSTANTIATE_TYPED_TEST_CASE_P
+#  define TYPED_TEST_SUITE TYPED_TEST_CASE
+#  define TYPED_TEST_SUITE_P TYPED_TEST_CASE_P
+#  define INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
+#  define REGISTER_TYPED_TEST_SUITE_P REGISTER_TYPED_TEST_CASE_P
+#  define INSTANTIATE_TYPED_TEST_SUITE_P INSTANTIATE_TYPED_TEST_CASE_P
 #endif
diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc
index 95de16c715f19..07d15826f2c8f 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -20,13 +20,13 @@
 #include "arrow/testing/extension_type.h"
 
 #ifdef _WIN32
-#include <crtdbg.h>
-#include <io.h>
+#  include <crtdbg.h>
+#  include <io.h>
 #else
-#include <fcntl.h>     // IWYU pragma: keep
-#include <sys/stat.h>  // IWYU pragma: keep
-#include <sys/wait.h>  // IWYU pragma: keep
-#include <unistd.h>    // IWYU pragma: keep
+#  include <fcntl.h>     // IWYU pragma: keep
+#  include <sys/stat.h>  // IWYU pragma: keep
+#  include <sys/wait.h>  // IWYU pragma: keep
+#  include <unistd.h>    // IWYU pragma: keep
 #endif
 
 #include <algorithm>
@@ -49,9 +49,14 @@
 #include "arrow/buffer.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/datum.h"
+#include "arrow/extension/json.h"
+#include "arrow/io/memory.h"
 #include "arrow/ipc/json_simple.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/json/rapidjson_defs.h"  // IWYU pragma: keep
 #include "arrow/pretty_print.h"
+#include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/tensor.h"
@@ -847,17 +852,17 @@ Future<> SleepABitAsync() {
 ///////////////////////////////////////////////////////////////////////////
 // Extension types
 
-bool UuidType::ExtensionEquals(const ExtensionType& other) const {
+bool ExampleUuidType::ExtensionEquals(const ExtensionType& other) const {
   return (other.extension_name() == this->extension_name());
 }
 
-std::shared_ptr<Array> UuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
+std::shared_ptr<Array> ExampleUuidType::MakeArray(std::shared_ptr<ArrayData> data) const {
   DCHECK_EQ(data->type->id(), Type::EXTENSION);
   DCHECK_EQ("uuid", static_cast<const ExtensionType&>(*data->type).extension_name());
-  return std::make_shared<UuidArray>(data);
+  return std::make_shared<ExampleUuidArray>(data);
 }
 
-Result<std::shared_ptr<DataType>> UuidType::Deserialize(
+Result<std::shared_ptr<DataType>> ExampleUuidType::Deserialize(
     std::shared_ptr<DataType> storage_type, const std::string& serialized) const {
   if (serialized != "uuid-serialized") {
     return Status::Invalid("Type identifier did not match: '", serialized, "'");
@@ -866,7 +871,7 @@ Result<std::shared_ptr<DataType>> UuidType::Deserialize(
     return Status::Invalid("Invalid storage type for UuidType: ",
                            storage_type->ToString());
   }
-  return std::make_shared<UuidType>();
+  return std::make_shared<ExampleUuidType>();
 }
 
 bool SmallintType::ExtensionEquals(const ExtensionType& other) const {
@@ -982,7 +987,7 @@ Result<std::shared_ptr<DataType>> Complex128Type::Deserialize(
   return std::make_shared<Complex128Type>();
 }
 
-std::shared_ptr<DataType> uuid() { return std::make_shared<UuidType>(); }
+std::shared_ptr<DataType> uuid() { return std::make_shared<ExampleUuidType>(); }
 
 std::shared_ptr<DataType> smallint() { return std::make_shared<SmallintType>(); }
 
diff --git a/cpp/src/arrow/testing/gtest_util.h b/cpp/src/arrow/testing/gtest_util.h
index 85b4c1f1f0138..90311464c283b 100644
--- a/cpp/src/arrow/testing/gtest_util.h
+++ b/cpp/src/arrow/testing/gtest_util.h
@@ -457,9 +457,9 @@ class ARROW_TESTING_EXPORT SignalHandlerGuard {
 };
 
 #ifndef ARROW_LARGE_MEMORY_TESTS
-#define LARGE_MEMORY_TEST(name) DISABLED_##name
+#  define LARGE_MEMORY_TEST(name) DISABLED_##name
 #else
-#define LARGE_MEMORY_TEST(name) name
+#  define LARGE_MEMORY_TEST(name) name
 #endif
 
 inline void PrintTo(const Status& st, std::ostream* os) { *os << st.ToString(); }
diff --git a/cpp/src/arrow/testing/process.cc b/cpp/src/arrow/testing/process.cc
new file mode 100644
index 0000000000000..133768ff015e6
--- /dev/null
+++ b/cpp/src/arrow/testing/process.cc
@@ -0,0 +1,339 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/testing/process.h"
+#include "arrow/result.h"
+
+#define BOOST_PROCESS_AVAILABLE
+#ifdef __EMSCRIPTEN__
+#  undef BOOST_PROCESS_AVAILABLE
+#endif
+
+#ifdef BOOST_PROCESS_AVAILABLE
+// This boost/asio/io_context.hpp include is needless for no MinGW
+// build.
+//
+// This is for including boost/asio/detail/socket_types.hpp before any
+// "#include <windows.h>". boost/asio/detail/socket_types.hpp doesn't
+// work if windows.h is already included.
+#  include <boost/asio/io_context.hpp>
+
+#  ifdef BOOST_PROCESS_HAVE_V2
+// We can't use v2 API on Windows because v2 API doesn't support
+// process group [1] and GCS testbench uses multiple processes [2].
+//
+// [1] https://github.com/boostorg/process/issues/259
+// [2] https://github.com/googleapis/storage-testbench/issues/669
+#    ifndef _WIN32
+#      define BOOST_PROCESS_USE_V2
+#    endif
+#  endif
+
+#  ifdef BOOST_PROCESS_USE_V2
+#    ifdef BOOST_PROCESS_NEED_SOURCE
+// Workaround for https://github.com/boostorg/process/issues/312
+#      define BOOST_PROCESS_V2_SEPARATE_COMPILATION
+#      ifdef __APPLE__
+#        include <sys/sysctl.h>
+#      endif
+#      include <boost/process/v2.hpp>
+#      include <boost/process/v2/src.hpp>
+#    else
+#      include <boost/process/v2.hpp>
+#    endif
+#    include <unordered_map>
+#  else
+// We need BOOST_USE_WINDOWS_H definition with MinGW when we use
+// boost/process.hpp. boost/process/detail/windows/handle_workaround.hpp
+// doesn't work without BOOST_USE_WINDOWS_H with MinGW because MinGW
+// doesn't provide __kernel_entry without winternl.h.
+//
+// See also:
+// https://github.com/boostorg/process/blob/develop/include/boost/process/detail/windows/handle_workaround.hpp
+#    ifdef __MINGW32__
+#      define BOOST_USE_WINDOWS_H = 1
+#    endif
+#    ifdef BOOST_PROCESS_HAVE_V1
+#      include <boost/process/v1.hpp>
+#    else
+#      include <boost/process.hpp>
+#    endif
+#  endif
+
+#  ifdef __APPLE__
+#    include <limits.h>
+#    include <mach-o/dyld.h>
+#  endif
+
+#  include <chrono>
+#  include <iostream>
+#  include <sstream>
+#  include <thread>
+
+#  ifdef BOOST_PROCESS_USE_V2
+namespace asio = BOOST_PROCESS_V2_ASIO_NAMESPACE;
+namespace process = BOOST_PROCESS_V2_NAMESPACE;
+namespace filesystem = process::filesystem;
+#  elif defined(BOOST_PROCESS_HAVE_V1)
+namespace process = boost::process::v1;
+namespace filesystem = boost::process::v1::filesystem;
+#  else
+namespace process = boost::process;
+namespace filesystem = boost::filesystem;
+#  endif
+#endif
+
+namespace arrow::util {
+
+class Process::Impl {
+ public:
+  Impl() {
+    // Get a copy of the current environment.
+#ifdef BOOST_PROCESS_AVAILABLE
+#  ifdef BOOST_PROCESS_USE_V2
+    for (const auto& kv : process::environment::current()) {
+      env_[kv.key()] = process::environment::value(kv.value());
+    }
+#  else
+    env_ = process::environment(boost::this_process::environment());
+#  endif
+#endif
+  }
+
+  ~Impl() {
+#ifdef BOOST_PROCESS_AVAILABLE
+#  ifdef BOOST_PROCESS_USE_V2
+    // V2 doesn't provide process group support yet:
+    // https://github.com/boostorg/process/issues/259
+    //
+    // So we try graceful shutdown (SIGTERM + waitpid()) before
+    // immediate shutdown (SIGKILL). This assumes that the target
+    // executable such as "python3 -m testbench" terminates all related
+    // processes by graceful shutdown.
+    boost::system::error_code error_code;
+    if (process_ && process_->running(error_code)) {
+      process_->request_exit(error_code);
+      if (!error_code) {
+        auto timeout = std::chrono::seconds(3);
+        std::chrono::time_point<std::chrono::steady_clock> end =
+            std::chrono::steady_clock::now() + timeout;
+        while (process_->running(error_code) && std::chrono::steady_clock::now() < end) {
+          std::this_thread::sleep_for(std::chrono::milliseconds(20));
+        }
+      }
+    }
+#  else
+    process_group_ = nullptr;
+#  endif
+    process_ = nullptr;
+#endif
+  }
+
+  Status SetExecutable(const std::string& name) {
+#ifdef BOOST_PROCESS_AVAILABLE
+#  ifdef BOOST_PROCESS_USE_V2
+    executable_ = process::environment::find_executable(name);
+#  else
+    executable_ = process::search_path(name);
+#  endif
+    if (executable_.empty()) {
+      // Search the current executable directory as fallback.
+      ARROW_ASSIGN_OR_RAISE(auto current_exe, ResolveCurrentExecutable());
+#  ifdef BOOST_PROCESS_USE_V2
+      std::unordered_map<process::environment::key, process::environment::value> env;
+      for (const auto& kv : process::environment::current()) {
+        env[kv.key()] = process::environment::value(kv.value());
+      }
+      env["PATH"] = process::environment::value(current_exe.parent_path());
+      executable_ = process::environment::find_executable(name, env);
+#  else
+      executable_ = process::search_path(name, {current_exe.parent_path()});
+#  endif
+    }
+    if (executable_.empty()) {
+      return Status::IOError("Failed to find '", name, "' in PATH");
+    }
+    return Status::OK();
+#else
+    return Status::NotImplemented("Boost.Process isn't available on this system");
+#endif
+  }
+
+  void SetArgs(const std::vector<std::string>& args) {
+#ifdef BOOST_PROCESS_AVAILABLE
+    args_ = args;
+#endif
+  }
+
+  void SetEnv(const std::string& name, const std::string& value) {
+#ifdef BOOST_PROCESS_AVAILABLE
+#  ifdef BOOST_PROCESS_USE_V2
+    env_[name] = process::environment::value(value);
+#  else
+    env_[name] = value;
+#  endif
+#endif
+  }
+
+  void IgnoreStderr() {
+#ifdef BOOST_PROCESS_AVAILABLE
+    keep_stderr_ = false;
+#endif
+  }
+
+  Status Execute() {
+#ifdef BOOST_PROCESS_AVAILABLE
+    try {
+#  ifdef BOOST_PROCESS_USE_V2
+      return ExecuteV2();
+#  else
+      return ExecuteV1();
+#  endif
+    } catch (const std::exception& e) {
+      return Status::IOError("Failed to launch '", executable_, "': ", e.what());
+    }
+#else
+    return Status::NotImplemented("Boost.Process isn't available on this system");
+#endif
+  }
+
+  bool IsRunning() {
+#ifdef BOOST_PROCESS_AVAILABLE
+#  ifdef BOOST_PROCESS_USE_V2
+    boost::system::error_code error_code;
+    return process_ && process_->running(error_code);
+#  else
+    return process_ && process_->running();
+#  endif
+#else
+    return false;
+#endif
+  }
+
+  uint64_t pid() {
+#ifdef BOOST_PROCESS_AVAILABLE
+    if (!process_) {
+      return 0;
+    }
+    return process_->id();
+#else
+    return 0;
+#endif
+  }
+
+ private:
+#ifdef BOOST_PROCESS_AVAILABLE
+  filesystem::path executable_;
+  std::vector<std::string> args_;
+  bool keep_stderr_ = true;
+#  ifdef BOOST_PROCESS_USE_V2
+  std::unordered_map<process::environment::key, process::environment::value> env_;
+  std::unique_ptr<process::process> process_;
+  asio::io_context ctx_;
+  // boost/process/v2/ doesn't support process group yet:
+  // https://github.com/boostorg/process/issues/259
+#  else
+  process::environment env_;
+  std::unique_ptr<process::child> process_;
+  std::unique_ptr<process::group> process_group_;
+#  endif
+#endif
+
+#ifdef BOOST_PROCESS_AVAILABLE
+  Result<filesystem::path> ResolveCurrentExecutable() {
+    // See https://stackoverflow.com/a/1024937/10194 for various
+    // platform-specific recipes.
+
+    filesystem::path path;
+    boost::system::error_code error_code;
+
+#  if defined(__linux__)
+    path = filesystem::canonical("/proc/self/exe", error_code);
+#  elif defined(__APPLE__)
+    char buf[PATH_MAX + 1];
+    uint32_t bufsize = sizeof(buf);
+    if (_NSGetExecutablePath(buf, &bufsize) < 0) {
+      return Status::Invalid("Can't resolve current exe: path too large");
+    }
+    path = filesystem::canonical(buf, error_code);
+#  elif defined(_WIN32)
+    char buf[MAX_PATH + 1];
+    if (!GetModuleFileNameA(NULL, buf, sizeof(buf))) {
+      return Status::Invalid("Can't get executable file path");
+    }
+    path = filesystem::canonical(buf, error_code);
+#  else
+    ARROW_UNUSED(error_code);
+    return Status::NotImplemented("Not available on this system");
+#  endif
+    if (error_code) {
+      // XXX fold this into the Status class?
+      return Status::IOError("Can't resolve current exe: ", error_code.message());
+    } else {
+      return path;
+    }
+  }
+
+#  ifdef BOOST_PROCESS_USE_V2
+  Status ExecuteV2() {
+    process::process_environment env(env_);
+    // We can't use std::make_unique<process::process>.
+    process_ = std::unique_ptr<process::process>(
+        new process::process(ctx_, executable_, args_, env,
+                             keep_stderr_ ? process::process_stdio{{}, {}, {}}
+                                          : process::process_stdio{{}, {}, nullptr}));
+    return Status::OK();
+  }
+#  else
+  Status ExecuteV1() {
+    process_group_ = std::make_unique<process::group>();
+    if (keep_stderr_) {
+      process_ = std::make_unique<process::child>(executable_, process::args(args_), env_,
+                                                  *process_group_);
+    } else {
+      process_ = std::make_unique<process::child>(executable_, process::args(args_), env_,
+                                                  *process_group_,
+                                                  process::std_err > process::null);
+    }
+    return Status::OK();
+  }
+#  endif
+#endif
+};
+
+Process::Process() : impl_(new Impl()) {}
+
+Process::~Process() {}
+
+Status Process::SetExecutable(const std::string& path) {
+  return impl_->SetExecutable(path);
+}
+
+void Process::SetArgs(const std::vector<std::string>& args) { impl_->SetArgs(args); }
+
+void Process::SetEnv(const std::string& key, const std::string& value) {
+  impl_->SetEnv(key, value);
+}
+
+void Process::IgnoreStderr() { impl_->IgnoreStderr(); }
+
+Status Process::Execute() { return impl_->Execute(); }
+
+bool Process::IsRunning() { return impl_->IsRunning(); }
+
+uint64_t Process::pid() { return impl_->pid(); }
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/testing/process.h b/cpp/src/arrow/testing/process.h
new file mode 100644
index 0000000000000..d4d2ae124f427
--- /dev/null
+++ b/cpp/src/arrow/testing/process.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/testing/visibility.h"
+
+namespace arrow::util {
+
+class ARROW_TESTING_EXPORT Process {
+ public:
+  Process();
+  ~Process();
+
+  Status SetExecutable(const std::string& path);
+  void SetArgs(const std::vector<std::string>& args);
+  void SetEnv(const std::string& name, const std::string& value);
+  void IgnoreStderr();
+  Status Execute();
+  bool IsRunning();
+  uint64_t pid();
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+}  // namespace arrow::util
diff --git a/cpp/src/arrow/testing/random.cc b/cpp/src/arrow/testing/random.cc
index c317fe7aef44c..59de09fff83c5 100644
--- a/cpp/src/arrow/testing/random.cc
+++ b/cpp/src/arrow/testing/random.cc
@@ -473,19 +473,16 @@ std::shared_ptr<Array> RandomArrayGenerator::StringWithRepeats(
   return result;
 }
 
-std::shared_ptr<Array> RandomArrayGenerator::FixedSizeBinary(int64_t size,
-                                                             int32_t byte_width,
-                                                             double null_probability,
-                                                             int64_t alignment,
-                                                             MemoryPool* memory_pool) {
+std::shared_ptr<Array> RandomArrayGenerator::FixedSizeBinary(
+    int64_t size, int32_t byte_width, double null_probability, uint8_t min_byte,
+    uint8_t max_byte, int64_t alignment, MemoryPool* memory_pool) {
   if (null_probability < 0 || null_probability > 1) {
     ABORT_NOT_OK(Status::Invalid("null_probability must be between 0 and 1"));
   }
 
   // Visual Studio does not implement uniform_int_distribution for char types.
   using GenOpt = GenerateOptions<uint8_t, std::uniform_int_distribution<uint16_t>>;
-  GenOpt options(seed(), static_cast<uint8_t>('A'), static_cast<uint8_t>('z'),
-                 null_probability);
+  GenOpt options(seed(), min_byte, max_byte, null_probability);
 
   int64_t null_count = 0;
   auto null_bitmap = *AllocateEmptyBitmap(size, alignment, memory_pool);
@@ -1087,7 +1084,9 @@ std::shared_ptr<Array> RandomArrayGenerator::ArrayOf(const Field& field, int64_t
     case Type::type::FIXED_SIZE_BINARY: {
       auto byte_width =
           internal::checked_pointer_cast<FixedSizeBinaryType>(field.type())->byte_width();
-      return *FixedSizeBinary(length, byte_width, null_probability, alignment,
+      return *FixedSizeBinary(length, byte_width, null_probability,
+                              /*min_byte=*/static_cast<uint8_t>('A'),
+                              /*min_byte=*/static_cast<uint8_t>('z'), alignment,
                               memory_pool)
                   ->View(field.type());
     }
@@ -1143,7 +1142,9 @@ std::shared_ptr<Array> RandomArrayGenerator::ArrayOf(const Field& field, int64_t
       // type means it's not a (useful) composition of other generators
       GENERATE_INTEGRAL_CASE_VIEW(Int64Type, DayTimeIntervalType);
     case Type::type::INTERVAL_MONTH_DAY_NANO: {
-      return *FixedSizeBinary(length, /*byte_width=*/16, null_probability, alignment,
+      return *FixedSizeBinary(length, /*byte_width=*/16, null_probability,
+                              /*min_byte=*/static_cast<uint8_t>('A'),
+                              /*min_byte=*/static_cast<uint8_t>('z'), alignment,
                               memory_pool)
                   ->View(month_day_nano_interval());
     }
diff --git a/cpp/src/arrow/testing/random.h b/cpp/src/arrow/testing/random.h
index 1d97a3ada724a..9c0c5baae0f7c 100644
--- a/cpp/src/arrow/testing/random.h
+++ b/cpp/src/arrow/testing/random.h
@@ -434,12 +434,18 @@ class ARROW_TESTING_EXPORT RandomArrayGenerator {
   /// \param[in] size the size of the array to generate
   /// \param[in] byte_width the byte width of fixed-size binary items
   /// \param[in] null_probability the probability of a value being null
+  /// \param[in] min_byte the lower bound of each byte in the binary determined by the
+  ///            uniform distribution
+  /// \param[in] max_byte the upper bound of each byte in the binary determined by the
+  ///            uniform distribution
   /// \param[in] alignment alignment for memory allocations (in bytes)
   /// \param[in] memory_pool memory pool to allocate memory from
   ///
   /// \return a generated Array
   std::shared_ptr<Array> FixedSizeBinary(int64_t size, int32_t byte_width,
                                          double null_probability = 0,
+                                         uint8_t min_byte = static_cast<uint8_t>('A'),
+                                         uint8_t max_byte = static_cast<uint8_t>('z'),
                                          int64_t alignment = kDefaultBufferAlignment,
                                          MemoryPool* memory_pool = default_memory_pool());
 
diff --git a/cpp/src/arrow/testing/util.cc b/cpp/src/arrow/testing/util.cc
index 36351fa8595be..7bef9f7d4756d 100644
--- a/cpp/src/arrow/testing/util.cc
+++ b/cpp/src/arrow/testing/util.cc
@@ -30,13 +30,13 @@
 #include <winsock2.h>
 // clang-format on
 #else
-#include <arpa/inet.h>   // IWYU pragma: keep
-#include <netinet/in.h>  // IWYU pragma: keep
-#include <sys/socket.h>  // IWYU pragma: keep
-#include <sys/stat.h>    // IWYU pragma: keep
-#include <sys/types.h>   // IWYU pragma: keep
-#include <sys/wait.h>    // IWYU pragma: keep
-#include <unistd.h>      // IWYU pragma: keep
+#  include <arpa/inet.h>   // IWYU pragma: keep
+#  include <netinet/in.h>  // IWYU pragma: keep
+#  include <sys/socket.h>  // IWYU pragma: keep
+#  include <sys/stat.h>    // IWYU pragma: keep
+#  include <sys/types.h>   // IWYU pragma: keep
+#  include <sys/wait.h>    // IWYU pragma: keep
+#  include <unistd.h>      // IWYU pragma: keep
 #endif
 
 #include "arrow/config.h"
@@ -144,8 +144,8 @@ int GetListenPort() {
     return internal::WinErrorMessage(WSAGetLastError());
   };
 #else
-#define INVALID_SOCKET -1
-#define SOCKET_ERROR -1
+#  define INVALID_SOCKET -1
+#  define SOCKET_ERROR -1
   int sock_fd;
   auto sin_len = static_cast<socklen_t>(sizeof(sin));
   auto errno_message = []() -> std::string { return internal::ErrnoMessage(errno); };
diff --git a/cpp/src/arrow/testing/visibility.h b/cpp/src/arrow/testing/visibility.h
index 1b2aa7cd86fc6..b7fbcd42757bd 100644
--- a/cpp/src/arrow/testing/visibility.h
+++ b/cpp/src/arrow/testing/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_TESTING_STATIC
-#define ARROW_TESTING_EXPORT
-#elif defined(ARROW_TESTING_EXPORTING)
-#define ARROW_TESTING_EXPORT __declspec(dllexport)
-#else
-#define ARROW_TESTING_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_TESTING_STATIC
+#    define ARROW_TESTING_EXPORT
+#  elif defined(ARROW_TESTING_EXPORTING)
+#    define ARROW_TESTING_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_TESTING_EXPORT __declspec(dllimport)
+#  endif
 
-#define ARROW_TESTING_NO_EXPORT
+#  define ARROW_TESTING_NO_EXPORT
 #else  // Not Windows
-#ifndef ARROW_TESTING_EXPORT
-#define ARROW_TESTING_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef ARROW_TESTING_NO_EXPORT
-#define ARROW_TESTING_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef ARROW_TESTING_EXPORT
+#    define ARROW_TESTING_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef ARROW_TESTING_NO_EXPORT
+#    define ARROW_TESTING_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 91a0d87cb8ae7..ae9b213480f7b 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -3333,6 +3333,7 @@ std::vector<std::shared_ptr<DataType>> g_int_types;
 std::vector<std::shared_ptr<DataType>> g_floating_types;
 std::vector<std::shared_ptr<DataType>> g_numeric_types;
 std::vector<std::shared_ptr<DataType>> g_base_binary_types;
+std::vector<std::shared_ptr<DataType>> g_binary_view_types;
 std::vector<std::shared_ptr<DataType>> g_temporal_types;
 std::vector<std::shared_ptr<DataType>> g_interval_types;
 std::vector<std::shared_ptr<DataType>> g_duration_types;
@@ -3384,6 +3385,9 @@ void InitStaticData() {
   // Base binary types (without FixedSizeBinary)
   g_base_binary_types = {binary(), utf8(), large_binary(), large_utf8()};
 
+  // Binary view types
+  g_binary_view_types = {utf8_view(), binary_view()};
+
   // Non-parametric, non-nested types. This also DOES NOT include
   //
   // * Decimal
@@ -3391,9 +3395,10 @@ void InitStaticData() {
   // * Time32
   // * Time64
   // * Timestamp
-  g_primitive_types = {null(), boolean(), date32(), date64(), binary_view(), utf8_view()};
+  g_primitive_types = {null(), boolean(), date32(), date64()};
   Extend(g_numeric_types, &g_primitive_types);
   Extend(g_base_binary_types, &g_primitive_types);
+  Extend(g_binary_view_types, &g_primitive_types);
 }
 
 }  // namespace
@@ -3413,6 +3418,11 @@ const std::vector<std::shared_ptr<DataType>>& StringTypes() {
   return types;
 }
 
+const std::vector<std::shared_ptr<DataType>>& BinaryViewTypes() {
+  std::call_once(static_data_initialized, InitStaticData);
+  return g_binary_view_types;
+}
+
 const std::vector<std::shared_ptr<DataType>>& SignedIntTypes() {
   std::call_once(static_data_initialized, InitStaticData);
   return g_signed_int_types;
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index e087c8ca1c387..e0f87e6a9d263 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -2540,6 +2540,9 @@ const std::vector<std::shared_ptr<DataType>>& BinaryTypes();
 /// \brief String and large-string types
 ARROW_EXPORT
 const std::vector<std::shared_ptr<DataType>>& StringTypes();
+/// \brief String-view and Binary-view
+ARROW_EXPORT
+const std::vector<std::shared_ptr<DataType>>& BinaryViewTypes();
 /// \brief Temporal types including date, time and timestamps for each unit
 ARROW_EXPORT
 const std::vector<std::shared_ptr<DataType>>& TemporalTypes();
diff --git a/cpp/src/arrow/type_benchmark.cc b/cpp/src/arrow/type_benchmark.cc
index 17dccfcb33138..0d1425a405709 100644
--- a/cpp/src/arrow/type_benchmark.cc
+++ b/cpp/src/arrow/type_benchmark.cc
@@ -174,12 +174,12 @@ static void SchemaEqualsWithMetadata(
 // Micro-benchmark various error reporting schemes
 
 #if (defined(__GNUC__) || defined(__APPLE__))
-#define ARROW_NO_INLINE __attribute__((noinline))
+#  define ARROW_NO_INLINE __attribute__((noinline))
 #elif defined(_MSC_VER)
-#define ARROW_NO_INLINE __declspec(noinline)
+#  define ARROW_NO_INLINE __declspec(noinline)
 #else
-#define ARROW_NO_INLINE
-#warning Missing "noinline" attribute, no-inline benchmarks may be bogus
+#  define ARROW_NO_INLINE
+#  warning Missing "noinline" attribute, no-inline benchmarks may be bogus
 #endif
 
 inline int64_t Accumulate(int64_t partial, int32_t value) {
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 08777d247edbf..8faebe217f141 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -724,4 +724,25 @@ ARROW_EXPORT MemoryPool* default_memory_pool();
 
 constexpr int64_t kDefaultBufferAlignment = 64;
 
+/// \brief EXPERIMENTAL: Device type enum which matches up with C Data Device types
+enum class DeviceAllocationType : char {
+  kCPU = 1,
+  kCUDA = 2,
+  kCUDA_HOST = 3,
+  kOPENCL = 4,
+  kVULKAN = 7,
+  kMETAL = 8,
+  kVPI = 9,
+  kROCM = 10,
+  kROCM_HOST = 11,
+  kEXT_DEV = 12,
+  kCUDA_MANAGED = 13,
+  kONEAPI = 14,
+  kWEBGPU = 15,
+  kHEXAGON = 16,
+};
+constexpr int kDeviceAllocationTypeMax = 16;
+
+class DeviceAllocationTypeSet;
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/type_test.cc b/cpp/src/arrow/type_test.cc
index df484a8fc2c59..f641bb9fab738 100644
--- a/cpp/src/arrow/type_test.cc
+++ b/cpp/src/arrow/type_test.cc
@@ -1307,6 +1307,7 @@ TEST_F(TestUnifySchemas, Binary) {
   options.promote_binary = false;
   CheckUnifyFailsTypeError({utf8(), binary()}, {large_utf8(), large_binary()});
   CheckUnifyFailsTypeError(fixed_size_binary(2), BaseBinaryTypes());
+  CheckUnifyFailsTypeError(fixed_size_binary(2), BinaryViewTypes());
   CheckUnifyFailsTypeError(utf8(), {binary(), large_binary(), fixed_size_binary(2)});
 }
 
@@ -2430,6 +2431,7 @@ TEST(TypesTest, TestMembership) {
   TEST_PREDICATE(all_types, is_large_binary_like);
   TEST_PREDICATE(all_types, is_binary);
   TEST_PREDICATE(all_types, is_string);
+  TEST_PREDICATE(all_types, is_binary_view_like);
   TEST_PREDICATE(all_types, is_temporal);
   TEST_PREDICATE(all_types, is_interval);
   TEST_PREDICATE(all_types, is_dictionary);
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 8caf4400fe86d..96b6ccd26a79e 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -1201,6 +1201,21 @@ constexpr bool is_string(Type::type type_id) {
   return false;
 }
 
+/// \brief Check for a binary-view-like type (i.e. string view and binary view)
+///
+/// \param[in] type_id the type-id to check
+/// \return whether type-id is a binary-view-like type one
+constexpr bool is_binary_view_like(Type::type type_id) {
+  switch (type_id) {
+    case Type::STRING_VIEW:
+    case Type::BINARY_VIEW:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
 /// \brief Check for a temporal type
 ///
 /// \param[in] type_id the type-id to check
@@ -1624,6 +1639,16 @@ static inline bool is_binary(const DataType& type) { return is_binary(type.id())
 /// Convenience for checking using the type's id
 static inline bool is_string(const DataType& type) { return is_string(type.id()); }
 
+/// \brief Check for a binary-view-like type
+///
+/// \param[in] type the type to check
+/// \return whether type is a binary-view-like type
+///
+/// Convenience for checking using the type's id
+static inline bool is_binary_view_like(const DataType& type) {
+  return is_binary_view_like(type.id());
+}
+
 /// \brief Check for a temporal type, including time and timestamps for each unit
 ///
 /// \param[in] type the type to check
diff --git a/cpp/src/arrow/util/atfork_internal.cc b/cpp/src/arrow/util/atfork_internal.cc
index eb26304fba36e..e89b37d83456e 100644
--- a/cpp/src/arrow/util/atfork_internal.cc
+++ b/cpp/src/arrow/util/atfork_internal.cc
@@ -23,7 +23,7 @@
 #include <vector>
 
 #ifndef _WIN32
-#include <pthread.h>
+#  include <pthread.h>
 #endif
 
 #include "arrow/util/io_util.h"
diff --git a/cpp/src/arrow/util/atfork_test.cc b/cpp/src/arrow/util/atfork_test.cc
index 750f4d138793b..97910f9539c0d 100644
--- a/cpp/src/arrow/util/atfork_test.cc
+++ b/cpp/src/arrow/util/atfork_test.cc
@@ -25,9 +25,9 @@
 #include <vector>
 
 #ifndef _WIN32
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
+#  include <sys/types.h>
+#  include <sys/wait.h>
+#  include <unistd.h>
 #endif
 
 #include <gmock/gmock-matchers.h>
@@ -110,9 +110,9 @@ class TestAtFork : public ::testing::Test {
 #ifndef _WIN32
 
 TEST_F(TestAtFork, EmptyHandlers) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   auto handlers = std::make_shared<AtForkHandler>();
 
@@ -135,9 +135,9 @@ TEST_F(TestAtFork, EmptyHandlers) {
 }
 
 TEST_F(TestAtFork, SingleThread) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   auto handlers1 = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                    PushChildAfter(21));
@@ -190,16 +190,17 @@ TEST_F(TestAtFork, SingleThread) {
   ASSERT_THAT(child_after_, ElementsAre());
 }
 
-#if !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER))
+#  if !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
+        defined(THREAD_SANITIZER))
 
 // The two following tests would seem to leak for various reasons.
 // Also, Thread Sanitizer would fail with the same error message as in
 // https://github.com/google/sanitizers/issues/950.
 
 TEST_F(TestAtFork, MultipleThreads) {
-#ifndef ARROW_ENABLE_THREADING
+#    ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#    endif
 
   const int kNumThreads = 5;
   const int kNumIterations = 40;
@@ -255,12 +256,12 @@ TEST_F(TestAtFork, MultipleThreads) {
 }
 
 TEST_F(TestAtFork, NestedChild) {
-#ifdef __APPLE__
+#    ifdef __APPLE__
   GTEST_SKIP() << "Nested fork is not supported on macOS";
-#endif
-#ifndef ARROW_ENABLE_THREADING
+#    endif
+#    ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#    endif
 
   auto handlers1 = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                    PushChildAfter(21));
@@ -295,16 +296,16 @@ TEST_F(TestAtFork, NestedChild) {
   ASSERT_THAT(child_after_, ElementsAre());
 }
 
-#endif  // !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) ||
-        //   defined(THREAD_SANITIZER))
+#  endif  // !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) ||
+          //   defined(THREAD_SANITIZER))
 
 #endif  // !defined(_WIN32)
 
 #ifdef _WIN32
 TEST_F(TestAtFork, NoOp) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   auto handlers = std::make_shared<AtForkHandler>(PushBefore(1), PushParentAfter(11),
                                                   PushChildAfter(21));
diff --git a/cpp/src/arrow/util/binary_view_util.h b/cpp/src/arrow/util/binary_view_util.h
index 94f7a5bdfa667..eb079e2c548ab 100644
--- a/cpp/src/arrow/util/binary_view_util.h
+++ b/cpp/src/arrow/util/binary_view_util.h
@@ -26,6 +26,7 @@
 namespace arrow::util {
 
 inline BinaryViewType::c_type ToInlineBinaryView(const void* data, int32_t size) {
+  assert(size <= BinaryViewType::kInlineSize);
   // Small string: inlined. Bytes beyond size are zeroed
   BinaryViewType::c_type out;
   out.inlined = {size, {}};
@@ -34,15 +35,13 @@ inline BinaryViewType::c_type ToInlineBinaryView(const void* data, int32_t size)
 }
 
 inline BinaryViewType::c_type ToInlineBinaryView(std::string_view v) {
+  assert(v.size() <= BinaryViewType::kInlineSize);
   return ToInlineBinaryView(v.data(), static_cast<int32_t>(v.size()));
 }
 
-inline BinaryViewType::c_type ToBinaryView(const void* data, int32_t size,
-                                           int32_t buffer_index, int32_t offset) {
-  if (size <= BinaryViewType::kInlineSize) {
-    return ToInlineBinaryView(data, size);
-  }
-
+inline BinaryViewType::c_type ToNonInlineBinaryView(const void* data, int32_t size,
+                                                    int32_t buffer_index,
+                                                    int32_t offset) {
   // Large string: store index/offset.
   BinaryViewType::c_type out;
   out.ref = {size, {}, buffer_index, offset};
@@ -50,6 +49,14 @@ inline BinaryViewType::c_type ToBinaryView(const void* data, int32_t size,
   return out;
 }
 
+inline BinaryViewType::c_type ToBinaryView(const void* data, int32_t size,
+                                           int32_t buffer_index, int32_t offset) {
+  if (size <= BinaryViewType::kInlineSize) {
+    return ToInlineBinaryView(data, size);
+  }
+  return ToNonInlineBinaryView(data, size, buffer_index, offset);
+}
+
 inline BinaryViewType::c_type ToBinaryView(std::string_view v, int32_t buffer_index,
                                            int32_t offset) {
   return ToBinaryView(v.data(), static_cast<int32_t>(v.size()), buffer_index, offset);
@@ -92,4 +99,17 @@ bool EqualBinaryView(BinaryViewType::c_type l, BinaryViewType::c_type r,
                 l.size() - BinaryViewType::kPrefixSize) == 0;
 }
 
+/// \brief Compute the total size of a list of binary views including null
+/// views.
+///
+/// This is useful when calculating the necessary memory to store all the string
+/// data from the views.
+inline int64_t SumOfBinaryViewSizes(const BinaryViewType::c_type* views, int64_t length) {
+  int64_t total = 0;
+  for (int64_t i = 0; i < length; ++i) {
+    total += views[i].size();
+  }
+  return total;
+}
+
 }  // namespace arrow::util
diff --git a/cpp/src/arrow/util/bit_stream_utils.h b/cpp/src/arrow/util/bit_stream_utils_internal.h
similarity index 98%
rename from cpp/src/arrow/util/bit_stream_utils.h
rename to cpp/src/arrow/util/bit_stream_utils_internal.h
index 811694e43b76c..316086fcf0c04 100644
--- a/cpp/src/arrow/util/bit_stream_utils.h
+++ b/cpp/src/arrow/util/bit_stream_utils_internal.h
@@ -269,13 +269,13 @@ template <typename T>
 inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
                       int* bit_offset, int* byte_offset, uint64_t* buffered_values) {
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4800)
+#  pragma warning(push)
+#  pragma warning(disable : 4800)
 #endif
   *v = static_cast<T>(bit_util::TrailingBits(*buffered_values, *bit_offset + num_bits) >>
                       *bit_offset);
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
   *bit_offset += num_bits;
   if (*bit_offset >= 64) {
@@ -285,8 +285,8 @@ inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
     *buffered_values =
         detail::ReadLittleEndianWord(buffer + *byte_offset, max_bytes - *byte_offset);
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4800 4805)
+#  pragma warning(push)
+#  pragma warning(disable : 4800 4805)
 #endif
     // Read bits of v that crossed into new buffered_values_
     if (ARROW_PREDICT_TRUE(num_bits - *bit_offset < static_cast<int>(8 * sizeof(T)))) {
@@ -297,7 +297,7 @@ inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
                                << (num_bits - *bit_offset));
     }
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
     DCHECK_LE(*bit_offset, 64);
   }
@@ -367,12 +367,12 @@ inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
       }
       for (int k = 0; k < num_unpacked; ++k) {
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4800)
+#  pragma warning(push)
+#  pragma warning(disable : 4800)
 #endif
         v[i + k] = static_cast<T>(unpack_buffer[k]);
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
       }
       i += num_unpacked;
diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index 1d3a1dc2459f9..17d1de406d514 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -18,18 +18,18 @@
 #pragma once
 
 #if defined(_MSC_VER)
-#if defined(_M_AMD64) || defined(_M_X64)
-#include <intrin.h>  // IWYU pragma: keep
-#include <nmmintrin.h>
-#endif
-
-#pragma intrinsic(_BitScanReverse)
-#pragma intrinsic(_BitScanForward)
-#define ARROW_POPCOUNT64 __popcnt64
-#define ARROW_POPCOUNT32 __popcnt
+#  if defined(_M_AMD64) || defined(_M_X64)
+#    include <intrin.h>  // IWYU pragma: keep
+#    include <nmmintrin.h>
+#  endif
+
+#  pragma intrinsic(_BitScanReverse)
+#  pragma intrinsic(_BitScanForward)
+#  define ARROW_POPCOUNT64 __popcnt64
+#  define ARROW_POPCOUNT32 __popcnt
 #else
-#define ARROW_POPCOUNT64 __builtin_popcountll
-#define ARROW_POPCOUNT32 __builtin_popcount
+#  define ARROW_POPCOUNT64 __builtin_popcountll
+#  define ARROW_POPCOUNT32 __builtin_popcount
 #endif
 
 #include <cstdint>
diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc
index e026dfec24065..c7674af57f167 100644
--- a/cpp/src/arrow/util/bit_util_test.cc
+++ b/cpp/src/arrow/util/bit_util_test.cc
@@ -43,7 +43,7 @@
 #include "arrow/testing/util.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/bit_run_reader.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
 #include "arrow/util/bitmap.h"
 #include "arrow/util/bitmap_generate.h"
 #include "arrow/util/bitmap_ops.h"
diff --git a/cpp/src/arrow/util/bpacking.cc b/cpp/src/arrow/util/bpacking.cc
index b33eb92606be2..e0d61d8db01e1 100644
--- a/cpp/src/arrow/util/bpacking.cc
+++ b/cpp/src/arrow/util/bpacking.cc
@@ -24,13 +24,13 @@
 #include "arrow/util/logging.h"
 
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-#include "arrow/util/bpacking_avx2.h"
+#  include "arrow/util/bpacking_avx2.h"
 #endif
 #if defined(ARROW_HAVE_RUNTIME_AVX512)
-#include "arrow/util/bpacking_avx512.h"
+#  include "arrow/util/bpacking_avx512.h"
 #endif
 #if defined(ARROW_HAVE_NEON)
-#include "arrow/util/bpacking_neon.h"
+#  include "arrow/util/bpacking_neon.h"
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/byte_stream_split_internal.h b/cpp/src/arrow/util/byte_stream_split_internal.h
index 8bca0d442c681..d3214239ff9fb 100644
--- a/cpp/src/arrow/util/byte_stream_split_internal.h
+++ b/cpp/src/arrow/util/byte_stream_split_internal.h
@@ -29,8 +29,8 @@
 #include <cstring>
 
 #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
-#include <xsimd/xsimd.hpp>
-#define ARROW_HAVE_SIMD_SPLIT
+#  include <xsimd/xsimd.hpp>
+#  define ARROW_HAVE_SIMD_SPLIT
 #endif
 
 namespace arrow::util::internal {
@@ -383,28 +383,28 @@ void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, int width,
 template <int kNumStreams>
 void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int width, int64_t num_values,
                                       int64_t stride, uint8_t* out) {
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
   return ByteStreamSplitDecodeAvx2<kNumStreams>(data, width, num_values, stride, out);
-#elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
+#  elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
   return ByteStreamSplitDecodeSimd128<kNumStreams>(data, width, num_values, stride, out);
-#else
-#error "ByteStreamSplitDecodeSimd not implemented"
-#endif
+#  else
+#    error "ByteStreamSplitDecodeSimd not implemented"
+#  endif
 }
 
 template <int kNumStreams>
 void inline ByteStreamSplitEncodeSimd(const uint8_t* raw_values, int width,
                                       const int64_t num_values,
                                       uint8_t* output_buffer_raw) {
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
   return ByteStreamSplitEncodeAvx2<kNumStreams>(raw_values, width, num_values,
                                                 output_buffer_raw);
-#elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
+#  elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_NEON)
   return ByteStreamSplitEncodeSimd128<kNumStreams>(raw_values, width, num_values,
                                                    output_buffer_raw);
-#else
-#error "ByteStreamSplitEncodeSimd not implemented"
-#endif
+#  else
+#    error "ByteStreamSplitEncodeSimd not implemented"
+#  endif
 }
 #endif
 
@@ -546,9 +546,9 @@ inline void ByteStreamSplitDecodeScalarDynamic(const uint8_t* data, int width,
 inline void ByteStreamSplitEncode(const uint8_t* raw_values, int width,
                                   const int64_t num_values, uint8_t* out) {
 #if defined(ARROW_HAVE_SIMD_SPLIT)
-#define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeSimd
+#  define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeSimd
 #else
-#define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeScalar
+#  define ByteStreamSplitEncodePerhapsSimd ByteStreamSplitEncodeScalar
 #endif
   switch (width) {
     case 1:
@@ -570,9 +570,9 @@ inline void ByteStreamSplitEncode(const uint8_t* raw_values, int width,
 inline void ByteStreamSplitDecode(const uint8_t* data, int width, int64_t num_values,
                                   int64_t stride, uint8_t* out) {
 #if defined(ARROW_HAVE_SIMD_SPLIT)
-#define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeSimd
+#  define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeSimd
 #else
-#define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeScalar
+#  define ByteStreamSplitDecodePerhapsSimd ByteStreamSplitDecodeScalar
 #endif
   switch (width) {
     case 1:
diff --git a/cpp/src/arrow/util/byte_stream_split_test.cc b/cpp/src/arrow/util/byte_stream_split_test.cc
index 3a537725b0692..9755cd8b8d0f2 100644
--- a/cpp/src/arrow/util/byte_stream_split_test.cc
+++ b/cpp/src/arrow/util/byte_stream_split_test.cc
@@ -145,9 +145,9 @@ class TestByteStreamSplitSpecialized : public ::testing::Test {
     if constexpr (kSimdImplemented) {
       funcs.push_back({"simd", &ByteStreamSplitDecodeSimd<kWidth>});
       funcs.push_back({"simd128", &ByteStreamSplitDecodeSimd128<kWidth>});
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
       funcs.push_back({"avx2", &ByteStreamSplitDecodeAvx2<kWidth>});
-#endif
+#  endif
     }
 #endif  // defined(ARROW_HAVE_SIMD_SPLIT)
     return funcs;
@@ -163,9 +163,9 @@ class TestByteStreamSplitSpecialized : public ::testing::Test {
     if constexpr (kSimdImplemented) {
       funcs.push_back({"simd", &ByteStreamSplitEncodeSimd<kWidth>});
       funcs.push_back({"simd128", &ByteStreamSplitEncodeSimd128<kWidth>});
-#if defined(ARROW_HAVE_AVX2)
+#  if defined(ARROW_HAVE_AVX2)
       funcs.push_back({"avx2", &ByteStreamSplitEncodeAvx2<kWidth>});
-#endif
+#  endif
     }
 #endif  // defined(ARROW_HAVE_SIMD_SPLIT)
     return funcs;
diff --git a/cpp/src/arrow/util/cancel.cc b/cpp/src/arrow/util/cancel.cc
index 2648059af81ee..b3a0c1f92c24e 100644
--- a/cpp/src/arrow/util/cancel.cc
+++ b/cpp/src/arrow/util/cancel.cc
@@ -33,7 +33,7 @@
 namespace arrow {
 
 #if ATOMIC_INT_LOCK_FREE != 2
-#error Lock-free atomic int required for signal safety
+#  error Lock-free atomic int required for signal safety
 #endif
 
 using internal::AtForkHandler;
diff --git a/cpp/src/arrow/util/cancel_test.cc b/cpp/src/arrow/util/cancel_test.cc
index 713418f15a0cc..6cea75755de10 100644
--- a/cpp/src/arrow/util/cancel_test.cc
+++ b/cpp/src/arrow/util/cancel_test.cc
@@ -29,9 +29,9 @@
 
 #include <signal.h>
 #ifndef _WIN32
-#include <sys/time.h>  // for setitimer()
-#include <sys/types.h>
-#include <unistd.h>
+#  include <sys/time.h>  // for setitimer()
+#  include <sys/types.h>
+#  include <unistd.h>
 #endif
 
 #include "arrow/testing/gtest_util.h"
@@ -269,9 +269,9 @@ TEST_F(SignalCancelTest, RegisterUnregister) {
 #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
       defined(THREAD_SANITIZER))
 TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   RunInChild([&]() {
     // Child
@@ -296,9 +296,9 @@ TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) {
 }
 
 TEST_F(SignalCancelTest, ForkSafetyRegisteredHandlers) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   RegisterHandler();
 
diff --git a/cpp/src/arrow/util/compression_benchmark.cc b/cpp/src/arrow/util/compression_benchmark.cc
index 0b9727cff9041..361935805be79 100644
--- a/cpp/src/arrow/util/compression_benchmark.cc
+++ b/cpp/src/arrow/util/compression_benchmark.cc
@@ -228,28 +228,28 @@ static void ReferenceDecompression(
   state.SetBytesProcessed(state.iterations() * data.size());
 }
 
-#ifdef ARROW_WITH_ZLIB
+#  ifdef ARROW_WITH_ZLIB
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::GZIP);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::GZIP);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::GZIP);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::GZIP);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_BROTLI
+#  ifdef ARROW_WITH_BROTLI
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::BROTLI);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::BROTLI);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::BROTLI);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::BROTLI);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_ZSTD
+#  ifdef ARROW_WITH_ZSTD
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::ZSTD);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::ZSTD);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::ZSTD);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::ZSTD);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_LZ4
+#  ifdef ARROW_WITH_LZ4
 BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::LZ4_FRAME);
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::LZ4_FRAME);
 BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::LZ4_FRAME);
@@ -257,12 +257,12 @@ BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::LZ4_FRAME);
 
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::LZ4);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::LZ4);
-#endif
+#  endif
 
-#ifdef ARROW_WITH_SNAPPY
+#  ifdef ARROW_WITH_SNAPPY
 BENCHMARK_TEMPLATE(ReferenceCompression, Compression::SNAPPY);
 BENCHMARK_TEMPLATE(ReferenceDecompression, Compression::SNAPPY);
-#endif
+#  endif
 
 #endif
 
diff --git a/cpp/src/arrow/util/compression_lz4.cc b/cpp/src/arrow/util/compression_lz4.cc
index be957afab3c46..ae1d0961de600 100644
--- a/cpp/src/arrow/util/compression_lz4.cc
+++ b/cpp/src/arrow/util/compression_lz4.cc
@@ -34,7 +34,7 @@
 #include "arrow/util/ubsan.h"
 
 #ifndef LZ4F_HEADER_SIZE_MAX
-#define LZ4F_HEADER_SIZE_MAX 19
+#  define LZ4F_HEADER_SIZE_MAX 19
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/cpu_info.cc b/cpp/src/arrow/util/cpu_info.cc
index 7c2e9fa921246..620e520e72b39 100644
--- a/cpp/src/arrow/util/cpu_info.cc
+++ b/cpp/src/arrow/util/cpu_info.cc
@@ -20,17 +20,17 @@
 #include "arrow/util/cpu_info.h"
 
 #ifdef __APPLE__
-#include <sys/sysctl.h>
+#  include <sys/sysctl.h>
 #endif
 
 #ifndef _MSC_VER
-#include <unistd.h>
+#  include <unistd.h>
 #endif
 
 #ifdef _WIN32
-#include <intrin.h>
+#  include <intrin.h>
 
-#include "arrow/util/windows_compatibility.h"
+#  include "arrow/util/windows_compatibility.h"
 #endif
 
 #include <algorithm>
@@ -55,12 +55,12 @@
 #undef CPUINFO_ARCH_PPC
 
 #if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#define CPUINFO_ARCH_X86
+#  define CPUINFO_ARCH_X86
 #elif defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__)
-#define CPUINFO_ARCH_ARM
+#  define CPUINFO_ARCH_ARM
 #elif defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || \
     defined(__powerpc64__)
-#define CPUINFO_ARCH_PPC
+#  define CPUINFO_ARCH_PPC
 #endif
 
 namespace arrow {
@@ -122,10 +122,10 @@ void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
   free(buffer);
 }
 
-#if defined(CPUINFO_ARCH_X86)
+#  if defined(CPUINFO_ARCH_X86)
 // On x86, get CPU features by cpuid, https://en.wikipedia.org/wiki/CPUID
 
-#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
+#    if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
 void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
   __asm__ __volatile__("cpuid"
                        : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]),
@@ -138,7 +138,7 @@ int64_t _xgetbv(int xcr) {
   __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
   return out;
 }
-#endif  // MINGW
+#    endif  // MINGW
 
 void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
                        std::string* model_name) {
@@ -215,14 +215,14 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
     }
   }
 }
-#elif defined(CPUINFO_ARCH_ARM)
+#  elif defined(CPUINFO_ARCH_ARM)
 // Windows on Arm
 void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
                        std::string* model_name) {
   *hardware_flags |= CpuInfo::ASIMD;
   // TODO: vendor, model_name
 }
-#endif
+#  endif
 
 #elif defined(__APPLE__)
 //------------------------------ MACOS ------------------------------//
@@ -265,7 +265,7 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
     int64_t flag;
   };
   std::vector<SysCtlCpuFeature> features = {
-#if defined(CPUINFO_ARCH_X86)
+#  if defined(CPUINFO_ARCH_X86)
     {"hw.optional.sse4_2",
      CpuInfo::SSSE3 | CpuInfo::SSE4_1 | CpuInfo::SSE4_2 | CpuInfo::POPCNT},
     {"hw.optional.avx1_0", CpuInfo::AVX},
@@ -277,10 +277,10 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
     {"hw.optional.avx512dq", CpuInfo::AVX512DQ},
     {"hw.optional.avx512bw", CpuInfo::AVX512BW},
     {"hw.optional.avx512vl", CpuInfo::AVX512VL},
-#elif defined(CPUINFO_ARCH_ARM)
+#  elif defined(CPUINFO_ARCH_ARM)
     // ARM64 (note that this is exposed under Rosetta as well)
     {"hw.optional.neon", CpuInfo::ASIMD},
-#endif
+#  endif
   };
   for (const auto& feature : features) {
     auto v = IntegerSysCtlByName(feature.name);
@@ -297,7 +297,7 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
 // Get cache size, return 0 on error
 int64_t LinuxGetCacheSize(int level) {
   // get cache size by sysconf()
-#ifdef _SC_LEVEL1_DCACHE_SIZE
+#  ifdef _SC_LEVEL1_DCACHE_SIZE
   const int kCacheSizeConf[] = {
       _SC_LEVEL1_DCACHE_SIZE,
       _SC_LEVEL2_CACHE_SIZE,
@@ -310,7 +310,7 @@ int64_t LinuxGetCacheSize(int level) {
   if (errno == 0 && cache_size > 0) {
     return cache_size;
   }
-#endif
+#  endif
 
   // get cache size from sysfs if sysconf() fails or not supported
   const char* kCacheSizeSysfs[] = {
@@ -345,12 +345,12 @@ int64_t LinuxGetCacheSize(int level) {
 // care about are present.
 // Returns a bitmap of flags.
 int64_t LinuxParseCpuFlags(const std::string& values) {
-#if defined(CPUINFO_ARCH_X86) || defined(CPUINFO_ARCH_ARM)
+#  if defined(CPUINFO_ARCH_X86) || defined(CPUINFO_ARCH_ARM)
   const struct {
     std::string name;
     int64_t flag;
   } flag_mappings[] = {
-#if defined(CPUINFO_ARCH_X86)
+#    if defined(CPUINFO_ARCH_X86)
     {"ssse3", CpuInfo::SSSE3},
     {"sse4_1", CpuInfo::SSE4_1},
     {"sse4_2", CpuInfo::SSE4_2},
@@ -364,9 +364,9 @@ int64_t LinuxParseCpuFlags(const std::string& values) {
     {"avx512bw", CpuInfo::AVX512BW},
     {"bmi1", CpuInfo::BMI1},
     {"bmi2", CpuInfo::BMI2},
-#elif defined(CPUINFO_ARCH_ARM)
+#    elif defined(CPUINFO_ARCH_ARM)
     {"asimd", CpuInfo::ASIMD},
-#endif
+#    endif
   };
   const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
 
@@ -377,9 +377,9 @@ int64_t LinuxParseCpuFlags(const std::string& values) {
     }
   }
   return flags;
-#else
+#  else
   return 0;
-#endif
+#  endif
 }
 
 void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
@@ -466,11 +466,11 @@ bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_fla
 }
 
 void ArchVerifyCpuRequirements(const CpuInfo* ci) {
-#if defined(ARROW_HAVE_SSE4_2)
+#  if defined(ARROW_HAVE_SSE4_2)
   if (!ci->IsDetected(CpuInfo::SSE4_2)) {
     DCHECK(false) << "CPU does not support the Supplemental SSE4_2 instruction set";
   }
-#endif
+#  endif
 }
 
 #elif defined(CPUINFO_ARCH_ARM)
diff --git a/cpp/src/arrow/util/decimal_benchmark.cc b/cpp/src/arrow/util/decimal_benchmark.cc
index d505532d71da1..fd77f451d3a05 100644
--- a/cpp/src/arrow/util/decimal_benchmark.cc
+++ b/cpp/src/arrow/util/decimal_benchmark.cc
@@ -77,7 +77,7 @@ static void ToString(benchmark::State& state) {  // NOLINT non-const reference
   state.SetItemsProcessed(state.iterations() * values.size());
 }
 
-constexpr int32_t kValueSize = 10;
+constexpr int32_t kValueSize = 12;
 
 static void BinaryCompareOp(benchmark::State& state) {  // NOLINT non-const reference
   std::vector<BasicDecimal128> v1, v2;
@@ -85,6 +85,8 @@ static void BinaryCompareOp(benchmark::State& state) {  // NOLINT non-const refe
     v1.emplace_back(100 + x, 100 + x);
     v2.emplace_back(200 + x, 200 + x);
   }
+  static_assert(kValueSize % 4 == 0,
+                "kValueSize needs to be a multiple of 4 to avoid out-of-bounds accesses");
   for (auto _ : state) {
     for (int x = 0; x < kValueSize; x += 4) {
       auto equal = v1[x] == v2[x];
@@ -93,7 +95,7 @@ static void BinaryCompareOp(benchmark::State& state) {  // NOLINT non-const refe
       benchmark::DoNotOptimize(less_than_or_equal);
       auto greater_than_or_equal1 = v1[x + 2] >= v2[x + 2];
       benchmark::DoNotOptimize(greater_than_or_equal1);
-      auto greater_than_or_equal2 = v1[x + 3] >= v1[x + 3];
+      auto greater_than_or_equal2 = v1[x + 3] >= v2[x + 3];
       benchmark::DoNotOptimize(greater_than_or_equal2);
     }
   }
@@ -106,6 +108,8 @@ static void BinaryCompareOpConstant(
   for (int x = 0; x < kValueSize; x++) {
     v1.emplace_back(100 + x, 100 + x);
   }
+  static_assert(kValueSize % 4 == 0,
+                "kValueSize needs to be a multiple of 4 to avoid out-of-bounds accesses");
   BasicDecimal128 constant(313, 212);
   for (auto _ : state) {
     for (int x = 0; x < kValueSize; x += 4) {
@@ -245,6 +249,8 @@ static void UnaryOp(benchmark::State& state) {  // NOLINT non-const reference
     v.emplace_back(100 + x, 100 + x);
   }
 
+  static_assert(kValueSize % 2 == 0,
+                "kValueSize needs to be a multiple of 2 to avoid out-of-bounds accesses");
   for (auto _ : state) {
     for (int x = 0; x < kValueSize; x += 2) {
       auto abs = v[x].Abs();
@@ -274,6 +280,8 @@ static void BinaryBitOp(benchmark::State& state) {  // NOLINT non-const referenc
     v2.emplace_back(200 + x, 200 + x);
   }
 
+  static_assert(kValueSize % 2 == 0,
+                "kValueSize needs to be a multiple of 2 to avoid out-of-bounds accesses");
   for (auto _ : state) {
     for (int x = 0; x < kValueSize; x += 2) {
       benchmark::DoNotOptimize(v1[x] |= v2[x]);
diff --git a/cpp/src/arrow/util/decimal_internal.h b/cpp/src/arrow/util/decimal_internal.h
index 89f755af88316..b3a8b1127f918 100644
--- a/cpp/src/arrow/util/decimal_internal.h
+++ b/cpp/src/arrow/util/decimal_internal.h
@@ -189,11 +189,11 @@ constexpr BasicDecimal128 kDecimal128HalfPowersOfTen[] = {
     BasicDecimal128(2710505431213761085LL, 343699775700336640ULL)};
 
 #if ARROW_LITTLE_ENDIAN
-#define BasicDecimal256FromLE(v1, v2, v3, v4) \
-  BasicDecimal256(std::array<uint64_t, 4>{v1, v2, v3, v4})
+#  define BasicDecimal256FromLE(v1, v2, v3, v4) \
+    BasicDecimal256(std::array<uint64_t, 4>{v1, v2, v3, v4})
 #else
-#define BasicDecimal256FromLE(v1, v2, v3, v4) \
-  BasicDecimal256(std::array<uint64_t, 4>{v4, v3, v2, v1})
+#  define BasicDecimal256FromLE(v1, v2, v3, v4) \
+    BasicDecimal256(std::array<uint64_t, 4>{v4, v3, v2, v1})
 #endif
 
 constexpr BasicDecimal256 kDecimal256PowersOfTen[76 + 1] = {
diff --git a/cpp/src/arrow/util/endian.h b/cpp/src/arrow/util/endian.h
index 3d394ba8b7801..9c603144a7fd8 100644
--- a/cpp/src/arrow/util/endian.h
+++ b/cpp/src/arrow/util/endian.h
@@ -18,38 +18,38 @@
 #pragma once
 
 #ifdef _WIN32
-#define ARROW_LITTLE_ENDIAN 1
+#  define ARROW_LITTLE_ENDIAN 1
 #else
-#if defined(__APPLE__) || defined(__FreeBSD__)
-#include <machine/endian.h>  // IWYU pragma: keep
-#elif defined(sun) || defined(__sun)
-#include <sys/byteorder.h>  // IWYU pragma: keep
-#else
-#include <endian.h>  // IWYU pragma: keep
-#endif
+#  if defined(__APPLE__) || defined(__FreeBSD__)
+#    include <machine/endian.h>  // IWYU pragma: keep
+#  elif defined(sun) || defined(__sun)
+#    include <sys/byteorder.h>  // IWYU pragma: keep
+#  else
+#    include <endian.h>  // IWYU pragma: keep
+#  endif
 #
-#ifndef __BYTE_ORDER__
-#error "__BYTE_ORDER__ not defined"
-#endif
+#  ifndef __BYTE_ORDER__
+#    error "__BYTE_ORDER__ not defined"
+#  endif
 #
-#ifndef __ORDER_LITTLE_ENDIAN__
-#error "__ORDER_LITTLE_ENDIAN__ not defined"
-#endif
+#  ifndef __ORDER_LITTLE_ENDIAN__
+#    error "__ORDER_LITTLE_ENDIAN__ not defined"
+#  endif
 #
-#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#define ARROW_LITTLE_ENDIAN 1
-#else
-#define ARROW_LITTLE_ENDIAN 0
-#endif
+#  if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#    define ARROW_LITTLE_ENDIAN 1
+#  else
+#    define ARROW_LITTLE_ENDIAN 0
+#  endif
 #endif
 
 #if defined(_MSC_VER)
-#include <intrin.h>  // IWYU pragma: keep
-#define ARROW_BYTE_SWAP64 _byteswap_uint64
-#define ARROW_BYTE_SWAP32 _byteswap_ulong
+#  include <intrin.h>  // IWYU pragma: keep
+#  define ARROW_BYTE_SWAP64 _byteswap_uint64
+#  define ARROW_BYTE_SWAP32 _byteswap_ulong
 #else
-#define ARROW_BYTE_SWAP64 __builtin_bswap64
-#define ARROW_BYTE_SWAP32 __builtin_bswap32
+#  define ARROW_BYTE_SWAP64 __builtin_bswap64
+#  define ARROW_BYTE_SWAP32 __builtin_bswap32
 #endif
 
 #include <algorithm>
diff --git a/cpp/src/arrow/util/hash_util.h b/cpp/src/arrow/util/hash_util.h
index dd1c38a78216e..7b3de2208935f 100644
--- a/cpp/src/arrow/util/hash_util.h
+++ b/cpp/src/arrow/util/hash_util.h
@@ -26,9 +26,9 @@ namespace internal {
 namespace detail {
 
 #if defined(_MSC_VER)
-#define ARROW_HASH_ROTL32(x, r) _rotl(x, r)
+#  define ARROW_HASH_ROTL32(x, r) _rotl(x, r)
 #else
-#define ARROW_HASH_ROTL32(x, r) (x << r) | (x >> (32 - r))
+#  define ARROW_HASH_ROTL32(x, r) (x << r) | (x >> (32 - r))
 #endif
 
 template <typename SizeT>
diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h
index 2de9f4153248f..4ead1a7283d81 100644
--- a/cpp/src/arrow/util/hashing.h
+++ b/cpp/src/arrow/util/hashing.h
@@ -182,7 +182,7 @@ hash_t ComputeStringHash(const void* data, int64_t length) {
   }
 
 #if XXH3_SECRET_SIZE_MIN != 136
-#error XXH3_SECRET_SIZE_MIN changed, please fix kXxh3Secrets
+#  error XXH3_SECRET_SIZE_MIN changed, please fix kXxh3Secrets
 #endif
 
   // XXH3_64bits_withSeed generates a secret based on the seed, which is too slow.
diff --git a/cpp/src/arrow/util/int128_internal.h b/cpp/src/arrow/util/int128_internal.h
index 1d494671a9f8f..201e4a1349190 100644
--- a/cpp/src/arrow/util/int128_internal.h
+++ b/cpp/src/arrow/util/int128_internal.h
@@ -20,7 +20,7 @@
 #include "arrow/util/macros.h"
 
 #ifndef ARROW_USE_NATIVE_INT128
-#include <boost/multiprecision/cpp_int.hpp>
+#  include <boost/multiprecision/cpp_int.hpp>
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc
index 2eefe96f0d5c4..8c4d925dac541 100644
--- a/cpp/src/arrow/util/io_util.cc
+++ b/cpp/src/arrow/util/io_util.cc
@@ -17,7 +17,7 @@
 
 // Ensure 64-bit off_t for platforms where it matters
 #ifdef _FILE_OFFSET_BITS
-#undef _FILE_OFFSET_BITS
+#  undef _FILE_OFFSET_BITS
 #endif
 
 #define _FILE_OFFSET_BITS 64
@@ -27,8 +27,8 @@
 // is the best way to enable modern POSIX APIs, such as posix_madvise(), on Solaris.
 // (see also
 // https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/sys/mman.h)
-#undef __EXTENSIONS__
-#define __EXTENSIONS__
+#  undef __EXTENSIONS__
+#  define __EXTENSIONS__
 #endif
 
 #include "arrow/util/windows_compatibility.h"  // IWYU pragma: keep
@@ -60,34 +60,34 @@
 // file compatibility stuff
 
 #ifdef _WIN32
-#include <direct.h>
-#include <io.h>
-#include <share.h>
+#  include <direct.h>
+#  include <io.h>
+#  include <share.h>
 #else  // POSIX-like platforms
-#include <dirent.h>
+#  include <dirent.h>
 #endif
 
 #ifdef _WIN32
-#include "arrow/io/mman.h"
-#undef Realloc
-#undef Free
+#  include "arrow/io/mman.h"
+#  undef Realloc
+#  undef Free
 #else  // POSIX-like platforms
-#include <sys/mman.h>
-#include <unistd.h>
+#  include <sys/mman.h>
+#  include <unistd.h>
 #endif
 
 // define max read/write count
 #ifdef _WIN32
-#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
+#  define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
 #else
 
-#ifdef __APPLE__
+#  ifdef __APPLE__
 // due to macOS bug, we need to set read/write max
-#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
-#else
+#    define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
+#  else
 // see notes on Linux read/write manpage
-#define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
-#endif
+#    define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
+#  endif
 
 #endif
 
@@ -102,25 +102,25 @@
 
 // For filename conversion
 #if defined(_WIN32)
-#include "arrow/util/utf8.h"
+#  include "arrow/util/utf8.h"
 #endif
 
 #ifdef _WIN32
-#include <psapi.h>
+#  include <psapi.h>
 
 #elif __APPLE__
-#include <mach/mach.h>
-#include <sys/sysctl.h>
+#  include <mach/mach.h>
+#  include <sys/sysctl.h>
 
 #elif __linux__
-#include <sys/sysinfo.h>
-#include <fstream>
+#  include <sys/sysinfo.h>
+#  include <fstream>
 #endif
 
 #ifdef _WIN32
-#include <Windows.h>
+#  include <Windows.h>
 #else
-#include <dlfcn.h>
+#  include <dlfcn.h>
 #endif
 
 namespace arrow::internal {
@@ -1223,11 +1223,11 @@ Status SetPipeFileDescriptorNonBlocking(int fd) {
 namespace {
 
 #ifdef WIN32
-#define PIPE_WRITE _write
-#define PIPE_READ _read
+#  define PIPE_WRITE _write
+#  define PIPE_READ _read
 #else
-#define PIPE_WRITE write
-#define PIPE_READ read
+#  define PIPE_WRITE write
+#  define PIPE_READ read
 #endif
 
 class SelfPipeImpl : public SelfPipe, public std::enable_shared_from_this<SelfPipeImpl> {
@@ -1500,7 +1500,7 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
             region.size + static_cast<size_t>(addr - aligned_addr)};
   };
 
-#ifdef _WIN32
+#  ifdef _WIN32
   // PrefetchVirtualMemory() is available on Windows 8 or later
   struct PrefetchEntry {  // Like WIN32_MEMORY_RANGE_ENTRY
     void* VirtualAddress;
@@ -1528,7 +1528,7 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
     }
   }
   return Status::OK();
-#elif defined(POSIX_MADV_WILLNEED)
+#  elif defined(POSIX_MADV_WILLNEED)
   for (const auto& region : regions) {
     if (region.size != 0) {
       const auto aligned = align_region(region);
@@ -1542,9 +1542,9 @@ Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
     }
   }
   return Status::OK();
-#else
+#  else
   return Status::OK();
-#endif
+#  endif
 #else
   return Status::OK();
 #endif
@@ -1876,11 +1876,11 @@ std::vector<NativePathString> GetPlatformTemporaryDirs() {
 
 #else
   selectors = {{"TMPDIR", ""}, {"TMP", ""}, {"TEMP", ""}, {"TEMPDIR", ""}};
-#ifdef __ANDROID__
+#  ifdef __ANDROID__
   fallback_tmp = "/data/local/tmp";
-#else
+#  else
   fallback_tmp = "/tmp";
-#endif
+#  endif
 #endif
 
   std::vector<NativePathString> temp_dirs;
@@ -2157,7 +2157,7 @@ int64_t GetCurrentRSS() {
 
 #elif defined(__APPLE__)
 // OSX ------------------------------------------------------
-#ifdef MACH_TASK_BASIC_INFO
+#  ifdef MACH_TASK_BASIC_INFO
   struct mach_task_basic_info info;
   mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT;
   if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) !=
@@ -2165,7 +2165,7 @@ int64_t GetCurrentRSS() {
     ARROW_LOG(WARNING) << "Can't resolve RSS value";
     return 0;
   }
-#else
+#  else
   struct task_basic_info info;
   mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT;
   if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) !=
@@ -2173,7 +2173,7 @@ int64_t GetCurrentRSS() {
     ARROW_LOG(WARNING) << "Can't resolve RSS value";
     return 0;
   }
-#endif
+#  endif
   return static_cast<int64_t>(info.resident_size);
 
 #elif defined(__linux__)
diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h
index 5f5bbd169e2eb..892641d4bc52f 100644
--- a/cpp/src/arrow/util/io_util.h
+++ b/cpp/src/arrow/util/io_util.h
@@ -18,7 +18,7 @@
 #pragma once
 
 #ifndef _WIN32
-#define ARROW_HAVE_SIGACTION 1
+#  define ARROW_HAVE_SIGACTION 1
 #endif
 
 #include <atomic>
@@ -29,7 +29,7 @@
 #include <vector>
 
 #if ARROW_HAVE_SIGACTION
-#include <csignal>  // Needed for struct sigaction
+#  include <csignal>  // Needed for struct sigaction
 #endif
 
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc
index 73213bf9ce48a..1ff8fcf7adb5c 100644
--- a/cpp/src/arrow/util/io_util_test.cc
+++ b/cpp/src/arrow/util/io_util_test.cc
@@ -29,9 +29,9 @@
 #include <signal.h>
 
 #ifndef _WIN32
-#include <pthread.h>
-#include <sys/types.h>
-#include <unistd.h>
+#  include <pthread.h>
+#  include <sys/types.h>
+#  include <unistd.h>
 #endif
 
 #include <gmock/gmock-matchers.h>
@@ -48,11 +48,11 @@
 #include "arrow/util/windows_fixup.h"
 
 #ifdef WIN32
-#define PIPE_WRITE _write
-#define PIPE_READ _read
+#  define PIPE_WRITE _write
+#  define PIPE_READ _read
 #else
-#define PIPE_WRITE write
-#define PIPE_READ read
+#  define PIPE_WRITE write
+#  define PIPE_READ read
 #endif
 
 namespace arrow {
@@ -474,9 +474,9 @@ TEST_F(TestSelfPipe, SendFromSignalAndWait) {
 #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \
       defined(THREAD_SANITIZER))
 TEST_F(TestSelfPipe, ForkSafety) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
   self_pipe_->Send(123456789123456789ULL);
 
@@ -974,7 +974,7 @@ TEST(DeleteFile, Basics) {
 TEST(FileUtils, LongPaths) {
   // ARROW-8477: check using long file paths under Windows (> 260 characters).
   bool created, deleted;
-#ifdef _WIN32
+#  ifdef _WIN32
   const char* kRegKeyName = R"(SYSTEM\CurrentControlSet\Control\FileSystem)";
   const char* kRegValueName = "LongPathsEnabled";
   DWORD value = 0;
@@ -990,7 +990,7 @@ TEST(FileUtils, LongPaths) {
         << " to 1 on the test host.";
     return;
   }
-#endif
+#  endif
 
   const std::string BASE = "xxx-io-util-test-dir-long";
   PlatformFilename base_path, long_path, long_filename;
diff --git a/cpp/src/arrow/util/logger.h b/cpp/src/arrow/util/logger.h
index 5200503bb4fdb..7832f4a4c2232 100644
--- a/cpp/src/arrow/util/logger.h
+++ b/cpp/src/arrow/util/logger.h
@@ -128,7 +128,7 @@ class ARROW_EXPORT LogMessage {
 // For the following macros, log statements with a lower severity than
 // `ARROW_MINIMUM_LOG_LEVEL` will be stripped from the build
 #ifndef ARROW_MINIMUM_LOG_LEVEL
-#define ARROW_MINIMUM_LOG_LEVEL -1000
+#  define ARROW_MINIMUM_LOG_LEVEL -1000
 #endif
 
 #define ARROW_LOGGER_INTERNAL(LOGGER, LEVEL)                                      \
@@ -137,50 +137,50 @@ class ARROW_EXPORT LogMessage {
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_TRACE) == -2);
 #if ARROW_MINIMUM_LOG_LEVEL <= -2
-#define ARROW_LOGGER_TRACE(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, TRACE).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_TRACE(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, TRACE).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_TRACE(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_TRACE(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_DEBUG) == -1);
 #if ARROW_MINIMUM_LOG_LEVEL <= -1
-#define ARROW_LOGGER_DEBUG(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, DEBUG).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_DEBUG(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, DEBUG).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_DEBUG(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_DEBUG(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_INFO) == 0);
 #if ARROW_MINIMUM_LOG_LEVEL <= 0
-#define ARROW_LOGGER_INFO(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, INFO).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_INFO(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, INFO).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_INFO(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_INFO(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_WARNING) == 1);
 #if ARROW_MINIMUM_LOG_LEVEL <= 1
-#define ARROW_LOGGER_WARNING(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, WARNING).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_WARNING(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, WARNING).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_WARNING(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_WARNING(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_ERROR) == 2);
 #if ARROW_MINIMUM_LOG_LEVEL <= 2
-#define ARROW_LOGGER_ERROR(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, ERROR).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_ERROR(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, ERROR).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_ERROR(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_ERROR(...) ARROW_UNUSED(0)
 #endif
 
 static_assert(static_cast<int>(::arrow::util::ArrowLogLevel::ARROW_FATAL) == 3);
 #if ARROW_MINIMUM_LOG_LEVEL <= 3
-#define ARROW_LOGGER_FATAL(LOGGER, ...) \
-  (ARROW_LOGGER_INTERNAL(LOGGER, FATAL).Append(__VA_ARGS__))
+#  define ARROW_LOGGER_FATAL(LOGGER, ...) \
+    (ARROW_LOGGER_INTERNAL(LOGGER, FATAL).Append(__VA_ARGS__))
 #else
-#define ARROW_LOGGER_FATAL(...) ARROW_UNUSED(0)
+#  define ARROW_LOGGER_FATAL(...) ARROW_UNUSED(0)
 #endif
 
 #define ARROW_LOGGER_CALL(LOGGER, LEVEL, ...) ARROW_LOGGER_##LEVEL(LOGGER, __VA_ARGS__)
diff --git a/cpp/src/arrow/util/logging.cc b/cpp/src/arrow/util/logging.cc
index ca4edcc5a5deb..993c5306ca4aa 100644
--- a/cpp/src/arrow/util/logging.cc
+++ b/cpp/src/arrow/util/logging.cc
@@ -20,36 +20,36 @@
 #include "arrow/util/config.h"
 
 #ifdef ARROW_WITH_BACKTRACE
-#include <execinfo.h>
+#  include <execinfo.h>
 #endif
 #include <cstdlib>
 #include <iostream>
 
 #ifdef ARROW_USE_GLOG
 
-#include <signal.h>
-#include <vector>
+#  include <signal.h>
+#  include <vector>
 
-#include <glog/logging.h>
+#  include <glog/logging.h>
 
 // Restore our versions of DCHECK and friends, as GLog defines its own
-#undef DCHECK
-#undef DCHECK_OK
-#undef DCHECK_EQ
-#undef DCHECK_NE
-#undef DCHECK_LE
-#undef DCHECK_LT
-#undef DCHECK_GE
-#undef DCHECK_GT
-
-#define DCHECK ARROW_DCHECK
-#define DCHECK_OK ARROW_DCHECK_OK
-#define DCHECK_EQ ARROW_DCHECK_EQ
-#define DCHECK_NE ARROW_DCHECK_NE
-#define DCHECK_LE ARROW_DCHECK_LE
-#define DCHECK_LT ARROW_DCHECK_LT
-#define DCHECK_GE ARROW_DCHECK_GE
-#define DCHECK_GT ARROW_DCHECK_GT
+#  undef DCHECK
+#  undef DCHECK_OK
+#  undef DCHECK_EQ
+#  undef DCHECK_NE
+#  undef DCHECK_LE
+#  undef DCHECK_LT
+#  undef DCHECK_GE
+#  undef DCHECK_GT
+
+#  define DCHECK ARROW_DCHECK
+#  define DCHECK_OK ARROW_DCHECK_OK
+#  define DCHECK_EQ ARROW_DCHECK_EQ
+#  define DCHECK_NE ARROW_DCHECK_NE
+#  define DCHECK_LE ARROW_DCHECK_LE
+#  define DCHECK_LT ARROW_DCHECK_LT
+#  define DCHECK_GE ARROW_DCHECK_GE
+#  define DCHECK_GT ARROW_DCHECK_GT
 
 #endif
 
@@ -188,11 +188,11 @@ void ArrowLog::UninstallSignalAction() {
   // This signal list comes from glog's signalhandler.cc.
   // https://github.com/google/glog/blob/master/src/signalhandler.cc#L58-L70
   std::vector<int> installed_signals({SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGTERM});
-#ifdef WIN32
+#  ifdef WIN32
   for (int signal_num : installed_signals) {
     ARROW_CHECK(signal(signal_num, SIG_DFL) != SIG_ERR);
   }
-#else
+#  else
   struct sigaction sig_action;
   memset(&sig_action, 0, sizeof(sig_action));
   sigemptyset(&sig_action.sa_mask);
@@ -200,7 +200,7 @@ void ArrowLog::UninstallSignalAction() {
   for (int signal_num : installed_signals) {
     ARROW_CHECK(sigaction(signal_num, &sig_action, NULL) == 0);
   }
-#endif
+#  endif
 #endif
 }
 
diff --git a/cpp/src/arrow/util/logging.h b/cpp/src/arrow/util/logging.h
index 2a2175ec0fc72..04c6bc21cac73 100644
--- a/cpp/src/arrow/util/logging.h
+++ b/cpp/src/arrow/util/logging.h
@@ -22,25 +22,25 @@
 // The LLVM IR code doesn't have an NDEBUG mode. And, it shouldn't include references to
 // streams or stdc++. So, making the DCHECK calls void in that case.
 
-#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+#  define ARROW_IGNORE_EXPR(expr) ((void)(expr))
 
-#define DCHECK(condition) ARROW_IGNORE_EXPR(condition)
-#define DCHECK_OK(status) ARROW_IGNORE_EXPR(status)
-#define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1)
-#define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK(condition) ARROW_IGNORE_EXPR(condition)
+#  define DCHECK_OK(status) ARROW_IGNORE_EXPR(status)
+#  define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#  define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1)
 
 #else  // !GANDIVA_IR
 
-#include <memory>
-#include <ostream>
-#include <string>
+#  include <memory>
+#  include <ostream>
+#  include <string>
 
-#include "arrow/util/macros.h"
-#include "arrow/util/visibility.h"
+#  include "arrow/util/macros.h"
+#  include "arrow/util/visibility.h"
 
 namespace arrow {
 namespace util {
@@ -54,98 +54,115 @@ enum class ArrowLogLevel : int {
   ARROW_FATAL = 3
 };
 
-#define ARROW_LOG_INTERNAL(level) ::arrow::util::ArrowLog(__FILE__, __LINE__, level)
-#define ARROW_LOG(level) ARROW_LOG_INTERNAL(::arrow::util::ArrowLogLevel::ARROW_##level)
+#  define ARROW_LOG_INTERNAL(level) ::arrow::util::ArrowLog(__FILE__, __LINE__, level)
+#  define ARROW_LOG(level) ARROW_LOG_INTERNAL(::arrow::util::ArrowLogLevel::ARROW_##level)
 
-#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+#  define ARROW_IGNORE_EXPR(expr) ((void)(expr))
 
-#define ARROW_CHECK_OR_LOG(condition, level) \
-  ARROW_PREDICT_TRUE(condition)              \
-  ? ARROW_IGNORE_EXPR(0)                     \
-  : ::arrow::util::Voidify() & ARROW_LOG(level) << " Check failed: " #condition " "
+#  define ARROW_CHECK_OR_LOG(condition, level) \
+    ARROW_PREDICT_TRUE(condition)              \
+    ? ARROW_IGNORE_EXPR(0)                     \
+    : ::arrow::util::Voidify() & ARROW_LOG(level) << " Check failed: " #condition " "
 
-#define ARROW_CHECK(condition) ARROW_CHECK_OR_LOG(condition, FATAL)
+#  define ARROW_CHECK(condition) ARROW_CHECK_OR_LOG(condition, FATAL)
 
 // If 'to_call' returns a bad status, CHECK immediately with a logged message
 // of 'msg' followed by the status.
-#define ARROW_CHECK_OK_PREPEND(to_call, msg, level)                 \
-  do {                                                              \
-    ::arrow::Status _s = (to_call);                                 \
-    ARROW_CHECK_OR_LOG(_s.ok(), level)                              \
-        << "Operation failed: " << ARROW_STRINGIFY(to_call) << "\n" \
-        << (msg) << ": " << _s.ToString();                          \
-  } while (false)
+#  define ARROW_CHECK_OK_PREPEND(to_call, msg, level)                 \
+    do {                                                              \
+      ::arrow::Status _s = (to_call);                                 \
+      ARROW_CHECK_OR_LOG(_s.ok(), level)                              \
+          << "Operation failed: " << ARROW_STRINGIFY(to_call) << "\n" \
+          << (msg) << ": " << _s.ToString();                          \
+    } while (false)
 
 // If the status is bad, CHECK immediately, appending the status to the
 // logged message.
-#define ARROW_CHECK_OK(s) ARROW_CHECK_OK_PREPEND(s, "Bad status", FATAL)
+#  define ARROW_CHECK_OK(s) ARROW_CHECK_OK_PREPEND(s, "Bad status", FATAL)
 
-#define ARROW_CHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2))
-#define ARROW_CHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2))
-#define ARROW_CHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2))
-#define ARROW_CHECK_LT(val1, val2) ARROW_CHECK((val1) < (val2))
-#define ARROW_CHECK_GE(val1, val2) ARROW_CHECK((val1) >= (val2))
-#define ARROW_CHECK_GT(val1, val2) ARROW_CHECK((val1) > (val2))
+#  define ARROW_CHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2))
+#  define ARROW_CHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2))
+#  define ARROW_CHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2))
+#  define ARROW_CHECK_LT(val1, val2) ARROW_CHECK((val1) < (val2))
+#  define ARROW_CHECK_GE(val1, val2) ARROW_CHECK((val1) >= (val2))
+#  define ARROW_CHECK_GT(val1, val2) ARROW_CHECK((val1) > (val2))
 
-#ifdef NDEBUG
-#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_WARNING
+#  ifdef NDEBUG
+#    define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_WARNING
 
 // CAUTION: DCHECK_OK() always evaluates its argument, but other DCHECK*() macros
 // only do so in debug mode.
 
-#define ARROW_DCHECK(condition)               \
-  while (false) ARROW_IGNORE_EXPR(condition); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_OK(s) \
-  ARROW_IGNORE_EXPR(s);    \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_EQ(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_NE(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_LE(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_LT(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_GE(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-#define ARROW_DCHECK_GT(val1, val2)      \
-  while (false) ARROW_IGNORE_EXPR(val1); \
-  while (false) ARROW_IGNORE_EXPR(val2); \
-  while (false) ::arrow::util::detail::NullLog()
-
-#else
-#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL
-
-#define ARROW_DCHECK ARROW_CHECK
-#define ARROW_DCHECK_OK ARROW_CHECK_OK
-#define ARROW_DCHECK_EQ ARROW_CHECK_EQ
-#define ARROW_DCHECK_NE ARROW_CHECK_NE
-#define ARROW_DCHECK_LE ARROW_CHECK_LE
-#define ARROW_DCHECK_LT ARROW_CHECK_LT
-#define ARROW_DCHECK_GE ARROW_CHECK_GE
-#define ARROW_DCHECK_GT ARROW_CHECK_GT
-
-#endif  // NDEBUG
-
-#define DCHECK ARROW_DCHECK
-#define DCHECK_OK ARROW_DCHECK_OK
-#define DCHECK_EQ ARROW_DCHECK_EQ
-#define DCHECK_NE ARROW_DCHECK_NE
-#define DCHECK_LE ARROW_DCHECK_LE
-#define DCHECK_LT ARROW_DCHECK_LT
-#define DCHECK_GE ARROW_DCHECK_GE
-#define DCHECK_GT ARROW_DCHECK_GT
+#    define ARROW_DCHECK(condition)               \
+      while (false) ARROW_IGNORE_EXPR(condition); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_OK(s) \
+      ARROW_IGNORE_EXPR(s);    \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_EQ(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_NE(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_LE(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_LT(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_GE(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+#    define ARROW_DCHECK_GT(val1, val2)      \
+      while (false) ARROW_IGNORE_EXPR(val1); \
+      while (false) ARROW_IGNORE_EXPR(val2); \
+      while (false) ::arrow::util::detail::NullLog()
+
+#  else
+#    define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL
+
+#    define ARROW_DCHECK ARROW_CHECK
+#    define ARROW_DCHECK_OK ARROW_CHECK_OK
+#    define ARROW_DCHECK_EQ ARROW_CHECK_EQ
+#    define ARROW_DCHECK_NE ARROW_CHECK_NE
+#    define ARROW_DCHECK_LE ARROW_CHECK_LE
+#    define ARROW_DCHECK_LT ARROW_CHECK_LT
+#    define ARROW_DCHECK_GE ARROW_CHECK_GE
+#    define ARROW_DCHECK_GT ARROW_CHECK_GT
+
+#  endif  // NDEBUG
+
+// These are internal-use macros and should not be used in public headers.
+#  ifndef DCHECK
+#    define DCHECK ARROW_DCHECK
+#  endif
+#  ifndef DCHECK_OK
+#    define DCHECK_OK ARROW_DCHECK_OK
+#  endif
+#  ifndef DCHECK_EQ
+#    define DCHECK_EQ ARROW_DCHECK_EQ
+#  endif
+#  ifndef DCHECK_NE
+#    define DCHECK_NE ARROW_DCHECK_NE
+#  endif
+#  ifndef DCHECK_LE
+#    define DCHECK_LE ARROW_DCHECK_LE
+#  endif
+#  ifndef DCHECK_LT
+#    define DCHECK_LT ARROW_DCHECK_LT
+#  endif
+#  ifndef DCHECK_GE
+#    define DCHECK_GE ARROW_DCHECK_GE
+#  endif
+#  ifndef DCHECK_GT
+#    define DCHECK_GT ARROW_DCHECK_GT
+#  endif
 
 // This code is adapted from
 // https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h.
diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h
index 484df3400d92d..5658874b42b6c 100644
--- a/cpp/src/arrow/util/macros.h
+++ b/cpp/src/arrow/util/macros.h
@@ -25,15 +25,15 @@
 
 // From Google gutil
 #ifndef ARROW_DISALLOW_COPY_AND_ASSIGN
-#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&) = delete;            \
-  void operator=(const TypeName&) = delete
+#  define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+    TypeName(const TypeName&) = delete;            \
+    void operator=(const TypeName&) = delete
 #endif
 
 #ifndef ARROW_DEFAULT_MOVE_AND_ASSIGN
-#define ARROW_DEFAULT_MOVE_AND_ASSIGN(TypeName) \
-  TypeName(TypeName&&) = default;               \
-  TypeName& operator=(TypeName&&) = default
+#  define ARROW_DEFAULT_MOVE_AND_ASSIGN(TypeName) \
+    TypeName(TypeName&&) = default;               \
+    TypeName& operator=(TypeName&&) = default
 #endif
 
 // With ARROW_PREDICT_FALSE, GCC and clang can be told that a certain branch is
@@ -68,55 +68,55 @@
 //     Program Annotations". https://github.com/jdoerfert/PETOSPA/blob/master/ISC19.pdf
 #define ARROW_UNUSED(x) (void)(x)
 #ifdef ARROW_WARN_DOCUMENTATION
-#define ARROW_ARG_UNUSED(x) x
+#  define ARROW_ARG_UNUSED(x) x
 #else
-#define ARROW_ARG_UNUSED(x)
+#  define ARROW_ARG_UNUSED(x)
 #endif
 #if defined(__GNUC__)  // GCC and compatible compilers (clang, Intel ICC)
-#define ARROW_NORETURN __attribute__((noreturn))
-#define ARROW_NOINLINE __attribute__((noinline))
-#define ARROW_FORCE_INLINE __attribute__((always_inline))
-#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
-#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
-#define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
-#define ARROW_RESTRICT __restrict
-#if defined(__clang__)  // clang-specific
-#define ARROW_COMPILER_ASSUME(expr) __builtin_assume(expr)
-#else  // GCC-specific
-#if __GNUC__ >= 13
-#define ARROW_COMPILER_ASSUME(expr) __attribute__((assume(expr)))
-#else
+#  define ARROW_NORETURN __attribute__((noreturn))
+#  define ARROW_NOINLINE __attribute__((noinline))
+#  define ARROW_FORCE_INLINE __attribute__((always_inline))
+#  define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
+#  define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#  define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
+#  define ARROW_RESTRICT __restrict
+#  if defined(__clang__)  // clang-specific
+#    define ARROW_COMPILER_ASSUME(expr) __builtin_assume(expr)
+#  else  // GCC-specific
+#    if __GNUC__ >= 13
+#      define ARROW_COMPILER_ASSUME(expr) __attribute__((assume(expr)))
+#    else
 // GCC does not have a built-in assume intrinsic before GCC 13, so we use an
 // if statement and __builtin_unreachable() to achieve the same effect [2].
 // Unlike clang's __builtin_assume and C++23's [[assume(expr)]], using this
 // on GCC won't warn about side-effects in the expression, so make sure expr
 // is side-effect free when working with GCC versions before 13 (Jan-2024),
 // otherwise clang/MSVC builds will fail in CI.
-#define ARROW_COMPILER_ASSUME(expr) \
-  if (expr) {                       \
-  } else {                          \
-    __builtin_unreachable();        \
-  }
-#endif  // __GNUC__ >= 13
-#endif
+#      define ARROW_COMPILER_ASSUME(expr) \
+        if (expr) {                       \
+        } else {                          \
+          __builtin_unreachable();        \
+        }
+#    endif  // __GNUC__ >= 13
+#  endif
 #elif defined(_MSC_VER)  // MSVC
-#define ARROW_NORETURN __declspec(noreturn)
-#define ARROW_NOINLINE __declspec(noinline)
-#define ARROW_FORCE_INLINE __forceinline
-#define ARROW_PREDICT_FALSE(x) (x)
-#define ARROW_PREDICT_TRUE(x) (x)
-#define ARROW_PREFETCH(addr)
-#define ARROW_RESTRICT __restrict
-#define ARROW_COMPILER_ASSUME(expr) __assume(expr)
+#  define ARROW_NORETURN __declspec(noreturn)
+#  define ARROW_NOINLINE __declspec(noinline)
+#  define ARROW_FORCE_INLINE __forceinline
+#  define ARROW_PREDICT_FALSE(x) (x)
+#  define ARROW_PREDICT_TRUE(x) (x)
+#  define ARROW_PREFETCH(addr)
+#  define ARROW_RESTRICT __restrict
+#  define ARROW_COMPILER_ASSUME(expr) __assume(expr)
 #else
-#define ARROW_NORETURN
-#define ARROW_NOINLINE
-#define ARROW_FORCE_INLINE
-#define ARROW_PREDICT_FALSE(x) (x)
-#define ARROW_PREDICT_TRUE(x) (x)
-#define ARROW_PREFETCH(addr)
-#define ARROW_RESTRICT
-#define ARROW_COMPILER_ASSUME(expr)
+#  define ARROW_NORETURN
+#  define ARROW_NOINLINE
+#  define ARROW_FORCE_INLINE
+#  define ARROW_PREDICT_FALSE(x) (x)
+#  define ARROW_PREDICT_TRUE(x) (x)
+#  define ARROW_PREFETCH(addr)
+#  define ARROW_RESTRICT
+#  define ARROW_COMPILER_ASSUME(expr)
 #endif
 
 // ----------------------------------------------------------------------
@@ -124,11 +124,11 @@
 
 #ifndef NULLPTR
 
-#ifdef __cplusplus_cli
-#define NULLPTR __nullptr
-#else
-#define NULLPTR nullptr
-#endif
+#  ifdef __cplusplus_cli
+#    define NULLPTR __nullptr
+#  else
+#    define NULLPTR nullptr
+#  endif
 
 #endif  // ifndef NULLPTR
 
@@ -154,22 +154,22 @@
 // Macros to disable deprecation warnings
 
 #ifdef __clang__
-#define ARROW_SUPPRESS_DEPRECATION_WARNING \
-  _Pragma("clang diagnostic push");        \
-  _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("clang diagnostic pop")
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING \
+    _Pragma("clang diagnostic push");        \
+    _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("clang diagnostic pop")
 #elif defined(__GNUC__)
-#define ARROW_SUPPRESS_DEPRECATION_WARNING \
-  _Pragma("GCC diagnostic push");          \
-  _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("GCC diagnostic pop")
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING \
+    _Pragma("GCC diagnostic push");          \
+    _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("GCC diagnostic pop")
 #elif defined(_MSC_VER)
-#define ARROW_SUPPRESS_DEPRECATION_WARNING \
-  __pragma(warning(push)) __pragma(warning(disable : 4996))
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING __pragma(warning(pop))
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING \
+    __pragma(warning(push)) __pragma(warning(disable : 4996))
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING __pragma(warning(pop))
 #else
-#define ARROW_SUPPRESS_DEPRECATION_WARNING
-#define ARROW_UNSUPPRESS_DEPRECATION_WARNING
+#  define ARROW_SUPPRESS_DEPRECATION_WARNING
+#  define ARROW_UNSUPPRESS_DEPRECATION_WARNING
 #endif
 
 // ----------------------------------------------------------------------
@@ -178,41 +178,42 @@
 // these macros are portable across different compilers and platforms
 //[https://github.com/google/flatbuffers/blob/master/include/flatbuffers/flatbuffers.h#L1355]
 #if !defined(MANUALLY_ALIGNED_STRUCT)
-#if defined(_MSC_VER)
-#define MANUALLY_ALIGNED_STRUCT(alignment) \
-  __pragma(pack(1));                       \
-  struct __declspec(align(alignment))
-#define STRUCT_END(name, size) \
-  __pragma(pack());            \
-  static_assert(sizeof(name) == size, "compiler breaks packing rules")
-#elif defined(__GNUC__) || defined(__clang__)
-#define MANUALLY_ALIGNED_STRUCT(alignment) \
-  _Pragma("pack(1)") struct __attribute__((aligned(alignment)))
-#define STRUCT_END(name, size) \
-  _Pragma("pack()") static_assert(sizeof(name) == size, "compiler breaks packing rules")
-#else
-#error Unknown compiler, please define structure alignment macros
-#endif
+#  if defined(_MSC_VER)
+#    define MANUALLY_ALIGNED_STRUCT(alignment) \
+      __pragma(pack(1));                       \
+      struct __declspec(align(alignment))
+#    define STRUCT_END(name, size) \
+      __pragma(pack());            \
+      static_assert(sizeof(name) == size, "compiler breaks packing rules")
+#  elif defined(__GNUC__) || defined(__clang__)
+#    define MANUALLY_ALIGNED_STRUCT(alignment) \
+      _Pragma("pack(1)") struct __attribute__((aligned(alignment)))
+#    define STRUCT_END(name, size)                          \
+      _Pragma("pack()") static_assert(sizeof(name) == size, \
+                                      "compiler breaks packing rules")
+#  else
+#    error Unknown compiler, please define structure alignment macros
+#  endif
 #endif  // !defined(MANUALLY_ALIGNED_STRUCT)
 
 // ----------------------------------------------------------------------
 // Convenience macro disabling a particular UBSan check in a function
 
 #if defined(__clang__)
-#define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature)))
+#  define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature)))
 #else
-#define ARROW_DISABLE_UBSAN(feature)
+#  define ARROW_DISABLE_UBSAN(feature)
 #endif
 
 // ----------------------------------------------------------------------
 // Machine information
 
 #if INTPTR_MAX == INT64_MAX
-#define ARROW_BITNESS 64
+#  define ARROW_BITNESS 64
 #elif INTPTR_MAX == INT32_MAX
-#define ARROW_BITNESS 32
+#  define ARROW_BITNESS 32
 #else
-#error Unexpected INTPTR_MAX
+#  error Unexpected INTPTR_MAX
 #endif
 
 // ----------------------------------------------------------------------
diff --git a/cpp/src/arrow/util/math_constants.h b/cpp/src/arrow/util/math_constants.h
index 7ee87c5d6ac81..3524f88e0ba9a 100644
--- a/cpp/src/arrow/util/math_constants.h
+++ b/cpp/src/arrow/util/math_constants.h
@@ -22,11 +22,11 @@
 // Not provided by default in MSVC,
 // and _USE_MATH_DEFINES is not reliable with unity builds
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 #ifndef M_PI_2
-#define M_PI_2 1.57079632679489661923
+#  define M_PI_2 1.57079632679489661923
 #endif
 #ifndef M_PI_4
-#define M_PI_4 0.785398163397448309616
+#  define M_PI_4 0.785398163397448309616
 #endif
diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc
index bbf2a9a93e692..e170a8648a1fd 100644
--- a/cpp/src/arrow/util/mutex.cc
+++ b/cpp/src/arrow/util/mutex.cc
@@ -20,8 +20,8 @@
 #include <mutex>
 
 #ifndef _WIN32
-#include <pthread.h>
-#include <atomic>
+#  include <pthread.h>
+#  include <atomic>
 #endif
 
 #include "arrow/util/config.h"
diff --git a/cpp/src/arrow/util/rle_encoding.h b/cpp/src/arrow/util/rle_encoding_internal.h
similarity index 99%
rename from cpp/src/arrow/util/rle_encoding.h
rename to cpp/src/arrow/util/rle_encoding_internal.h
index e0f5690062a04..4575320659706 100644
--- a/cpp/src/arrow/util/rle_encoding.h
+++ b/cpp/src/arrow/util/rle_encoding_internal.h
@@ -27,7 +27,7 @@
 
 #include "arrow/util/bit_block_counter.h"
 #include "arrow/util/bit_run_reader.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/macros.h"
 
diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc
index 26984e5f7735d..0cc0a276a25f4 100644
--- a/cpp/src/arrow/util/rle_encoding_test.cc
+++ b/cpp/src/arrow/util/rle_encoding_test.cc
@@ -28,10 +28,10 @@
 #include "arrow/buffer.h"
 #include "arrow/testing/random.h"
 #include "arrow/type.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/io_util.h"
-#include "arrow/util/rle_encoding.h"
+#include "arrow/util/rle_encoding_internal.h"
 
 namespace arrow {
 namespace util {
diff --git a/cpp/src/arrow/util/simd.h b/cpp/src/arrow/util/simd.h
index ee9105d5f4beb..cc1a7d6cc807c 100644
--- a/cpp/src/arrow/util/simd.h
+++ b/cpp/src/arrow/util/simd.h
@@ -20,25 +20,32 @@
 #ifdef _MSC_VER
 // MSVC x86_64/arm64
 
-#if defined(_M_AMD64) || defined(_M_X64)
-#include <intrin.h>
-#endif
+#  if defined(_M_AMD64) || defined(_M_X64)
+#    include <intrin.h>
+#  endif
 
 #else
 // gcc/clang (possibly others)
 
-#if defined(ARROW_HAVE_BMI2)
-#include <x86intrin.h>
-#endif
-
-#if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_AVX512)
-#include <immintrin.h>
-#elif defined(ARROW_HAVE_SSE4_2)
-#include <nmmintrin.h>
-#endif
-
-#ifdef ARROW_HAVE_NEON
-#include <arm_neon.h>
-#endif
+#  if defined(ARROW_HAVE_BMI2) || defined(ARROW_HAVE_RUNTIME_BMI2)
+#    include <x86intrin.h>
+#  endif
+
+#  if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_AVX512) || \
+      defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_AVX512)
+#    include <immintrin.h>
+#  elif defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_RUNTIME_SSE4_2)
+#    include <nmmintrin.h>
+#  endif
+
+#  ifdef ARROW_HAVE_NEON
+#    include <arm_neon.h>
+#  endif
+
+// GH-44098: Workaround for missing _mm256_set_m128i in older versions of GCC.
+#  if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8
+#    define _mm256_set_m128i(hi, lo) \
+      _mm256_inserti128_si256(_mm256_castsi128_si256(lo), (hi), 1)
+#  endif
 
 #endif
diff --git a/cpp/src/arrow/util/small_vector_benchmark.cc b/cpp/src/arrow/util/small_vector_benchmark.cc
index 96f94c369e61e..04ad547221b2c 100644
--- a/cpp/src/arrow/util/small_vector_benchmark.cc
+++ b/cpp/src/arrow/util/small_vector_benchmark.cc
@@ -321,10 +321,10 @@ void LongVectorInsertAtEnd(benchmark::State& state) {
 
 #ifdef ARROW_WITH_BENCHMARKS_REFERENCE
 
-#define STD_VECTOR(T) std::vector<T>
+#  define STD_VECTOR(T) std::vector<T>
 SHORT_VECTOR_BENCHMARKS(STD_VECTOR);
 LONG_VECTOR_BENCHMARKS(STD_VECTOR);
-#undef STD_VECTOR
+#  undef STD_VECTOR
 
 #endif
 
diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h
index d7e377773f62f..f2081d0937b77 100644
--- a/cpp/src/arrow/util/string.h
+++ b/cpp/src/arrow/util/string.h
@@ -26,7 +26,7 @@
 #include <vector>
 
 #if __has_include(<charconv>)
-#include <charconv>
+#  include <charconv>
 #endif
 
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/util/tdigest.h b/cpp/src/arrow/util/tdigest.h
index 308df468840eb..ea033ed696d1b 100644
--- a/cpp/src/arrow/util/tdigest.h
+++ b/cpp/src/arrow/util/tdigest.h
@@ -56,7 +56,7 @@ class ARROW_EXPORT TDigest {
   // this function is intensively called and performance critical
   // call it only if you are sure no NAN exists in input data
   void Add(double value) {
-    DCHECK(!std::isnan(value)) << "cannot add NAN";
+    ARROW_DCHECK(!std::isnan(value)) << "cannot add NAN";
     if (ARROW_PREDICT_FALSE(input_.size() == input_.capacity())) {
       MergeInput();
     }
diff --git a/cpp/src/arrow/util/thread_pool.cc b/cpp/src/arrow/util/thread_pool.cc
index d82934c9bec01..d59d1f20de7c3 100644
--- a/cpp/src/arrow/util/thread_pool.cc
+++ b/cpp/src/arrow/util/thread_pool.cc
@@ -128,7 +128,7 @@ int SerialExecutor::GetNumTasks() {
 #ifdef ARROW_ENABLE_THREADING
 Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
                                  StopToken stop_token, StopCallback&& stop_callback) {
-#ifdef ARROW_WITH_OPENTELEMETRY
+#  ifdef ARROW_WITH_OPENTELEMETRY
   // Wrap the task to propagate a parent tracing span to it
   // XXX should there be a generic utility in tracing_internal.h for this?
   task = [func = std::move(task),
@@ -137,7 +137,7 @@ Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
     auto scope = ::arrow::internal::tracing::GetTracer()->WithActiveSpan(active_span);
     std::move(func)();
   };
-#endif
+#  endif
   // While the SerialExecutor runs tasks synchronously on its main thread,
   // SpawnReal may be called from external threads (e.g. when transferring back
   // from blocking I/O threads), so we need to keep the state alive *and* to
@@ -172,7 +172,7 @@ void SerialExecutor::Finish() {
 #else  // ARROW_ENABLE_THREADING
 Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
                                  StopToken stop_token, StopCallback&& stop_callback) {
-#ifdef ARROW_WITH_OPENTELEMETRY
+#  ifdef ARROW_WITH_OPENTELEMETRY
   // Wrap the task to propagate a parent tracing span to it
   // XXX should there be a generic utility in tracing_internal.h for this?
   task = [func = std::move(task),
@@ -181,7 +181,7 @@ Status SerialExecutor::SpawnReal(TaskHints hints, FnOnce<void()> task,
     auto scope = ::arrow::internal::tracing::GetTracer()->WithActiveSpan(active_span);
     std::move(func)();
   };
-#endif  // ARROW_WITH_OPENTELEMETRY
+#  endif  // ARROW_WITH_OPENTELEMETRY
 
   if (state_->finished) {
     return Status::Invalid(
@@ -503,7 +503,7 @@ ThreadPool::ThreadPool()
       shutdown_on_destroy_(true) {
   // Eternal thread pools would produce false leak reports in the vector of
   // atfork handlers.
-#if !(defined(_WIN32) || defined(ADDRESS_SANITIZER) || defined(ARROW_VALGRIND))
+#  if !(defined(_WIN32) || defined(ADDRESS_SANITIZER) || defined(ARROW_VALGRIND))
   state_->atfork_handler_ = std::make_shared<AtForkHandler>(
       /*before=*/
       [weak_state = std::weak_ptr<ThreadPool::State>(sp_state_)]() {
@@ -528,7 +528,7 @@ ThreadPool::ThreadPool()
         }
       });
   RegisterAtFork(state_->atfork_handler_);
-#endif
+#  endif
 }
 
 ThreadPool::~ThreadPool() {
@@ -623,7 +623,7 @@ void ThreadPool::LaunchWorkersUnlocked(int threads) {
 Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken stop_token,
                              StopCallback&& stop_callback) {
   {
-#ifdef ARROW_WITH_OPENTELEMETRY
+#  ifdef ARROW_WITH_OPENTELEMETRY
     // Wrap the task to propagate a parent tracing span to it
     // This task-wrapping needs to be done before we grab the mutex because the
     // first call to OT (whatever that happens to be) will attempt to grab this mutex
@@ -638,7 +638,7 @@ Status ThreadPool::SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken sto
     } wrapper{std::forward<FnOnce<void()>>(task),
               ::arrow::internal::tracing::GetTracer()->GetCurrentSpan()};
     task = std::move(wrapper);
-#endif
+#  endif
     std::lock_guard<std::mutex> lock(state_->mutex_);
     if (state_->please_shutdown_) {
       return Status::Invalid("operation forbidden during or after shutdown");
@@ -674,9 +674,9 @@ Result<std::shared_ptr<ThreadPool>> ThreadPool::MakeEternal(int threads) {
   // On Windows, the ThreadPool destructor may be called after non-main threads
   // have been killed by the OS, and hang in a condition variable.
   // On Unix, we want to avoid leak reports by Valgrind.
-#ifdef _WIN32
+#  ifdef _WIN32
   pool->shutdown_on_destroy_ = false;
-#endif
+#  endif
   return pool;
 }
 
diff --git a/cpp/src/arrow/util/thread_pool.h b/cpp/src/arrow/util/thread_pool.h
index 44b1e227b0e5f..cd32781aed756 100644
--- a/cpp/src/arrow/util/thread_pool.h
+++ b/cpp/src/arrow/util/thread_pool.h
@@ -36,7 +36,7 @@
 
 #if defined(_MSC_VER)
 // Disable harmless warning for decorated name length limit
-#pragma warning(disable : 4503)
+#  pragma warning(disable : 4503)
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/thread_pool_test.cc b/cpp/src/arrow/util/thread_pool_test.cc
index 8f43bb8dec367..7cf8826e8a173 100644
--- a/cpp/src/arrow/util/thread_pool_test.cc
+++ b/cpp/src/arrow/util/thread_pool_test.cc
@@ -16,8 +16,8 @@
 // under the License.
 
 #ifndef _WIN32
-#include <sys/types.h>
-#include <unistd.h>
+#  include <sys/types.h>
+#  include <unistd.h>
 #endif
 
 #include <algorithm>
@@ -830,9 +830,9 @@ class TestThreadPoolForkSafety : public TestThreadPool {};
 
 TEST_F(TestThreadPoolForkSafety, Basics) {
   {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
     GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
 
     // Fork after task submission
     auto pool = this->MakeThreadPool(3);
@@ -877,9 +877,9 @@ TEST_F(TestThreadPoolForkSafety, Basics) {
 }
 
 TEST_F(TestThreadPoolForkSafety, MultipleChildThreads) {
-#ifndef ARROW_ENABLE_THREADING
+#  ifndef ARROW_ENABLE_THREADING
   GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
   // ARROW-15593: race condition in after-fork ThreadPool reinitialization
   // when SpawnReal() was called from multiple threads in a forked child.
   auto run_in_child = [](ThreadPool* pool) {
@@ -927,12 +927,12 @@ TEST_F(TestThreadPoolForkSafety, MultipleChildThreads) {
 
 TEST_F(TestThreadPoolForkSafety, NestedChild) {
   {
-#ifdef __APPLE__
+#  ifdef __APPLE__
     GTEST_SKIP() << "Nested fork is not supported on macos";
-#endif
-#ifndef ARROW_ENABLE_THREADING
+#  endif
+#  ifndef ARROW_ENABLE_THREADING
     GTEST_SKIP() << "Test requires threading support";
-#endif
+#  endif
     auto pool = this->MakeThreadPool(3);
     ASSERT_OK_AND_ASSIGN(auto fut, pool->Submit(add<int>, 4, 5));
     ASSERT_OK_AND_EQ(9, fut.result());
diff --git a/cpp/src/arrow/util/tracing_internal.cc b/cpp/src/arrow/util/tracing_internal.cc
index f4f65ad1e6132..e47acf42bccd1 100644
--- a/cpp/src/arrow/util/tracing_internal.cc
+++ b/cpp/src/arrow/util/tracing_internal.cc
@@ -26,8 +26,8 @@
 #include <thread>
 
 #ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4522)
+#  pragma warning(push)
+#  pragma warning(disable : 4522)
 #endif
 #include <google/protobuf/util/json_util.h>
 
@@ -45,7 +45,7 @@
 #include <opentelemetry/exporters/otlp/protobuf_include_suffix.h>
 #include <opentelemetry/proto/collector/trace/v1/trace_service.pb.h>
 #ifdef _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 #include "arrow/util/io_util.h"
diff --git a/cpp/src/arrow/util/tracing_internal.h b/cpp/src/arrow/util/tracing_internal.h
index a031edf08dc77..6ed731599a9d4 100644
--- a/cpp/src/arrow/util/tracing_internal.h
+++ b/cpp/src/arrow/util/tracing_internal.h
@@ -23,15 +23,15 @@
 #include "arrow/util/config.h"
 
 #ifdef ARROW_WITH_OPENTELEMETRY
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4522)
-#endif
-#include <opentelemetry/trace/provider.h>
-#include <opentelemetry/trace/scope.h>
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
+#  ifdef _MSC_VER
+#    pragma warning(push)
+#    pragma warning(disable : 4522)
+#  endif
+#  include <opentelemetry/trace/provider.h>
+#  include <opentelemetry/trace/scope.h>
+#  ifdef _MSC_VER
+#    pragma warning(pop)
+#  endif
 #endif
 
 #include "arrow/memory_pool.h"
@@ -135,77 +135,78 @@ opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>& RewrapSpan(
 opentelemetry::trace::StartSpanOptions SpanOptionsWithParent(
     const util::tracing::Span& parent_span);
 
-#define START_SPAN(target_span, ...)                           \
-  auto opentelemetry_scope##__LINE__ =                         \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
-          ::arrow::internal::tracing::RewrapSpan(              \
-              target_span.details.get(),                       \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__)))
-
-#define START_SCOPED_SPAN(target_span, ...)                    \
-  ::arrow::internal::tracing::Scope(                           \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
-          ::arrow::internal::tracing::RewrapSpan(              \
-              target_span.details.get(),                       \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__))))
-
-#define START_SCOPED_SPAN_SV(target_span, name, ...)                             \
-  ::arrow::internal::tracing::Scope(                                             \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
-          ::arrow::internal::tracing::RewrapSpan(                                \
-              target_span.details.get(),                                         \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(                \
-                  ::opentelemetry::nostd::string_view(name.data(), name.size()), \
-                  ##__VA_ARGS__))))
-
-#define START_SCOPED_SPAN_WITH_PARENT_SV(target_span, parent_span, name, ...)    \
-  ::arrow::internal::tracing::Scope(                                             \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
-          ::arrow::internal::tracing::RewrapSpan(                                \
-              target_span.details.get(),                                         \
-                                                                                 \
-              ::arrow::internal::tracing::GetTracer()->StartSpan(                \
-                  ::opentelemetry::nostd::string_view(name.data(), name.size()), \
-                  __VA_ARGS__,                                                   \
-                  ::arrow::internal::tracing::SpanOptionsWithParent(parent_span)))))
-
-#define START_COMPUTE_SPAN(target_span, ...)                        \
-  START_SPAN(target_span, __VA_ARGS__);                             \
-  ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()) \
-      ->SetAttribute("arrow.memory_pool_bytes",                     \
-                     ::arrow::default_memory_pool()->bytes_allocated())
-
-#define EVENT_ON_CURRENT_SPAN(...) \
-  ::arrow::internal::tracing::GetTracer()->GetCurrentSpan()->AddEvent(__VA_ARGS__)
-
-#define EVENT(target_span, ...) \
-  ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())->AddEvent(__VA_ARGS__)
-
-#define ACTIVATE_SPAN(target_span)                             \
-  ::arrow::internal::tracing::Scope(                           \
-      ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
-          ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())))
-
-#define MARK_SPAN(target_span, status)  \
-  ::arrow::internal::tracing::MarkSpan( \
-      status, ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()).get())
-
-#define END_SPAN(target_span) \
-  ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())->End()
-
-#define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future) \
-  target_future.SetSpan(&target_span)
-
-#define PROPAGATE_SPAN_TO_GENERATOR(generator)                                \
-  generator = ::arrow::internal::tracing::PropagateSpanThroughAsyncGenerator( \
-      std::move(generator))
-
-#define WRAP_ASYNC_GENERATOR(generator) \
-  generator = ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator))
-
-#define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name) \
-  generator =                                                 \
-      ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator), name, true)
+#  define START_SPAN(target_span, ...)                           \
+    auto opentelemetry_scope##__LINE__ =                         \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
+            ::arrow::internal::tracing::RewrapSpan(              \
+                target_span.details.get(),                       \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__)))
+
+#  define START_SCOPED_SPAN(target_span, ...)                    \
+    ::arrow::internal::tracing::Scope(                           \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
+            ::arrow::internal::tracing::RewrapSpan(              \
+                target_span.details.get(),                       \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(__VA_ARGS__))))
+
+#  define START_SCOPED_SPAN_SV(target_span, name, ...)                             \
+    ::arrow::internal::tracing::Scope(                                             \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
+            ::arrow::internal::tracing::RewrapSpan(                                \
+                target_span.details.get(),                                         \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(                \
+                    ::opentelemetry::nostd::string_view(name.data(), name.size()), \
+                    ##__VA_ARGS__))))
+
+#  define START_SCOPED_SPAN_WITH_PARENT_SV(target_span, parent_span, name, ...)    \
+    ::arrow::internal::tracing::Scope(                                             \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan(                   \
+            ::arrow::internal::tracing::RewrapSpan(                                \
+                target_span.details.get(),                                         \
+                                                                                   \
+                ::arrow::internal::tracing::GetTracer()->StartSpan(                \
+                    ::opentelemetry::nostd::string_view(name.data(), name.size()), \
+                    __VA_ARGS__,                                                   \
+                    ::arrow::internal::tracing::SpanOptionsWithParent(parent_span)))))
+
+#  define START_COMPUTE_SPAN(target_span, ...)                        \
+    START_SPAN(target_span, __VA_ARGS__);                             \
+    ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()) \
+        ->SetAttribute("arrow.memory_pool_bytes",                     \
+                       ::arrow::default_memory_pool()->bytes_allocated())
+
+#  define EVENT_ON_CURRENT_SPAN(...) \
+    ::arrow::internal::tracing::GetTracer()->GetCurrentSpan()->AddEvent(__VA_ARGS__)
+
+#  define EVENT(target_span, ...)                                     \
+    ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()) \
+        ->AddEvent(__VA_ARGS__)
+
+#  define ACTIVATE_SPAN(target_span)                             \
+    ::arrow::internal::tracing::Scope(                           \
+        ::arrow::internal::tracing::GetTracer()->WithActiveSpan( \
+            ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())))
+
+#  define MARK_SPAN(target_span, status)  \
+    ::arrow::internal::tracing::MarkSpan( \
+        status, ::arrow::internal::tracing::UnwrapSpan(target_span.details.get()).get())
+
+#  define END_SPAN(target_span) \
+    ::arrow::internal::tracing::UnwrapSpan(target_span.details.get())->End()
+
+#  define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future) \
+    target_future.SetSpan(&target_span)
+
+#  define PROPAGATE_SPAN_TO_GENERATOR(generator)                                \
+    generator = ::arrow::internal::tracing::PropagateSpanThroughAsyncGenerator( \
+        std::move(generator))
+
+#  define WRAP_ASYNC_GENERATOR(generator) \
+    generator = ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator))
+
+#  define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name) \
+    generator =                                                 \
+        ::arrow::internal::tracing::WrapAsyncGenerator(std::move(generator), name, true)
 
 /*
  * Calls to the helper macros above are removed by the preprocessor when
@@ -223,19 +224,19 @@ struct Scope {
   [[maybe_unused]] ~Scope() {}
 };
 
-#define START_SPAN(target_span, ...)
-#define START_SCOPED_SPAN(target_span, ...) ::arrow::internal::tracing::Scope()
-#define START_SCOPED_SPAN_SV(target_span, name, ...) ::arrow::internal::tracing::Scope()
-#define START_COMPUTE_SPAN(target_span, ...)
-#define ACTIVATE_SPAN(target_span) ::arrow::internal::tracing::Scope()
-#define MARK_SPAN(target_span, status)
-#define EVENT(target_span, ...)
-#define EVENT_ON_CURRENT_SPAN(...)
-#define END_SPAN(target_span)
-#define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future)
-#define PROPAGATE_SPAN_TO_GENERATOR(generator)
-#define WRAP_ASYNC_GENERATOR(generator)
-#define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name)
+#  define START_SPAN(target_span, ...)
+#  define START_SCOPED_SPAN(target_span, ...) ::arrow::internal::tracing::Scope()
+#  define START_SCOPED_SPAN_SV(target_span, name, ...) ::arrow::internal::tracing::Scope()
+#  define START_COMPUTE_SPAN(target_span, ...)
+#  define ACTIVATE_SPAN(target_span) ::arrow::internal::tracing::Scope()
+#  define MARK_SPAN(target_span, status)
+#  define EVENT(target_span, ...)
+#  define EVENT_ON_CURRENT_SPAN(...)
+#  define END_SPAN(target_span)
+#  define END_SPAN_ON_FUTURE_COMPLETION(target_span, target_future)
+#  define PROPAGATE_SPAN_TO_GENERATOR(generator)
+#  define WRAP_ASYNC_GENERATOR(generator)
+#  define WRAP_ASYNC_GENERATOR_WITH_CHILD_SPAN(generator, name)
 
 #endif
 
diff --git a/cpp/src/arrow/util/uri.cc b/cpp/src/arrow/util/uri.cc
index 9c0f7f9a59630..6c0787a87e046 100644
--- a/cpp/src/arrow/util/uri.cc
+++ b/cpp/src/arrow/util/uri.cc
@@ -250,9 +250,16 @@ Status Uri::Parse(const std::string& uri_string) {
   const auto& s = impl_->KeepString(uri_string);
   impl_->string_rep_ = s;
   const char* error_pos;
-  if (uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), &error_pos) !=
-      URI_SUCCESS) {
-    return Status::Invalid("Cannot parse URI: '", uri_string, "'");
+  int retval =
+      uriParseSingleUriExA(&impl_->uri_, s.data(), s.data() + s.size(), &error_pos);
+  if (retval != URI_SUCCESS) {
+    if (retval == URI_ERROR_SYNTAX) {
+      return Status::Invalid("Cannot parse URI: '", uri_string,
+                             "' due to syntax error at character '", *error_pos,
+                             "' (position ", error_pos - s.data(), ")");
+    } else {
+      return Status::Invalid("Cannot parse URI: '", uri_string, "'");
+    }
   }
 
   const auto scheme = TextRangeToView(impl_->uri_.scheme);
diff --git a/cpp/src/arrow/util/utf8.cc b/cpp/src/arrow/util/utf8.cc
index 042a6144d6c19..9f91e0f080868 100644
--- a/cpp/src/arrow/util/utf8.cc
+++ b/cpp/src/arrow/util/utf8.cc
@@ -30,7 +30,7 @@
 
 // Can be defined by utfcpp
 #ifdef NOEXCEPT
-#undef NOEXCEPT
+#  undef NOEXCEPT
 #endif
 
 namespace arrow {
diff --git a/cpp/src/arrow/util/utf8_internal.h b/cpp/src/arrow/util/utf8_internal.h
index ec8fc2b46fe82..335e875f7ae20 100644
--- a/cpp/src/arrow/util/utf8_internal.h
+++ b/cpp/src/arrow/util/utf8_internal.h
@@ -25,7 +25,7 @@
 #include <string_view>
 
 #if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2)
-#include <xsimd/xsimd.hpp>
+#  include <xsimd/xsimd.hpp>
 #endif
 
 #include "arrow/type_fwd.h"
diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h
index 74b6a2403a2bb..e77d713a44d01 100644
--- a/cpp/src/arrow/util/vector.h
+++ b/cpp/src/arrow/util/vector.h
@@ -31,8 +31,8 @@ namespace internal {
 
 template <typename T>
 std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index) {
-  DCHECK(!values.empty());
-  DCHECK_LT(index, values.size());
+  ARROW_DCHECK(!values.empty());
+  ARROW_DCHECK_LT(index, values.size());
   std::vector<T> out;
   out.reserve(values.size() - 1);
   for (size_t i = 0; i < index; ++i) {
@@ -47,7 +47,7 @@ std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index) {
 template <typename T>
 std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
                                 T new_element) {
-  DCHECK_LE(index, values.size());
+  ARROW_DCHECK_LE(index, values.size());
   std::vector<T> out;
   out.reserve(values.size() + 1);
   for (size_t i = 0; i < index; ++i) {
@@ -63,7 +63,7 @@ std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
 template <typename T>
 std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t index,
                                     T new_element) {
-  DCHECK_LE(index, values.size());
+  ARROW_DCHECK_LE(index, values.size());
   std::vector<T> out;
   out.reserve(values.size());
   for (size_t i = 0; i < index; ++i) {
diff --git a/cpp/src/arrow/util/visibility.h b/cpp/src/arrow/util/visibility.h
index 1498d2085a03d..9a53cdbdeff1b 100644
--- a/cpp/src/arrow/util/visibility.h
+++ b/cpp/src/arrow/util/visibility.h
@@ -20,67 +20,67 @@
 #if defined(_WIN32) || defined(__CYGWIN__)
 // Windows
 
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#if defined(__cplusplus) && defined(__GNUC__) && !defined(__clang__)
+#  if defined(__cplusplus) && defined(__GNUC__) && !defined(__clang__)
 // Use C++ attribute syntax where possible to avoid GCC parser bug
 // (https://stackoverflow.com/questions/57993818/gcc-how-to-combine-attribute-dllexport-and-nodiscard-in-a-struct-de)
-#define ARROW_DLLEXPORT [[gnu::dllexport]]
-#define ARROW_DLLIMPORT [[gnu::dllimport]]
-#else
-#define ARROW_DLLEXPORT __declspec(dllexport)
-#define ARROW_DLLIMPORT __declspec(dllimport)
-#endif
+#    define ARROW_DLLEXPORT [[gnu::dllexport]]
+#    define ARROW_DLLIMPORT [[gnu::dllimport]]
+#  else
+#    define ARROW_DLLEXPORT __declspec(dllexport)
+#    define ARROW_DLLIMPORT __declspec(dllimport)
+#  endif
 
 // _declspec(dllexport) even when the #included by a non-arrow source
-#define ARROW_FORCE_EXPORT ARROW_DLLEXPORT
+#  define ARROW_FORCE_EXPORT ARROW_DLLEXPORT
 
-#ifdef ARROW_STATIC
-#define ARROW_EXPORT
-#define ARROW_FRIEND_EXPORT
-#define ARROW_TEMPLATE_EXPORT
-#elif defined(ARROW_EXPORTING)
-#define ARROW_EXPORT ARROW_DLLEXPORT
+#  ifdef ARROW_STATIC
+#    define ARROW_EXPORT
+#    define ARROW_FRIEND_EXPORT
+#    define ARROW_TEMPLATE_EXPORT
+#  elif defined(ARROW_EXPORTING)
+#    define ARROW_EXPORT ARROW_DLLEXPORT
 // For some reason [[gnu::dllexport]] doesn't work well with friend declarations
-#define ARROW_FRIEND_EXPORT __declspec(dllexport)
-#define ARROW_TEMPLATE_EXPORT ARROW_DLLEXPORT
-#else
-#define ARROW_EXPORT ARROW_DLLIMPORT
-#define ARROW_FRIEND_EXPORT __declspec(dllimport)
-#define ARROW_TEMPLATE_EXPORT ARROW_DLLIMPORT
-#endif
+#    define ARROW_FRIEND_EXPORT __declspec(dllexport)
+#    define ARROW_TEMPLATE_EXPORT ARROW_DLLEXPORT
+#  else
+#    define ARROW_EXPORT ARROW_DLLIMPORT
+#    define ARROW_FRIEND_EXPORT __declspec(dllimport)
+#    define ARROW_TEMPLATE_EXPORT ARROW_DLLIMPORT
+#  endif
 
-#define ARROW_NO_EXPORT
+#  define ARROW_NO_EXPORT
 
 #else
 
 // Non-Windows
 
-#if defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__))
-#ifndef ARROW_EXPORT
-#define ARROW_EXPORT [[gnu::visibility("default")]]
-#endif
-#ifndef ARROW_NO_EXPORT
-#define ARROW_NO_EXPORT [[gnu::visibility("hidden")]]
-#endif
-#else
+#  if defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__))
+#    ifndef ARROW_EXPORT
+#      define ARROW_EXPORT [[gnu::visibility("default")]]
+#    endif
+#    ifndef ARROW_NO_EXPORT
+#      define ARROW_NO_EXPORT [[gnu::visibility("hidden")]]
+#    endif
+#  else
 // Not C++, or not gcc/clang
-#ifndef ARROW_EXPORT
-#define ARROW_EXPORT
-#endif
-#ifndef ARROW_NO_EXPORT
-#define ARROW_NO_EXPORT
-#endif
-#endif
+#    ifndef ARROW_EXPORT
+#      define ARROW_EXPORT
+#    endif
+#    ifndef ARROW_NO_EXPORT
+#      define ARROW_NO_EXPORT
+#    endif
+#  endif
 
-#define ARROW_FRIEND_EXPORT
-#define ARROW_TEMPLATE_EXPORT
+#  define ARROW_FRIEND_EXPORT
+#  define ARROW_TEMPLATE_EXPORT
 
 // [[gnu::visibility("default")]] even when #included by a non-arrow source
-#define ARROW_FORCE_EXPORT [[gnu::visibility("default")]]
+#  define ARROW_FORCE_EXPORT [[gnu::visibility("default")]]
 
 #endif  // Non-Windows
diff --git a/cpp/src/arrow/util/windows_compatibility.h b/cpp/src/arrow/util/windows_compatibility.h
index c97b2f3b76a7c..810a91201f335 100644
--- a/cpp/src/arrow/util/windows_compatibility.h
+++ b/cpp/src/arrow/util/windows_compatibility.h
@@ -18,22 +18,22 @@
 #ifdef _WIN32
 
 // Windows defines min and max macros that mess up std::min/max
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
+#  ifndef NOMINMAX
+#    define NOMINMAX
+#  endif
 
-#define WIN32_LEAN_AND_MEAN
+#  define WIN32_LEAN_AND_MEAN
 
 // Set Windows 7 as a conservative minimum for Apache Arrow
-#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601
-#undef _WIN32_WINNT
-#endif
-#ifndef _WIN32_WINNT
-#define _WIN32_WINNT 0x601
-#endif
+#  if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601
+#    undef _WIN32_WINNT
+#  endif
+#  ifndef _WIN32_WINNT
+#    define _WIN32_WINNT 0x601
+#  endif
 
-#include <winsock2.h>
+#  include <winsock2.h>
 
-#include "arrow/util/windows_fixup.h"
+#  include "arrow/util/windows_fixup.h"
 
 #endif  // _WIN32
diff --git a/cpp/src/arrow/util/windows_fixup.h b/cpp/src/arrow/util/windows_fixup.h
index 2949ac4ab7688..42e74f4a7857f 100644
--- a/cpp/src/arrow/util/windows_fixup.h
+++ b/cpp/src/arrow/util/windows_fixup.h
@@ -19,32 +19,32 @@
 
 #ifdef _WIN32
 
-#ifdef max
-#undef max
-#endif
-#ifdef min
-#undef min
-#endif
+#  ifdef max
+#    undef max
+#  endif
+#  ifdef min
+#    undef min
+#  endif
 
 // The Windows API defines macros from *File resolving to either
 // *FileA or *FileW.  Need to undo them.
-#ifdef CopyFile
-#undef CopyFile
-#endif
-#ifdef CreateFile
-#undef CreateFile
-#endif
-#ifdef DeleteFile
-#undef DeleteFile
-#endif
+#  ifdef CopyFile
+#    undef CopyFile
+#  endif
+#  ifdef CreateFile
+#    undef CreateFile
+#  endif
+#  ifdef DeleteFile
+#    undef DeleteFile
+#  endif
 
 // Other annoying Windows macro definitions...
-#ifdef IN
-#undef IN
-#endif
-#ifdef OUT
-#undef OUT
-#endif
+#  ifdef IN
+#    undef IN
+#  endif
+#  ifdef OUT
+#    undef OUT
+#  endif
 
 // Note that we can't undefine OPTIONAL, because it can be used in other
 // Windows headers...
diff --git a/cpp/src/arrow/visit_data_inline.h b/cpp/src/arrow/visit_data_inline.h
index a2ba9cfc65071..3fa557af2079d 100644
--- a/cpp/src/arrow/visit_data_inline.h
+++ b/cpp/src/arrow/visit_data_inline.h
@@ -249,7 +249,8 @@ VisitArraySpanInline(const ArraySpan& arr, ValidFunc&& valid_func, NullFunc&& nu
 // The scalar value's type depends on the array data type:
 // - the type's `c_type`, if any
 // - for boolean arrays, a `bool`
-// - for binary, string and fixed-size binary arrays, a `std::string_view`
+// - for binary, string, large binary and string, binary and string view, and fixed-size
+//   binary arrays, a `std::string_view`
 
 template <typename T>
 struct ArraySpanVisitor {
diff --git a/cpp/src/gandiva/cast_time.cc b/cpp/src/gandiva/cast_time.cc
index eeb2ea3fdd88f..f170375298b55 100644
--- a/cpp/src/gandiva/cast_time.cc
+++ b/cpp/src/gandiva/cast_time.cc
@@ -22,10 +22,10 @@
 #include "gandiva/precompiled/time_fields.h"
 
 #ifndef GANDIVA_UNIT_TEST
-#include "gandiva/exported_funcs.h"
-#include "gandiva/gdv_function_stubs.h"
+#  include "gandiva/exported_funcs.h"
+#  include "gandiva/gdv_function_stubs.h"
 
-#include "gandiva/engine.h"
+#  include "gandiva/engine.h"
 
 namespace gandiva {
 
diff --git a/cpp/src/gandiva/context_helper.cc b/cpp/src/gandiva/context_helper.cc
index 03bbe1b7a67d9..2a3efc8348b38 100644
--- a/cpp/src/gandiva/context_helper.cc
+++ b/cpp/src/gandiva/context_helper.cc
@@ -15,13 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "gandiva/execution_context.h"
+#include "gandiva/gdv_function_stubs.h"
+
 // This file is also used in the pre-compiled unit tests, which do include
 // llvm/engine/..
 #ifndef GANDIVA_UNIT_TEST
-#include "gandiva/exported_funcs.h"
-#include "gandiva/gdv_function_stubs.h"
+#  include "gandiva/exported_funcs.h"
 
-#include "gandiva/engine.h"
+#  include "gandiva/engine.h"
 
 namespace gandiva {
 
@@ -56,8 +58,6 @@ arrow::Status ExportedContextFunctions::AddMappings(Engine* engine) const {
 }  // namespace gandiva
 #endif  // !GANDIVA_UNIT_TEST
 
-#include "gandiva/execution_context.h"
-
 extern "C" {
 
 void gdv_fn_context_set_error_msg(int64_t context_ptr, char const* err_msg) {
diff --git a/cpp/src/gandiva/decimal_xlarge.cc b/cpp/src/gandiva/decimal_xlarge.cc
index 21212422f3d69..e9fe0dc6b91ed 100644
--- a/cpp/src/gandiva/decimal_xlarge.cc
+++ b/cpp/src/gandiva/decimal_xlarge.cc
@@ -33,8 +33,8 @@
 #include "gandiva/decimal_type_util.h"
 
 #ifndef GANDIVA_UNIT_TEST
-#include "gandiva/engine.h"
-#include "gandiva/exported_funcs.h"
+#  include "gandiva/engine.h"
+#  include "gandiva/exported_funcs.h"
 
 namespace gandiva {
 
diff --git a/cpp/src/gandiva/dex_visitor.h b/cpp/src/gandiva/dex_visitor.h
index 5d160bb22ca68..4115df7ffb22b 100644
--- a/cpp/src/gandiva/dex_visitor.h
+++ b/cpp/src/gandiva/dex_visitor.h
@@ -70,7 +70,7 @@ class GANDIVA_EXPORT DexVisitor {
 
 /// Default implementation with only DCHECK().
 #define VISIT_DCHECK(DEX_CLASS) \
-  void Visit(const DEX_CLASS& dex) override { DCHECK(0); }
+  void Visit(const DEX_CLASS& dex) override { ARROW_DCHECK(0); }
 
 class GANDIVA_EXPORT DexDefaultVisitor : public DexVisitor {
   VISIT_DCHECK(VectorReadValidityDex)
diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc
index bfce72cefc630..065ea5a59837e 100644
--- a/cpp/src/gandiva/engine.cc
+++ b/cpp/src/gandiva/engine.cc
@@ -18,7 +18,7 @@
 // TODO(wesm): LLVM 7 produces pesky C4244 that disable pragmas around the LLVM
 // includes seem to not fix as with LLVM 6
 #if defined(_MSC_VER)
-#pragma warning(disable : 4244)
+#  pragma warning(disable : 4244)
 #endif
 
 #include "gandiva/engine.h"
@@ -35,12 +35,12 @@
 #include <arrow/util/logging.h>
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4141)
-#pragma warning(disable : 4146)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4624)
+#  pragma warning(push)
+#  pragma warning(disable : 4141)
+#  pragma warning(disable : 4146)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4267)
+#  pragma warning(disable : 4624)
 #endif
 
 #include <llvm/Analysis/Passes.h>
@@ -56,32 +56,32 @@
 #include <llvm/Linker/Linker.h>
 #include <llvm/Transforms/Utils/Cloning.h>
 #if LLVM_VERSION_MAJOR >= 17
-#include <llvm/TargetParser/SubtargetFeature.h>
+#  include <llvm/TargetParser/SubtargetFeature.h>
 #else
-#include <llvm/MC/SubtargetFeature.h>
+#  include <llvm/MC/SubtargetFeature.h>
 #endif
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Support/DynamicLibrary.h>
 #if LLVM_VERSION_MAJOR >= 18
-#include <llvm/TargetParser/Host.h>
+#  include <llvm/TargetParser/Host.h>
 #else
-#include <llvm/Support/Host.h>
+#  include <llvm/Support/Host.h>
 #endif
 #include <llvm/Transforms/IPO/GlobalDCE.h>
 #include <llvm/Transforms/IPO/Internalize.h>
 #if LLVM_VERSION_MAJOR >= 14
-#include <llvm/IR/PassManager.h>
-#include <llvm/MC/TargetRegistry.h>
-#include <llvm/Passes/PassPlugin.h>
-#include <llvm/Transforms/IPO/GlobalOpt.h>
-#include <llvm/Transforms/Scalar/NewGVN.h>
-#include <llvm/Transforms/Scalar/SimplifyCFG.h>
-#include <llvm/Transforms/Utils/Mem2Reg.h>
-#include <llvm/Transforms/Vectorize/LoopVectorize.h>
-#include <llvm/Transforms/Vectorize/SLPVectorizer.h>
+#  include <llvm/IR/PassManager.h>
+#  include <llvm/MC/TargetRegistry.h>
+#  include <llvm/Passes/PassPlugin.h>
+#  include <llvm/Transforms/IPO/GlobalOpt.h>
+#  include <llvm/Transforms/Scalar/NewGVN.h>
+#  include <llvm/Transforms/Scalar/SimplifyCFG.h>
+#  include <llvm/Transforms/Utils/Mem2Reg.h>
+#  include <llvm/Transforms/Vectorize/LoopVectorize.h>
+#  include <llvm/Transforms/Vectorize/SLPVectorizer.h>
 #else
-#include <llvm/Support/TargetRegistry.h>
-#include <llvm/Transforms/IPO/PassManagerBuilder.h>
+#  include <llvm/Support/TargetRegistry.h>
+#  include <llvm/Transforms/IPO/PassManagerBuilder.h>
 #endif
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Support/raw_ostream.h>
@@ -91,18 +91,18 @@
 #include <llvm/Transforms/Scalar/GVN.h>
 #include <llvm/Transforms/Utils.h>
 #if LLVM_VERSION_MAJOR <= 17
-#include <llvm/Transforms/Vectorize.h>
+#  include <llvm/Transforms/Vectorize.h>
 #endif
 
 // JITLink is available in LLVM 9+
 // but the `InProcessMemoryManager::Create` API was added since LLVM 14
 #if LLVM_VERSION_MAJOR >= 14 && !defined(_WIN32)
-#define JIT_LINK_SUPPORTED
-#include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
+#  define JIT_LINK_SUPPORTED
+#  include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
 #endif
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 #include "gandiva/configuration.h"
diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h
index 565c3f142502d..3a69500e38bcf 100644
--- a/cpp/src/gandiva/engine.h
+++ b/cpp/src/gandiva/engine.h
@@ -67,7 +67,7 @@ class GANDIVA_EXPORT Engine {
   /// Add the function to the list of IR functions that need to be compiled.
   /// Compiling only the functions that are used by the module saves time.
   void AddFunctionToCompile(const std::string& fname) {
-    DCHECK(!module_finalized_);
+    ARROW_DCHECK(!module_finalized_);
     functions_to_compile_.push_back(fname);
   }
 
diff --git a/cpp/src/gandiva/eval_batch.h b/cpp/src/gandiva/eval_batch.h
index 9644010b721af..feb4cdc975005 100644
--- a/cpp/src/gandiva/eval_batch.h
+++ b/cpp/src/gandiva/eval_batch.h
@@ -53,22 +53,22 @@ class EvalBatch {
   int GetNumBuffers() const { return num_buffers_; }
 
   const uint8_t* GetBuffer(int idx) const {
-    DCHECK(idx <= num_buffers_);
+    ARROW_DCHECK(idx <= num_buffers_);
     return (buffers_array_.get())[idx];
   }
 
   uint8_t* GetBuffer(int idx) {
-    DCHECK(idx <= num_buffers_);
+    ARROW_DCHECK(idx <= num_buffers_);
     return (buffers_array_.get())[idx];
   }
 
   int64_t GetBufferOffset(int idx) const {
-    DCHECK(idx <= num_buffers_);
+    ARROW_DCHECK(idx <= num_buffers_);
     return (buffer_offsets_array_.get())[idx];
   }
 
   void SetBuffer(int idx, uint8_t* buffer, int64_t offset) {
-    DCHECK(idx <= num_buffers_);
+    ARROW_DCHECK(idx <= num_buffers_);
     (buffers_array_.get())[idx] = buffer;
     (buffer_offsets_array_.get())[idx] = offset;
   }
@@ -80,11 +80,11 @@ class EvalBatch {
   }
 
   const uint8_t* GetLocalBitMap(int idx) const {
-    DCHECK(idx <= GetNumLocalBitMaps());
+    ARROW_DCHECK(idx <= GetNumLocalBitMaps());
     return local_bitmaps_holder_->GetLocalBitMap(idx);
   }
   uint8_t* GetLocalBitMap(int idx) {
-    DCHECK(idx <= GetNumLocalBitMaps());
+    ARROW_DCHECK(idx <= GetNumLocalBitMaps());
     return local_bitmaps_holder_->GetLocalBitMap(idx);
   }
 
diff --git a/cpp/src/gandiva/gandiva_object_cache.h b/cpp/src/gandiva/gandiva_object_cache.h
index 62042c7b627ec..cebc8d5cac211 100644
--- a/cpp/src/gandiva/gandiva_object_cache.h
+++ b/cpp/src/gandiva/gandiva_object_cache.h
@@ -18,12 +18,12 @@
 #pragma once
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4141)
-#pragma warning(disable : 4146)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4624)
+#  pragma warning(push)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4141)
+#  pragma warning(disable : 4146)
+#  pragma warning(disable : 4267)
+#  pragma warning(disable : 4624)
 #endif
 
 #include <llvm/ExecutionEngine/ObjectCache.h>
diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h
index 8e87bc51215e1..4113f261ad766 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -46,13 +46,13 @@ using gdv_month_interval = int32_t;
 
 #ifdef GANDIVA_UNIT_TEST
 // unit tests may be compiled without O2, so inlining may not happen.
-#define GDV_FORCE_INLINE
+#  define GDV_FORCE_INLINE
 #else
-#ifdef _MSC_VER
-#define GDV_FORCE_INLINE __forceinline
-#else
-#define GDV_FORCE_INLINE inline __attribute__((always_inline))
-#endif
+#  ifdef _MSC_VER
+#    define GDV_FORCE_INLINE __forceinline
+#  else
+#    define GDV_FORCE_INLINE inline __attribute__((always_inline))
+#  endif
 #endif
 
 GANDIVA_EXPORT
diff --git a/cpp/src/gandiva/llvm_includes.h b/cpp/src/gandiva/llvm_includes.h
index 3d455591895c7..659c9d29de177 100644
--- a/cpp/src/gandiva/llvm_includes.h
+++ b/cpp/src/gandiva/llvm_includes.h
@@ -18,13 +18,13 @@
 #pragma once
 
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4141)
-#pragma warning(disable : 4146)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4267)
-#pragma warning(disable : 4291)
-#pragma warning(disable : 4624)
+#  pragma warning(push)
+#  pragma warning(disable : 4141)
+#  pragma warning(disable : 4146)
+#  pragma warning(disable : 4244)
+#  pragma warning(disable : 4267)
+#  pragma warning(disable : 4291)
+#  pragma warning(disable : 4624)
 #endif
 
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
@@ -33,11 +33,11 @@
 #include <llvm/IR/Module.h>
 
 #if LLVM_VERSION_MAJOR >= 10
-#define LLVM_ALIGN(alignment) (llvm::Align((alignment)))
+#  define LLVM_ALIGN(alignment) (llvm::Align((alignment)))
 #else
-#define LLVM_ALIGN(alignment) (alignment)
+#  define LLVM_ALIGN(alignment) (alignment)
 #endif
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/gandiva/llvm_types.h b/cpp/src/gandiva/llvm_types.h
index d6f0952713efc..7768a7f7e4bde 100644
--- a/cpp/src/gandiva/llvm_types.h
+++ b/cpp/src/gandiva/llvm_types.h
@@ -97,7 +97,7 @@ class GANDIVA_EXPORT LLVMTypes {
     } else if (type->isFloatingPointTy()) {
       return llvm::ConstantFP::get(type, 0);
     } else {
-      DCHECK(type->isPointerTy());
+      ARROW_DCHECK(type->isPointerTy());
       return llvm::ConstantPointerNull::getNullValue(type);
     }
   }
diff --git a/cpp/src/gandiva/local_bitmaps_holder.h b/cpp/src/gandiva/local_bitmaps_holder.h
index a172fb973c4a5..dc24a32e7cad0 100644
--- a/cpp/src/gandiva/local_bitmaps_holder.h
+++ b/cpp/src/gandiva/local_bitmaps_holder.h
@@ -40,7 +40,7 @@ class LocalBitMapsHolder {
   uint8_t** GetLocalBitMapArray() const { return local_bitmaps_array_.get(); }
 
   uint8_t* GetLocalBitMap(int idx) const {
-    DCHECK(idx <= GetNumLocalBitMaps());
+    ARROW_DCHECK(idx <= GetNumLocalBitMaps());
     return local_bitmaps_array_.get()[idx];
   }
 
diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt
index c092ff4fd011f..c2bc7fc02797e 100644
--- a/cpp/src/gandiva/precompiled/CMakeLists.txt
+++ b/cpp/src/gandiva/precompiled/CMakeLists.txt
@@ -53,8 +53,8 @@ add_custom_target(precompiled ALL DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH}
                                           ${GANDIVA_PRECOMPILED_CC_PATH})
 
 # testing
-if(ARROW_BUILD_TESTS)
-  add_executable(gandiva-precompiled-test
+add_gandiva_test(precompiled-test
+                 SOURCES
                  ../context_helper.cc
                  bitmap_test.cc
                  bitmap.cc
@@ -75,16 +75,12 @@ if(ARROW_BUILD_TESTS)
                  decimal_ops_test.cc
                  decimal_ops.cc
                  ../decimal_type_util.cc
-                 ../decimal_xlarge.cc)
-  target_include_directories(gandiva-precompiled-test PRIVATE ${CMAKE_SOURCE_DIR}/src)
-  target_link_libraries(gandiva-precompiled-test PRIVATE ${ARROW_TEST_LINK_LIBS}
-                                                         Boost::headers)
-  target_compile_definitions(gandiva-precompiled-test PRIVATE GANDIVA_UNIT_TEST=1
-                                                              ARROW_STATIC GANDIVA_STATIC)
-  set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/gandiva-precompiled-test")
-  add_test(gandiva-precompiled-test ${TEST_PATH})
-  set_property(TEST gandiva-precompiled-test
-               APPEND
-               PROPERTY LABELS "unittest;gandiva-tests")
-  add_dependencies(gandiva-tests gandiva-precompiled-test)
-endif()
+                 ../decimal_xlarge.cc
+                 EXTRA_INCLUDES
+                 ${CMAKE_SOURCE_DIR}/src
+                 EXTRA_LINK_LIBS
+                 Boost::headers
+                 DEFINITIONS
+                 GANDIVA_UNIT_TEST=1
+                 ARROW_STATIC
+                 GANDIVA_STATIC)
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops.cc b/cpp/src/gandiva/precompiled/extended_math_ops.cc
index e5bd9b48e0e0b..c03db1d5f2b13 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 
 #include "arrow/util/logging.h"
diff --git a/cpp/src/gandiva/precompiled/extended_math_ops_test.cc b/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
index 3e9d8a5d2cd44..7170fad01d250 100644
--- a/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/extended_math_ops_test.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 
 #include <gtest/gtest.h>
diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h
index 83bbdee208562..c93b694fc777e 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -43,9 +43,9 @@ using gdv_day_time_interval = int64_t;
 
 #ifdef GANDIVA_UNIT_TEST
 // unit tests may be compiled without O2, so inlining may not happen.
-#define FORCE_INLINE
+#  define FORCE_INLINE
 #else
-#define FORCE_INLINE __attribute__((always_inline))
+#  define FORCE_INLINE __attribute__((always_inline))
 #endif
 
 extern "C" {
diff --git a/cpp/src/gandiva/selection_vector.cc b/cpp/src/gandiva/selection_vector.cc
index 39e9f5bc90228..8d5f9f4210af5 100644
--- a/cpp/src/gandiva/selection_vector.cc
+++ b/cpp/src/gandiva/selection_vector.cc
@@ -54,14 +54,14 @@ Status SelectionVector::PopulateFromBitMap(const uint8_t* bitmap, int64_t bitmap
 
     while (current_word != 0) {
 #if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4146)
+#  pragma warning(push)
+#  pragma warning(disable : 4146)
 #endif
       // MSVC warns about negating an unsigned type. We suppress it for now
       uint64_t highest_only = current_word & -current_word;
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
       int pos_in_word = arrow::bit_util::CountTrailingZeros(highest_only);
diff --git a/cpp/src/gandiva/selection_vector_impl.h b/cpp/src/gandiva/selection_vector_impl.h
index dc9724ca86fe2..234298daf5748 100644
--- a/cpp/src/gandiva/selection_vector_impl.h
+++ b/cpp/src/gandiva/selection_vector_impl.h
@@ -60,7 +60,7 @@ class SelectionVectorImpl : public SelectionVector {
   int64_t GetNumSlots() const override { return num_slots_; }
 
   void SetNumSlots(int64_t num_slots) override {
-    DCHECK_LE(num_slots, max_slots_);
+    ARROW_DCHECK_LE(num_slots, max_slots_);
     num_slots_ = num_slots;
   }
 
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index 59eeb3d92f19a..a22d04ac28f47 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #ifndef M_PI
-#define M_PI 3.14159265358979323846
+#  define M_PI 3.14159265358979323846
 #endif
 
 #include "gandiva/projector.h"
diff --git a/cpp/src/gandiva/tests/test_util.cc b/cpp/src/gandiva/tests/test_util.cc
index 2ee49ffae0ed6..584e27e7533b6 100644
--- a/cpp/src/gandiva/tests/test_util.cc
+++ b/cpp/src/gandiva/tests/test_util.cc
@@ -35,7 +35,7 @@ std::shared_ptr<Configuration> TestConfigWithIrDumping() {
 }
 
 #ifndef GANDIVA_EXTENSION_TEST_DIR
-#define GANDIVA_EXTENSION_TEST_DIR "."
+#  define GANDIVA_EXTENSION_TEST_DIR "."
 #endif
 
 std::string GetTestFunctionLLVMIRPath() {
diff --git a/cpp/src/gandiva/visibility.h b/cpp/src/gandiva/visibility.h
index 450b3056b2ec0..4961952c2974e 100644
--- a/cpp/src/gandiva/visibility.h
+++ b/cpp/src/gandiva/visibility.h
@@ -18,31 +18,31 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#if defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(push)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef GANDIVA_STATIC
-#define GANDIVA_EXPORT
-#elif defined(GANDIVA_EXPORTING)
-#define GANDIVA_EXPORT __declspec(dllexport)
-#else
-#define GANDIVA_EXPORT __declspec(dllimport)
-#endif
+#  ifdef GANDIVA_STATIC
+#    define GANDIVA_EXPORT
+#  elif defined(GANDIVA_EXPORTING)
+#    define GANDIVA_EXPORT __declspec(dllexport)
+#  else
+#    define GANDIVA_EXPORT __declspec(dllimport)
+#  endif
 
-#define GANDIVA_NO_EXPORT
+#  define GANDIVA_NO_EXPORT
 #else  // Not Windows
-#ifndef GANDIVA_EXPORT
-#define GANDIVA_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef GANDIVA_NO_EXPORT
-#define GANDIVA_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef GANDIVA_EXPORT
+#    define GANDIVA_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef GANDIVA_NO_EXPORT
+#    define GANDIVA_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 #if defined(_MSC_VER)
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 17574261d891d..b984ef77adbe0 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -165,7 +165,8 @@ set(PARQUET_SRCS
     column_reader.cc
     column_scanner.cc
     column_writer.cc
-    encoding.cc
+    decoder.cc
+    encoder.cc
     encryption/encryption.cc
     encryption/internal_file_decryptor.cc
     encryption/internal_file_encryptor.cc
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index aad1e933c4f25..5d990a5c6bd4a 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -16,9 +16,9 @@
 // under the License.
 
 #ifdef _MSC_VER
-#pragma warning(push)
+#  pragma warning(push)
 // Disable forcing value to bool warnings
-#pragma warning(disable : 4800)
+#  pragma warning(disable : 4800)
 #endif
 
 #include "gmock/gmock.h"
@@ -37,6 +37,7 @@
 #include "arrow/array/builder_primitive.h"
 #include "arrow/chunked_array.h"
 #include "arrow/compute/api.h"
+#include "arrow/extension/json.h"
 #include "arrow/io/api.h"
 #include "arrow/record_batch.h"
 #include "arrow/scalar.h"
@@ -55,7 +56,7 @@
 #include "arrow/util/range.h"
 
 #ifdef ARROW_CSV
-#include "arrow/csv/api.h"
+#  include "arrow/csv/api.h"
 #endif
 
 #include "parquet/api/reader.h"
@@ -618,10 +619,15 @@ class ParquetIOTestBase : public ::testing::Test {
     return ParquetFileWriter::Open(sink_, schema);
   }
 
-  void ReaderFromSink(std::unique_ptr<FileReader>* out) {
+  void ReaderFromSink(
+      std::unique_ptr<FileReader>* out,
+      const ArrowReaderProperties& properties = default_arrow_reader_properties()) {
     ASSERT_OK_AND_ASSIGN(auto buffer, sink_->Finish());
-    ASSERT_OK_NO_THROW(OpenFile(std::make_shared<BufferReader>(buffer),
-                                ::arrow::default_memory_pool(), out));
+    FileReaderBuilder builder;
+    ASSERT_OK_NO_THROW(builder.Open(std::make_shared<BufferReader>(buffer)));
+    ASSERT_OK_NO_THROW(builder.memory_pool(::arrow::default_memory_pool())
+                           ->properties(properties)
+                           ->Build(out));
   }
 
   void ReadSingleColumnFile(std::unique_ptr<FileReader> file_reader,
@@ -670,6 +676,7 @@ class ParquetIOTestBase : public ::testing::Test {
   void RoundTripSingleColumn(
       const std::shared_ptr<Array>& values, const std::shared_ptr<Array>& expected,
       const std::shared_ptr<::parquet::ArrowWriterProperties>& arrow_properties,
+      const ArrowReaderProperties& reader_properties = default_arrow_reader_properties(),
       bool nullable = true) {
     std::shared_ptr<Table> table = MakeSimpleTable(values, nullable);
     this->ResetSink();
@@ -679,7 +686,7 @@ class ParquetIOTestBase : public ::testing::Test {
 
     std::shared_ptr<Table> out;
     std::unique_ptr<FileReader> reader;
-    ASSERT_NO_FATAL_FAILURE(this->ReaderFromSink(&reader));
+    ASSERT_NO_FATAL_FAILURE(this->ReaderFromSink(&reader, reader_properties));
     const bool expect_metadata = arrow_properties->store_schema();
     ASSERT_NO_FATAL_FAILURE(
         this->ReadTableFromFile(std::move(reader), expect_metadata, &out));
@@ -1428,6 +1435,52 @@ TEST_F(TestLargeStringParquetIO, Basics) {
   this->RoundTripSingleColumn(large_array, large_array, arrow_properties);
 }
 
+using TestJsonParquetIO = TestParquetIO<::arrow::extension::JsonExtensionType>;
+
+TEST_F(TestJsonParquetIO, JsonExtension) {
+  const char* json = R"([
+    "null",
+    "1234",
+    "3.14159",
+    "true",
+    "false",
+    "\"a json string\"",
+    "[\"a\", \"json\", \"array\"]",
+    "{\"obj\": \"a simple json object\"}"
+  ])";
+
+  const auto json_type = ::arrow::extension::json();
+  const auto string_array = ::arrow::ArrayFromJSON(::arrow::utf8(), json);
+  const auto json_array = ::arrow::ExtensionType::WrapArray(json_type, string_array);
+
+  const auto json_large_type = ::arrow::extension::json(::arrow::large_utf8());
+  const auto large_string_array = ::arrow::ArrayFromJSON(::arrow::large_utf8(), json);
+  const auto json_large_array =
+      ::arrow::ExtensionType::WrapArray(json_large_type, large_string_array);
+
+  // When the original Arrow schema isn't stored and Arrow extensions are disabled,
+  // LogicalType::JSON is read as utf8.
+  this->RoundTripSingleColumn(json_array, string_array,
+                              default_arrow_writer_properties());
+  this->RoundTripSingleColumn(json_large_array, string_array,
+                              default_arrow_writer_properties());
+
+  // When the original Arrow schema isn't stored and Arrow extensions are enabled,
+  // LogicalType::JSON is read as JsonExtensionType with utf8 storage.
+  ::parquet::ArrowReaderProperties reader_properties;
+  reader_properties.set_arrow_extensions_enabled(true);
+  this->RoundTripSingleColumn(json_array, json_array, default_arrow_writer_properties(),
+                              reader_properties);
+  this->RoundTripSingleColumn(json_large_array, json_array,
+                              default_arrow_writer_properties(), reader_properties);
+
+  // When the original Arrow schema is stored, the stored Arrow type is respected.
+  const auto writer_properties =
+      ::parquet::ArrowWriterProperties::Builder().store_schema()->build();
+  this->RoundTripSingleColumn(json_array, json_array, writer_properties);
+  this->RoundTripSingleColumn(json_large_array, json_large_array, writer_properties);
+}
+
 using TestNullParquetIO = TestParquetIO<::arrow::NullType>;
 
 TEST_F(TestNullParquetIO, NullColumn) {
@@ -5298,14 +5351,20 @@ TEST(TestArrowReadWrite, MultithreadedWrite) {
 
 TEST(TestArrowReadWrite, FuzzReader) {
   constexpr size_t kMaxFileSize = 1024 * 1024 * 1;
-  {
-    auto path = test::get_data_file("PARQUET-1481.parquet", /*is_good=*/false);
+  auto check_bad_file = [&](const std::string& file_name) {
+    SCOPED_TRACE(file_name);
+    auto path = test::get_data_file(file_name, /*is_good=*/false);
     PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
                                              path, ::arrow::io::FileMode::READ));
     PARQUET_ASSIGN_OR_THROW(auto buffer, source->Read(kMaxFileSize));
     auto s = internal::FuzzReader(buffer->data(), buffer->size());
     ASSERT_NOT_OK(s);
-  }
+  };
+  check_bad_file("PARQUET-1481.parquet");
+  check_bad_file("ARROW-GH-41317.parquet");
+  check_bad_file("ARROW-GH-41321.parquet");
+  check_bad_file("ARROW-RS-GH-6229-LEVELS.parquet");
+  check_bad_file("ARROW-RS-GH-6229-DICTHEADER.parquet");
   {
     auto path = test::get_data_file("alltypes_plain.parquet", /*is_good=*/true);
     PARQUET_ASSIGN_OR_THROW(auto source, ::arrow::io::MemoryMappedFile::Open(
diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc
index 9f60cd31d3541..31ead461aa6e2 100644
--- a/cpp/src/parquet/arrow/arrow_schema_test.cc
+++ b/cpp/src/parquet/arrow/arrow_schema_test.cc
@@ -31,8 +31,11 @@
 #include "parquet/thrift_internal.h"
 
 #include "arrow/array.h"
+#include "arrow/extension/json.h"
+#include "arrow/ipc/writer.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/util/base64.h"
 #include "arrow/util/key_value_metadata.h"
 
 using arrow::Field;
@@ -76,17 +79,17 @@ class TestConvertParquetSchema : public ::testing::Test {
       auto result_field = result_schema_->field(i);
       auto expected_field = expected_schema->field(i);
       EXPECT_TRUE(result_field->Equals(expected_field, check_metadata))
-          << "Field " << i << "\n  result: " << result_field->ToString()
-          << "\n  expected: " << expected_field->ToString();
+          << "Field " << i << "\n  result: " << result_field->ToString(check_metadata)
+          << "\n  expected: " << expected_field->ToString(check_metadata);
     }
   }
 
   ::arrow::Status ConvertSchema(
       const std::vector<NodePtr>& nodes,
-      const std::shared_ptr<const KeyValueMetadata>& key_value_metadata = nullptr) {
+      const std::shared_ptr<const KeyValueMetadata>& key_value_metadata = nullptr,
+      ArrowReaderProperties props = ArrowReaderProperties()) {
     NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, nodes);
     descr_.Init(schema);
-    ArrowReaderProperties props;
     return FromParquetSchema(&descr_, props, key_value_metadata, &result_schema_);
   }
 
@@ -230,7 +233,7 @@ TEST_F(TestConvertParquetSchema, ParquetAnnotatedFields) {
        ::arrow::uint64()},
       {"int(64, true)", LogicalType::Int(64, true), ParquetType::INT64, -1,
        ::arrow::int64()},
-      {"json", LogicalType::JSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
+      {"json", LogicalType::JSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::utf8()},
       {"bson", LogicalType::BSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()},
       {"interval", LogicalType::Interval(), ParquetType::FIXED_LEN_BYTE_ARRAY, 12,
        ::arrow::fixed_size_binary(12)},
@@ -724,6 +727,87 @@ TEST_F(TestConvertParquetSchema, ParquetRepeatedNestedSchema) {
   ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(arrow_schema));
 }
 
+Status ArrowSchemaToParquetMetadata(std::shared_ptr<::arrow::Schema>& arrow_schema,
+                                    std::shared_ptr<KeyValueMetadata>& metadata) {
+  ARROW_ASSIGN_OR_RAISE(
+      std::shared_ptr<Buffer> serialized,
+      ::arrow::ipc::SerializeSchema(*arrow_schema, ::arrow::default_memory_pool()));
+  std::string schema_as_string = serialized->ToString();
+  std::string schema_base64 = ::arrow::util::base64_encode(schema_as_string);
+  metadata = ::arrow::key_value_metadata({"ARROW:schema"}, {schema_base64});
+  return Status::OK();
+}
+
+TEST_F(TestConvertParquetSchema, ParquetSchemaArrowExtensions) {
+  std::vector<NodePtr> parquet_fields;
+  parquet_fields.push_back(PrimitiveNode::Make(
+      "json_1", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, ConvertedType::JSON));
+  parquet_fields.push_back(PrimitiveNode::Make(
+      "json_2", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, ConvertedType::JSON));
+
+  {
+    // Parquet file does not contain Arrow schema.
+    // By default, both fields should be treated as utf8() fields in Arrow.
+    auto arrow_schema = ::arrow::schema(
+        {::arrow::field("json_1", UTF8, true), ::arrow::field("json_2", UTF8, true)});
+    std::shared_ptr<KeyValueMetadata> metadata{};
+    ASSERT_OK(ConvertSchema(parquet_fields, metadata));
+    CheckFlatSchema(arrow_schema);
+  }
+
+  {
+    // Parquet file does not contain Arrow schema.
+    // If Arrow extensions are enabled, both fields should be treated as json() extension
+    // fields.
+    ArrowReaderProperties props;
+    props.set_arrow_extensions_enabled(true);
+    auto arrow_schema = ::arrow::schema(
+        {::arrow::field("json_1", ::arrow::extension::json(), true),
+         ::arrow::field("json_2", ::arrow::extension::json(::arrow::large_utf8()),
+                        true)});
+    std::shared_ptr<KeyValueMetadata> metadata{};
+    ASSERT_OK(ConvertSchema(parquet_fields, metadata, props));
+    CheckFlatSchema(arrow_schema);
+  }
+
+  {
+    // Parquet file contains Arrow schema.
+    // Both json_1 and json_2 should be returned as a json() field
+    // even though extensions are not enabled.
+    ArrowReaderProperties props;
+    props.set_arrow_extensions_enabled(false);
+    std::shared_ptr<KeyValueMetadata> field_metadata =
+        ::arrow::key_value_metadata({"foo", "bar"}, {"biz", "baz"});
+    auto arrow_schema = ::arrow::schema(
+        {::arrow::field("json_1", ::arrow::extension::json(), true, field_metadata),
+         ::arrow::field("json_2", ::arrow::extension::json(::arrow::large_utf8()),
+                        true)});
+
+    std::shared_ptr<KeyValueMetadata> metadata;
+    ASSERT_OK(ArrowSchemaToParquetMetadata(arrow_schema, metadata));
+    ASSERT_OK(ConvertSchema(parquet_fields, metadata, props));
+    CheckFlatSchema(arrow_schema, true /* check_metadata */);
+  }
+
+  {
+    // Parquet file contains Arrow schema. Extensions are enabled.
+    // Both json_1 and json_2 should be returned as a json() field
+    ArrowReaderProperties props;
+    props.set_arrow_extensions_enabled(true);
+    std::shared_ptr<KeyValueMetadata> field_metadata =
+        ::arrow::key_value_metadata({"foo", "bar"}, {"biz", "baz"});
+    auto arrow_schema = ::arrow::schema(
+        {::arrow::field("json_1", ::arrow::extension::json(), true, field_metadata),
+         ::arrow::field("json_2", ::arrow::extension::json(::arrow::large_utf8()),
+                        true)});
+
+    std::shared_ptr<KeyValueMetadata> metadata;
+    ASSERT_OK(ArrowSchemaToParquetMetadata(arrow_schema, metadata));
+    ASSERT_OK(ConvertSchema(parquet_fields, metadata, props));
+    CheckFlatSchema(arrow_schema, true /* check_metadata */);
+  }
+}
+
 class TestConvertArrowSchema : public ::testing::Test {
  public:
   virtual void SetUp() {}
diff --git a/cpp/src/parquet/arrow/arrow_statistics_test.cc b/cpp/src/parquet/arrow/arrow_statistics_test.cc
index ad4496933ef4c..a8e2287d37085 100644
--- a/cpp/src/parquet/arrow/arrow_statistics_test.cc
+++ b/cpp/src/parquet/arrow/arrow_statistics_test.cc
@@ -17,12 +17,16 @@
 
 #include "gtest/gtest.h"
 
+#include "arrow/array.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/builder_time.h"
 #include "arrow/table.h"
 #include "arrow/testing/gtest_util.h"
 
 #include "parquet/api/reader.h"
 #include "parquet/api/writer.h"
 
+#include "parquet/arrow/reader.h"
 #include "parquet/arrow/schema.h"
 #include "parquet/arrow/writer.h"
 #include "parquet/file_writer.h"
@@ -156,4 +160,170 @@ INSTANTIATE_TEST_SUITE_P(
             /*expected_min=*/"z",
             /*expected_max=*/"z"}));
 
+TEST(StatisticsTest, TruncateOnlyHalfMinMax) {
+  // GH-43382: Tests when we only have min or max, the `HasMinMax` should be false.
+  std::shared_ptr<::arrow::ResizableBuffer> serialized_data = AllocateBuffer();
+  auto out_stream = std::make_shared<::arrow::io::BufferOutputStream>(serialized_data);
+  auto schema = ::arrow::schema({::arrow::field("a", ::arrow::utf8())});
+  ::parquet::WriterProperties::Builder properties_builder;
+  properties_builder.max_statistics_size(2);
+  ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileWriter> writer,
+      FileWriter::Open(*schema, default_memory_pool(), out_stream,
+                       properties_builder.build(), default_arrow_writer_properties()));
+  auto table = Table::Make(schema, {ArrayFromJSON(::arrow::utf8(), R"(["a", "abc"])")});
+  ASSERT_OK(writer->WriteTable(*table, std::numeric_limits<int64_t>::max()));
+  ASSERT_OK(writer->Close());
+  ASSERT_OK(out_stream->Close());
+
+  auto buffer_reader = std::make_shared<::arrow::io::BufferReader>(serialized_data);
+  auto parquet_reader = ParquetFileReader::Open(std::move(buffer_reader));
+  std::shared_ptr<FileMetaData> metadata = parquet_reader->metadata();
+  std::shared_ptr<Statistics> stats = metadata->RowGroup(0)->ColumnChunk(0)->statistics();
+  ASSERT_FALSE(stats->HasMinMax());
+}
+
+namespace {
+::arrow::Result<std::shared_ptr<::arrow::Array>> StatisticsReadArray(
+    std::shared_ptr<::arrow::DataType> data_type, std::shared_ptr<::arrow::Array> array) {
+  auto schema = ::arrow::schema({::arrow::field("column", data_type)});
+  auto record_batch = ::arrow::RecordBatch::Make(schema, array->length(), {array});
+  ARROW_ASSIGN_OR_RAISE(auto sink, ::arrow::io::BufferOutputStream::Create());
+  const auto arrow_writer_properties =
+      parquet::ArrowWriterProperties::Builder().store_schema()->build();
+  ARROW_ASSIGN_OR_RAISE(
+      auto writer,
+      FileWriter::Open(*schema, ::arrow::default_memory_pool(), sink,
+                       default_writer_properties(), arrow_writer_properties));
+  ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*record_batch));
+  ARROW_RETURN_NOT_OK(writer->Close());
+  ARROW_ASSIGN_OR_RAISE(auto buffer, sink->Finish());
+
+  auto reader =
+      ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
+  std::unique_ptr<FileReader> file_reader;
+  ARROW_RETURN_NOT_OK(
+      FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader));
+  std::shared_ptr<::arrow::ChunkedArray> chunked_array;
+  ARROW_RETURN_NOT_OK(file_reader->ReadColumn(0, &chunked_array));
+  return chunked_array->chunk(0);
+}
+
+template <typename ArrowType, typename MinMaxType>
+void TestStatisticsReadArray(std::shared_ptr<::arrow::DataType> arrow_type) {
+  using ArrowArrayType = typename ::arrow::TypeTraits<ArrowType>::ArrayType;
+  using ArrowArrayBuilder = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
+  using ArrowCType = typename ArrowType::c_type;
+  constexpr auto min = std::numeric_limits<ArrowCType>::lowest();
+  constexpr auto max = std::numeric_limits<ArrowCType>::max();
+
+  std::unique_ptr<ArrowArrayBuilder> builder;
+  if constexpr (::arrow::TypeTraits<ArrowType>::is_parameter_free) {
+    builder = std::make_unique<ArrowArrayBuilder>(::arrow::default_memory_pool());
+  } else {
+    builder =
+        std::make_unique<ArrowArrayBuilder>(arrow_type, ::arrow::default_memory_pool());
+  }
+  ASSERT_OK(builder->Append(max));
+  ASSERT_OK(builder->AppendNull());
+  ASSERT_OK(builder->Append(min));
+  ASSERT_OK(builder->Append(max));
+  ASSERT_OK_AND_ASSIGN(auto built_array, builder->Finish());
+  ASSERT_OK_AND_ASSIGN(auto read_array,
+                       StatisticsReadArray(arrow_type, std::move(built_array)));
+  auto typed_read_array = std::static_pointer_cast<ArrowArrayType>(read_array);
+  auto statistics = typed_read_array->statistics();
+  ASSERT_NE(nullptr, statistics);
+  ASSERT_EQ(true, statistics->null_count.has_value());
+  ASSERT_EQ(1, statistics->null_count.value());
+  ASSERT_EQ(false, statistics->distinct_count.has_value());
+  ASSERT_EQ(true, statistics->min.has_value());
+  ASSERT_EQ(true, std::holds_alternative<MinMaxType>(*statistics->min));
+  ASSERT_EQ(min, std::get<MinMaxType>(*statistics->min));
+  ASSERT_EQ(true, statistics->is_min_exact);
+  ASSERT_EQ(true, statistics->max.has_value());
+  ASSERT_EQ(true, std::holds_alternative<MinMaxType>(*statistics->max));
+  ASSERT_EQ(max, std::get<MinMaxType>(*statistics->max));
+  ASSERT_EQ(true, statistics->is_min_exact);
+}
+}  // namespace
+
+TEST(TestStatisticsRead, Boolean) {
+  TestStatisticsReadArray<::arrow::BooleanType, bool>(::arrow::boolean());
+}
+
+TEST(TestStatisticsRead, Int8) {
+  TestStatisticsReadArray<::arrow::Int8Type, int64_t>(::arrow::int8());
+}
+
+TEST(TestStatisticsRead, UInt8) {
+  TestStatisticsReadArray<::arrow::UInt8Type, uint64_t>(::arrow::uint8());
+}
+
+TEST(TestStatisticsRead, Int16) {
+  TestStatisticsReadArray<::arrow::Int16Type, int64_t>(::arrow::int16());
+}
+
+TEST(TestStatisticsRead, UInt16) {
+  TestStatisticsReadArray<::arrow::UInt16Type, uint64_t>(::arrow::uint16());
+}
+
+TEST(TestStatisticsRead, Int32) {
+  TestStatisticsReadArray<::arrow::Int32Type, int64_t>(::arrow::int32());
+}
+
+TEST(TestStatisticsRead, UInt32) {
+  TestStatisticsReadArray<::arrow::UInt32Type, uint64_t>(::arrow::uint32());
+}
+
+TEST(TestStatisticsRead, Int64) {
+  TestStatisticsReadArray<::arrow::Int64Type, int64_t>(::arrow::int64());
+}
+
+TEST(TestStatisticsRead, UInt64) {
+  TestStatisticsReadArray<::arrow::UInt64Type, uint64_t>(::arrow::uint64());
+}
+
+TEST(TestStatisticsRead, Float) {
+  TestStatisticsReadArray<::arrow::FloatType, double>(::arrow::float32());
+}
+
+TEST(TestStatisticsRead, Double) {
+  TestStatisticsReadArray<::arrow::DoubleType, double>(::arrow::float64());
+}
+
+TEST(TestStatisticsRead, Date32) {
+  TestStatisticsReadArray<::arrow::Date32Type, int64_t>(::arrow::date32());
+}
+
+TEST(TestStatisticsRead, Time32) {
+  TestStatisticsReadArray<::arrow::Time32Type, int64_t>(
+      ::arrow::time32(::arrow::TimeUnit::MILLI));
+}
+
+TEST(TestStatisticsRead, Time64) {
+  TestStatisticsReadArray<::arrow::Time64Type, int64_t>(
+      ::arrow::time64(::arrow::TimeUnit::MICRO));
+}
+
+TEST(TestStatisticsRead, TimestampMilli) {
+  TestStatisticsReadArray<::arrow::TimestampType, int64_t>(
+      ::arrow::timestamp(::arrow::TimeUnit::MILLI));
+}
+
+TEST(TestStatisticsRead, TimestampMicro) {
+  TestStatisticsReadArray<::arrow::TimestampType, int64_t>(
+      ::arrow::timestamp(::arrow::TimeUnit::MICRO));
+}
+
+TEST(TestStatisticsRead, TimestampNano) {
+  TestStatisticsReadArray<::arrow::TimestampType, int64_t>(
+      ::arrow::timestamp(::arrow::TimeUnit::NANO));
+}
+
+TEST(TestStatisticsRead, Duration) {
+  TestStatisticsReadArray<::arrow::DurationType, int64_t>(
+      ::arrow::duration(::arrow::TimeUnit::NANO));
+}
+
 }  // namespace parquet::arrow
diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc
index 285e2a597389d..4f57c3f4f56f7 100644
--- a/cpp/src/parquet/arrow/reader.cc
+++ b/cpp/src/parquet/arrow/reader.cc
@@ -485,8 +485,9 @@ class LeafReader : public ColumnReaderImpl {
         NextRowGroup();
       }
     }
-    RETURN_NOT_OK(
-        TransferColumnData(record_reader_.get(), field_, descr_, ctx_->pool, &out_));
+    RETURN_NOT_OK(TransferColumnData(record_reader_.get(),
+                                     input_->column_chunk_metadata(), field_, descr_,
+                                     ctx_.get(), &out_));
     return Status::OK();
     END_PARQUET_CATCH_EXCEPTIONS
   }
diff --git a/cpp/src/parquet/arrow/reader_internal.cc b/cpp/src/parquet/arrow/reader_internal.cc
index e5aef5a45b5f3..9d3171ea1a95d 100644
--- a/cpp/src/parquet/arrow/reader_internal.cc
+++ b/cpp/src/parquet/arrow/reader_internal.cc
@@ -319,31 +319,89 @@ void ReconstructChunksWithoutNulls(::arrow::ArrayVector* chunks) {
 }
 
 template <typename ArrowType, typename ParquetType>
-Status TransferInt(RecordReader* reader, MemoryPool* pool,
-                   const std::shared_ptr<Field>& field, Datum* out) {
+void AttachStatistics(::arrow::ArrayData* data,
+                      std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                      const ReaderContext* ctx) {
+  using ArrowCType = typename ArrowType::c_type;
+
+  auto statistics = metadata->statistics().get();
+  if (data->null_count == ::arrow::kUnknownNullCount && !statistics) {
+    return;
+  }
+
+  auto array_statistics = std::make_shared<::arrow::ArrayStatistics>();
+  if (data->null_count != ::arrow::kUnknownNullCount) {
+    array_statistics->null_count = data->null_count;
+  }
+  if (statistics) {
+    if (statistics->HasDistinctCount()) {
+      array_statistics->distinct_count = statistics->distinct_count();
+    }
+    if (statistics->HasMinMax()) {
+      auto typed_statistics =
+          static_cast<::parquet::TypedStatistics<ParquetType>*>(statistics);
+      const ArrowCType min = typed_statistics->min();
+      const ArrowCType max = typed_statistics->max();
+      if constexpr (std::is_same<ArrowCType, bool>::value) {
+        array_statistics->min = static_cast<bool>(min);
+        array_statistics->max = static_cast<bool>(max);
+      } else if constexpr (std::is_floating_point<ArrowCType>::value) {
+        array_statistics->min = static_cast<double>(min);
+        array_statistics->max = static_cast<double>(max);
+      } else if constexpr (std::is_signed<ArrowCType>::value) {
+        array_statistics->min = static_cast<int64_t>(min);
+        array_statistics->max = static_cast<int64_t>(max);
+      } else {
+        array_statistics->min = static_cast<uint64_t>(min);
+        array_statistics->max = static_cast<uint64_t>(max);
+      }
+      // We can assume that integer/floating point number/boolean
+      // based min/max are always exact if they exist. Apache
+      // Parquet's "Statistics" has "is_min_value_exact" and
+      // "is_max_value_exact" but we can ignore them for integer/
+      // floating point number/boolean based min/max.
+      //
+      // See also the discussion at dev@parquet.apache.org:
+      // https://lists.apache.org/thread/zfnmg5p51b7oylft5w5k4670wgkd4zv4
+      array_statistics->is_min_exact = true;
+      array_statistics->is_max_exact = true;
+    }
+  }
+
+  data->statistics = std::move(array_statistics);
+}
+
+template <typename ArrowType, typename ParquetType>
+Status TransferInt(RecordReader* reader,
+                   std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                   const ReaderContext* ctx, const std::shared_ptr<Field>& field,
+                   Datum* out) {
   using ArrowCType = typename ArrowType::c_type;
   using ParquetCType = typename ParquetType::c_type;
   int64_t length = reader->values_written();
   ARROW_ASSIGN_OR_RAISE(auto data,
-                        ::arrow::AllocateBuffer(length * sizeof(ArrowCType), pool));
+                        ::arrow::AllocateBuffer(length * sizeof(ArrowCType), ctx->pool));
 
   auto values = reinterpret_cast<const ParquetCType*>(reader->values());
   auto out_ptr = reinterpret_cast<ArrowCType*>(data->mutable_data());
   std::copy(values, values + length, out_ptr);
+  int64_t null_count = 0;
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, std::move(data)};
   if (field->nullable()) {
-    *out = std::make_shared<ArrayType<ArrowType>>(field->type(), length, std::move(data),
-                                                  reader->ReleaseIsValid(),
-                                                  reader->null_count());
-  } else {
-    *out =
-        std::make_shared<ArrayType<ArrowType>>(field->type(), length, std::move(data),
-                                               /*null_bitmap=*/nullptr, /*null_count=*/0);
+    null_count = reader->null_count();
+    buffers[0] = reader->ReleaseIsValid();
   }
+  auto array_data =
+      ::arrow::ArrayData::Make(field->type(), length, std::move(buffers), null_count);
+  AttachStatistics<ArrowType, ParquetType>(array_data.get(), std::move(metadata), ctx);
+  *out = std::make_shared<ArrayType<ArrowType>>(std::move(array_data));
   return Status::OK();
 }
 
-std::shared_ptr<Array> TransferZeroCopy(RecordReader* reader,
-                                        const std::shared_ptr<Field>& field) {
+template <typename ArrowType, typename ParquetType>
+std::shared_ptr<Array> TransferZeroCopy(
+    RecordReader* reader, std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+    const ReaderContext* ctx, const std::shared_ptr<Field>& field) {
   std::shared_ptr<::arrow::ArrayData> data;
   if (field->nullable()) {
     std::vector<std::shared_ptr<Buffer>> buffers = {reader->ReleaseIsValid(),
@@ -355,14 +413,17 @@ std::shared_ptr<Array> TransferZeroCopy(RecordReader* reader,
     data = std::make_shared<::arrow::ArrayData>(field->type(), reader->values_written(),
                                                 std::move(buffers), /*null_count=*/0);
   }
-  return ::arrow::MakeArray(data);
+  AttachStatistics<ArrowType, ParquetType>(data.get(), std::move(metadata), ctx);
+  return ::arrow::MakeArray(std::move(data));
 }
 
-Status TransferBool(RecordReader* reader, bool nullable, MemoryPool* pool, Datum* out) {
+Status TransferBool(RecordReader* reader,
+                    std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                    const ReaderContext* ctx, bool nullable, Datum* out) {
   int64_t length = reader->values_written();
 
   const int64_t buffer_size = bit_util::BytesForBits(length);
-  ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(buffer_size, pool));
+  ARROW_ASSIGN_OR_RAISE(auto data, ::arrow::AllocateBuffer(buffer_size, ctx->pool));
 
   // Transfer boolean values to packed bitmap
   auto values = reinterpret_cast<const bool*>(reader->values());
@@ -375,13 +436,19 @@ Status TransferBool(RecordReader* reader, bool nullable, MemoryPool* pool, Datum
     }
   }
 
+  std::shared_ptr<::arrow::ArrayData> array_data;
   if (nullable) {
-    *out = std::make_shared<BooleanArray>(length, std::move(data),
-                                          reader->ReleaseIsValid(), reader->null_count());
+    array_data = ::arrow::ArrayData::Make(::arrow::boolean(), length,
+                                          {reader->ReleaseIsValid(), std::move(data)},
+                                          reader->null_count());
   } else {
-    *out = std::make_shared<BooleanArray>(length, std::move(data),
-                                          /*null_bitmap=*/nullptr, /*null_count=*/0);
+    array_data = ::arrow::ArrayData::Make(::arrow::boolean(), length,
+                                          {/*null_bitmap=*/nullptr, std::move(data)},
+                                          /*null_count=*/0);
   }
+  AttachStatistics<::arrow::BooleanType, BooleanType>(array_data.get(),
+                                                      std::move(metadata), ctx);
+  *out = std::make_shared<BooleanArray>(std::move(array_data));
   return Status::OK();
 }
 
@@ -728,21 +795,26 @@ Status TransferHalfFloat(RecordReader* reader, MemoryPool* pool,
 
 }  // namespace
 
-#define TRANSFER_INT32(ENUM, ArrowType)                                               \
-  case ::arrow::Type::ENUM: {                                                         \
-    Status s = TransferInt<ArrowType, Int32Type>(reader, pool, value_field, &result); \
-    RETURN_NOT_OK(s);                                                                 \
+#define TRANSFER_INT32(ENUM, ArrowType)                                            \
+  case ::arrow::Type::ENUM: {                                                      \
+    Status s = TransferInt<ArrowType, Int32Type>(reader, std::move(metadata), ctx, \
+                                                 value_field, &result);            \
+    RETURN_NOT_OK(s);                                                              \
   } break;
 
-#define TRANSFER_INT64(ENUM, ArrowType)                                               \
-  case ::arrow::Type::ENUM: {                                                         \
-    Status s = TransferInt<ArrowType, Int64Type>(reader, pool, value_field, &result); \
-    RETURN_NOT_OK(s);                                                                 \
+#define TRANSFER_INT64(ENUM, ArrowType)                                            \
+  case ::arrow::Type::ENUM: {                                                      \
+    Status s = TransferInt<ArrowType, Int64Type>(reader, std::move(metadata), ctx, \
+                                                 value_field, &result);            \
+    RETURN_NOT_OK(s);                                                              \
   } break;
 
-Status TransferColumnData(RecordReader* reader, const std::shared_ptr<Field>& value_field,
-                          const ColumnDescriptor* descr, MemoryPool* pool,
+Status TransferColumnData(RecordReader* reader,
+                          std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                          const std::shared_ptr<Field>& value_field,
+                          const ColumnDescriptor* descr, const ReaderContext* ctx,
                           std::shared_ptr<ChunkedArray>* out) {
+  auto pool = ctx->pool;
   Datum result;
   std::shared_ptr<ChunkedArray> chunked_result;
   switch (value_field->type()->id()) {
@@ -756,13 +828,24 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr<Field>& va
       break;
     }
     case ::arrow::Type::INT32:
+      result = TransferZeroCopy<::arrow::Int32Type, Int32Type>(
+          reader, std::move(metadata), ctx, value_field);
+      break;
     case ::arrow::Type::INT64:
+      result = TransferZeroCopy<::arrow::Int64Type, Int64Type>(
+          reader, std::move(metadata), ctx, value_field);
+      break;
     case ::arrow::Type::FLOAT:
+      result = TransferZeroCopy<::arrow::FloatType, FloatType>(
+          reader, std::move(metadata), ctx, value_field);
+      break;
     case ::arrow::Type::DOUBLE:
-      result = TransferZeroCopy(reader, value_field);
+      result = TransferZeroCopy<::arrow::DoubleType, DoubleType>(
+          reader, std::move(metadata), ctx, value_field);
       break;
     case ::arrow::Type::BOOL:
-      RETURN_NOT_OK(TransferBool(reader, value_field->nullable(), pool, &result));
+      RETURN_NOT_OK(TransferBool(reader, std::move(metadata), ctx,
+                                 value_field->nullable(), &result));
       break;
       TRANSFER_INT32(UINT8, ::arrow::UInt8Type);
       TRANSFER_INT32(INT8, ::arrow::Int8Type);
@@ -857,7 +940,8 @@ Status TransferColumnData(RecordReader* reader, const std::shared_ptr<Field>& va
           case ::arrow::TimeUnit::MILLI:
           case ::arrow::TimeUnit::MICRO:
           case ::arrow::TimeUnit::NANO:
-            result = TransferZeroCopy(reader, value_field);
+            result = TransferZeroCopy<::arrow::Int64Type, Int64Type>(
+                reader, std::move(metadata), ctx, value_field);
             break;
           default:
             return Status::NotImplemented("TimeUnit not supported");
diff --git a/cpp/src/parquet/arrow/reader_internal.h b/cpp/src/parquet/arrow/reader_internal.h
index cf9dbb86577b5..fab56c888045d 100644
--- a/cpp/src/parquet/arrow/reader_internal.h
+++ b/cpp/src/parquet/arrow/reader_internal.h
@@ -66,7 +66,8 @@ class FileColumnIterator {
       : column_index_(column_index),
         reader_(reader),
         schema_(reader->metadata()->schema()),
-        row_groups_(row_groups.begin(), row_groups.end()) {}
+        row_groups_(row_groups.begin(), row_groups.end()),
+        row_group_index_(-1) {}
 
   virtual ~FileColumnIterator() {}
 
@@ -75,7 +76,8 @@ class FileColumnIterator {
       return nullptr;
     }
 
-    auto row_group_reader = reader_->RowGroup(row_groups_.front());
+    row_group_index_ = row_groups_.front();
+    auto row_group_reader = reader_->RowGroup(row_group_index_);
     row_groups_.pop_front();
     return row_group_reader->GetColumnPageReader(column_index_);
   }
@@ -86,23 +88,29 @@ class FileColumnIterator {
 
   std::shared_ptr<FileMetaData> metadata() const { return reader_->metadata(); }
 
+  std::unique_ptr<RowGroupMetaData> row_group_metadata() const {
+    return metadata()->RowGroup(row_group_index_);
+  }
+
+  std::unique_ptr<ColumnChunkMetaData> column_chunk_metadata() const {
+    return row_group_metadata()->ColumnChunk(column_index_);
+  }
+
   int column_index() const { return column_index_; }
 
+  int row_group_index() const { return row_group_index_; }
+
  protected:
   int column_index_;
   ParquetFileReader* reader_;
   const SchemaDescriptor* schema_;
   std::deque<int> row_groups_;
+  int row_group_index_;
 };
 
 using FileColumnIteratorFactory =
     std::function<FileColumnIterator*(int, ParquetFileReader*)>;
 
-Status TransferColumnData(::parquet::internal::RecordReader* reader,
-                          const std::shared_ptr<::arrow::Field>& value_field,
-                          const ColumnDescriptor* descr, ::arrow::MemoryPool* pool,
-                          std::shared_ptr<::arrow::ChunkedArray>* out);
-
 struct ReaderContext {
   ParquetFileReader* reader;
   ::arrow::MemoryPool* pool;
@@ -118,5 +126,11 @@ struct ReaderContext {
   }
 };
 
+Status TransferColumnData(::parquet::internal::RecordReader* reader,
+                          std::unique_ptr<::parquet::ColumnChunkMetaData> metadata,
+                          const std::shared_ptr<::arrow::Field>& value_field,
+                          const ColumnDescriptor* descr, const ReaderContext* ctx,
+                          std::shared_ptr<::arrow::ChunkedArray>* out);
+
 }  // namespace arrow
 }  // namespace parquet
diff --git a/cpp/src/parquet/arrow/reader_writer_benchmark.cc b/cpp/src/parquet/arrow/reader_writer_benchmark.cc
index 95c4a659297d9..283b113dfe992 100644
--- a/cpp/src/parquet/arrow/reader_writer_benchmark.cc
+++ b/cpp/src/parquet/arrow/reader_writer_benchmark.cc
@@ -20,6 +20,7 @@
 #include <array>
 #include <iostream>
 #include <random>
+#include <type_traits>
 
 #include "parquet/arrow/reader.h"
 #include "parquet/arrow/writer.h"
@@ -28,6 +29,7 @@
 #include "parquet/file_reader.h"
 #include "parquet/file_writer.h"
 #include "parquet/platform.h"
+#include "parquet/properties.h"
 
 #include "arrow/array.h"
 #include "arrow/array/builder_primitive.h"
@@ -36,6 +38,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/async_generator.h"
+#include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/logging.h"
 
@@ -44,6 +47,7 @@ using arrow::ArrayVector;
 using arrow::BooleanBuilder;
 using arrow::FieldVector;
 using arrow::NumericBuilder;
+using arrow::Table;
 
 #define EXIT_NOT_OK(s)                                        \
   do {                                                        \
@@ -88,6 +92,11 @@ struct benchmark_traits<BooleanType> {
   using arrow_type = ::arrow::BooleanType;
 };
 
+template <>
+struct benchmark_traits<Float16LogicalType> {
+  using arrow_type = ::arrow::HalfFloatType;
+};
+
 template <typename ParquetType>
 using ArrowType = typename benchmark_traits<ParquetType>::arrow_type;
 
@@ -98,13 +107,28 @@ std::shared_ptr<ColumnDescriptor> MakeSchema(Repetition::type repetition) {
                                             repetition == Repetition::REPEATED);
 }
 
-template <bool nullable, typename ParquetType>
+template <typename ParquetType>
+int64_t BytesForItems(int64_t num_items) {
+  static_assert(!std::is_same_v<ParquetType, FLBAType>,
+                "BytesForItems unsupported for FLBAType");
+  return num_items * sizeof(typename ParquetType::c_type);
+}
+
+template <>
+int64_t BytesForItems<BooleanType>(int64_t num_items) {
+  return ::arrow::bit_util::BytesForBits(num_items);
+}
+
+template <>
+int64_t BytesForItems<Float16LogicalType>(int64_t num_items) {
+  return num_items * sizeof(uint16_t);
+}
+
+template <typename ParquetType>
 void SetBytesProcessed(::benchmark::State& state, int64_t num_values = BENCHMARK_SIZE) {
   const int64_t items_processed = state.iterations() * num_values;
-  const int64_t bytes_processed = items_processed * sizeof(typename ParquetType::c_type);
-
-  state.SetItemsProcessed(bytes_processed);
-  state.SetBytesProcessed(bytes_processed);
+  state.SetItemsProcessed(items_processed);
+  state.SetBytesProcessed(BytesForItems<ParquetType>(items_processed));
 }
 
 constexpr int64_t kAlternatingOrNa = -1;
@@ -125,15 +149,15 @@ std::vector<T> RandomVector(int64_t true_percentage, int64_t vector_size,
   return values;
 }
 
-template <typename ParquetType>
-std::shared_ptr<::arrow::Table> TableFromVector(
-    const std::vector<typename ParquetType::c_type>& vec, bool nullable,
-    int64_t null_percentage = kAlternatingOrNa) {
+template <typename ParquetType, typename ArrowType = ArrowType<ParquetType>>
+std::shared_ptr<Table> TableFromVector(const std::vector<typename ArrowType::c_type>& vec,
+                                       bool nullable,
+                                       int64_t null_percentage = kAlternatingOrNa) {
   if (!nullable) {
     ARROW_CHECK_EQ(null_percentage, kAlternatingOrNa);
   }
-  std::shared_ptr<::arrow::DataType> type = std::make_shared<ArrowType<ParquetType>>();
-  NumericBuilder<ArrowType<ParquetType>> builder;
+  std::shared_ptr<::arrow::DataType> type = std::make_shared<ArrowType>();
+  NumericBuilder<ArrowType> builder;
   if (nullable) {
     // Note true values select index 1 of sample_values
     auto valid_bytes = RandomVector<uint8_t>(/*true_percentage=*/null_percentage,
@@ -147,13 +171,12 @@ std::shared_ptr<::arrow::Table> TableFromVector(
 
   auto field = ::arrow::field("column", type, nullable);
   auto schema = ::arrow::schema({field});
-  return ::arrow::Table::Make(schema, {array});
+  return Table::Make(schema, {array});
 }
 
 template <>
-std::shared_ptr<::arrow::Table> TableFromVector<BooleanType>(const std::vector<bool>& vec,
-                                                             bool nullable,
-                                                             int64_t null_percentage) {
+std::shared_ptr<Table> TableFromVector<BooleanType, ::arrow::BooleanType>(
+    const std::vector<bool>& vec, bool nullable, int64_t null_percentage) {
   BooleanBuilder builder;
   if (nullable) {
     auto valid_bytes = RandomVector<bool>(/*true_percentage=*/null_percentage, vec.size(),
@@ -168,21 +191,21 @@ std::shared_ptr<::arrow::Table> TableFromVector<BooleanType>(const std::vector<b
   auto field = ::arrow::field("column", ::arrow::boolean(), nullable);
   auto schema = std::make_shared<::arrow::Schema>(
       std::vector<std::shared_ptr<::arrow::Field>>({field}));
-  return ::arrow::Table::Make(schema, {array});
+  return Table::Make(schema, {array});
 }
 
 template <bool nullable, typename ParquetType>
 static void BM_WriteColumn(::benchmark::State& state) {
   using T = typename ParquetType::c_type;
   std::vector<T> values(BENCHMARK_SIZE, static_cast<T>(128));
-  std::shared_ptr<::arrow::Table> table = TableFromVector<ParquetType>(values, nullable);
+  std::shared_ptr<Table> table = TableFromVector<ParquetType>(values, nullable);
 
   while (state.KeepRunning()) {
     auto output = CreateOutputStream();
     EXIT_NOT_OK(
         WriteTable(*table, ::arrow::default_memory_pool(), output, BENCHMARK_SIZE));
   }
-  SetBytesProcessed<nullable, ParquetType>(state);
+  SetBytesProcessed<ParquetType>(state);
 }
 
 BENCHMARK_TEMPLATE2(BM_WriteColumn, false, Int32Type);
@@ -199,8 +222,8 @@ BENCHMARK_TEMPLATE2(BM_WriteColumn, true, BooleanType);
 
 int32_t kInfiniteUniqueValues = -1;
 
-std::shared_ptr<::arrow::Table> RandomStringTable(int64_t length, int64_t unique_values,
-                                                  int64_t null_percentage) {
+std::shared_ptr<Table> RandomStringTable(int64_t length, int64_t unique_values,
+                                         int64_t null_percentage) {
   std::shared_ptr<::arrow::DataType> type = ::arrow::utf8();
   std::shared_ptr<::arrow::Array> arr;
   ::arrow::random::RandomArrayGenerator generator(/*seed=*/500);
@@ -213,12 +236,12 @@ std::shared_ptr<::arrow::Table> RandomStringTable(int64_t length, int64_t unique
                                       /*min_length=*/3, /*max_length=*/32,
                                       /*null_probability=*/null_probability);
   }
-  return ::arrow::Table::Make(
+  return Table::Make(
       ::arrow::schema({::arrow::field("column", type, null_percentage > 0)}), {arr});
 }
 
 static void BM_WriteBinaryColumn(::benchmark::State& state) {
-  std::shared_ptr<::arrow::Table> table =
+  std::shared_ptr<Table> table =
       RandomStringTable(BENCHMARK_SIZE, state.range(1), state.range(0));
 
   while (state.KeepRunning()) {
@@ -257,20 +280,22 @@ struct Examples<bool> {
   static constexpr std::array<bool, 2> values() { return {false, true}; }
 };
 
-static void BenchmarkReadTable(::benchmark::State& state, const ::arrow::Table& table,
+static void BenchmarkReadTable(::benchmark::State& state, const Table& table,
+                               std::shared_ptr<WriterProperties> properties,
                                int64_t num_values = -1, int64_t total_bytes = -1) {
   auto output = CreateOutputStream();
-  EXIT_NOT_OK(
-      WriteTable(table, ::arrow::default_memory_pool(), output, table.num_rows()));
+  EXIT_NOT_OK(WriteTable(table, ::arrow::default_memory_pool(), output,
+                         /*chunk_size=*/table.num_rows(), properties));
   PARQUET_ASSIGN_OR_THROW(auto buffer, output->Finish());
 
-  while (state.KeepRunning()) {
+  for (auto _ : state) {
     auto reader =
         ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
     std::unique_ptr<FileReader> arrow_reader;
     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
                                  &arrow_reader));
-    std::shared_ptr<::arrow::Table> table;
+
+    std::shared_ptr<Table> table;
     EXIT_NOT_OK(arrow_reader->ReadTable(&table));
   }
 
@@ -283,19 +308,32 @@ static void BenchmarkReadTable(::benchmark::State& state, const ::arrow::Table&
   }
 }
 
+static void BenchmarkReadTable(::benchmark::State& state, const Table& table,
+                               int64_t num_values = -1, int64_t total_bytes = -1) {
+  BenchmarkReadTable(state, table, default_writer_properties(), num_values, total_bytes);
+}
+
 static void BenchmarkReadArray(::benchmark::State& state,
                                const std::shared_ptr<Array>& array, bool nullable,
+                               std::shared_ptr<WriterProperties> properties,
                                int64_t num_values = -1, int64_t total_bytes = -1) {
   auto schema = ::arrow::schema({field("s", array->type(), nullable)});
-  auto table = ::arrow::Table::Make(schema, {array}, array->length());
+  auto table = Table::Make(schema, {array}, array->length());
 
   EXIT_NOT_OK(table->Validate());
 
   BenchmarkReadTable(state, *table, num_values, total_bytes);
 }
 
+static void BenchmarkReadArray(::benchmark::State& state,
+                               const std::shared_ptr<Array>& array, bool nullable,
+                               int64_t num_values = -1, int64_t total_bytes = -1) {
+  BenchmarkReadArray(state, array, nullable, default_writer_properties(), num_values,
+                     total_bytes);
+}
+
 //
-// Benchmark reading a primitive column
+// Benchmark reading a dict-encoded primitive column
 //
 
 template <bool nullable, typename ParquetType>
@@ -305,19 +343,22 @@ static void BM_ReadColumn(::benchmark::State& state) {
   auto values = RandomVector<T>(/*percentage=*/state.range(1), BENCHMARK_SIZE,
                                 Examples<T>::values());
 
-  std::shared_ptr<::arrow::Table> table =
+  std::shared_ptr<Table> table =
       TableFromVector<ParquetType>(values, nullable, state.range(0));
 
-  BenchmarkReadTable(state, *table, table->num_rows(),
-                     sizeof(typename ParquetType::c_type) * table->num_rows());
+  auto properties = WriterProperties::Builder().disable_dictionary()->build();
+
+  BenchmarkReadTable(state, *table, properties, table->num_rows(),
+                     BytesForItems<ParquetType>(table->num_rows()));
 }
 
 // There are two parameters here that cover different data distributions.
 // null_percentage governs distribution and therefore runs of null values.
 // first_value_percentage governs distribution of values (we select from 1 of 2)
 // so when 0 or 100 RLE is triggered all the time.  When a value in the range (0, 100)
-// there will be some percentage of RLE encoded values and some percentage of literal
-// encoded values (RLE is much less likely with percentages close to 50).
+// there will be some percentage of RLE-encoded dictionary indices and some
+// percentage of literal encoded dictionary indices
+// (RLE is much less likely with percentages close to 50).
 BENCHMARK_TEMPLATE2(BM_ReadColumn, false, Int32Type)
     ->Args({/*null_percentage=*/kAlternatingOrNa, 1})
     ->Args({/*null_percentage=*/kAlternatingOrNa, 10})
@@ -325,6 +366,7 @@ BENCHMARK_TEMPLATE2(BM_ReadColumn, false, Int32Type)
 
 BENCHMARK_TEMPLATE2(BM_ReadColumn, true, Int32Type)
     ->Args({/*null_percentage=*/kAlternatingOrNa, /*first_value_percentage=*/0})
+    ->Args({/*null_percentage=*/0, /*first_value_percentage=*/1})
     ->Args({/*null_percentage=*/1, /*first_value_percentage=*/1})
     ->Args({/*null_percentage=*/10, /*first_value_percentage=*/10})
     ->Args({/*null_percentage=*/25, /*first_value_percentage=*/5})
@@ -369,12 +411,51 @@ BENCHMARK_TEMPLATE2(BM_ReadColumn, true, BooleanType)
     ->Args({kAlternatingOrNa, 1})
     ->Args({5, 10});
 
+//
+// Benchmark reading a PLAIN-encoded primitive column
+//
+
+template <bool nullable, typename ParquetType>
+static void BM_ReadColumnPlain(::benchmark::State& state) {
+  using c_type = typename ArrowType<ParquetType>::c_type;
+
+  const std::vector<c_type> values(BENCHMARK_SIZE, static_cast<c_type>(42));
+  std::shared_ptr<Table> table =
+      TableFromVector<ParquetType>(values, /*nullable=*/nullable, state.range(0));
+
+  auto properties = WriterProperties::Builder().disable_dictionary()->build();
+  BenchmarkReadTable(state, *table, properties, table->num_rows(),
+                     BytesForItems<ParquetType>(table->num_rows()));
+}
+
+BENCHMARK_TEMPLATE2(BM_ReadColumnPlain, false, Int32Type)
+    ->ArgNames({"null_probability"})
+    ->Args({kAlternatingOrNa});
+BENCHMARK_TEMPLATE2(BM_ReadColumnPlain, true, Int32Type)
+    ->ArgNames({"null_probability"})
+    ->Args({0})
+    ->Args({1})
+    ->Args({50})
+    ->Args({99})
+    ->Args({100});
+
+BENCHMARK_TEMPLATE2(BM_ReadColumnPlain, false, Float16LogicalType)
+    ->ArgNames({"null_probability"})
+    ->Args({kAlternatingOrNa});
+BENCHMARK_TEMPLATE2(BM_ReadColumnPlain, true, Float16LogicalType)
+    ->ArgNames({"null_probability"})
+    ->Args({0})
+    ->Args({1})
+    ->Args({50})
+    ->Args({99})
+    ->Args({100});
+
 //
 // Benchmark reading binary column
 //
 
 static void BM_ReadBinaryColumn(::benchmark::State& state) {
-  std::shared_ptr<::arrow::Table> table =
+  std::shared_ptr<Table> table =
       RandomStringTable(BENCHMARK_SIZE, state.range(1), state.range(0));
 
   // Offsets + data
@@ -572,7 +653,7 @@ BENCHMARK(BM_ReadListOfListColumn)->Apply(NestedReadArguments);
 
 static void BM_ReadIndividualRowGroups(::benchmark::State& state) {
   std::vector<int64_t> values(BENCHMARK_SIZE, 128);
-  std::shared_ptr<::arrow::Table> table = TableFromVector<Int64Type>(values, true);
+  std::shared_ptr<Table> table = TableFromVector<Int64Type>(values, true);
   auto output = CreateOutputStream();
   // This writes 10 RowGroups
   EXIT_NOT_OK(
@@ -587,27 +668,27 @@ static void BM_ReadIndividualRowGroups(::benchmark::State& state) {
     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
                                  &arrow_reader));
 
-    std::vector<std::shared_ptr<::arrow::Table>> tables;
+    std::vector<std::shared_ptr<Table>> tables;
     for (int i = 0; i < arrow_reader->num_row_groups(); i++) {
       // Only read the even numbered RowGroups
       if ((i % 2) == 0) {
-        std::shared_ptr<::arrow::Table> table;
+        std::shared_ptr<Table> table;
         EXIT_NOT_OK(arrow_reader->RowGroup(i)->ReadTable(&table));
         tables.push_back(table);
       }
     }
 
-    std::shared_ptr<::arrow::Table> final_table;
+    std::shared_ptr<Table> final_table;
     PARQUET_ASSIGN_OR_THROW(final_table, ConcatenateTables(tables));
   }
-  SetBytesProcessed<true, Int64Type>(state);
+  SetBytesProcessed<Int64Type>(state);
 }
 
 BENCHMARK(BM_ReadIndividualRowGroups);
 
 static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
   std::vector<int64_t> values(BENCHMARK_SIZE, 128);
-  std::shared_ptr<::arrow::Table> table = TableFromVector<Int64Type>(values, true);
+  std::shared_ptr<Table> table = TableFromVector<Int64Type>(values, true);
   auto output = CreateOutputStream();
   // This writes 10 RowGroups
   EXIT_NOT_OK(
@@ -621,17 +702,17 @@ static void BM_ReadMultipleRowGroups(::benchmark::State& state) {
     std::unique_ptr<FileReader> arrow_reader;
     EXIT_NOT_OK(FileReader::Make(::arrow::default_memory_pool(), std::move(reader),
                                  &arrow_reader));
-    std::shared_ptr<::arrow::Table> table;
+    std::shared_ptr<Table> table;
     EXIT_NOT_OK(arrow_reader->ReadRowGroups(rgs, &table));
   }
-  SetBytesProcessed<true, Int64Type>(state);
+  SetBytesProcessed<Int64Type>(state);
 }
 
 BENCHMARK(BM_ReadMultipleRowGroups);
 
 static void BM_ReadMultipleRowGroupsGenerator(::benchmark::State& state) {
   std::vector<int64_t> values(BENCHMARK_SIZE, 128);
-  std::shared_ptr<::arrow::Table> table = TableFromVector<Int64Type>(values, true);
+  std::shared_ptr<Table> table = TableFromVector<Int64Type>(values, true);
   auto output = CreateOutputStream();
   // This writes 10 RowGroups
   EXIT_NOT_OK(
@@ -650,9 +731,9 @@ static void BM_ReadMultipleRowGroupsGenerator(::benchmark::State& state) {
                     arrow_reader->GetRecordBatchGenerator(arrow_reader, rgs, {0}));
     auto fut = ::arrow::CollectAsyncGenerator(generator);
     ASSIGN_OR_ABORT(auto batches, fut.result());
-    ASSIGN_OR_ABORT(auto actual, ::arrow::Table::FromRecordBatches(std::move(batches)));
+    ASSIGN_OR_ABORT(auto actual, Table::FromRecordBatches(std::move(batches)));
   }
-  SetBytesProcessed<true, Int64Type>(state);
+  SetBytesProcessed<Int64Type>(state);
 }
 
 BENCHMARK(BM_ReadMultipleRowGroupsGenerator);
diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc
index ec3890a41f442..1623d80dcb0e4 100644
--- a/cpp/src/parquet/arrow/schema.cc
+++ b/cpp/src/parquet/arrow/schema.cc
@@ -21,6 +21,7 @@
 #include <string>
 #include <vector>
 
+#include "arrow/extension/json.h"
 #include "arrow/extension_type.h"
 #include "arrow/io/memory.h"
 #include "arrow/ipc/api.h"
@@ -427,6 +428,13 @@ Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
     }
     case ArrowTypeId::EXTENSION: {
       auto ext_type = std::static_pointer_cast<::arrow::ExtensionType>(field->type());
+      // Built-in JSON extension is handled differently.
+      if (ext_type->extension_name() == std::string("arrow.json")) {
+        // Set physical and logical types and instantiate primitive node.
+        type = ParquetType::BYTE_ARRAY;
+        logical_type = LogicalType::JSON();
+        break;
+      }
       std::shared_ptr<::arrow::Field> storage_field = ::arrow::field(
           name, ext_type->storage_type(), field->nullable(), field->metadata());
       return FieldToNode(name, storage_field, properties, arrow_properties, out);
@@ -438,7 +446,7 @@ Status FieldToNode(const std::string& name, const std::shared_ptr<Field>& field,
     }
 
     default: {
-      // TODO: DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL_TEXT, VARCHAR
+      // TODO: DENSE_UNION, SPARE_UNION, DECIMAL_TEXT, VARCHAR
       return Status::NotImplemented(
           "Unhandled type for Arrow to Parquet schema conversion: ",
           field->type()->ToString());
@@ -476,9 +484,8 @@ bool IsDictionaryReadSupported(const ArrowType& type) {
 ::arrow::Result<std::shared_ptr<ArrowType>> GetTypeForNode(
     int column_index, const schema::PrimitiveNode& primitive_node,
     SchemaTreeContext* ctx) {
-  ASSIGN_OR_RAISE(
-      std::shared_ptr<ArrowType> storage_type,
-      GetArrowType(primitive_node, ctx->properties.coerce_int96_timestamp_unit()));
+  ASSIGN_OR_RAISE(std::shared_ptr<ArrowType> storage_type,
+                  GetArrowType(primitive_node, ctx->properties));
   if (ctx->properties.read_dictionary(column_index) &&
       IsDictionaryReadSupported(*storage_type)) {
     return ::arrow::dictionary(::arrow::int32(), storage_type);
@@ -984,18 +991,35 @@ Result<bool> ApplyOriginalMetadata(const Field& origin_field, SchemaField* infer
   bool modified = false;
 
   auto& origin_type = origin_field.type();
+  const auto& inferred_type = inferred->field->type();
 
   if (origin_type->id() == ::arrow::Type::EXTENSION) {
     const auto& ex_type = checked_cast<const ::arrow::ExtensionType&>(*origin_type);
-    auto origin_storage_field = origin_field.WithType(ex_type.storage_type());
+    if (inferred_type->id() != ::arrow::Type::EXTENSION &&
+        ex_type.extension_name() == std::string("arrow.json") &&
+        (inferred_type->id() == ::arrow::Type::STRING ||
+         inferred_type->id() == ::arrow::Type::LARGE_STRING ||
+         inferred_type->id() == ::arrow::Type::STRING_VIEW)) {
+      // Schema mismatch.
+      //
+      // Arrow extensions are DISABLED in Parquet.
+      // origin_type is ::arrow::extension::json()
+      // inferred_type is ::arrow::utf8()
+      //
+      // Origin type is restored as Arrow should be considered the source of truth.
+      inferred->field = inferred->field->WithType(origin_type);
+      RETURN_NOT_OK(ApplyOriginalStorageMetadata(origin_field, inferred));
+    } else {
+      auto origin_storage_field = origin_field.WithType(ex_type.storage_type());
 
-    // Apply metadata recursively to storage type
-    RETURN_NOT_OK(ApplyOriginalStorageMetadata(*origin_storage_field, inferred));
+      // Apply metadata recursively to storage type
+      RETURN_NOT_OK(ApplyOriginalStorageMetadata(*origin_storage_field, inferred));
 
-    // Restore extension type, if the storage type is the same as inferred
-    // from the Parquet type
-    if (ex_type.storage_type()->Equals(*inferred->field->type())) {
-      inferred->field = inferred->field->WithType(origin_type);
+      // Restore extension type, if the storage type is the same as inferred
+      // from the Parquet type
+      if (ex_type.storage_type()->Equals(*inferred->field->type())) {
+        inferred->field = inferred->field->WithType(origin_type);
+      }
     }
     modified = true;
   } else {
diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc
index a8e2a95b9b97d..261a00940654d 100644
--- a/cpp/src/parquet/arrow/schema_internal.cc
+++ b/cpp/src/parquet/arrow/schema_internal.cc
@@ -17,8 +17,11 @@
 
 #include "parquet/arrow/schema_internal.h"
 
+#include "arrow/extension/json.h"
 #include "arrow/type.h"
 
+#include "parquet/properties.h"
+
 using ArrowType = ::arrow::DataType;
 using ArrowTypeId = ::arrow::Type;
 using ParquetType = parquet::Type;
@@ -107,7 +110,8 @@ Result<std::shared_ptr<ArrowType>> MakeArrowTimestamp(const LogicalType& logical
   }
 }
 
-Result<std::shared_ptr<ArrowType>> FromByteArray(const LogicalType& logical_type) {
+Result<std::shared_ptr<ArrowType>> FromByteArray(
+    const LogicalType& logical_type, const ArrowReaderProperties& reader_properties) {
   switch (logical_type.type()) {
     case LogicalType::Type::STRING:
       return ::arrow::utf8();
@@ -115,9 +119,15 @@ Result<std::shared_ptr<ArrowType>> FromByteArray(const LogicalType& logical_type
       return MakeArrowDecimal(logical_type);
     case LogicalType::Type::NONE:
     case LogicalType::Type::ENUM:
-    case LogicalType::Type::JSON:
     case LogicalType::Type::BSON:
       return ::arrow::binary();
+    case LogicalType::Type::JSON:
+      if (reader_properties.get_arrow_extensions_enabled()) {
+        return ::arrow::extension::json(::arrow::utf8());
+      }
+      // When the original Arrow schema isn't stored and Arrow extensions are disabled,
+      // LogicalType::JSON is read as utf8().
+      return ::arrow::utf8();
     default:
       return Status::NotImplemented("Unhandled logical logical_type ",
                                     logical_type.ToString(), " for binary array");
@@ -180,7 +190,7 @@ Result<std::shared_ptr<ArrowType>> FromInt64(const LogicalType& logical_type) {
 
 Result<std::shared_ptr<ArrowType>> GetArrowType(
     Type::type physical_type, const LogicalType& logical_type, int type_length,
-    const ::arrow::TimeUnit::type int96_arrow_time_unit) {
+    const ArrowReaderProperties& reader_properties) {
   if (logical_type.is_invalid() || logical_type.is_null()) {
     return ::arrow::null();
   }
@@ -193,13 +203,13 @@ Result<std::shared_ptr<ArrowType>> GetArrowType(
     case ParquetType::INT64:
       return FromInt64(logical_type);
     case ParquetType::INT96:
-      return ::arrow::timestamp(int96_arrow_time_unit);
+      return ::arrow::timestamp(reader_properties.coerce_int96_timestamp_unit());
     case ParquetType::FLOAT:
       return ::arrow::float32();
     case ParquetType::DOUBLE:
       return ::arrow::float64();
     case ParquetType::BYTE_ARRAY:
-      return FromByteArray(logical_type);
+      return FromByteArray(logical_type, reader_properties);
     case ParquetType::FIXED_LEN_BYTE_ARRAY:
       return FromFLBA(logical_type, type_length);
     default: {
@@ -212,9 +222,9 @@ Result<std::shared_ptr<ArrowType>> GetArrowType(
 
 Result<std::shared_ptr<ArrowType>> GetArrowType(
     const schema::PrimitiveNode& primitive,
-    const ::arrow::TimeUnit::type int96_arrow_time_unit) {
+    const ArrowReaderProperties& reader_properties) {
   return GetArrowType(primitive.physical_type(), *primitive.logical_type(),
-                      primitive.type_length(), int96_arrow_time_unit);
+                      primitive.type_length(), reader_properties);
 }
 
 }  // namespace parquet::arrow
diff --git a/cpp/src/parquet/arrow/schema_internal.h b/cpp/src/parquet/arrow/schema_internal.h
index f56ba0958ae2d..58828f85ab8e3 100644
--- a/cpp/src/parquet/arrow/schema_internal.h
+++ b/cpp/src/parquet/arrow/schema_internal.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include "arrow/result.h"
+#include "arrow/type_fwd.h"
 #include "parquet/schema.h"
 
 namespace arrow {
@@ -28,7 +29,8 @@ namespace parquet::arrow {
 
 using ::arrow::Result;
 
-Result<std::shared_ptr<::arrow::DataType>> FromByteArray(const LogicalType& logical_type);
+Result<std::shared_ptr<::arrow::DataType>> FromByteArray(const LogicalType& logical_type,
+                                                         bool use_known_arrow_extensions);
 Result<std::shared_ptr<::arrow::DataType>> FromFLBA(const LogicalType& logical_type,
                                                     int32_t physical_length);
 Result<std::shared_ptr<::arrow::DataType>> FromInt32(const LogicalType& logical_type);
@@ -36,10 +38,10 @@ Result<std::shared_ptr<::arrow::DataType>> FromInt64(const LogicalType& logical_
 
 Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
     Type::type physical_type, const LogicalType& logical_type, int type_length,
-    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
+    const ArrowReaderProperties& reader_properties);
 
 Result<std::shared_ptr<::arrow::DataType>> GetArrowType(
     const schema::PrimitiveNode& primitive,
-    ::arrow::TimeUnit::type int96_arrow_time_unit = ::arrow::TimeUnit::NANO);
+    const ArrowReaderProperties& reader_properties);
 
 }  // namespace parquet::arrow
diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc
index 4fd7ef1b47b39..463713df1b1aa 100644
--- a/cpp/src/parquet/arrow/writer.cc
+++ b/cpp/src/parquet/arrow/writer.cc
@@ -523,16 +523,6 @@ Status FileWriter::Make(::arrow::MemoryPool* pool,
   return Status::OK();
 }
 
-Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
-                        std::shared_ptr<::arrow::io::OutputStream> sink,
-                        std::shared_ptr<WriterProperties> properties,
-                        std::unique_ptr<FileWriter>* writer) {
-  ARROW_ASSIGN_OR_RAISE(
-      *writer, Open(std::move(schema), pool, std::move(sink), std::move(properties),
-                    default_arrow_writer_properties()));
-  return Status::OK();
-}
-
 Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
                          const ArrowWriterProperties& properties,
                          std::shared_ptr<const KeyValueMetadata>* out) {
@@ -560,16 +550,6 @@ Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* poo
   return Status::OK();
 }
 
-Status FileWriter::Open(const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
-                        std::shared_ptr<::arrow::io::OutputStream> sink,
-                        std::shared_ptr<WriterProperties> properties,
-                        std::shared_ptr<ArrowWriterProperties> arrow_properties,
-                        std::unique_ptr<FileWriter>* writer) {
-  ARROW_ASSIGN_OR_RAISE(*writer, Open(std::move(schema), pool, std::move(sink),
-                                      std::move(properties), arrow_properties));
-  return Status::OK();
-}
-
 Result<std::unique_ptr<FileWriter>> FileWriter::Open(
     const ::arrow::Schema& schema, ::arrow::MemoryPool* pool,
     std::shared_ptr<::arrow::io::OutputStream> sink,
diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h
index 4a1a033a7b7b8..4e1ddafd9a082 100644
--- a/cpp/src/parquet/arrow/writer.h
+++ b/cpp/src/parquet/arrow/writer.h
@@ -74,18 +74,6 @@ class PARQUET_EXPORT FileWriter {
       std::shared_ptr<ArrowWriterProperties> arrow_properties =
           default_arrow_writer_properties());
 
-  ARROW_DEPRECATED("Deprecated in 11.0.0. Use Result-returning variants instead.")
-  static ::arrow::Status Open(const ::arrow::Schema& schema, MemoryPool* pool,
-                              std::shared_ptr<::arrow::io::OutputStream> sink,
-                              std::shared_ptr<WriterProperties> properties,
-                              std::unique_ptr<FileWriter>* writer);
-  ARROW_DEPRECATED("Deprecated in 11.0.0. Use Result-returning variants instead.")
-  static ::arrow::Status Open(const ::arrow::Schema& schema, MemoryPool* pool,
-                              std::shared_ptr<::arrow::io::OutputStream> sink,
-                              std::shared_ptr<WriterProperties> properties,
-                              std::shared_ptr<ArrowWriterProperties> arrow_properties,
-                              std::unique_ptr<FileWriter>* writer);
-
   /// Return the Arrow schema to be written to.
   virtual std::shared_ptr<::arrow::Schema> schema() const = 0;
 
diff --git a/cpp/src/parquet/bloom_filter.h b/cpp/src/parquet/bloom_filter.h
index 909563d013fed..82172f363ba7e 100644
--- a/cpp/src/parquet/bloom_filter.h
+++ b/cpp/src/parquet/bloom_filter.h
@@ -221,7 +221,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
   /// kMaximumBloomFilterBytes, and the return value is always a power of 2
   static uint32_t OptimalNumOfBytes(uint32_t ndv, double fpp) {
     uint32_t optimal_num_of_bits = OptimalNumOfBits(ndv, fpp);
-    DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits));
+    ARROW_DCHECK(::arrow::bit_util::IsMultipleOf8(optimal_num_of_bits));
     return optimal_num_of_bits >> 3;
   }
 
@@ -233,7 +233,7 @@ class PARQUET_EXPORT BlockSplitBloomFilter : public BloomFilter {
   /// @return it always return a value between kMinimumBloomFilterBytes * 8 and
   /// kMaximumBloomFilterBytes * 8, and the return value is always a power of 16
   static uint32_t OptimalNumOfBits(uint32_t ndv, double fpp) {
-    DCHECK(fpp > 0.0 && fpp < 1.0);
+    ARROW_DCHECK(fpp > 0.0 && fpp < 1.0);
     const double m = -8.0 * ndv / log(1 - pow(fpp, 1.0 / 8));
     uint32_t num_bits;
 
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index ebf9515f27607..3ffc6f720061f 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -36,14 +36,14 @@
 #include "arrow/array/builder_primitive.h"
 #include "arrow/chunked_array.h"
 #include "arrow/type.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/compression.h"
 #include "arrow/util/crc32.h"
 #include "arrow/util/int_util_overflow.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/rle_encoding.h"
+#include "arrow/util/rle_encoding_internal.h"
 #include "arrow/util/unreachable.h"
 #include "parquet/column_page.h"
 #include "parquet/encoding.h"
@@ -75,24 +75,6 @@ constexpr int64_t kMinLevelBatchSize = 1024;
 // Both RecordReader and the ColumnReader use this for skipping.
 constexpr int64_t kSkipScratchBatchSize = 1024;
 
-inline bool HasSpacedValues(const ColumnDescriptor* descr) {
-  if (descr->max_repetition_level() > 0) {
-    // repeated+flat case
-    return !descr->schema_node()->is_required();
-  } else {
-    // non-repeated+nested case
-    // Find if a node forces nulls in the lowest level along the hierarchy
-    const schema::Node* node = descr->schema_node().get();
-    while (node) {
-      if (node->is_optional()) {
-        return true;
-      }
-      node = node->parent();
-    }
-    return false;
-  }
-}
-
 // Throws exception if number_decoded does not match expected.
 inline void CheckNumberDecoded(int64_t number_decoded, int64_t expected) {
   if (ARROW_PREDICT_FALSE(number_decoded != expected)) {
@@ -468,8 +450,8 @@ std::shared_ptr<Page> SerializedPageReader::NextPage() {
     // Advance the stream offset
     PARQUET_THROW_NOT_OK(stream_->Advance(header_size));
 
-    int compressed_len = current_page_header_.compressed_page_size;
-    int uncompressed_len = current_page_header_.uncompressed_page_size;
+    int32_t compressed_len = current_page_header_.compressed_page_size;
+    int32_t uncompressed_len = current_page_header_.uncompressed_page_size;
     if (compressed_len < 0 || uncompressed_len < 0) {
       throw ParquetException("Invalid page header");
     }
@@ -979,11 +961,6 @@ class TypedColumnReaderImpl : public TypedColumnReader<DType>,
   int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
                     T* values, int64_t* values_read) override;
 
-  int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
-                          T* values, uint8_t* valid_bits, int64_t valid_bits_offset,
-                          int64_t* levels_read, int64_t* values_read,
-                          int64_t* null_count) override;
-
   int64_t Skip(int64_t num_values_to_skip) override;
 
   Type::type type() const override { return this->descr_->physical_type(); }
@@ -1153,89 +1130,6 @@ int64_t TypedColumnReaderImpl<DType>::ReadBatch(int64_t batch_size, int16_t* def
   return total_values;
 }
 
-template <typename DType>
-int64_t TypedColumnReaderImpl<DType>::ReadBatchSpaced(
-    int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values,
-    uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
-    int64_t* values_read, int64_t* null_count_out) {
-  // HasNext might invoke ReadNewPage until a data page with
-  // `available_values_current_page() > 0` is found.
-  if (!HasNext()) {
-    *levels_read = 0;
-    *values_read = 0;
-    *null_count_out = 0;
-    return 0;
-  }
-
-  // Number of non-null values to read
-  int64_t total_values;
-  // TODO(wesm): keep reading data pages until batch_size is reached, or the
-  // row group is finished
-  batch_size = std::min(batch_size, this->available_values_current_page());
-
-  // If the field is required and non-repeated, there are no definition levels
-  if (this->max_def_level_ > 0) {
-    int64_t num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels);
-    if (ARROW_PREDICT_FALSE(num_def_levels != batch_size)) {
-      throw ParquetException(kErrorRepDefLevelNotMatchesNumValues);
-    }
-
-    // Not present for non-repeated fields
-    if (this->max_rep_level_ > 0) {
-      int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels);
-      if (ARROW_PREDICT_FALSE(num_def_levels != num_rep_levels)) {
-        throw ParquetException(kErrorRepDefLevelNotMatchesNumValues);
-      }
-    }
-
-    const bool has_spaced_values = HasSpacedValues(this->descr_);
-    int64_t null_count = 0;
-    if (!has_spaced_values) {
-      int64_t values_to_read =
-          std::count(def_levels, def_levels + num_def_levels, this->max_def_level_);
-      total_values = this->ReadValues(values_to_read, values);
-      ::arrow::bit_util::SetBitsTo(valid_bits, valid_bits_offset,
-                                   /*length=*/total_values,
-                                   /*bits_are_set=*/true);
-      *values_read = total_values;
-    } else {
-      internal::LevelInfo info;
-      info.repeated_ancestor_def_level = this->max_def_level_ - 1;
-      info.def_level = this->max_def_level_;
-      info.rep_level = this->max_rep_level_;
-      internal::ValidityBitmapInputOutput validity_io;
-      validity_io.values_read_upper_bound = num_def_levels;
-      validity_io.valid_bits = valid_bits;
-      validity_io.valid_bits_offset = valid_bits_offset;
-      validity_io.null_count = null_count;
-      validity_io.values_read = *values_read;
-
-      internal::DefLevelsToBitmap(def_levels, num_def_levels, info, &validity_io);
-      null_count = validity_io.null_count;
-      *values_read = validity_io.values_read;
-
-      total_values =
-          this->ReadValuesSpaced(*values_read, values, static_cast<int>(null_count),
-                                 valid_bits, valid_bits_offset);
-    }
-    *levels_read = num_def_levels;
-    *null_count_out = null_count;
-
-  } else {
-    // Required field, read all values
-    total_values = this->ReadValues(batch_size, values);
-    ::arrow::bit_util::SetBitsTo(valid_bits, valid_bits_offset,
-                                 /*length=*/total_values,
-                                 /*bits_are_set=*/true);
-    *null_count_out = 0;
-    *values_read = total_values;
-    *levels_read = total_values;
-  }
-
-  this->ConsumeBufferedValues(*levels_read);
-  return total_values;
-}
-
 template <typename DType>
 void TypedColumnReaderImpl<DType>::InitScratchForSkip() {
   if (this->scratch_for_skip_ == nullptr) {
@@ -2050,6 +1944,14 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,
   LevelInfo leaf_info_;
 };
 
+/// In FLBARecordReader, we read fixed length byte array values.
+///
+/// Unlike other fixed length types, the `values_` buffer is not used to store
+/// values, instead we use `data_builder_` to store the values, and `null_bitmap_builder_`
+/// is used to store the null bitmap.
+///
+/// The `values_` buffer is used to store the temporary values for `Decode`, and it would
+/// be Reset after each `Decode` call. The `valid_bits_` buffer is never used.
 class FLBARecordReader final : public TypedRecordReader<FLBAType>,
                                virtual public BinaryRecordReader {
  public:
@@ -2134,6 +2036,13 @@ class FLBARecordReader final : public TypedRecordReader<FLBAType>,
   ::arrow::BufferBuilder data_builder_;
 };
 
+/// ByteArrayRecordReader reads variable length byte array values.
+///
+/// It only calls `DecodeArrowNonNull` and `DecodeArrow` to read values, and
+/// `Decode` and `DecodeSpaced` are not used.
+///
+/// The `values_` buffers are never used, and the `accumulator_`
+/// is used to store the values.
 class ByteArrayChunkedRecordReader final : public TypedRecordReader<ByteArrayType>,
                                            virtual public BinaryRecordReader {
  public:
@@ -2147,7 +2056,7 @@ class ByteArrayChunkedRecordReader final : public TypedRecordReader<ByteArrayTyp
 
   ::arrow::ArrayVector GetBuilderChunks() override {
     ::arrow::ArrayVector result = accumulator_.chunks;
-    if (result.size() == 0 || accumulator_.builder->length() > 0) {
+    if (result.empty() || accumulator_.builder->length() > 0) {
       std::shared_ptr<::arrow::Array> last_chunk;
       PARQUET_THROW_NOT_OK(accumulator_.builder->Finish(&last_chunk));
       result.push_back(std::move(last_chunk));
@@ -2176,6 +2085,11 @@ class ByteArrayChunkedRecordReader final : public TypedRecordReader<ByteArrayTyp
   typename EncodingTraits<ByteArrayType>::Accumulator accumulator_;
 };
 
+/// ByteArrayDictionaryRecordReader reads into ::arrow::dictionary(index: int32,
+/// values: binary).
+///
+/// If underlying column is dictionary encoded, it will call `DecodeIndices` to read,
+/// otherwise it will call `DecodeArrowNonNull` to read.
 class ByteArrayDictionaryRecordReader final : public TypedRecordReader<ByteArrayType>,
                                               virtual public DictionaryRecordReader {
  public:
@@ -2225,10 +2139,9 @@ class ByteArrayDictionaryRecordReader final : public TypedRecordReader<ByteArray
     } else {
       num_decoded = this->current_decoder_->DecodeArrowNonNull(
           static_cast<int>(values_to_read), &builder_);
-
-      /// Flush values since they have been copied into the builder
-      ResetValues();
     }
+    // Flush values since they have been copied into the builder
+    ResetValues();
     CheckNumberDecoded(num_decoded, values_to_read);
   }
 
@@ -2244,11 +2157,10 @@ class ByteArrayDictionaryRecordReader final : public TypedRecordReader<ByteArray
       num_decoded = this->current_decoder_->DecodeArrow(
           static_cast<int>(values_to_read), static_cast<int>(null_count),
           valid_bits_->mutable_data(), values_written_, &builder_);
-
-      /// Flush values since they have been copied into the builder
-      ResetValues();
     }
     ARROW_DCHECK_EQ(num_decoded, values_to_read - null_count);
+    // Flush values since they have been copied into the builder
+    ResetValues();
   }
 
  private:
diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h
index 29e1b2a25e437..c31088c96cd84 100644
--- a/cpp/src/parquet/column_reader.h
+++ b/cpp/src/parquet/column_reader.h
@@ -219,48 +219,6 @@ class TypedColumnReader : public ColumnReader {
   virtual int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
                             T* values, int64_t* values_read) = 0;
 
-  /// Read a batch of repetition levels, definition levels, and values from the
-  /// column and leave spaces for null entries on the lowest level in the values
-  /// buffer.
-  ///
-  /// In comparison to ReadBatch the length of repetition and definition levels
-  /// is the same as of the number of values read for max_definition_level == 1.
-  /// In the case of max_definition_level > 1, the repetition and definition
-  /// levels are larger than the values but the values include the null entries
-  /// with definition_level == (max_definition_level - 1).
-  ///
-  /// To fully exhaust a row group, you must read batches until the number of
-  /// values read reaches the number of stored values according to the metadata.
-  ///
-  /// @param batch_size the number of levels to read
-  /// @param[out] def_levels The Parquet definition levels, output has
-  ///   the length levels_read.
-  /// @param[out] rep_levels The Parquet repetition levels, output has
-  ///   the length levels_read.
-  /// @param[out] values The values in the lowest nested level including
-  ///   spacing for nulls on the lowest levels; output has the length
-  ///   values_read.
-  /// @param[out] valid_bits Memory allocated for a bitmap that indicates if
-  ///   the row is null or on the maximum definition level. For performance
-  ///   reasons the underlying buffer should be able to store 1 bit more than
-  ///   required. If this requires an additional byte, this byte is only read
-  ///   but never written to.
-  /// @param valid_bits_offset The offset in bits of the valid_bits where the
-  ///   first relevant bit resides.
-  /// @param[out] levels_read The number of repetition/definition levels that were read.
-  /// @param[out] values_read The number of values read, this includes all
-  ///   non-null entries as well as all null-entries on the lowest level
-  ///   (i.e. definition_level == max_definition_level - 1)
-  /// @param[out] null_count The number of nulls on the lowest levels.
-  ///   (i.e. (values_read - null_count) is total number of non-null entries)
-  ///
-  /// \deprecated Since 4.0.0
-  ARROW_DEPRECATED("Doesn't handle nesting correctly and unused outside of unit tests.")
-  virtual int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels,
-                                  int16_t* rep_levels, T* values, uint8_t* valid_bits,
-                                  int64_t valid_bits_offset, int64_t* levels_read,
-                                  int64_t* values_read, int64_t* null_count) = 0;
-
   // Skip reading values. This method will work for both repeated and
   // non-repeated fields. Note that this method is skipping values and not
   // records. This distinction is important for repeated fields, meaning that
@@ -446,7 +404,9 @@ class PARQUET_EXPORT RecordReader {
   int64_t null_count_;
 
   /// \brief Each bit corresponds to one element in 'values_' and specifies if it
-  /// is null or not null. Not set if read_dense_for_nullable_ is true.
+  /// is null or not null.
+  ///
+  /// Not set if leaf type is not nullable or read_dense_for_nullable_ is true.
   std::shared_ptr<::arrow::ResizableBuffer> valid_bits_;
 
   /// \brief Buffer for definition levels. May contain more levels than
@@ -471,7 +431,10 @@ class PARQUET_EXPORT RecordReader {
 
   bool read_dictionary_ = false;
   // If true, we will not leave any space for the null values in the values_
-  // vector.
+  // vector or fill nulls values in BinaryRecordReader/DictionaryRecordReader.
+  //
+  // If read_dense_for_nullable_ is true, the BinaryRecordReader/DictionaryRecordReader
+  // might still populate the validity bitmap buffer.
   bool read_dense_for_nullable_ = false;
 };
 
diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc
index 9096f195687fb..f3d580ab5d345 100644
--- a/cpp/src/parquet/column_reader_test.cc
+++ b/cpp/src/parquet/column_reader_test.cc
@@ -125,58 +125,6 @@ class TestPrimitiveReader : public ::testing::Test {
     ASSERT_EQ(0, batch_actual);
     ASSERT_EQ(0, values_read);
   }
-  void CheckResultsSpaced() {
-    std::vector<int32_t> vresult(num_levels_, -1);
-    std::vector<int16_t> dresult(num_levels_, -1);
-    std::vector<int16_t> rresult(num_levels_, -1);
-    std::vector<uint8_t> valid_bits(num_levels_, 255);
-    int total_values_read = 0;
-    int batch_actual = 0;
-    int levels_actual = 0;
-    int64_t null_count = -1;
-    int64_t levels_read = 0;
-    int64_t values_read;
-
-    Int32Reader* reader = static_cast<Int32Reader*>(reader_.get());
-    int32_t batch_size = 8;
-    int batch = 0;
-    // This will cover both the cases
-    // 1) batch_size < page_size (multiple ReadBatch from a single page)
-    // 2) batch_size > page_size (BatchRead limits to a single page)
-    do {
-      ARROW_SUPPRESS_DEPRECATION_WARNING
-      batch = static_cast<int>(reader->ReadBatchSpaced(
-          batch_size, dresult.data() + levels_actual, rresult.data() + levels_actual,
-          vresult.data() + batch_actual, valid_bits.data() + batch_actual, 0,
-          &levels_read, &values_read, &null_count));
-      ARROW_UNSUPPRESS_DEPRECATION_WARNING
-      total_values_read += batch - static_cast<int>(null_count);
-      batch_actual += batch;
-      levels_actual += static_cast<int>(levels_read);
-      batch_size = std::min(1 << 24, std::max(batch_size * 2, 4096));
-    } while ((batch > 0) || (levels_read > 0));
-
-    ASSERT_EQ(num_levels_, levels_actual);
-    ASSERT_EQ(num_values_, total_values_read);
-    if (max_def_level_ > 0) {
-      ASSERT_TRUE(vector_equal(def_levels_, dresult));
-      ASSERT_TRUE(vector_equal_with_def_levels(values_, dresult, max_def_level_,
-                                               max_rep_level_, vresult));
-    } else {
-      ASSERT_TRUE(vector_equal(values_, vresult));
-    }
-    if (max_rep_level_ > 0) {
-      ASSERT_TRUE(vector_equal(rep_levels_, rresult));
-    }
-    // catch improper writes at EOS
-    ARROW_SUPPRESS_DEPRECATION_WARNING
-    batch_actual = static_cast<int>(
-        reader->ReadBatchSpaced(5, nullptr, nullptr, nullptr, valid_bits.data(), 0,
-                                &levels_read, &values_read, &null_count));
-    ARROW_UNSUPPRESS_DEPRECATION_WARNING
-    ASSERT_EQ(0, batch_actual);
-    ASSERT_EQ(0, null_count);
-  }
 
   void Clear() {
     values_.clear();
@@ -194,14 +142,6 @@ class TestPrimitiveReader : public ::testing::Test {
     InitReader(d);
     CheckResults();
     Clear();
-
-    num_values_ =
-        MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_, rep_levels_,
-                             values_, data_buffer_, pages_, Encoding::PLAIN);
-    num_levels_ = num_pages * levels_per_page;
-    InitReader(d);
-    CheckResultsSpaced();
-    Clear();
   }
 
   void ExecuteDict(int num_pages, int levels_per_page, const ColumnDescriptor* d) {
@@ -212,14 +152,6 @@ class TestPrimitiveReader : public ::testing::Test {
     InitReader(d);
     CheckResults();
     Clear();
-
-    num_values_ =
-        MakePages<Int32Type>(d, num_pages, levels_per_page, def_levels_, rep_levels_,
-                             values_, data_buffer_, pages_, Encoding::RLE_DICTIONARY);
-    num_levels_ = num_pages * levels_per_page;
-    InitReader(d);
-    CheckResultsSpaced();
-    Clear();
   }
 
  protected:
diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc
index c9f6e482981c0..94c301f918544 100644
--- a/cpp/src/parquet/column_writer.cc
+++ b/cpp/src/parquet/column_writer.cc
@@ -32,7 +32,7 @@
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_traits.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
 #include "arrow/util/checked_cast.h"
@@ -40,8 +40,9 @@
 #include "arrow/util/crc32.h"
 #include "arrow/util/endian.h"
 #include "arrow/util/float16.h"
+#include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/rle_encoding.h"
+#include "arrow/util/rle_encoding_internal.h"
 #include "arrow/util/type_traits.h"
 #include "arrow/visit_array_inline.h"
 #include "parquet/column_page.h"
@@ -353,8 +354,6 @@ class SerializedPageWriter : public PageWriter {
                       total_compressed_size_, total_uncompressed_size_, has_dictionary,
                       fallback, dict_encoding_stats_, data_encoding_stats_,
                       meta_encryptor_);
-    // Write metadata at end of column chunk
-    metadata_->WriteTo(sink_.get());
   }
 
   /**
@@ -667,9 +666,6 @@ class BufferedPageWriter : public PageWriter {
                       has_dictionary, fallback, pager_->dict_encoding_stats_,
                       pager_->data_encoding_stats_, pager_->meta_encryptor_);
 
-    // Write metadata at end of column chunk
-    metadata_->WriteTo(in_memory_sink_.get());
-
     // Buffered page writer needs to adjust page offsets.
     pager_->FinishPageIndexes(final_position);
 
@@ -722,18 +718,6 @@ std::unique_ptr<PageWriter> PageWriter::Open(
   }
 }
 
-std::unique_ptr<PageWriter> PageWriter::Open(
-    std::shared_ptr<ArrowOutputStream> sink, Compression::type codec,
-    int compression_level, ColumnChunkMetaDataBuilder* metadata,
-    int16_t row_group_ordinal, int16_t column_chunk_ordinal, MemoryPool* pool,
-    bool buffered_row_group, std::shared_ptr<Encryptor> meta_encryptor,
-    std::shared_ptr<Encryptor> data_encryptor, bool page_write_checksum_enabled,
-    ColumnIndexBuilder* column_index_builder, OffsetIndexBuilder* offset_index_builder) {
-  return PageWriter::Open(sink, codec, metadata, row_group_ordinal, column_chunk_ordinal,
-                          pool, buffered_row_group, meta_encryptor, data_encryptor,
-                          page_write_checksum_enabled, column_index_builder,
-                          offset_index_builder, CodecOptions{compression_level});
-}
 // ----------------------------------------------------------------------
 // ColumnWriter
 
@@ -837,6 +821,9 @@ class ColumnWriterImpl {
   void FlushBufferedDataPages();
 
   ColumnChunkMetaDataBuilder* metadata_;
+  // key_value_metadata_ for the column chunk
+  // It would be nullptr if there is no KeyValueMetadata set.
+  std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
   const ColumnDescriptor* descr_;
   // scratch buffer if validity bits need to be recalculated.
   std::shared_ptr<ResizableBuffer> bits_buffer_;
@@ -1105,6 +1092,7 @@ int64_t ColumnWriterImpl::Close() {
     if (rows_written_ > 0 && chunk_statistics.is_set()) {
       metadata_->SetStatistics(chunk_statistics);
     }
+    metadata_->SetKeyValueMetadata(key_value_metadata_);
     pager_->Close(has_dictionary_, fallback_);
   }
 
@@ -1402,6 +1390,25 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter<
     return pages_change_on_record_boundaries_;
   }
 
+  void AddKeyValueMetadata(
+      const std::shared_ptr<const KeyValueMetadata>& key_value_metadata) override {
+    if (closed_) {
+      throw ParquetException("Cannot add key-value metadata to closed column");
+    }
+    if (key_value_metadata_ == nullptr) {
+      key_value_metadata_ = key_value_metadata;
+    } else if (key_value_metadata != nullptr) {
+      key_value_metadata_ = key_value_metadata_->Merge(*key_value_metadata);
+    }
+  }
+
+  void ResetKeyValueMetadata() override {
+    if (closed_) {
+      throw ParquetException("Cannot add key-value metadata to closed column");
+    }
+    key_value_metadata_ = nullptr;
+  }
+
  private:
   using ValueEncoderType = typename EncodingTraits<DType>::Encoder;
   using TypedStats = TypedStatistics<DType>;
@@ -2265,11 +2272,11 @@ struct SerializeFunctor<
 
     if (array.null_count() == 0) {
       for (int64_t i = 0; i < array.length(); i++) {
-        out[i] = FixDecimalEndianess<ArrowType::kByteWidth>(array.GetValue(i), offset);
+        out[i] = FixDecimalEndianness<ArrowType::kByteWidth>(array.GetValue(i), offset);
       }
     } else {
       for (int64_t i = 0; i < array.length(); i++) {
-        out[i] = array.IsValid(i) ? FixDecimalEndianess<ArrowType::kByteWidth>(
+        out[i] = array.IsValid(i) ? FixDecimalEndianness<ArrowType::kByteWidth>(
                                         array.GetValue(i), offset)
                                   : FixedLenByteArray();
       }
@@ -2297,7 +2304,7 @@ struct SerializeFunctor<
   }
 
   template <int byte_width>
-  FixedLenByteArray FixDecimalEndianess(const uint8_t* in, int64_t offset) {
+  FixedLenByteArray FixDecimalEndianness(const uint8_t* in, int64_t offset) {
     const auto* u64_in = reinterpret_cast<const int64_t*>(in);
     auto out = reinterpret_cast<const uint8_t*>(scratch) + offset;
     static_assert(byte_width == 16 || byte_width == 32,
diff --git a/cpp/src/parquet/column_writer.h b/cpp/src/parquet/column_writer.h
index a278670fa81c6..bd329d61053f2 100644
--- a/cpp/src/parquet/column_writer.h
+++ b/cpp/src/parquet/column_writer.h
@@ -21,6 +21,7 @@
 #include <cstring>
 #include <memory>
 
+#include "arrow/type_fwd.h"
 #include "arrow/util/compression.h"
 #include "parquet/exception.h"
 #include "parquet/platform.h"
@@ -102,21 +103,6 @@ class PARQUET_EXPORT PageWriter {
       OffsetIndexBuilder* offset_index_builder = NULLPTR,
       const CodecOptions& codec_options = CodecOptions{});
 
-  ARROW_DEPRECATED("Deprecated in 13.0.0. Use CodecOptions-taking overload instead.")
-  static std::unique_ptr<PageWriter> Open(
-      std::shared_ptr<ArrowOutputStream> sink, Compression::type codec,
-      int compression_level, ColumnChunkMetaDataBuilder* metadata,
-      int16_t row_group_ordinal = -1, int16_t column_chunk_ordinal = -1,
-      ::arrow::MemoryPool* pool = ::arrow::default_memory_pool(),
-      bool buffered_row_group = false,
-      std::shared_ptr<Encryptor> header_encryptor = NULLPTR,
-      std::shared_ptr<Encryptor> data_encryptor = NULLPTR,
-      bool page_write_checksum_enabled = false,
-      // column_index_builder MUST outlive the PageWriter
-      ColumnIndexBuilder* column_index_builder = NULLPTR,
-      // offset_index_builder MUST outlive the PageWriter
-      OffsetIndexBuilder* offset_index_builder = NULLPTR);
-
   // The Column Writer decides if dictionary encoding is used if set and
   // if the dictionary encoding has fallen back to default encoding on reaching dictionary
   // page limit
@@ -181,6 +167,17 @@ class PARQUET_EXPORT ColumnWriter {
   /// \brief The file-level writer properties
   virtual const WriterProperties* properties() = 0;
 
+  /// \brief Add key-value metadata to the ColumnChunk.
+  /// \param[in] key_value_metadata the metadata to add.
+  /// \note This will overwrite any existing metadata with the same key.
+  /// \throw ParquetException if Close() has been called.
+  virtual void AddKeyValueMetadata(
+      const std::shared_ptr<const ::arrow::KeyValueMetadata>& key_value_metadata) = 0;
+
+  /// \brief Reset the ColumnChunk key-value metadata.
+  /// \throw ParquetException if Close() has been called.
+  virtual void ResetKeyValueMetadata() = 0;
+
   /// \brief Write Apache Arrow columnar data directly to ColumnWriter. Returns
   /// error status if the array data type is not compatible with the concrete
   /// writer type.
diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc
index c99efd17961aa..d2b3aa0dff003 100644
--- a/cpp/src/parquet/column_writer_test.cc
+++ b/cpp/src/parquet/column_writer_test.cc
@@ -23,10 +23,12 @@
 #include <gtest/gtest.h>
 
 #include "arrow/io/buffered.h"
+#include "arrow/io/file.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_builders.h"
 #include "arrow/util/config.h"
+#include "arrow/util/key_value_metadata.h"
 
 #include "parquet/column_page.h"
 #include "parquet/column_reader.h"
@@ -51,6 +53,9 @@ using schema::PrimitiveNode;
 
 namespace test {
 
+using ::testing::IsNull;
+using ::testing::NotNull;
+
 // The default size used in most tests.
 const int SMALL_SIZE = 100;
 #ifdef PARQUET_VALGRIND
@@ -385,6 +390,15 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
     return metadata_accessor->encoding_stats();
   }
 
+  std::shared_ptr<const KeyValueMetadata> metadata_key_value_metadata() {
+    // Metadata accessor must be created lazily.
+    // This is because the ColumnChunkMetaData semantics dictate the metadata object is
+    // complete (no changes to the metadata buffer can be made after instantiation)
+    auto metadata_accessor =
+        ColumnChunkMetaData::Make(metadata_->contents(), this->descr_);
+    return metadata_accessor->key_value_metadata();
+  }
+
  protected:
   int64_t values_read_;
   // Keep the reader alive as for ByteArray the lifetime of the ByteArray
@@ -1705,5 +1719,60 @@ TEST(TestColumnWriter, WriteDataPageV2HeaderNullCount) {
   }
 }
 
+using TestInt32Writer = TestPrimitiveWriter<Int32Type>;
+
+TEST_F(TestInt32Writer, NoWriteKeyValueMetadata) {
+  auto writer = this->BuildWriter();
+  writer->Close();
+  auto key_value_metadata = metadata_key_value_metadata();
+  ASSERT_THAT(key_value_metadata, IsNull());
+}
+
+TEST_F(TestInt32Writer, WriteKeyValueMetadata) {
+  auto writer = this->BuildWriter();
+  writer->AddKeyValueMetadata(
+      KeyValueMetadata::Make({"hello", "bye"}, {"world", "earth"}));
+  // overwrite the previous value
+  writer->AddKeyValueMetadata(KeyValueMetadata::Make({"bye"}, {"moon"}));
+  writer->Close();
+  auto key_value_metadata = metadata_key_value_metadata();
+  ASSERT_THAT(key_value_metadata, NotNull());
+  ASSERT_EQ(2, key_value_metadata->size());
+  ASSERT_OK_AND_ASSIGN(auto value, key_value_metadata->Get("hello"));
+  ASSERT_EQ("world", value);
+  ASSERT_OK_AND_ASSIGN(value, key_value_metadata->Get("bye"));
+  ASSERT_EQ("moon", value);
+}
+
+TEST_F(TestInt32Writer, ResetKeyValueMetadata) {
+  auto writer = this->BuildWriter();
+  writer->AddKeyValueMetadata(KeyValueMetadata::Make({"hello"}, {"world"}));
+  writer->ResetKeyValueMetadata();
+  writer->Close();
+  auto key_value_metadata = metadata_key_value_metadata();
+  ASSERT_THAT(key_value_metadata, IsNull());
+}
+
+TEST_F(TestInt32Writer, WriteKeyValueMetadataEndToEnd) {
+  auto sink = CreateOutputStream();
+  {
+    auto file_writer = ParquetFileWriter::Open(
+        sink, std::dynamic_pointer_cast<schema::GroupNode>(schema_.schema_root()));
+    auto rg_writer = file_writer->AppendRowGroup();
+    auto col_writer = rg_writer->NextColumn();
+    col_writer->AddKeyValueMetadata(KeyValueMetadata::Make({"foo"}, {"bar"}));
+    file_writer->Close();
+  }
+  ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish());
+  auto file_reader =
+      ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
+  auto key_value_metadata =
+      file_reader->metadata()->RowGroup(0)->ColumnChunk(0)->key_value_metadata();
+  ASSERT_THAT(key_value_metadata, NotNull());
+  ASSERT_EQ(1U, key_value_metadata->size());
+  ASSERT_OK_AND_ASSIGN(auto value, key_value_metadata->Get("foo"));
+  ASSERT_EQ("bar", value);
+}
+
 }  // namespace test
 }  // namespace parquet
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/decoder.cc
similarity index 57%
rename from cpp/src/parquet/encoding.cc
rename to cpp/src/parquet/decoder.cc
index 54e1e000040a1..7063f423096dc 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/decoder.cc
@@ -28,23 +28,23 @@
 #include <vector>
 
 #include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
 #include "arrow/array/builder_dict.h"
-#include "arrow/stl_allocator.h"
+#include "arrow/array/builder_primitive.h"
 #include "arrow/type_traits.h"
 #include "arrow/util/bit_block_counter.h"
-#include "arrow/util/bit_run_reader.h"
-#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_stream_utils_internal.h"
 #include "arrow/util/bit_util.h"
 #include "arrow/util/bitmap_ops.h"
-#include "arrow/util/bitmap_writer.h"
 #include "arrow/util/byte_stream_split_internal.h"
 #include "arrow/util/checked_cast.h"
-#include "arrow/util/hashing.h"
 #include "arrow/util/int_util_overflow.h"
 #include "arrow/util/logging.h"
-#include "arrow/util/rle_encoding.h"
+#include "arrow/util/rle_encoding_internal.h"
+#include "arrow/util/spaced.h"
 #include "arrow/util/ubsan.h"
 #include "arrow/visit_data_inline.h"
+
 #include "parquet/exception.h"
 #include "parquet/platform.h"
 #include "parquet/schema.h"
@@ -57,901 +57,12 @@ using arrow::VisitNullBitmapInline;
 using arrow::internal::AddWithOverflow;
 using arrow::internal::BitBlockCounter;
 using arrow::internal::checked_cast;
-using arrow::internal::MultiplyWithOverflow;
-using arrow::internal::SafeSignedSubtract;
-using arrow::internal::SubtractWithOverflow;
 using arrow::util::SafeLoad;
 using arrow::util::SafeLoadAs;
-using std::string_view;
-
-template <typename T>
-using ArrowPoolVector = std::vector<T, ::arrow::stl::allocator<T>>;
 
 namespace parquet {
 namespace {
 
-// The Parquet spec isn't very clear whether ByteArray lengths are signed or
-// unsigned, but the Java implementation uses signed ints.
-constexpr size_t kMaxByteArraySize = std::numeric_limits<int32_t>::max();
-
-// ----------------------------------------------------------------------
-// Encoders
-// ----------------------------------------------------------------------
-
-class EncoderImpl : virtual public Encoder {
- public:
-  EncoderImpl(const ColumnDescriptor* descr, Encoding::type encoding, MemoryPool* pool)
-      : descr_(descr),
-        encoding_(encoding),
-        pool_(pool),
-        type_length_(descr ? descr->type_length() : -1) {}
-
-  Encoding::type encoding() const override { return encoding_; }
-
-  MemoryPool* memory_pool() const override { return pool_; }
-
- protected:
-  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
-  const ColumnDescriptor* descr_;
-  const Encoding::type encoding_;
-  MemoryPool* pool_;
-
-  /// Type length from descr
-  const int type_length_;
-};
-
-// ----------------------------------------------------------------------
-// Plain encoder implementation
-
-template <typename DType>
-class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
- public:
-  using T = typename DType::c_type;
-
-  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
-
-  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    std::shared_ptr<Buffer> buffer;
-    PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
-    return buffer;
-  }
-
-  using TypedEncoder<DType>::Put;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void Put(const ::arrow::Array& values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->template mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
-  void UnsafePutByteArray(const void* data, uint32_t length) {
-    DCHECK(length == 0 || data != nullptr) << "Value ptr cannot be NULL";
-    sink_.UnsafeAppend(&length, sizeof(uint32_t));
-    sink_.UnsafeAppend(data, static_cast<int64_t>(length));
-  }
-
-  void Put(const ByteArray& val) {
-    // Write the result to the output stream
-    const int64_t increment = static_cast<int64_t>(val.len + sizeof(uint32_t));
-    if (ARROW_PREDICT_FALSE(sink_.length() + increment > sink_.capacity())) {
-      PARQUET_THROW_NOT_OK(sink_.Reserve(increment));
-    }
-    UnsafePutByteArray(val.ptr, val.len);
-  }
-
- protected:
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    const int64_t total_bytes =
-        array.value_offset(array.length()) - array.value_offset(0);
-    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes + array.length() * sizeof(uint32_t)));
-
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          UnsafePutByteArray(view.data(), static_cast<uint32_t>(view.size()));
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-  }
-
-  ::arrow::BufferBuilder sink_;
-};
-
-template <typename DType>
-void PlainEncoder<DType>::Put(const T* buffer, int num_values) {
-  if (num_values > 0) {
-    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T)));
-  }
-}
-
-template <>
-inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
-  for (int i = 0; i < num_values; ++i) {
-    Put(src[i]);
-  }
-}
-
-template <typename ArrayType>
-void DirectPutImpl(const ::arrow::Array& values, ::arrow::BufferBuilder* sink) {
-  if (values.type_id() != ArrayType::TypeClass::type_id) {
-    std::string type_name = ArrayType::TypeClass::type_name();
-    throw ParquetException("direct put to " + type_name + " from " +
-                           values.type()->ToString() + " not supported");
-  }
-
-  using value_type = typename ArrayType::value_type;
-  constexpr auto value_size = sizeof(value_type);
-  auto raw_values = checked_cast<const ArrayType&>(values).raw_values();
-
-  if (values.null_count() == 0) {
-    // no nulls, just dump the data
-    PARQUET_THROW_NOT_OK(sink->Append(raw_values, values.length() * value_size));
-  } else {
-    PARQUET_THROW_NOT_OK(
-        sink->Reserve((values.length() - values.null_count()) * value_size));
-
-    for (int64_t i = 0; i < values.length(); i++) {
-      if (values.IsValid(i)) {
-        sink->UnsafeAppend(&raw_values[i], value_size);
-      }
-    }
-  }
-}
-
-template <>
-void PlainEncoder<Int32Type>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::Int32Array>(values, &sink_);
-}
-
-template <>
-void PlainEncoder<Int64Type>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::Int64Array>(values, &sink_);
-}
-
-template <>
-void PlainEncoder<Int96Type>::Put(const ::arrow::Array& values) {
-  ParquetException::NYI("direct put to Int96");
-}
-
-template <>
-void PlainEncoder<FloatType>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::FloatArray>(values, &sink_);
-}
-
-template <>
-void PlainEncoder<DoubleType>::Put(const ::arrow::Array& values) {
-  DirectPutImpl<::arrow::DoubleArray>(values, &sink_);
-}
-
-template <typename DType>
-void PlainEncoder<DType>::Put(const ::arrow::Array& values) {
-  ParquetException::NYI("direct put of " + values.type()->ToString());
-}
-
-void AssertBaseBinary(const ::arrow::Array& values) {
-  if (!::arrow::is_base_binary_like(values.type_id())) {
-    throw ParquetException("Only BaseBinaryArray and subclasses supported");
-  }
-}
-
-template <>
-inline void PlainEncoder<ByteArrayType>::Put(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    DCHECK(::arrow::is_large_binary_like(values.type_id()));
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-void AssertFixedSizeBinary(const ::arrow::Array& values, int type_length) {
-  if (!::arrow::is_fixed_size_binary(values.type_id())) {
-    throw ParquetException("Only FixedSizeBinaryArray and subclasses supported");
-  }
-  if (checked_cast<const ::arrow::FixedSizeBinaryType&>(*values.type()).byte_width() !=
-      type_length) {
-    throw ParquetException("Size mismatch: " + values.type()->ToString() +
-                           " should have been " + std::to_string(type_length) + " wide");
-  }
-}
-
-template <>
-inline void PlainEncoder<FLBAType>::Put(const ::arrow::Array& values) {
-  AssertFixedSizeBinary(values, descr_->type_length());
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-
-  if (data.null_count() == 0) {
-    // no nulls, just dump the data
-    PARQUET_THROW_NOT_OK(
-        sink_.Append(data.raw_values(), data.length() * data.byte_width()));
-  } else {
-    const int64_t total_bytes =
-        data.length() * data.byte_width() - data.null_count() * data.byte_width();
-    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
-    for (int64_t i = 0; i < data.length(); i++) {
-      if (data.IsValid(i)) {
-        sink_.UnsafeAppend(data.Value(i), data.byte_width());
-      }
-    }
-  }
-}
-
-template <>
-inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) {
-  if (descr_->type_length() == 0) {
-    return;
-  }
-  for (int i = 0; i < num_values; ++i) {
-    // Write the result to the output stream
-    DCHECK(src[i].ptr != nullptr) << "Value ptr cannot be NULL";
-    PARQUET_THROW_NOT_OK(sink_.Append(src[i].ptr, descr_->type_length()));
-  }
-}
-
-template <>
-class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEncoder {
- public:
-  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
-
-  int64_t EstimatedDataEncodedSize() override;
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  void Put(const bool* src, int num_values) override;
-
-  void Put(const std::vector<bool>& src, int num_values) override;
-
-  void PutSpaced(const bool* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
-  void Put(const ::arrow::Array& values) override {
-    if (values.type_id() != ::arrow::Type::BOOL) {
-      throw ParquetException("direct put to boolean from " + values.type()->ToString() +
-                             " not supported");
-    }
-    const auto& data = checked_cast<const ::arrow::BooleanArray&>(values);
-
-    if (data.null_count() == 0) {
-      // no nulls, just dump the data
-      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length()));
-      sink_.UnsafeAppend(data.data()->GetValues<uint8_t>(1, 0), data.offset(),
-                         data.length());
-    } else {
-      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length() - data.null_count()));
-      for (int64_t i = 0; i < data.length(); i++) {
-        if (data.IsValid(i)) {
-          sink_.UnsafeAppend(data.Value(i));
-        }
-      }
-    }
-  }
-
- private:
-  ::arrow::TypedBufferBuilder<bool> sink_;
-
-  template <typename SequenceType>
-  void PutImpl(const SequenceType& src, int num_values);
-};
-
-template <typename SequenceType>
-void PlainEncoder<BooleanType>::PutImpl(const SequenceType& src, int num_values) {
-  PARQUET_THROW_NOT_OK(sink_.Reserve(num_values));
-  for (int i = 0; i < num_values; ++i) {
-    sink_.UnsafeAppend(src[i]);
-  }
-}
-
-int64_t PlainEncoder<BooleanType>::EstimatedDataEncodedSize() {
-  return ::arrow::bit_util::BytesForBits(sink_.length());
-}
-
-std::shared_ptr<Buffer> PlainEncoder<BooleanType>::FlushValues() {
-  std::shared_ptr<Buffer> buffer;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
-  return buffer;
-}
-
-void PlainEncoder<BooleanType>::Put(const bool* src, int num_values) {
-  PutImpl(src, num_values);
-}
-
-void PlainEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) {
-  PutImpl(src, num_values);
-}
-
-// ----------------------------------------------------------------------
-// DictEncoder<T> implementations
-
-template <typename DType>
-struct DictEncoderTraits {
-  using c_type = typename DType::c_type;
-  using MemoTableType = ::arrow::internal::ScalarMemoTable<c_type>;
-};
-
-template <>
-struct DictEncoderTraits<ByteArrayType> {
-  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
-};
-
-template <>
-struct DictEncoderTraits<FLBAType> {
-  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
-};
-
-// Initially 1024 elements
-static constexpr int32_t kInitialHashTableSize = 1 << 10;
-
-int RlePreserveBufferSize(int num_values, int bit_width) {
-  // Note: because of the way RleEncoder::CheckBufferFull()
-  // is called, we have to reserve an extra "RleEncoder::MinBufferSize"
-  // bytes. These extra bytes won't be used but not reserving them
-  // would cause the encoder to fail.
-  return ::arrow::util::RleEncoder::MaxBufferSize(bit_width, num_values) +
-         ::arrow::util::RleEncoder::MinBufferSize(bit_width);
-}
-
-/// See the dictionary encoding section of
-/// https://github.com/Parquet/parquet-format.  The encoding supports
-/// streaming encoding. Values are encoded as they are added while the
-/// dictionary is being constructed. At any time, the buffered values
-/// can be written out with the current dictionary size. More values
-/// can then be added to the encoder, including new dictionary
-/// entries.
-template <typename DType>
-class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
-  using MemoTableType = typename DictEncoderTraits<DType>::MemoTableType;
-
- public:
-  typedef typename DType::c_type T;
-
-  /// In data page, the bit width used to encode the entry
-  /// ids stored as 1 byte (max bit width = 32).
-  constexpr static int32_t kDataPageBitWidthBytes = 1;
-
-  explicit DictEncoderImpl(const ColumnDescriptor* desc, MemoryPool* pool)
-      : EncoderImpl(desc, Encoding::RLE_DICTIONARY, pool),
-        buffered_indices_(::arrow::stl::allocator<int32_t>(pool)),
-        dict_encoded_size_(0),
-        memo_table_(pool, kInitialHashTableSize) {}
-
-  ~DictEncoderImpl() override = default;
-
-  int dict_encoded_size() const override { return dict_encoded_size_; }
-
-  int WriteIndices(uint8_t* buffer, int buffer_len) override {
-    // Write bit width in first byte
-    *buffer = static_cast<uint8_t>(bit_width());
-    ++buffer;
-    --buffer_len;
-
-    ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width());
-
-    for (int32_t index : buffered_indices_) {
-      if (ARROW_PREDICT_FALSE(!encoder.Put(index))) return -1;
-    }
-    encoder.Flush();
-
-    ClearIndices();
-    return kDataPageBitWidthBytes + encoder.len();
-  }
-
-  /// Returns a conservative estimate of the number of bytes needed to encode the buffered
-  /// indices. Used to size the buffer passed to WriteIndices().
-  int64_t EstimatedDataEncodedSize() override {
-    return kDataPageBitWidthBytes +
-           RlePreserveBufferSize(static_cast<int>(buffered_indices_.size()), bit_width());
-  }
-
-  /// The minimum bit width required to encode the currently buffered indices.
-  int bit_width() const override {
-    if (ARROW_PREDICT_FALSE(num_entries() == 0)) return 0;
-    if (ARROW_PREDICT_FALSE(num_entries() == 1)) return 1;
-    return bit_util::Log2(num_entries());
-  }
-
-  /// Encode value. Note that this does not actually write any data, just
-  /// buffers the value's index to be written later.
-  inline void Put(const T& value);
-
-  // Not implemented for other data types
-  inline void PutByteArray(const void* ptr, int32_t length);
-
-  void Put(const T* src, int num_values) override {
-    for (int32_t i = 0; i < num_values; i++) {
-      Put(SafeLoad(src + i));
-    }
-  }
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    ::arrow::internal::VisitSetBitRunsVoid(valid_bits, valid_bits_offset, num_values,
-                                           [&](int64_t position, int64_t length) {
-                                             for (int64_t i = 0; i < length; i++) {
-                                               Put(SafeLoad(src + i + position));
-                                             }
-                                           });
-  }
-
-  using TypedEncoder<DType>::Put;
-
-  void Put(const ::arrow::Array& values) override;
-  void PutDictionary(const ::arrow::Array& values) override;
-
-  template <typename ArrowType, typename T = typename ArrowType::c_type>
-  void PutIndicesTyped(const ::arrow::Array& data) {
-    auto values = data.data()->GetValues<T>(1);
-    size_t buffer_position = buffered_indices_.size();
-    buffered_indices_.resize(buffer_position +
-                             static_cast<size_t>(data.length() - data.null_count()));
-    ::arrow::internal::VisitSetBitRunsVoid(
-        data.null_bitmap_data(), data.offset(), data.length(),
-        [&](int64_t position, int64_t length) {
-          for (int64_t i = 0; i < length; ++i) {
-            buffered_indices_[buffer_position++] =
-                static_cast<int32_t>(values[i + position]);
-          }
-        });
-  }
-
-  void PutIndices(const ::arrow::Array& data) override {
-    switch (data.type()->id()) {
-      case ::arrow::Type::UINT8:
-      case ::arrow::Type::INT8:
-        return PutIndicesTyped<::arrow::UInt8Type>(data);
-      case ::arrow::Type::UINT16:
-      case ::arrow::Type::INT16:
-        return PutIndicesTyped<::arrow::UInt16Type>(data);
-      case ::arrow::Type::UINT32:
-      case ::arrow::Type::INT32:
-        return PutIndicesTyped<::arrow::UInt32Type>(data);
-      case ::arrow::Type::UINT64:
-      case ::arrow::Type::INT64:
-        return PutIndicesTyped<::arrow::UInt64Type>(data);
-      default:
-        throw ParquetException("Passed non-integer array to PutIndices");
-    }
-  }
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    std::shared_ptr<ResizableBuffer> buffer =
-        AllocateBuffer(this->pool_, EstimatedDataEncodedSize());
-    int result_size = WriteIndices(buffer->mutable_data(),
-                                   static_cast<int>(EstimatedDataEncodedSize()));
-    PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
-    return buffer;
-  }
-
-  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
-  /// dict_encoded_size() bytes.
-  void WriteDict(uint8_t* buffer) const override;
-
-  /// The number of entries in the dictionary.
-  int num_entries() const override { return memo_table_.size(); }
-
- private:
-  /// Clears all the indices (but leaves the dictionary).
-  void ClearIndices() { buffered_indices_.clear(); }
-
-  /// Indices that have not yet be written out by WriteIndices().
-  ArrowPoolVector<int32_t> buffered_indices_;
-
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          PutByteArray(view.data(), static_cast<uint32_t>(view.size()));
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-  }
-
-  template <typename ArrayType>
-  void PutBinaryDictionaryArray(const ArrayType& array) {
-    DCHECK_EQ(array.null_count(), 0);
-    for (int64_t i = 0; i < array.length(); i++) {
-      auto v = array.GetView(i);
-      if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) {
-        throw ParquetException(
-            "Parquet cannot store strings with size 2GB or more, got: ", v.size());
-      }
-      dict_encoded_size_ += static_cast<int>(v.size() + sizeof(uint32_t));
-      int32_t unused_memo_index;
-      PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(
-          v.data(), static_cast<int32_t>(v.size()), &unused_memo_index));
-    }
-  }
-
-  /// The number of bytes needed to encode the dictionary.
-  int dict_encoded_size_;
-
-  MemoTableType memo_table_;
-};
-
-template <typename DType>
-void DictEncoderImpl<DType>::WriteDict(uint8_t* buffer) const {
-  // For primitive types, only a memcpy
-  DCHECK_EQ(static_cast<size_t>(dict_encoded_size_), sizeof(T) * memo_table_.size());
-  memo_table_.CopyValues(0 /* start_pos */, reinterpret_cast<T*>(buffer));
-}
-
-// ByteArray and FLBA already have the dictionary encoded in their data heaps
-template <>
-void DictEncoderImpl<ByteArrayType>::WriteDict(uint8_t* buffer) const {
-  memo_table_.VisitValues(0, [&buffer](::std::string_view v) {
-    uint32_t len = static_cast<uint32_t>(v.length());
-    memcpy(buffer, &len, sizeof(len));
-    buffer += sizeof(len);
-    memcpy(buffer, v.data(), len);
-    buffer += len;
-  });
-}
-
-template <>
-void DictEncoderImpl<FLBAType>::WriteDict(uint8_t* buffer) const {
-  memo_table_.VisitValues(0, [&](::std::string_view v) {
-    DCHECK_EQ(v.length(), static_cast<size_t>(type_length_));
-    memcpy(buffer, v.data(), type_length_);
-    buffer += type_length_;
-  });
-}
-
-template <typename DType>
-inline void DictEncoderImpl<DType>::Put(const T& v) {
-  // Put() implementation for primitive types
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [this](int32_t memo_index) {
-    dict_encoded_size_ += static_cast<int>(sizeof(T));
-  };
-
-  int32_t memo_index;
-  PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(v, on_found, on_not_found, &memo_index));
-  buffered_indices_.push_back(memo_index);
-}
-
-template <typename DType>
-inline void DictEncoderImpl<DType>::PutByteArray(const void* ptr, int32_t length) {
-  DCHECK(false);
-}
-
-template <>
-inline void DictEncoderImpl<ByteArrayType>::PutByteArray(const void* ptr,
-                                                         int32_t length) {
-  static const uint8_t empty[] = {0};
-
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [&](int32_t memo_index) {
-    dict_encoded_size_ += static_cast<int>(length + sizeof(uint32_t));
-  };
-
-  DCHECK(ptr != nullptr || length == 0);
-  ptr = (ptr != nullptr) ? ptr : empty;
-  int32_t memo_index;
-  PARQUET_THROW_NOT_OK(
-      memo_table_.GetOrInsert(ptr, length, on_found, on_not_found, &memo_index));
-  buffered_indices_.push_back(memo_index);
-}
-
-template <>
-inline void DictEncoderImpl<ByteArrayType>::Put(const ByteArray& val) {
-  return PutByteArray(val.ptr, static_cast<int32_t>(val.len));
-}
-
-template <>
-inline void DictEncoderImpl<FLBAType>::Put(const FixedLenByteArray& v) {
-  static const uint8_t empty[] = {0};
-
-  auto on_found = [](int32_t memo_index) {};
-  auto on_not_found = [this](int32_t memo_index) { dict_encoded_size_ += type_length_; };
-
-  DCHECK(v.ptr != nullptr || type_length_ == 0);
-  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
-  int32_t memo_index;
-  PARQUET_THROW_NOT_OK(
-      memo_table_.GetOrInsert(ptr, type_length_, on_found, on_not_found, &memo_index));
-  buffered_indices_.push_back(memo_index);
-}
-
-template <>
-void DictEncoderImpl<Int96Type>::Put(const ::arrow::Array& values) {
-  ParquetException::NYI("Direct put to Int96");
-}
-
-template <>
-void DictEncoderImpl<Int96Type>::PutDictionary(const ::arrow::Array& values) {
-  ParquetException::NYI("Direct put to Int96");
-}
-
-template <typename DType>
-void DictEncoderImpl<DType>::Put(const ::arrow::Array& values) {
-  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
-  const auto& data = checked_cast<const ArrayType&>(values);
-  if (data.null_count() == 0) {
-    // no nulls, just dump the data
-    for (int64_t i = 0; i < data.length(); i++) {
-      Put(data.Value(i));
-    }
-  } else {
-    for (int64_t i = 0; i < data.length(); i++) {
-      if (data.IsValid(i)) {
-        Put(data.Value(i));
-      }
-    }
-  }
-}
-
-template <>
-void DictEncoderImpl<FLBAType>::Put(const ::arrow::Array& values) {
-  AssertFixedSizeBinary(values, type_length_);
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-  if (data.null_count() == 0) {
-    // no nulls, just dump the data
-    for (int64_t i = 0; i < data.length(); i++) {
-      Put(FixedLenByteArray(data.Value(i)));
-    }
-  } else {
-    std::vector<uint8_t> empty(type_length_, 0);
-    for (int64_t i = 0; i < data.length(); i++) {
-      if (data.IsValid(i)) {
-        Put(FixedLenByteArray(data.Value(i)));
-      }
-    }
-  }
-}
-
-template <>
-void DictEncoderImpl<ByteArrayType>::Put(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    DCHECK(::arrow::is_large_binary_like(values.type_id()));
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-template <typename DType>
-void AssertCanPutDictionary(DictEncoderImpl<DType>* encoder, const ::arrow::Array& dict) {
-  if (dict.null_count() > 0) {
-    throw ParquetException("Inserted dictionary cannot contain nulls");
-  }
-
-  if (encoder->num_entries() > 0) {
-    throw ParquetException("Can only call PutDictionary on an empty DictEncoder");
-  }
-}
-
-template <typename DType>
-void DictEncoderImpl<DType>::PutDictionary(const ::arrow::Array& values) {
-  AssertCanPutDictionary(this, values);
-
-  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
-  const auto& data = checked_cast<const ArrayType&>(values);
-
-  dict_encoded_size_ += static_cast<int>(sizeof(typename DType::c_type) * data.length());
-  for (int64_t i = 0; i < data.length(); i++) {
-    int32_t unused_memo_index;
-    PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(data.Value(i), &unused_memo_index));
-  }
-}
-
-template <>
-void DictEncoderImpl<FLBAType>::PutDictionary(const ::arrow::Array& values) {
-  AssertFixedSizeBinary(values, type_length_);
-  AssertCanPutDictionary(this, values);
-
-  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-
-  dict_encoded_size_ += static_cast<int>(type_length_ * data.length());
-  for (int64_t i = 0; i < data.length(); i++) {
-    int32_t unused_memo_index;
-    PARQUET_THROW_NOT_OK(
-        memo_table_.GetOrInsert(data.Value(i), type_length_, &unused_memo_index));
-  }
-}
-
-template <>
-void DictEncoderImpl<ByteArrayType>::PutDictionary(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-  AssertCanPutDictionary(this, values);
-
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryDictionaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    DCHECK(::arrow::is_large_binary_like(values.type_id()));
-    PutBinaryDictionaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-// ----------------------------------------------------------------------
-// ByteStreamSplitEncoder<T> implementations
-
-// Common base class for all types
-
-template <typename DType>
-class ByteStreamSplitEncoderBase : public EncoderImpl,
-                                   virtual public TypedEncoder<DType> {
- public:
-  using T = typename DType::c_type;
-  using TypedEncoder<DType>::Put;
-
-  ByteStreamSplitEncoderBase(const ColumnDescriptor* descr, int byte_width,
-                             ::arrow::MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool),
-        sink_{pool},
-        byte_width_(byte_width),
-        num_values_in_buffer_{0} {}
-
-  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
-
-  std::shared_ptr<Buffer> FlushValues() override {
-    if (byte_width_ == 1) {
-      // Special-cased fast path
-      PARQUET_ASSIGN_OR_THROW(auto buf, sink_.Finish());
-      return buf;
-    }
-    auto output_buffer = AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize());
-    uint8_t* output_buffer_raw = output_buffer->mutable_data();
-    const uint8_t* raw_values = sink_.data();
-    ::arrow::util::internal::ByteStreamSplitEncode(
-        raw_values, /*width=*/byte_width_, num_values_in_buffer_, output_buffer_raw);
-    sink_.Reset();
-    num_values_in_buffer_ = 0;
-    return output_buffer;
-  }
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->template mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
- protected:
-  ::arrow::BufferBuilder sink_;
-  // Required because type_length_ is only filled in for FLBA
-  const int byte_width_;
-  int64_t num_values_in_buffer_;
-};
-
-// BYTE_STREAM_SPLIT encoder implementation for FLOAT, DOUBLE, INT32, INT64
-
-template <typename DType>
-class ByteStreamSplitEncoder : public ByteStreamSplitEncoderBase<DType> {
- public:
-  using T = typename DType::c_type;
-  using ArrowType = typename EncodingTraits<DType>::ArrowType;
-
-  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
-                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : ByteStreamSplitEncoderBase<DType>(descr,
-                                          /*byte_width=*/static_cast<int>(sizeof(T)),
-                                          pool) {}
-
-  // Inherit Put(const std::vector<T>&...)
-  using TypedEncoder<DType>::Put;
-
-  void Put(const T* buffer, int num_values) override {
-    if (num_values > 0) {
-      PARQUET_THROW_NOT_OK(
-          this->sink_.Append(reinterpret_cast<const uint8_t*>(buffer),
-                             num_values * static_cast<int64_t>(sizeof(T))));
-      this->num_values_in_buffer_ += num_values;
-    }
-  }
-
-  void Put(const ::arrow::Array& values) override {
-    if (values.type_id() != ArrowType::type_id) {
-      throw ParquetException(std::string() + "direct put from " +
-                             values.type()->ToString() + " not supported");
-    }
-    const auto& data = *values.data();
-    this->PutSpaced(data.GetValues<typename ArrowType::c_type>(1),
-                    static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0),
-                    data.offset);
-  }
-};
-
-// BYTE_STREAM_SPLIT encoder implementation for FLBA
-
-template <>
-class ByteStreamSplitEncoder<FLBAType> : public ByteStreamSplitEncoderBase<FLBAType> {
- public:
-  using DType = FLBAType;
-  using T = FixedLenByteArray;
-  using ArrowType = ::arrow::FixedSizeBinaryArray;
-
-  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
-                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
-      : ByteStreamSplitEncoderBase<DType>(descr,
-                                          /*byte_width=*/descr->type_length(), pool) {}
-
-  // Inherit Put(const std::vector<T>&...)
-  using TypedEncoder<DType>::Put;
-
-  void Put(const T* buffer, int num_values) override {
-    if (byte_width_ > 0) {
-      const int64_t total_bytes = static_cast<int64_t>(num_values) * byte_width_;
-      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
-      for (int i = 0; i < num_values; ++i) {
-        // Write the result to the output stream
-        DCHECK(buffer[i].ptr != nullptr) << "Value ptr cannot be NULL";
-        sink_.UnsafeAppend(buffer[i].ptr, byte_width_);
-      }
-    }
-    this->num_values_in_buffer_ += num_values;
-  }
-
-  void Put(const ::arrow::Array& values) override {
-    AssertFixedSizeBinary(values, byte_width_);
-    const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
-    if (data.null_count() == 0) {
-      // no nulls, just buffer the data
-      PARQUET_THROW_NOT_OK(sink_.Append(data.raw_values(), data.length() * byte_width_));
-      this->num_values_in_buffer_ += data.length();
-    } else {
-      const int64_t num_values = data.length() - data.null_count();
-      const int64_t total_bytes = num_values * byte_width_;
-      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
-      // TODO use VisitSetBitRunsVoid
-      for (int64_t i = 0; i < data.length(); i++) {
-        if (data.IsValid(i)) {
-          sink_.UnsafeAppend(data.Value(i), byte_width_);
-        }
-      }
-      this->num_values_in_buffer_ += num_values;
-    }
-  }
-};
-
-// ----------------------------------------------------------------------
-// Decoders
-// ----------------------------------------------------------------------
-
 class DecoderImpl : virtual public Decoder {
  public:
   void SetData(int num_values, const uint8_t* data, int len) override {
@@ -978,9 +89,35 @@ class DecoderImpl : virtual public Decoder {
 };
 
 template <typename DType>
-class PlainDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+class TypedDecoderImpl : virtual public TypedDecoder<DType> {
  public:
   using T = typename DType::c_type;
+
+  int DecodeSpaced(T* buffer, int num_values, int null_count, const uint8_t* valid_bits,
+                   int64_t valid_bits_offset) override {
+    if (null_count > 0) {
+      int values_to_read = num_values - null_count;
+      int values_read = this->Decode(buffer, values_to_read);
+      if (values_read != values_to_read) {
+        throw ParquetException("Number of values / definition_levels read did not match");
+      }
+
+      return ::arrow::util::internal::SpacedExpand<T>(buffer, num_values, null_count,
+                                                      valid_bits, valid_bits_offset);
+    } else {
+      return this->Decode(buffer, num_values);
+    }
+  }
+};
+
+// ----------------------------------------------------------------------
+// PLAIN decoder
+
+template <typename DType>
+class PlainDecoder : public DecoderImpl, virtual public TypedDecoderImpl<DType> {
+ public:
+  using T = typename DType::c_type;
+
   explicit PlainDecoder(const ColumnDescriptor* descr);
 
   int Decode(T* buffer, int max_values) override;
@@ -1156,7 +293,11 @@ int PlainDecoder<DType>::Decode(T* buffer, int max_values) {
   return max_values;
 }
 
-class PlainBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
+// PLAIN decoder implementation for BOOLEAN
+
+class PlainBooleanDecoder : public DecoderImpl,
+                            virtual public TypedDecoderImpl<BooleanType>,
+                            virtual public BooleanDecoder {
  public:
   explicit PlainBooleanDecoder(const ColumnDescriptor* descr);
   void SetData(int num_values, const uint8_t* data, int len) override;
@@ -1273,6 +414,8 @@ int PlainBooleanDecoder::Decode(bool* buffer, int max_values) {
   return max_values;
 }
 
+// PLAIN decoder implementation for FIXED_LEN_BYTE_ARRAY and BYTE_ARRAY
+
 // A helper class to abstract away differences between EncodingTraits<DType>::Accumulator
 // for ByteArrayType and FLBAType.
 template <typename DType>
@@ -1592,7 +735,7 @@ class PlainFLBADecoder : public PlainDecoder<FLBAType>, virtual public FLBADecod
 };
 
 // ----------------------------------------------------------------------
-// Dictionary encoding and decoding
+// Dictionary decoding
 
 template <typename Type>
 class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder<Type> {
@@ -2173,323 +1316,36 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
 
   template <typename BuilderType>
   Status DecodeArrowNonNull(int num_values, BuilderType* builder, int* out_num_values) {
-    constexpr int32_t kBufferSize = 2048;
-    int32_t indices[kBufferSize];
-
-    RETURN_NOT_OK(builder->Reserve(num_values));
-
-    const auto* dict_values = dictionary_->data_as<ByteArray>();
-
-    int values_decoded = 0;
-    while (values_decoded < num_values) {
-      int32_t batch_size = std::min<int32_t>(kBufferSize, num_values - values_decoded);
-      int num_indices = idx_decoder_.GetBatch(indices, batch_size);
-      if (num_indices == 0) ParquetException::EofException();
-      for (int i = 0; i < num_indices; ++i) {
-        auto idx = indices[i];
-        RETURN_NOT_OK(IndexInBounds(idx));
-        const auto& val = dict_values[idx];
-        RETURN_NOT_OK(builder->Append(val.ptr, val.len));
-      }
-      values_decoded += num_indices;
-    }
-    *out_num_values = values_decoded;
-    return Status::OK();
-  }
-};
-
-// ----------------------------------------------------------------------
-// DeltaBitPackEncoder
-
-/// DeltaBitPackEncoder is an encoder for the DeltaBinary Packing format
-/// as per the parquet spec. See:
-/// https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-encoding-delta_binary_packed--5
-///
-/// Consists of a header followed by blocks of delta encoded values binary packed.
-///
-///  Format
-///    [header] [block 1] [block 2] ... [block N]
-///
-///  Header
-///    [block size] [number of mini blocks per block] [total value count] [first value]
-///
-///  Block
-///    [min delta] [list of bitwidths of the mini blocks] [miniblocks]
-///
-/// Sets aside bytes at the start of the internal buffer where the header will be written,
-/// and only writes the header when FlushValues is called before returning it.
-///
-/// To encode a block, we will:
-///
-/// 1. Compute the differences between consecutive elements. For the first element in the
-/// block, use the last element in the previous block or, in the case of the first block,
-/// use the first value of the whole sequence, stored in the header.
-///
-/// 2. Compute the frame of reference (the minimum of the deltas in the block). Subtract
-/// this min delta from all deltas in the block. This guarantees that all values are
-/// non-negative.
-///
-/// 3. Encode the frame of reference (min delta) as a zigzag ULEB128 int followed by the
-/// bit widths of the mini blocks and the delta values (minus the min delta) bit packed
-/// per mini block.
-///
-/// Supports only INT32 and INT64.
-
-template <typename DType>
-class DeltaBitPackEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
-  // Maximum possible header size
-  static constexpr uint32_t kMaxPageHeaderWriterSize = 32;
-  static constexpr uint32_t kValuesPerBlock =
-      std::is_same_v<int32_t, typename DType::c_type> ? 128 : 256;
-  static constexpr uint32_t kMiniBlocksPerBlock = 4;
-
- public:
-  using T = typename DType::c_type;
-  using UT = std::make_unsigned_t<T>;
-  using TypedEncoder<DType>::Put;
-
-  explicit DeltaBitPackEncoder(const ColumnDescriptor* descr, MemoryPool* pool,
-                               const uint32_t values_per_block = kValuesPerBlock,
-                               const uint32_t mini_blocks_per_block = kMiniBlocksPerBlock)
-      : EncoderImpl(descr, Encoding::DELTA_BINARY_PACKED, pool),
-        values_per_block_(values_per_block),
-        mini_blocks_per_block_(mini_blocks_per_block),
-        values_per_mini_block_(values_per_block / mini_blocks_per_block),
-        deltas_(values_per_block, ::arrow::stl::allocator<T>(pool)),
-        bits_buffer_(
-            AllocateBuffer(pool, (kMiniBlocksPerBlock + values_per_block) * sizeof(T))),
-        sink_(pool),
-        bit_writer_(bits_buffer_->mutable_data(),
-                    static_cast<int>(bits_buffer_->size())) {
-    if (values_per_block_ % 128 != 0) {
-      throw ParquetException(
-          "the number of values in a block must be multiple of 128, but it's " +
-          std::to_string(values_per_block_));
-    }
-    if (values_per_mini_block_ % 32 != 0) {
-      throw ParquetException(
-          "the number of values in a miniblock must be multiple of 32, but it's " +
-          std::to_string(values_per_mini_block_));
-    }
-    if (values_per_block % mini_blocks_per_block != 0) {
-      throw ParquetException(
-          "the number of values per block % number of miniblocks per block must be 0, "
-          "but it's " +
-          std::to_string(values_per_block % mini_blocks_per_block));
-    }
-    // Reserve enough space at the beginning of the buffer for largest possible header.
-    PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
-  }
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
-
-  void Put(const ::arrow::Array& values) override;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override;
-
-  void FlushBlock();
-
- private:
-  const uint32_t values_per_block_;
-  const uint32_t mini_blocks_per_block_;
-  const uint32_t values_per_mini_block_;
-  uint32_t values_current_block_{0};
-  uint32_t total_value_count_{0};
-  T first_value_{0};
-  T current_value_{0};
-  ArrowPoolVector<T> deltas_;
-  std::shared_ptr<ResizableBuffer> bits_buffer_;
-  ::arrow::BufferBuilder sink_;
-  ::arrow::bit_util::BitWriter bit_writer_;
-};
-
-template <typename DType>
-void DeltaBitPackEncoder<DType>::Put(const T* src, int num_values) {
-  if (num_values == 0) {
-    return;
-  }
-
-  int idx = 0;
-  if (total_value_count_ == 0) {
-    current_value_ = src[0];
-    first_value_ = current_value_;
-    idx = 1;
-  }
-  total_value_count_ += num_values;
-
-  while (idx < num_values) {
-    T value = src[idx];
-    // Calculate deltas. The possible overflow is handled by use of unsigned integers
-    // making subtraction operations well-defined and correct even in case of overflow.
-    // Encoded integers will wrap back around on decoding.
-    // See http://en.wikipedia.org/wiki/Modular_arithmetic#Integers_modulo_n
-    deltas_[values_current_block_] = SafeSignedSubtract(value, current_value_);
-    current_value_ = value;
-    idx++;
-    values_current_block_++;
-    if (values_current_block_ == values_per_block_) {
-      FlushBlock();
-    }
-  }
-}
-
-template <typename DType>
-void DeltaBitPackEncoder<DType>::FlushBlock() {
-  if (values_current_block_ == 0) {
-    return;
-  }
-
-  // Calculate the frame of reference for this miniblock. This value will be subtracted
-  // from all deltas to guarantee all deltas are positive for encoding.
-  const T min_delta =
-      *std::min_element(deltas_.begin(), deltas_.begin() + values_current_block_);
-  bit_writer_.PutZigZagVlqInt(min_delta);
-
-  // Call to GetNextBytePtr reserves mini_blocks_per_block_ bytes of space to write
-  // bit widths of miniblocks as they become known during the encoding.
-  uint8_t* bit_width_data = bit_writer_.GetNextBytePtr(mini_blocks_per_block_);
-  DCHECK(bit_width_data != nullptr);
-
-  const uint32_t num_miniblocks =
-      static_cast<uint32_t>(std::ceil(static_cast<double>(values_current_block_) /
-                                      static_cast<double>(values_per_mini_block_)));
-  for (uint32_t i = 0; i < num_miniblocks; i++) {
-    const uint32_t values_current_mini_block =
-        std::min(values_per_mini_block_, values_current_block_);
-
-    const uint32_t start = i * values_per_mini_block_;
-    const T max_delta = *std::max_element(
-        deltas_.begin() + start, deltas_.begin() + start + values_current_mini_block);
-
-    // The minimum number of bits required to write any of values in deltas_ vector.
-    // See overflow comment above.
-    const auto bit_width = bit_width_data[i] = bit_util::NumRequiredBits(
-        static_cast<UT>(max_delta) - static_cast<UT>(min_delta));
-
-    for (uint32_t j = start; j < start + values_current_mini_block; j++) {
-      // Convert delta to frame of reference. See overflow comment above.
-      const UT value = static_cast<UT>(deltas_[j]) - static_cast<UT>(min_delta);
-      bit_writer_.PutValue(value, bit_width);
-    }
-    // If there are not enough values to fill the last mini block, we pad the mini block
-    // with zeroes so that its length is the number of values in a full mini block
-    // multiplied by the bit width.
-    for (uint32_t j = values_current_mini_block; j < values_per_mini_block_; j++) {
-      bit_writer_.PutValue(0, bit_width);
-    }
-    values_current_block_ -= values_current_mini_block;
-  }
-
-  // If, in the last block, less than <number of miniblocks in a block> miniblocks are
-  // needed to store the values, the bytes storing the bit widths of the unneeded
-  // miniblocks are still present, their value should be zero, but readers must accept
-  // arbitrary values as well.
-  for (uint32_t i = num_miniblocks; i < mini_blocks_per_block_; i++) {
-    bit_width_data[i] = 0;
-  }
-  DCHECK_EQ(values_current_block_, 0);
-
-  bit_writer_.Flush();
-  PARQUET_THROW_NOT_OK(sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written()));
-  bit_writer_.Clear();
-}
-
-template <typename DType>
-std::shared_ptr<Buffer> DeltaBitPackEncoder<DType>::FlushValues() {
-  if (values_current_block_ > 0) {
-    FlushBlock();
-  }
-  PARQUET_ASSIGN_OR_THROW(auto buffer, sink_.Finish(/*shrink_to_fit=*/true));
-
-  uint8_t header_buffer_[kMaxPageHeaderWriterSize] = {};
-  bit_util::BitWriter header_writer(header_buffer_, sizeof(header_buffer_));
-  if (!header_writer.PutVlqInt(values_per_block_) ||
-      !header_writer.PutVlqInt(mini_blocks_per_block_) ||
-      !header_writer.PutVlqInt(total_value_count_) ||
-      !header_writer.PutZigZagVlqInt(static_cast<T>(first_value_))) {
-    throw ParquetException("header writing error");
-  }
-  header_writer.Flush();
-
-  // We reserved enough space at the beginning of the buffer for largest possible header
-  // and data was written immediately after. We now write the header data immediately
-  // before the end of reserved space.
-  const size_t offset_bytes = kMaxPageHeaderWriterSize - header_writer.bytes_written();
-  std::memcpy(buffer->mutable_data() + offset_bytes, header_buffer_,
-              header_writer.bytes_written());
-
-  // Reset counter of cached values
-  total_value_count_ = 0;
-  // Reserve enough space at the beginning of the buffer for largest possible header.
-  PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
-
-  // Excess bytes at the beginning are sliced off and ignored.
-  return SliceBuffer(buffer, offset_bytes);
-}
-
-template <>
-void DeltaBitPackEncoder<Int32Type>::Put(const ::arrow::Array& values) {
-  const ::arrow::ArrayData& data = *values.data();
-  if (values.type_id() != ::arrow::Type::INT32) {
-    throw ParquetException("Expected Int32TArray, got ", values.type()->ToString());
-  }
-  if (data.length > std::numeric_limits<int32_t>::max()) {
-    throw ParquetException("Array cannot be longer than ",
-                           std::numeric_limits<int32_t>::max());
-  }
+    constexpr int32_t kBufferSize = 2048;
+    int32_t indices[kBufferSize];
 
-  if (values.null_count() == 0) {
-    Put(data.GetValues<int32_t>(1), static_cast<int>(data.length));
-  } else {
-    PutSpaced(data.GetValues<int32_t>(1), static_cast<int>(data.length),
-              data.GetValues<uint8_t>(0, 0), data.offset);
-  }
-}
+    RETURN_NOT_OK(builder->Reserve(num_values));
 
-template <>
-void DeltaBitPackEncoder<Int64Type>::Put(const ::arrow::Array& values) {
-  const ::arrow::ArrayData& data = *values.data();
-  if (values.type_id() != ::arrow::Type::INT64) {
-    throw ParquetException("Expected Int64TArray, got ", values.type()->ToString());
-  }
-  if (data.length > std::numeric_limits<int32_t>::max()) {
-    throw ParquetException("Array cannot be longer than ",
-                           std::numeric_limits<int32_t>::max());
-  }
-  if (values.null_count() == 0) {
-    Put(data.GetValues<int64_t>(1), static_cast<int>(data.length));
-  } else {
-    PutSpaced(data.GetValues<int64_t>(1), static_cast<int>(data.length),
-              data.GetValues<uint8_t>(0, 0), data.offset);
-  }
-}
+    const auto* dict_values = dictionary_->data_as<ByteArray>();
 
-template <typename DType>
-void DeltaBitPackEncoder<DType>::PutSpaced(const T* src, int num_values,
-                                           const uint8_t* valid_bits,
-                                           int64_t valid_bits_offset) {
-  if (valid_bits != NULLPTR) {
-    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                 this->memory_pool()));
-    T* data = buffer->template mutable_data_as<T>();
-    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-        src, num_values, valid_bits, valid_bits_offset, data);
-    Put(data, num_valid_values);
-  } else {
-    Put(src, num_values);
+    int values_decoded = 0;
+    while (values_decoded < num_values) {
+      int32_t batch_size = std::min<int32_t>(kBufferSize, num_values - values_decoded);
+      int num_indices = idx_decoder_.GetBatch(indices, batch_size);
+      if (num_indices == 0) ParquetException::EofException();
+      for (int i = 0; i < num_indices; ++i) {
+        auto idx = indices[i];
+        RETURN_NOT_OK(IndexInBounds(idx));
+        const auto& val = dict_values[idx];
+        RETURN_NOT_OK(builder->Append(val.ptr, val.len));
+      }
+      values_decoded += num_indices;
+    }
+    *out_num_values = values_decoded;
+    return Status::OK();
   }
-}
+};
 
 // ----------------------------------------------------------------------
-// DeltaBitPackDecoder
+// DELTA_BINARY_PACKED decoder
 
 template <typename DType>
-class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DType> {
+class DeltaBitPackDecoder : public DecoderImpl, public TypedDecoderImpl<DType> {
  public:
   typedef typename DType::c_type T;
   using UT = std::make_unsigned_t<T>;
@@ -2727,140 +1583,10 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp
 };
 
 // ----------------------------------------------------------------------
-// DELTA_LENGTH_BYTE_ARRAY
-
-// ----------------------------------------------------------------------
-// DeltaLengthByteArrayEncoder
-
-template <typename DType>
-class DeltaLengthByteArrayEncoder : public EncoderImpl,
-                                    virtual public TypedEncoder<ByteArrayType> {
- public:
-  explicit DeltaLengthByteArrayEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY,
-                    pool = ::arrow::default_memory_pool()),
-        sink_(pool),
-        length_encoder_(nullptr, pool) {}
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  int64_t EstimatedDataEncodedSize() override {
-    return sink_.length() + length_encoder_.EstimatedDataEncodedSize();
-  }
-
-  using TypedEncoder<ByteArrayType>::Put;
-
-  void Put(const ::arrow::Array& values) override;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override;
-
- protected:
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          if (ARROW_PREDICT_FALSE(
-                  view.size() + sink_.length() >
-                  static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
-            return Status::Invalid("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
-          }
-          length_encoder_.Put({static_cast<int32_t>(view.length())}, 1);
-          PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length()));
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-  }
-
-  ::arrow::BufferBuilder sink_;
-  DeltaBitPackEncoder<Int32Type> length_encoder_;
-};
-
-template <typename DType>
-void DeltaLengthByteArrayEncoder<DType>::Put(const ::arrow::Array& values) {
-  AssertBaseBinary(values);
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else {
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  }
-}
-
-template <typename DType>
-void DeltaLengthByteArrayEncoder<DType>::Put(const T* src, int num_values) {
-  if (num_values == 0) {
-    return;
-  }
-
-  constexpr int kBatchSize = 256;
-  std::array<int32_t, kBatchSize> lengths;
-  uint32_t total_increment_size = 0;
-  for (int idx = 0; idx < num_values; idx += kBatchSize) {
-    const int batch_size = std::min(kBatchSize, num_values - idx);
-    for (int j = 0; j < batch_size; ++j) {
-      const int32_t len = src[idx + j].len;
-      if (ARROW_PREDICT_FALSE(
-              AddWithOverflow(total_increment_size, len, &total_increment_size))) {
-        throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
-      }
-      lengths[j] = len;
-    }
-    length_encoder_.Put(lengths.data(), batch_size);
-  }
-  if (sink_.length() + total_increment_size > std::numeric_limits<int32_t>::max()) {
-    throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
-  }
-  PARQUET_THROW_NOT_OK(sink_.Reserve(total_increment_size));
-  for (int idx = 0; idx < num_values; idx++) {
-    sink_.UnsafeAppend(src[idx].ptr, src[idx].len);
-  }
-}
-
-template <typename DType>
-void DeltaLengthByteArrayEncoder<DType>::PutSpaced(const T* src, int num_values,
-                                                   const uint8_t* valid_bits,
-                                                   int64_t valid_bits_offset) {
-  if (valid_bits != NULLPTR) {
-    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                 this->memory_pool()));
-    T* data = buffer->template mutable_data_as<T>();
-    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-        src, num_values, valid_bits, valid_bits_offset, data);
-    Put(data, num_valid_values);
-  } else {
-    Put(src, num_values);
-  }
-}
-
-template <typename DType>
-std::shared_ptr<Buffer> DeltaLengthByteArrayEncoder<DType>::FlushValues() {
-  std::shared_ptr<Buffer> encoded_lengths = length_encoder_.FlushValues();
-
-  std::shared_ptr<Buffer> data;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&data));
-  sink_.Reset();
-
-  PARQUET_THROW_NOT_OK(sink_.Resize(encoded_lengths->size() + data->size()));
-  PARQUET_THROW_NOT_OK(sink_.Append(encoded_lengths->data(), encoded_lengths->size()));
-  PARQUET_THROW_NOT_OK(sink_.Append(data->data(), data->size()));
-
-  std::shared_ptr<Buffer> buffer;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
-  return buffer;
-}
-
-// ----------------------------------------------------------------------
-// DeltaLengthByteArrayDecoder
+// DELTA_LENGTH_BYTE_ARRAY decoder
 
 class DeltaLengthByteArrayDecoder : public DecoderImpl,
-                                    virtual public TypedDecoder<ByteArrayType> {
+                                    public TypedDecoderImpl<ByteArrayType> {
  public:
   explicit DeltaLengthByteArrayDecoder(const ColumnDescriptor* descr,
                                        MemoryPool* pool = ::arrow::default_memory_pool())
@@ -2994,113 +1720,11 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
 };
 
 // ----------------------------------------------------------------------
-// RLE_BOOLEAN_ENCODER
-
-class RleBooleanEncoder final : public EncoderImpl, virtual public BooleanEncoder {
- public:
-  explicit RleBooleanEncoder(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
-      : EncoderImpl(descr, Encoding::RLE, pool),
-        buffered_append_values_(::arrow::stl::allocator<T>(pool)) {}
-
-  int64_t EstimatedDataEncodedSize() override {
-    return kRleLengthInBytes + MaxRleBufferSize();
-  }
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  void Put(const T* buffer, int num_values) override;
-  void Put(const ::arrow::Array& values) override {
-    if (values.type_id() != ::arrow::Type::BOOL) {
-      throw ParquetException("RleBooleanEncoder expects BooleanArray, got ",
-                             values.type()->ToString());
-    }
-    const auto& boolean_array = checked_cast<const ::arrow::BooleanArray&>(values);
-    if (values.null_count() == 0) {
-      for (int i = 0; i < boolean_array.length(); ++i) {
-        // null_count == 0, so just call Value directly is ok.
-        buffered_append_values_.push_back(boolean_array.Value(i));
-      }
-    } else {
-      PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<::arrow::BooleanType>(
-          *boolean_array.data(),
-          [&](bool value) {
-            buffered_append_values_.push_back(value);
-            return Status::OK();
-          },
-          []() { return Status::OK(); }));
-    }
-  }
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != NULLPTR) {
-      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
-                                                                   this->memory_pool()));
-      T* data = buffer->mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
-  void Put(const std::vector<bool>& src, int num_values) override;
-
- protected:
-  template <typename SequenceType>
-  void PutImpl(const SequenceType& src, int num_values);
-
-  int MaxRleBufferSize() const noexcept {
-    return RlePreserveBufferSize(static_cast<int>(buffered_append_values_.size()),
-                                 kBitWidth);
-  }
-
-  constexpr static int32_t kBitWidth = 1;
-  /// 4 bytes in little-endian, which indicates the length.
-  constexpr static int32_t kRleLengthInBytes = 4;
-
-  // std::vector<bool> in C++ is tricky, because it's a bitmap.
-  // Here RleBooleanEncoder will only append values into it, and
-  // dump values into Buffer, so using it here is ok.
-  ArrowPoolVector<bool> buffered_append_values_;
-};
-
-void RleBooleanEncoder::Put(const bool* src, int num_values) { PutImpl(src, num_values); }
-
-void RleBooleanEncoder::Put(const std::vector<bool>& src, int num_values) {
-  PutImpl(src, num_values);
-}
-
-template <typename SequenceType>
-void RleBooleanEncoder::PutImpl(const SequenceType& src, int num_values) {
-  for (int i = 0; i < num_values; ++i) {
-    buffered_append_values_.push_back(src[i]);
-  }
-}
-
-std::shared_ptr<Buffer> RleBooleanEncoder::FlushValues() {
-  int rle_buffer_size_max = MaxRleBufferSize();
-  std::shared_ptr<ResizableBuffer> buffer =
-      AllocateBuffer(this->pool_, rle_buffer_size_max + kRleLengthInBytes);
-  ::arrow::util::RleEncoder encoder(buffer->mutable_data() + kRleLengthInBytes,
-                                    rle_buffer_size_max, /*bit_width*/ kBitWidth);
-
-  for (bool value : buffered_append_values_) {
-    encoder.Put(value ? 1 : 0);
-  }
-  encoder.Flush();
-  ::arrow::util::SafeStore(buffer->mutable_data(),
-                           ::arrow::bit_util::ToLittleEndian(encoder.len()));
-  PARQUET_THROW_NOT_OK(buffer->Resize(kRleLengthInBytes + encoder.len()));
-  buffered_append_values_.clear();
-  return buffer;
-}
-
-// ----------------------------------------------------------------------
-// RLE_BOOLEAN_DECODER
+// RLE decoder for BOOLEAN
 
-class RleBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
+class RleBooleanDecoder : public DecoderImpl,
+                          virtual public TypedDecoderImpl<BooleanType>,
+                          virtual public BooleanDecoder {
  public:
   explicit RleBooleanDecoder(const ColumnDescriptor* descr)
       : DecoderImpl(descr, Encoding::RLE) {}
@@ -3214,235 +1838,10 @@ class RleBooleanDecoder : public DecoderImpl, virtual public BooleanDecoder {
 };
 
 // ----------------------------------------------------------------------
-// DELTA_BYTE_ARRAY
-
-/// Delta Byte Array encoding also known as incremental encoding or front compression:
-/// for each element in a sequence of strings, store the prefix length of the previous
-/// entry plus the suffix.
-///
-/// This is stored as a sequence of delta-encoded prefix lengths (DELTA_BINARY_PACKED),
-/// followed by the suffixes encoded as delta length byte arrays
-/// (DELTA_LENGTH_BYTE_ARRAY).
-
-// ----------------------------------------------------------------------
-// DeltaByteArrayEncoder
-
-template <typename DType>
-class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
-  static constexpr std::string_view kEmpty = "";
-
- public:
-  using T = typename DType::c_type;
-
-  explicit DeltaByteArrayEncoder(const ColumnDescriptor* descr,
-                                 MemoryPool* pool = ::arrow::default_memory_pool())
-      : EncoderImpl(descr, Encoding::DELTA_BYTE_ARRAY, pool),
-        sink_(pool),
-        prefix_length_encoder_(/*descr=*/nullptr, pool),
-        suffix_encoder_(descr, pool),
-        last_value_(""),
-        empty_(static_cast<uint32_t>(kEmpty.size()),
-               reinterpret_cast<const uint8_t*>(kEmpty.data())) {}
-
-  std::shared_ptr<Buffer> FlushValues() override;
-
-  int64_t EstimatedDataEncodedSize() override {
-    return prefix_length_encoder_.EstimatedDataEncodedSize() +
-           suffix_encoder_.EstimatedDataEncodedSize();
-  }
-
-  using TypedEncoder<DType>::Put;
-
-  void Put(const ::arrow::Array& values) override;
-
-  void Put(const T* buffer, int num_values) override;
-
-  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
-                 int64_t valid_bits_offset) override {
-    if (valid_bits != nullptr) {
-      if (buffer_ == nullptr) {
-        PARQUET_ASSIGN_OR_THROW(buffer_,
-                                ::arrow::AllocateResizableBuffer(num_values * sizeof(T),
-                                                                 this->memory_pool()));
-      } else {
-        PARQUET_THROW_NOT_OK(buffer_->Resize(num_values * sizeof(T), false));
-      }
-      T* data = buffer_->mutable_data_as<T>();
-      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
-          src, num_values, valid_bits, valid_bits_offset, data);
-      Put(data, num_valid_values);
-    } else {
-      Put(src, num_values);
-    }
-  }
-
- protected:
-  template <typename VisitorType>
-  void PutInternal(const T* src, int num_values, const VisitorType visitor) {
-    if (num_values == 0) {
-      return;
-    }
-
-    std::string_view last_value_view = last_value_;
-    constexpr int kBatchSize = 256;
-    std::array<int32_t, kBatchSize> prefix_lengths;
-    std::array<ByteArray, kBatchSize> suffixes;
-
-    for (int i = 0; i < num_values; i += kBatchSize) {
-      const int batch_size = std::min(kBatchSize, num_values - i);
-
-      for (int j = 0; j < batch_size; ++j) {
-        const int idx = i + j;
-        const auto view = visitor[idx];
-        const auto len = static_cast<const uint32_t>(view.length());
-
-        uint32_t common_prefix_length = 0;
-        const uint32_t maximum_common_prefix_length =
-            std::min(len, static_cast<uint32_t>(last_value_view.length()));
-        while (common_prefix_length < maximum_common_prefix_length) {
-          if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
-            break;
-          }
-          common_prefix_length++;
-        }
-
-        last_value_view = view;
-        prefix_lengths[j] = common_prefix_length;
-        const uint32_t suffix_length = len - common_prefix_length;
-        const uint8_t* suffix_ptr = src[idx].ptr + common_prefix_length;
-
-        // Convert to ByteArray, so it can be passed to the suffix_encoder_.
-        const ByteArray suffix(suffix_length, suffix_ptr);
-        suffixes[j] = suffix;
-      }
-      suffix_encoder_.Put(suffixes.data(), batch_size);
-      prefix_length_encoder_.Put(prefix_lengths.data(), batch_size);
-    }
-    last_value_ = last_value_view;
-  }
-
-  template <typename ArrayType>
-  void PutBinaryArray(const ArrayType& array) {
-    auto previous_len = static_cast<uint32_t>(last_value_.length());
-    std::string_view last_value_view = last_value_;
-
-    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
-        *array.data(),
-        [&](::std::string_view view) {
-          if (ARROW_PREDICT_FALSE(view.size() >= kMaxByteArraySize)) {
-            return Status::Invalid(
-                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
-          }
-          const ByteArray src{view};
-
-          uint32_t common_prefix_length = 0;
-          const uint32_t len = src.len;
-          const uint32_t maximum_common_prefix_length = std::min(previous_len, len);
-          while (common_prefix_length < maximum_common_prefix_length) {
-            if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
-              break;
-            }
-            common_prefix_length++;
-          }
-          previous_len = len;
-          prefix_length_encoder_.Put({static_cast<int32_t>(common_prefix_length)}, 1);
-
-          last_value_view = view;
-          const auto suffix_length = static_cast<uint32_t>(len - common_prefix_length);
-          if (suffix_length == 0) {
-            suffix_encoder_.Put(&empty_, 1);
-            return Status::OK();
-          }
-          const uint8_t* suffix_ptr = src.ptr + common_prefix_length;
-          // Convert to ByteArray, so it can be passed to the suffix_encoder_.
-          const ByteArray suffix(suffix_length, suffix_ptr);
-          suffix_encoder_.Put(&suffix, 1);
-
-          return Status::OK();
-        },
-        []() { return Status::OK(); }));
-    last_value_ = last_value_view;
-  }
-
-  ::arrow::BufferBuilder sink_;
-  DeltaBitPackEncoder<Int32Type> prefix_length_encoder_;
-  DeltaLengthByteArrayEncoder<ByteArrayType> suffix_encoder_;
-  std::string last_value_;
-  const ByteArray empty_;
-  std::unique_ptr<ResizableBuffer> buffer_;
-};
-
-struct ByteArrayVisitor {
-  const ByteArray* src;
-
-  std::string_view operator[](int i) const {
-    if (ARROW_PREDICT_FALSE(src[i].len >= kMaxByteArraySize)) {
-      throw ParquetException("Parquet cannot store strings with size 2GB or more, got: ",
-                             src[i].len);
-    }
-    return std::string_view{src[i]};
-  }
-
-  uint32_t len(int i) const { return src[i].len; }
-};
-
-struct FLBAVisitor {
-  const FLBA* src;
-  const uint32_t type_length;
-
-  std::string_view operator[](int i) const {
-    return std::string_view{reinterpret_cast<const char*>(src[i].ptr), type_length};
-  }
-
-  uint32_t len(int i) const { return type_length; }
-};
-
-template <>
-void DeltaByteArrayEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
-  auto visitor = ByteArrayVisitor{src};
-  PutInternal<ByteArrayVisitor>(src, num_values, visitor);
-}
-
-template <>
-void DeltaByteArrayEncoder<FLBAType>::Put(const FLBA* src, int num_values) {
-  auto visitor = FLBAVisitor{src, static_cast<uint32_t>(descr_->type_length())};
-  PutInternal<FLBAVisitor>(src, num_values, visitor);
-}
-
-template <typename DType>
-void DeltaByteArrayEncoder<DType>::Put(const ::arrow::Array& values) {
-  if (::arrow::is_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
-  } else if (::arrow::is_large_binary_like(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
-  } else if (::arrow::is_fixed_size_binary(values.type_id())) {
-    PutBinaryArray(checked_cast<const ::arrow::FixedSizeBinaryArray&>(values));
-  } else {
-    throw ParquetException("Only BaseBinaryArray and subclasses supported");
-  }
-}
-
-template <typename DType>
-std::shared_ptr<Buffer> DeltaByteArrayEncoder<DType>::FlushValues() {
-  PARQUET_THROW_NOT_OK(sink_.Resize(EstimatedDataEncodedSize(), false));
-
-  std::shared_ptr<Buffer> prefix_lengths = prefix_length_encoder_.FlushValues();
-  PARQUET_THROW_NOT_OK(sink_.Append(prefix_lengths->data(), prefix_lengths->size()));
-
-  std::shared_ptr<Buffer> suffixes = suffix_encoder_.FlushValues();
-  PARQUET_THROW_NOT_OK(sink_.Append(suffixes->data(), suffixes->size()));
-
-  std::shared_ptr<Buffer> buffer;
-  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
-  last_value_.clear();
-  return buffer;
-}
-
-// ----------------------------------------------------------------------
-// DeltaByteArrayDecoder
+// DELTA_BYTE_ARRAY decoder
 
 template <typename DType>
-class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecoder<DType> {
+class DeltaByteArrayDecoderImpl : public DecoderImpl, public TypedDecoderImpl<DType> {
   using T = typename DType::c_type;
 
  public:
@@ -3580,7 +1979,7 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode
     }
     PARQUET_THROW_NOT_OK(buffered_data_->Resize(data_size));
 
-    string_view prefix{last_value_};
+    std::string_view prefix{last_value_};
     uint8_t* data_ptr = buffered_data_->mutable_data();
     if (max_values > 0) {
       BuildBufferInternal</*is_first_run=*/true>(prefix_len_ptr, 0, buffer, &prefix,
@@ -3648,6 +2047,8 @@ class DeltaByteArrayDecoderImpl : public DecoderImpl, virtual public TypedDecode
   int num_valid_values_{0};
   uint32_t prefix_len_offset_{0};
   std::shared_ptr<ResizableBuffer> buffered_prefix_length_;
+  // buffer for decoded strings, which gurantees the lifetime of the decoded strings
+  // until the next call of Decode.
   std::shared_ptr<ResizableBuffer> buffered_data_;
 };
 
@@ -3688,8 +2089,7 @@ class DeltaByteArrayFLBADecoder : public DeltaByteArrayDecoderImpl<FLBAType>,
 // BYTE_STREAM_SPLIT decoders
 
 template <typename DType>
-class ByteStreamSplitDecoderBase : public DecoderImpl,
-                                   virtual public TypedDecoder<DType> {
+class ByteStreamSplitDecoderBase : public DecoderImpl, public TypedDecoderImpl<DType> {
  public:
   using T = typename DType::c_type;
 
@@ -3857,110 +2257,7 @@ class ByteStreamSplitDecoder<FLBAType> : public ByteStreamSplitDecoderBase<FLBAT
 }  // namespace
 
 // ----------------------------------------------------------------------
-// Encoder and decoder factory functions
-
-std::unique_ptr<Encoder> MakeEncoder(Type::type type_num, Encoding::type encoding,
-                                     bool use_dictionary, const ColumnDescriptor* descr,
-                                     MemoryPool* pool) {
-  if (use_dictionary) {
-    switch (type_num) {
-      case Type::INT32:
-        return std::make_unique<DictEncoderImpl<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<DictEncoderImpl<Int64Type>>(descr, pool);
-      case Type::INT96:
-        return std::make_unique<DictEncoderImpl<Int96Type>>(descr, pool);
-      case Type::FLOAT:
-        return std::make_unique<DictEncoderImpl<FloatType>>(descr, pool);
-      case Type::DOUBLE:
-        return std::make_unique<DictEncoderImpl<DoubleType>>(descr, pool);
-      case Type::BYTE_ARRAY:
-        return std::make_unique<DictEncoderImpl<ByteArrayType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<DictEncoderImpl<FLBAType>>(descr, pool);
-      default:
-        DCHECK(false) << "Encoder not implemented";
-        break;
-    }
-  } else if (encoding == Encoding::PLAIN) {
-    switch (type_num) {
-      case Type::BOOLEAN:
-        return std::make_unique<PlainEncoder<BooleanType>>(descr, pool);
-      case Type::INT32:
-        return std::make_unique<PlainEncoder<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<PlainEncoder<Int64Type>>(descr, pool);
-      case Type::INT96:
-        return std::make_unique<PlainEncoder<Int96Type>>(descr, pool);
-      case Type::FLOAT:
-        return std::make_unique<PlainEncoder<FloatType>>(descr, pool);
-      case Type::DOUBLE:
-        return std::make_unique<PlainEncoder<DoubleType>>(descr, pool);
-      case Type::BYTE_ARRAY:
-        return std::make_unique<PlainEncoder<ByteArrayType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<PlainEncoder<FLBAType>>(descr, pool);
-      default:
-        DCHECK(false) << "Encoder not implemented";
-        break;
-    }
-  } else if (encoding == Encoding::BYTE_STREAM_SPLIT) {
-    switch (type_num) {
-      case Type::INT32:
-        return std::make_unique<ByteStreamSplitEncoder<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<ByteStreamSplitEncoder<Int64Type>>(descr, pool);
-      case Type::FLOAT:
-        return std::make_unique<ByteStreamSplitEncoder<FloatType>>(descr, pool);
-      case Type::DOUBLE:
-        return std::make_unique<ByteStreamSplitEncoder<DoubleType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<ByteStreamSplitEncoder<FLBAType>>(descr, pool);
-      default:
-        throw ParquetException(
-            "BYTE_STREAM_SPLIT only supports FLOAT, DOUBLE, INT32, INT64 "
-            "and FIXED_LEN_BYTE_ARRAY");
-    }
-  } else if (encoding == Encoding::DELTA_BINARY_PACKED) {
-    switch (type_num) {
-      case Type::INT32:
-        return std::make_unique<DeltaBitPackEncoder<Int32Type>>(descr, pool);
-      case Type::INT64:
-        return std::make_unique<DeltaBitPackEncoder<Int64Type>>(descr, pool);
-      default:
-        throw ParquetException(
-            "DELTA_BINARY_PACKED encoder only supports INT32 and INT64");
-    }
-  } else if (encoding == Encoding::DELTA_LENGTH_BYTE_ARRAY) {
-    switch (type_num) {
-      case Type::BYTE_ARRAY:
-        return std::make_unique<DeltaLengthByteArrayEncoder<ByteArrayType>>(descr, pool);
-      default:
-        throw ParquetException("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY");
-    }
-  } else if (encoding == Encoding::RLE) {
-    switch (type_num) {
-      case Type::BOOLEAN:
-        return std::make_unique<RleBooleanEncoder>(descr, pool);
-      default:
-        throw ParquetException("RLE only supports BOOLEAN");
-    }
-  } else if (encoding == Encoding::DELTA_BYTE_ARRAY) {
-    switch (type_num) {
-      case Type::BYTE_ARRAY:
-        return std::make_unique<DeltaByteArrayEncoder<ByteArrayType>>(descr, pool);
-      case Type::FIXED_LEN_BYTE_ARRAY:
-        return std::make_unique<DeltaByteArrayEncoder<FLBAType>>(descr, pool);
-      default:
-        throw ParquetException(
-            "DELTA_BYTE_ARRAY only supports BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY");
-    }
-  } else {
-    ParquetException::NYI("Selected encoding is not supported");
-  }
-  DCHECK(false) << "Should not be able to reach this code";
-  return nullptr;
-}
+// Factory functions
 
 std::unique_ptr<Decoder> MakeDecoder(Type::type type_num, Encoding::type encoding,
                                      const ColumnDescriptor* descr,
diff --git a/cpp/src/parquet/encoder.cc b/cpp/src/parquet/encoder.cc
new file mode 100644
index 0000000000000..89d5d44c5219c
--- /dev/null
+++ b/cpp/src/parquet/encoder.cc
@@ -0,0 +1,1783 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encoding.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/stl_allocator.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_stream_utils_internal.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/byte_stream_split_internal.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/int_util_overflow.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/rle_encoding_internal.h"
+#include "arrow/util/spaced.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/visit_data_inline.h"
+
+#include "parquet/exception.h"
+#include "parquet/platform.h"
+#include "parquet/schema.h"
+#include "parquet/types.h"
+
+namespace bit_util = arrow::bit_util;
+
+using arrow::Status;
+using arrow::internal::AddWithOverflow;
+using arrow::internal::checked_cast;
+using arrow::internal::SafeSignedSubtract;
+using arrow::util::SafeLoad;
+using arrow::util::SafeLoadAs;
+
+template <typename T>
+using ArrowPoolVector = std::vector<T, ::arrow::stl::allocator<T>>;
+
+namespace parquet {
+namespace {
+
+// The Parquet spec isn't very clear whether ByteArray lengths are signed or
+// unsigned, but the Java implementation uses signed ints.
+constexpr size_t kMaxByteArraySize = std::numeric_limits<int32_t>::max();
+
+class EncoderImpl : virtual public Encoder {
+ public:
+  EncoderImpl(const ColumnDescriptor* descr, Encoding::type encoding, MemoryPool* pool)
+      : descr_(descr),
+        encoding_(encoding),
+        pool_(pool),
+        type_length_(descr ? descr->type_length() : -1) {}
+
+  Encoding::type encoding() const override { return encoding_; }
+
+  MemoryPool* memory_pool() const override { return pool_; }
+
+ protected:
+  // For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
+  const ColumnDescriptor* descr_;
+  const Encoding::type encoding_;
+  MemoryPool* pool_;
+
+  /// Type length from descr
+  const int type_length_;
+};
+
+// ----------------------------------------------------------------------
+// PLAIN encoder
+
+template <typename DType>
+class PlainEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+
+  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    std::shared_ptr<Buffer> buffer;
+    PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
+    return buffer;
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void Put(const ::arrow::Array& values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->template mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void UnsafePutByteArray(const void* data, uint32_t length) {
+    DCHECK(length == 0 || data != nullptr) << "Value ptr cannot be NULL";
+    sink_.UnsafeAppend(&length, sizeof(uint32_t));
+    sink_.UnsafeAppend(data, static_cast<int64_t>(length));
+  }
+
+  void Put(const ByteArray& val) {
+    // Write the result to the output stream
+    const int64_t increment = static_cast<int64_t>(val.len + sizeof(uint32_t));
+    if (ARROW_PREDICT_FALSE(sink_.length() + increment > sink_.capacity())) {
+      PARQUET_THROW_NOT_OK(sink_.Reserve(increment));
+    }
+    UnsafePutByteArray(val.ptr, val.len);
+  }
+
+ protected:
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    const int64_t total_bytes =
+        array.value_offset(array.length()) - array.value_offset(0);
+    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes + array.length() * sizeof(uint32_t)));
+
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          UnsafePutByteArray(view.data(), static_cast<uint32_t>(view.size()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  ::arrow::BufferBuilder sink_;
+};
+
+template <typename DType>
+void PlainEncoder<DType>::Put(const T* buffer, int num_values) {
+  if (num_values > 0) {
+    PARQUET_THROW_NOT_OK(sink_.Append(buffer, num_values * sizeof(T)));
+  }
+}
+
+template <>
+inline void PlainEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
+  for (int i = 0; i < num_values; ++i) {
+    Put(src[i]);
+  }
+}
+
+template <typename ArrayType>
+void DirectPutImpl(const ::arrow::Array& values, ::arrow::BufferBuilder* sink) {
+  if (values.type_id() != ArrayType::TypeClass::type_id) {
+    std::string type_name = ArrayType::TypeClass::type_name();
+    throw ParquetException("direct put to " + type_name + " from " +
+                           values.type()->ToString() + " not supported");
+  }
+
+  using value_type = typename ArrayType::value_type;
+  constexpr auto value_size = sizeof(value_type);
+  auto raw_values = checked_cast<const ArrayType&>(values).raw_values();
+
+  if (values.null_count() == 0) {
+    // no nulls, just dump the data
+    PARQUET_THROW_NOT_OK(sink->Append(raw_values, values.length() * value_size));
+  } else {
+    PARQUET_THROW_NOT_OK(
+        sink->Reserve((values.length() - values.null_count()) * value_size));
+
+    for (int64_t i = 0; i < values.length(); i++) {
+      if (values.IsValid(i)) {
+        sink->UnsafeAppend(&raw_values[i], value_size);
+      }
+    }
+  }
+}
+
+template <>
+void PlainEncoder<Int32Type>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::Int32Array>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<Int64Type>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::Int64Array>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<Int96Type>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("direct put to Int96");
+}
+
+template <>
+void PlainEncoder<FloatType>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::FloatArray>(values, &sink_);
+}
+
+template <>
+void PlainEncoder<DoubleType>::Put(const ::arrow::Array& values) {
+  DirectPutImpl<::arrow::DoubleArray>(values, &sink_);
+}
+
+template <typename DType>
+void PlainEncoder<DType>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("direct put of " + values.type()->ToString());
+}
+
+void AssertBaseBinary(const ::arrow::Array& values) {
+  if (!::arrow::is_base_binary_like(values.type_id())) {
+    throw ParquetException("Only BaseBinaryArray and subclasses supported");
+  }
+}
+
+template <>
+inline void PlainEncoder<ByteArrayType>::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+void AssertFixedSizeBinary(const ::arrow::Array& values, int type_length) {
+  if (!::arrow::is_fixed_size_binary(values.type_id())) {
+    throw ParquetException("Only FixedSizeBinaryArray and subclasses supported");
+  }
+  if (checked_cast<const ::arrow::FixedSizeBinaryType&>(*values.type()).byte_width() !=
+      type_length) {
+    throw ParquetException("Size mismatch: " + values.type()->ToString() +
+                           " should have been " + std::to_string(type_length) + " wide");
+  }
+}
+
+template <>
+inline void PlainEncoder<FLBAType>::Put(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, descr_->type_length());
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    PARQUET_THROW_NOT_OK(
+        sink_.Append(data.raw_values(), data.length() * data.byte_width()));
+  } else {
+    const int64_t total_bytes =
+        data.length() * data.byte_width() - data.null_count() * data.byte_width();
+    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        sink_.UnsafeAppend(data.Value(i), data.byte_width());
+      }
+    }
+  }
+}
+
+template <>
+inline void PlainEncoder<FLBAType>::Put(const FixedLenByteArray* src, int num_values) {
+  if (descr_->type_length() == 0) {
+    return;
+  }
+  for (int i = 0; i < num_values; ++i) {
+    // Write the result to the output stream
+    DCHECK(src[i].ptr != nullptr) << "Value ptr cannot be NULL";
+    PARQUET_THROW_NOT_OK(sink_.Append(src[i].ptr, descr_->type_length()));
+  }
+}
+
+template <>
+class PlainEncoder<BooleanType> : public EncoderImpl, virtual public BooleanEncoder {
+ public:
+  explicit PlainEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::PLAIN, pool), sink_(pool) {}
+
+  int64_t EstimatedDataEncodedSize() override;
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void Put(const bool* src, int num_values) override;
+
+  void Put(const std::vector<bool>& src, int num_values) override;
+
+  void PutSpaced(const bool* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ::arrow::Type::BOOL) {
+      throw ParquetException("direct put to boolean from " + values.type()->ToString() +
+                             " not supported");
+    }
+    const auto& data = checked_cast<const ::arrow::BooleanArray&>(values);
+
+    if (data.null_count() == 0) {
+      // no nulls, just dump the data
+      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length()));
+      sink_.UnsafeAppend(data.data()->GetValues<uint8_t>(1, 0), data.offset(),
+                         data.length());
+    } else {
+      PARQUET_THROW_NOT_OK(sink_.Reserve(data.length() - data.null_count()));
+      for (int64_t i = 0; i < data.length(); i++) {
+        if (data.IsValid(i)) {
+          sink_.UnsafeAppend(data.Value(i));
+        }
+      }
+    }
+  }
+
+ private:
+  ::arrow::TypedBufferBuilder<bool> sink_;
+
+  template <typename SequenceType>
+  void PutImpl(const SequenceType& src, int num_values);
+};
+
+template <typename SequenceType>
+void PlainEncoder<BooleanType>::PutImpl(const SequenceType& src, int num_values) {
+  PARQUET_THROW_NOT_OK(sink_.Reserve(num_values));
+  for (int i = 0; i < num_values; ++i) {
+    sink_.UnsafeAppend(src[i]);
+  }
+}
+
+int64_t PlainEncoder<BooleanType>::EstimatedDataEncodedSize() {
+  return ::arrow::bit_util::BytesForBits(sink_.length());
+}
+
+std::shared_ptr<Buffer> PlainEncoder<BooleanType>::FlushValues() {
+  std::shared_ptr<Buffer> buffer;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer));
+  return buffer;
+}
+
+void PlainEncoder<BooleanType>::Put(const bool* src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+void PlainEncoder<BooleanType>::Put(const std::vector<bool>& src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+// ----------------------------------------------------------------------
+// DictEncoder<T> implementations
+
+template <typename DType>
+struct DictEncoderTraits {
+  using c_type = typename DType::c_type;
+  using MemoTableType = ::arrow::internal::ScalarMemoTable<c_type>;
+};
+
+template <>
+struct DictEncoderTraits<ByteArrayType> {
+  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
+};
+
+template <>
+struct DictEncoderTraits<FLBAType> {
+  using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>;
+};
+
+// Initially 1024 elements
+static constexpr int32_t kInitialHashTableSize = 1 << 10;
+
+int RlePreserveBufferSize(int num_values, int bit_width) {
+  // Note: because of the way RleEncoder::CheckBufferFull()
+  // is called, we have to reserve an extra "RleEncoder::MinBufferSize"
+  // bytes. These extra bytes won't be used but not reserving them
+  // would cause the encoder to fail.
+  return ::arrow::util::RleEncoder::MaxBufferSize(bit_width, num_values) +
+         ::arrow::util::RleEncoder::MinBufferSize(bit_width);
+}
+
+/// See the dictionary encoding section of
+/// https://github.com/Parquet/parquet-format.  The encoding supports
+/// streaming encoding. Values are encoded as they are added while the
+/// dictionary is being constructed. At any time, the buffered values
+/// can be written out with the current dictionary size. More values
+/// can then be added to the encoder, including new dictionary
+/// entries.
+template <typename DType>
+class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder<DType> {
+  using MemoTableType = typename DictEncoderTraits<DType>::MemoTableType;
+
+ public:
+  typedef typename DType::c_type T;
+
+  /// In data page, the bit width used to encode the entry
+  /// ids stored as 1 byte (max bit width = 32).
+  constexpr static int32_t kDataPageBitWidthBytes = 1;
+
+  explicit DictEncoderImpl(const ColumnDescriptor* desc, MemoryPool* pool)
+      : EncoderImpl(desc, Encoding::RLE_DICTIONARY, pool),
+        buffered_indices_(::arrow::stl::allocator<int32_t>(pool)),
+        dict_encoded_size_(0),
+        memo_table_(pool, kInitialHashTableSize) {}
+
+  ~DictEncoderImpl() override = default;
+
+  int dict_encoded_size() const override { return dict_encoded_size_; }
+
+  int WriteIndices(uint8_t* buffer, int buffer_len) override {
+    // Write bit width in first byte
+    *buffer = static_cast<uint8_t>(bit_width());
+    ++buffer;
+    --buffer_len;
+
+    ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width());
+
+    for (int32_t index : buffered_indices_) {
+      if (ARROW_PREDICT_FALSE(!encoder.Put(index))) return -1;
+    }
+    encoder.Flush();
+
+    ClearIndices();
+    return kDataPageBitWidthBytes + encoder.len();
+  }
+
+  /// Returns a conservative estimate of the number of bytes needed to encode the buffered
+  /// indices. Used to size the buffer passed to WriteIndices().
+  int64_t EstimatedDataEncodedSize() override {
+    return kDataPageBitWidthBytes +
+           RlePreserveBufferSize(static_cast<int>(buffered_indices_.size()), bit_width());
+  }
+
+  /// The minimum bit width required to encode the currently buffered indices.
+  int bit_width() const override {
+    if (ARROW_PREDICT_FALSE(num_entries() == 0)) return 0;
+    if (ARROW_PREDICT_FALSE(num_entries() == 1)) return 1;
+    return bit_util::Log2(num_entries());
+  }
+
+  /// Encode value. Note that this does not actually write any data, just
+  /// buffers the value's index to be written later.
+  inline void Put(const T& value);
+
+  // Not implemented for other data types
+  inline void PutByteArray(const void* ptr, int32_t length);
+
+  void Put(const T* src, int num_values) override {
+    for (int32_t i = 0; i < num_values; i++) {
+      Put(SafeLoad(src + i));
+    }
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    ::arrow::internal::VisitSetBitRunsVoid(valid_bits, valid_bits_offset, num_values,
+                                           [&](int64_t position, int64_t length) {
+                                             for (int64_t i = 0; i < length; i++) {
+                                               Put(SafeLoad(src + i + position));
+                                             }
+                                           });
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const ::arrow::Array& values) override;
+  void PutDictionary(const ::arrow::Array& values) override;
+
+  template <typename ArrowType, typename T = typename ArrowType::c_type>
+  void PutIndicesTyped(const ::arrow::Array& data) {
+    auto values = data.data()->GetValues<T>(1);
+    size_t buffer_position = buffered_indices_.size();
+    buffered_indices_.resize(buffer_position +
+                             static_cast<size_t>(data.length() - data.null_count()));
+    ::arrow::internal::VisitSetBitRunsVoid(
+        data.null_bitmap_data(), data.offset(), data.length(),
+        [&](int64_t position, int64_t length) {
+          for (int64_t i = 0; i < length; ++i) {
+            buffered_indices_[buffer_position++] =
+                static_cast<int32_t>(values[i + position]);
+          }
+        });
+  }
+
+  void PutIndices(const ::arrow::Array& data) override {
+    switch (data.type()->id()) {
+      case ::arrow::Type::UINT8:
+      case ::arrow::Type::INT8:
+        return PutIndicesTyped<::arrow::UInt8Type>(data);
+      case ::arrow::Type::UINT16:
+      case ::arrow::Type::INT16:
+        return PutIndicesTyped<::arrow::UInt16Type>(data);
+      case ::arrow::Type::UINT32:
+      case ::arrow::Type::INT32:
+        return PutIndicesTyped<::arrow::UInt32Type>(data);
+      case ::arrow::Type::UINT64:
+      case ::arrow::Type::INT64:
+        return PutIndicesTyped<::arrow::UInt64Type>(data);
+      default:
+        throw ParquetException("Passed non-integer array to PutIndices");
+    }
+  }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    std::shared_ptr<ResizableBuffer> buffer =
+        AllocateBuffer(this->pool_, EstimatedDataEncodedSize());
+    int result_size = WriteIndices(buffer->mutable_data(),
+                                   static_cast<int>(EstimatedDataEncodedSize()));
+    PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
+    return buffer;
+  }
+
+  /// Writes out the encoded dictionary to buffer. buffer must be preallocated to
+  /// dict_encoded_size() bytes.
+  void WriteDict(uint8_t* buffer) const override;
+
+  /// The number of entries in the dictionary.
+  int num_entries() const override { return memo_table_.size(); }
+
+ private:
+  /// Clears all the indices (but leaves the dictionary).
+  void ClearIndices() { buffered_indices_.clear(); }
+
+  /// Indices that have not yet be written out by WriteIndices().
+  ArrowPoolVector<int32_t> buffered_indices_;
+
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          PutByteArray(view.data(), static_cast<uint32_t>(view.size()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  template <typename ArrayType>
+  void PutBinaryDictionaryArray(const ArrayType& array) {
+    DCHECK_EQ(array.null_count(), 0);
+    for (int64_t i = 0; i < array.length(); i++) {
+      auto v = array.GetView(i);
+      if (ARROW_PREDICT_FALSE(v.size() > kMaxByteArraySize)) {
+        throw ParquetException(
+            "Parquet cannot store strings with size 2GB or more, got: ", v.size());
+      }
+      dict_encoded_size_ += static_cast<int>(v.size() + sizeof(uint32_t));
+      int32_t unused_memo_index;
+      PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(
+          v.data(), static_cast<int32_t>(v.size()), &unused_memo_index));
+    }
+  }
+
+  /// The number of bytes needed to encode the dictionary.
+  int dict_encoded_size_;
+
+  MemoTableType memo_table_;
+};
+
+template <typename DType>
+void DictEncoderImpl<DType>::WriteDict(uint8_t* buffer) const {
+  // For primitive types, only a memcpy
+  DCHECK_EQ(static_cast<size_t>(dict_encoded_size_), sizeof(T) * memo_table_.size());
+  memo_table_.CopyValues(0 /* start_pos */, reinterpret_cast<T*>(buffer));
+}
+
+// ByteArray and FLBA already have the dictionary encoded in their data heaps
+template <>
+void DictEncoderImpl<ByteArrayType>::WriteDict(uint8_t* buffer) const {
+  memo_table_.VisitValues(0, [&buffer](::std::string_view v) {
+    uint32_t len = static_cast<uint32_t>(v.length());
+    memcpy(buffer, &len, sizeof(len));
+    buffer += sizeof(len);
+    memcpy(buffer, v.data(), len);
+    buffer += len;
+  });
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::WriteDict(uint8_t* buffer) const {
+  memo_table_.VisitValues(0, [&](::std::string_view v) {
+    DCHECK_EQ(v.length(), static_cast<size_t>(type_length_));
+    memcpy(buffer, v.data(), type_length_);
+    buffer += type_length_;
+  });
+}
+
+template <typename DType>
+inline void DictEncoderImpl<DType>::Put(const T& v) {
+  // Put() implementation for primitive types
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [this](int32_t memo_index) {
+    dict_encoded_size_ += static_cast<int>(sizeof(T));
+  };
+
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(v, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <typename DType>
+inline void DictEncoderImpl<DType>::PutByteArray(const void* ptr, int32_t length) {
+  DCHECK(false);
+}
+
+template <>
+inline void DictEncoderImpl<ByteArrayType>::PutByteArray(const void* ptr,
+                                                         int32_t length) {
+  static const uint8_t empty[] = {0};
+
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [&](int32_t memo_index) {
+    dict_encoded_size_ += static_cast<int>(length + sizeof(uint32_t));
+  };
+
+  DCHECK(ptr != nullptr || length == 0);
+  ptr = (ptr != nullptr) ? ptr : empty;
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(
+      memo_table_.GetOrInsert(ptr, length, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <>
+inline void DictEncoderImpl<ByteArrayType>::Put(const ByteArray& val) {
+  return PutByteArray(val.ptr, static_cast<int32_t>(val.len));
+}
+
+template <>
+inline void DictEncoderImpl<FLBAType>::Put(const FixedLenByteArray& v) {
+  static const uint8_t empty[] = {0};
+
+  auto on_found = [](int32_t memo_index) {};
+  auto on_not_found = [this](int32_t memo_index) { dict_encoded_size_ += type_length_; };
+
+  DCHECK(v.ptr != nullptr || type_length_ == 0);
+  const void* ptr = (v.ptr != nullptr) ? v.ptr : empty;
+  int32_t memo_index;
+  PARQUET_THROW_NOT_OK(
+      memo_table_.GetOrInsert(ptr, type_length_, on_found, on_not_found, &memo_index));
+  buffered_indices_.push_back(memo_index);
+}
+
+template <>
+void DictEncoderImpl<Int96Type>::Put(const ::arrow::Array& values) {
+  ParquetException::NYI("Direct put to Int96");
+}
+
+template <>
+void DictEncoderImpl<Int96Type>::PutDictionary(const ::arrow::Array& values) {
+  ParquetException::NYI("Direct put to Int96");
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::Put(const ::arrow::Array& values) {
+  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
+  const auto& data = checked_cast<const ArrayType&>(values);
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    for (int64_t i = 0; i < data.length(); i++) {
+      Put(data.Value(i));
+    }
+  } else {
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        Put(data.Value(i));
+      }
+    }
+  }
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::Put(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, type_length_);
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+  if (data.null_count() == 0) {
+    // no nulls, just dump the data
+    for (int64_t i = 0; i < data.length(); i++) {
+      Put(FixedLenByteArray(data.Value(i)));
+    }
+  } else {
+    std::vector<uint8_t> empty(type_length_, 0);
+    for (int64_t i = 0; i < data.length(); i++) {
+      if (data.IsValid(i)) {
+        Put(FixedLenByteArray(data.Value(i)));
+      }
+    }
+  }
+}
+
+template <>
+void DictEncoderImpl<ByteArrayType>::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+template <typename DType>
+void AssertCanPutDictionary(DictEncoderImpl<DType>* encoder, const ::arrow::Array& dict) {
+  if (dict.null_count() > 0) {
+    throw ParquetException("Inserted dictionary cannot contain nulls");
+  }
+
+  if (encoder->num_entries() > 0) {
+    throw ParquetException("Can only call PutDictionary on an empty DictEncoder");
+  }
+}
+
+template <typename DType>
+void DictEncoderImpl<DType>::PutDictionary(const ::arrow::Array& values) {
+  AssertCanPutDictionary(this, values);
+
+  using ArrayType = typename ::arrow::CTypeTraits<typename DType::c_type>::ArrayType;
+  const auto& data = checked_cast<const ArrayType&>(values);
+
+  dict_encoded_size_ += static_cast<int>(sizeof(typename DType::c_type) * data.length());
+  for (int64_t i = 0; i < data.length(); i++) {
+    int32_t unused_memo_index;
+    PARQUET_THROW_NOT_OK(memo_table_.GetOrInsert(data.Value(i), &unused_memo_index));
+  }
+}
+
+template <>
+void DictEncoderImpl<FLBAType>::PutDictionary(const ::arrow::Array& values) {
+  AssertFixedSizeBinary(values, type_length_);
+  AssertCanPutDictionary(this, values);
+
+  const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+
+  dict_encoded_size_ += static_cast<int>(type_length_ * data.length());
+  for (int64_t i = 0; i < data.length(); i++) {
+    int32_t unused_memo_index;
+    PARQUET_THROW_NOT_OK(
+        memo_table_.GetOrInsert(data.Value(i), type_length_, &unused_memo_index));
+  }
+}
+
+template <>
+void DictEncoderImpl<ByteArrayType>::PutDictionary(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  AssertCanPutDictionary(this, values);
+
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryDictionaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    DCHECK(::arrow::is_large_binary_like(values.type_id()));
+    PutBinaryDictionaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+// ----------------------------------------------------------------------
+// BYTE_STREAM_SPLIT encoder
+
+// Common base class for all types
+
+template <typename DType>
+class ByteStreamSplitEncoderBase : public EncoderImpl,
+                                   virtual public TypedEncoder<DType> {
+ public:
+  using T = typename DType::c_type;
+  using TypedEncoder<DType>::Put;
+
+  ByteStreamSplitEncoderBase(const ColumnDescriptor* descr, int byte_width,
+                             ::arrow::MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::BYTE_STREAM_SPLIT, pool),
+        sink_{pool},
+        byte_width_(byte_width),
+        num_values_in_buffer_{0} {}
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  std::shared_ptr<Buffer> FlushValues() override {
+    if (byte_width_ == 1) {
+      // Special-cased fast path
+      PARQUET_ASSIGN_OR_THROW(auto buf, sink_.Finish());
+      return buf;
+    }
+    auto output_buffer = AllocateBuffer(this->memory_pool(), EstimatedDataEncodedSize());
+    uint8_t* output_buffer_raw = output_buffer->mutable_data();
+    const uint8_t* raw_values = sink_.data();
+    ::arrow::util::internal::ByteStreamSplitEncode(
+        raw_values, /*width=*/byte_width_, num_values_in_buffer_, output_buffer_raw);
+    sink_.Reset();
+    num_values_in_buffer_ = 0;
+    return output_buffer;
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->template mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+ protected:
+  ::arrow::BufferBuilder sink_;
+  // Required because type_length_ is only filled in for FLBA
+  const int byte_width_;
+  int64_t num_values_in_buffer_;
+};
+
+// BYTE_STREAM_SPLIT encoder implementation for FLOAT, DOUBLE, INT32, INT64
+
+template <typename DType>
+class ByteStreamSplitEncoder : public ByteStreamSplitEncoderBase<DType> {
+ public:
+  using T = typename DType::c_type;
+  using ArrowType = typename EncodingTraits<DType>::ArrowType;
+
+  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
+                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : ByteStreamSplitEncoderBase<DType>(descr,
+                                          /*byte_width=*/static_cast<int>(sizeof(T)),
+                                          pool) {}
+
+  // Inherit Put(const std::vector<T>&...)
+  using TypedEncoder<DType>::Put;
+
+  void Put(const T* buffer, int num_values) override {
+    if (num_values > 0) {
+      PARQUET_THROW_NOT_OK(
+          this->sink_.Append(reinterpret_cast<const uint8_t*>(buffer),
+                             num_values * static_cast<int64_t>(sizeof(T))));
+      this->num_values_in_buffer_ += num_values;
+    }
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ArrowType::type_id) {
+      throw ParquetException(std::string() + "direct put from " +
+                             values.type()->ToString() + " not supported");
+    }
+    const auto& data = *values.data();
+    this->PutSpaced(data.GetValues<typename ArrowType::c_type>(1),
+                    static_cast<int>(data.length), data.GetValues<uint8_t>(0, 0),
+                    data.offset);
+  }
+};
+
+// BYTE_STREAM_SPLIT encoder implementation for FLBA
+
+template <>
+class ByteStreamSplitEncoder<FLBAType> : public ByteStreamSplitEncoderBase<FLBAType> {
+ public:
+  using DType = FLBAType;
+  using T = FixedLenByteArray;
+  using ArrowType = ::arrow::FixedSizeBinaryArray;
+
+  ByteStreamSplitEncoder(const ColumnDescriptor* descr,
+                         ::arrow::MemoryPool* pool = ::arrow::default_memory_pool())
+      : ByteStreamSplitEncoderBase<DType>(descr,
+                                          /*byte_width=*/descr->type_length(), pool) {}
+
+  // Inherit Put(const std::vector<T>&...)
+  using TypedEncoder<DType>::Put;
+
+  void Put(const T* buffer, int num_values) override {
+    if (byte_width_ > 0) {
+      const int64_t total_bytes = static_cast<int64_t>(num_values) * byte_width_;
+      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
+      for (int i = 0; i < num_values; ++i) {
+        // Write the result to the output stream
+        DCHECK(buffer[i].ptr != nullptr) << "Value ptr cannot be NULL";
+        sink_.UnsafeAppend(buffer[i].ptr, byte_width_);
+      }
+    }
+    this->num_values_in_buffer_ += num_values;
+  }
+
+  void Put(const ::arrow::Array& values) override {
+    AssertFixedSizeBinary(values, byte_width_);
+    const auto& data = checked_cast<const ::arrow::FixedSizeBinaryArray&>(values);
+    if (data.null_count() == 0) {
+      // no nulls, just buffer the data
+      PARQUET_THROW_NOT_OK(sink_.Append(data.raw_values(), data.length() * byte_width_));
+      this->num_values_in_buffer_ += data.length();
+    } else {
+      const int64_t num_values = data.length() - data.null_count();
+      const int64_t total_bytes = num_values * byte_width_;
+      PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes));
+      // TODO use VisitSetBitRunsVoid
+      for (int64_t i = 0; i < data.length(); i++) {
+        if (data.IsValid(i)) {
+          sink_.UnsafeAppend(data.Value(i), byte_width_);
+        }
+      }
+      this->num_values_in_buffer_ += num_values;
+    }
+  }
+};
+
+// ----------------------------------------------------------------------
+// DELTA_BINARY_PACKED encoder
+
+/// DeltaBitPackEncoder is an encoder for the DeltaBinary Packing format
+/// as per the parquet spec. See:
+/// https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-encoding-delta_binary_packed--5
+///
+/// Consists of a header followed by blocks of delta encoded values binary packed.
+///
+///  Format
+///    [header] [block 1] [block 2] ... [block N]
+///
+///  Header
+///    [block size] [number of mini blocks per block] [total value count] [first value]
+///
+///  Block
+///    [min delta] [list of bitwidths of the mini blocks] [miniblocks]
+///
+/// Sets aside bytes at the start of the internal buffer where the header will be written,
+/// and only writes the header when FlushValues is called before returning it.
+///
+/// To encode a block, we will:
+///
+/// 1. Compute the differences between consecutive elements. For the first element in the
+/// block, use the last element in the previous block or, in the case of the first block,
+/// use the first value of the whole sequence, stored in the header.
+///
+/// 2. Compute the frame of reference (the minimum of the deltas in the block). Subtract
+/// this min delta from all deltas in the block. This guarantees that all values are
+/// non-negative.
+///
+/// 3. Encode the frame of reference (min delta) as a zigzag ULEB128 int followed by the
+/// bit widths of the mini blocks and the delta values (minus the min delta) bit packed
+/// per mini block.
+///
+/// Supports only INT32 and INT64.
+
+template <typename DType>
+class DeltaBitPackEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+  // Maximum possible header size
+  static constexpr uint32_t kMaxPageHeaderWriterSize = 32;
+  static constexpr uint32_t kValuesPerBlock =
+      std::is_same_v<int32_t, typename DType::c_type> ? 128 : 256;
+  static constexpr uint32_t kMiniBlocksPerBlock = 4;
+
+ public:
+  using T = typename DType::c_type;
+  using UT = std::make_unsigned_t<T>;
+  using TypedEncoder<DType>::Put;
+
+  explicit DeltaBitPackEncoder(const ColumnDescriptor* descr, MemoryPool* pool,
+                               const uint32_t values_per_block = kValuesPerBlock,
+                               const uint32_t mini_blocks_per_block = kMiniBlocksPerBlock)
+      : EncoderImpl(descr, Encoding::DELTA_BINARY_PACKED, pool),
+        values_per_block_(values_per_block),
+        mini_blocks_per_block_(mini_blocks_per_block),
+        values_per_mini_block_(values_per_block / mini_blocks_per_block),
+        deltas_(values_per_block, ::arrow::stl::allocator<T>(pool)),
+        bits_buffer_(
+            AllocateBuffer(pool, (kMiniBlocksPerBlock + values_per_block) * sizeof(T))),
+        sink_(pool),
+        bit_writer_(bits_buffer_->mutable_data(),
+                    static_cast<int>(bits_buffer_->size())) {
+    if (values_per_block_ % 128 != 0) {
+      throw ParquetException(
+          "the number of values in a block must be multiple of 128, but it's " +
+          std::to_string(values_per_block_));
+    }
+    if (values_per_mini_block_ % 32 != 0) {
+      throw ParquetException(
+          "the number of values in a miniblock must be multiple of 32, but it's " +
+          std::to_string(values_per_mini_block_));
+    }
+    if (values_per_block % mini_blocks_per_block != 0) {
+      throw ParquetException(
+          "the number of values per block % number of miniblocks per block must be 0, "
+          "but it's " +
+          std::to_string(values_per_block % mini_blocks_per_block));
+    }
+    // Reserve enough space at the beginning of the buffer for largest possible header.
+    PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
+  }
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  int64_t EstimatedDataEncodedSize() override { return sink_.length(); }
+
+  void Put(const ::arrow::Array& values) override;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override;
+
+  void FlushBlock();
+
+ private:
+  const uint32_t values_per_block_;
+  const uint32_t mini_blocks_per_block_;
+  const uint32_t values_per_mini_block_;
+  uint32_t values_current_block_{0};
+  uint32_t total_value_count_{0};
+  T first_value_{0};
+  T current_value_{0};
+  ArrowPoolVector<T> deltas_;
+  std::shared_ptr<ResizableBuffer> bits_buffer_;
+  ::arrow::BufferBuilder sink_;
+  ::arrow::bit_util::BitWriter bit_writer_;
+};
+
+template <typename DType>
+void DeltaBitPackEncoder<DType>::Put(const T* src, int num_values) {
+  if (num_values == 0) {
+    return;
+  }
+
+  int idx = 0;
+  if (total_value_count_ == 0) {
+    current_value_ = src[0];
+    first_value_ = current_value_;
+    idx = 1;
+  }
+  total_value_count_ += num_values;
+
+  while (idx < num_values) {
+    T value = src[idx];
+    // Calculate deltas. The possible overflow is handled by use of unsigned integers
+    // making subtraction operations well-defined and correct even in case of overflow.
+    // Encoded integers will wrap back around on decoding.
+    // See http://en.wikipedia.org/wiki/Modular_arithmetic#Integers_modulo_n
+    deltas_[values_current_block_] = SafeSignedSubtract(value, current_value_);
+    current_value_ = value;
+    idx++;
+    values_current_block_++;
+    if (values_current_block_ == values_per_block_) {
+      FlushBlock();
+    }
+  }
+}
+
+template <typename DType>
+void DeltaBitPackEncoder<DType>::FlushBlock() {
+  if (values_current_block_ == 0) {
+    return;
+  }
+
+  // Calculate the frame of reference for this miniblock. This value will be subtracted
+  // from all deltas to guarantee all deltas are positive for encoding.
+  const T min_delta =
+      *std::min_element(deltas_.begin(), deltas_.begin() + values_current_block_);
+  bit_writer_.PutZigZagVlqInt(min_delta);
+
+  // Call to GetNextBytePtr reserves mini_blocks_per_block_ bytes of space to write
+  // bit widths of miniblocks as they become known during the encoding.
+  uint8_t* bit_width_data = bit_writer_.GetNextBytePtr(mini_blocks_per_block_);
+  DCHECK(bit_width_data != nullptr);
+
+  const uint32_t num_miniblocks =
+      static_cast<uint32_t>(std::ceil(static_cast<double>(values_current_block_) /
+                                      static_cast<double>(values_per_mini_block_)));
+  for (uint32_t i = 0; i < num_miniblocks; i++) {
+    const uint32_t values_current_mini_block =
+        std::min(values_per_mini_block_, values_current_block_);
+
+    const uint32_t start = i * values_per_mini_block_;
+    const T max_delta = *std::max_element(
+        deltas_.begin() + start, deltas_.begin() + start + values_current_mini_block);
+
+    // The minimum number of bits required to write any of values in deltas_ vector.
+    // See overflow comment above.
+    const auto bit_width = bit_width_data[i] = bit_util::NumRequiredBits(
+        static_cast<UT>(max_delta) - static_cast<UT>(min_delta));
+
+    for (uint32_t j = start; j < start + values_current_mini_block; j++) {
+      // Convert delta to frame of reference. See overflow comment above.
+      const UT value = static_cast<UT>(deltas_[j]) - static_cast<UT>(min_delta);
+      bit_writer_.PutValue(value, bit_width);
+    }
+    // If there are not enough values to fill the last mini block, we pad the mini block
+    // with zeroes so that its length is the number of values in a full mini block
+    // multiplied by the bit width.
+    for (uint32_t j = values_current_mini_block; j < values_per_mini_block_; j++) {
+      bit_writer_.PutValue(0, bit_width);
+    }
+    values_current_block_ -= values_current_mini_block;
+  }
+
+  // If, in the last block, less than <number of miniblocks in a block> miniblocks are
+  // needed to store the values, the bytes storing the bit widths of the unneeded
+  // miniblocks are still present, their value should be zero, but readers must accept
+  // arbitrary values as well.
+  for (uint32_t i = num_miniblocks; i < mini_blocks_per_block_; i++) {
+    bit_width_data[i] = 0;
+  }
+  DCHECK_EQ(values_current_block_, 0);
+
+  bit_writer_.Flush();
+  PARQUET_THROW_NOT_OK(sink_.Append(bit_writer_.buffer(), bit_writer_.bytes_written()));
+  bit_writer_.Clear();
+}
+
+template <typename DType>
+std::shared_ptr<Buffer> DeltaBitPackEncoder<DType>::FlushValues() {
+  if (values_current_block_ > 0) {
+    FlushBlock();
+  }
+  PARQUET_ASSIGN_OR_THROW(auto buffer, sink_.Finish(/*shrink_to_fit=*/true));
+
+  uint8_t header_buffer_[kMaxPageHeaderWriterSize] = {};
+  bit_util::BitWriter header_writer(header_buffer_, sizeof(header_buffer_));
+  if (!header_writer.PutVlqInt(values_per_block_) ||
+      !header_writer.PutVlqInt(mini_blocks_per_block_) ||
+      !header_writer.PutVlqInt(total_value_count_) ||
+      !header_writer.PutZigZagVlqInt(static_cast<T>(first_value_))) {
+    throw ParquetException("header writing error");
+  }
+  header_writer.Flush();
+
+  // We reserved enough space at the beginning of the buffer for largest possible header
+  // and data was written immediately after. We now write the header data immediately
+  // before the end of reserved space.
+  const size_t offset_bytes = kMaxPageHeaderWriterSize - header_writer.bytes_written();
+  std::memcpy(buffer->mutable_data() + offset_bytes, header_buffer_,
+              header_writer.bytes_written());
+
+  // Reset counter of cached values
+  total_value_count_ = 0;
+  // Reserve enough space at the beginning of the buffer for largest possible header.
+  PARQUET_THROW_NOT_OK(sink_.Advance(kMaxPageHeaderWriterSize));
+
+  // Excess bytes at the beginning are sliced off and ignored.
+  return SliceBuffer(buffer, offset_bytes);
+}
+
+template <>
+void DeltaBitPackEncoder<Int32Type>::Put(const ::arrow::Array& values) {
+  const ::arrow::ArrayData& data = *values.data();
+  if (values.type_id() != ::arrow::Type::INT32) {
+    throw ParquetException("Expected Int32TArray, got ", values.type()->ToString());
+  }
+  if (data.length > std::numeric_limits<int32_t>::max()) {
+    throw ParquetException("Array cannot be longer than ",
+                           std::numeric_limits<int32_t>::max());
+  }
+
+  if (values.null_count() == 0) {
+    Put(data.GetValues<int32_t>(1), static_cast<int>(data.length));
+  } else {
+    PutSpaced(data.GetValues<int32_t>(1), static_cast<int>(data.length),
+              data.GetValues<uint8_t>(0, 0), data.offset);
+  }
+}
+
+template <>
+void DeltaBitPackEncoder<Int64Type>::Put(const ::arrow::Array& values) {
+  const ::arrow::ArrayData& data = *values.data();
+  if (values.type_id() != ::arrow::Type::INT64) {
+    throw ParquetException("Expected Int64TArray, got ", values.type()->ToString());
+  }
+  if (data.length > std::numeric_limits<int32_t>::max()) {
+    throw ParquetException("Array cannot be longer than ",
+                           std::numeric_limits<int32_t>::max());
+  }
+  if (values.null_count() == 0) {
+    Put(data.GetValues<int64_t>(1), static_cast<int>(data.length));
+  } else {
+    PutSpaced(data.GetValues<int64_t>(1), static_cast<int>(data.length),
+              data.GetValues<uint8_t>(0, 0), data.offset);
+  }
+}
+
+template <typename DType>
+void DeltaBitPackEncoder<DType>::PutSpaced(const T* src, int num_values,
+                                           const uint8_t* valid_bits,
+                                           int64_t valid_bits_offset) {
+  if (valid_bits != NULLPTR) {
+    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                 this->memory_pool()));
+    T* data = buffer->template mutable_data_as<T>();
+    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+        src, num_values, valid_bits, valid_bits_offset, data);
+    Put(data, num_valid_values);
+  } else {
+    Put(src, num_values);
+  }
+}
+
+// ----------------------------------------------------------------------
+// DELTA_LENGTH_BYTE_ARRAY encoder
+
+class DeltaLengthByteArrayEncoder : public EncoderImpl,
+                                    virtual public TypedEncoder<ByteArrayType> {
+ public:
+  explicit DeltaLengthByteArrayEncoder(const ColumnDescriptor* descr, MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY,
+                    pool = ::arrow::default_memory_pool()),
+        sink_(pool),
+        length_encoder_(nullptr, pool) {}
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  int64_t EstimatedDataEncodedSize() override {
+    return sink_.length() + length_encoder_.EstimatedDataEncodedSize();
+  }
+
+  using TypedEncoder<ByteArrayType>::Put;
+
+  void Put(const ::arrow::Array& values) override;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override;
+
+ protected:
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() > kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          if (ARROW_PREDICT_FALSE(
+                  view.size() + sink_.length() >
+                  static_cast<size_t>(std::numeric_limits<int32_t>::max()))) {
+            return Status::Invalid("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
+          }
+          length_encoder_.Put({static_cast<int32_t>(view.length())}, 1);
+          PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length()));
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+  }
+
+  ::arrow::BufferBuilder sink_;
+  DeltaBitPackEncoder<Int32Type> length_encoder_;
+};
+
+void DeltaLengthByteArrayEncoder::Put(const ::arrow::Array& values) {
+  AssertBaseBinary(values);
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else {
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  }
+}
+
+void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) {
+  if (num_values == 0) {
+    return;
+  }
+
+  constexpr int kBatchSize = 256;
+  std::array<int32_t, kBatchSize> lengths;
+  uint32_t total_increment_size = 0;
+  for (int idx = 0; idx < num_values; idx += kBatchSize) {
+    const int batch_size = std::min(kBatchSize, num_values - idx);
+    for (int j = 0; j < batch_size; ++j) {
+      const int32_t len = src[idx + j].len;
+      if (ARROW_PREDICT_FALSE(
+              AddWithOverflow(total_increment_size, len, &total_increment_size))) {
+        throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
+      }
+      lengths[j] = len;
+    }
+    length_encoder_.Put(lengths.data(), batch_size);
+  }
+  if (sink_.length() + total_increment_size > std::numeric_limits<int32_t>::max()) {
+    throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY");
+  }
+  PARQUET_THROW_NOT_OK(sink_.Reserve(total_increment_size));
+  for (int idx = 0; idx < num_values; idx++) {
+    sink_.UnsafeAppend(src[idx].ptr, src[idx].len);
+  }
+}
+
+void DeltaLengthByteArrayEncoder::PutSpaced(const T* src, int num_values,
+                                            const uint8_t* valid_bits,
+                                            int64_t valid_bits_offset) {
+  if (valid_bits != NULLPTR) {
+    PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                 this->memory_pool()));
+    T* data = buffer->template mutable_data_as<T>();
+    int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+        src, num_values, valid_bits, valid_bits_offset, data);
+    Put(data, num_valid_values);
+  } else {
+    Put(src, num_values);
+  }
+}
+
+std::shared_ptr<Buffer> DeltaLengthByteArrayEncoder::FlushValues() {
+  std::shared_ptr<Buffer> encoded_lengths = length_encoder_.FlushValues();
+
+  std::shared_ptr<Buffer> data;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&data));
+  sink_.Reset();
+
+  PARQUET_THROW_NOT_OK(sink_.Resize(encoded_lengths->size() + data->size()));
+  PARQUET_THROW_NOT_OK(sink_.Append(encoded_lengths->data(), encoded_lengths->size()));
+  PARQUET_THROW_NOT_OK(sink_.Append(data->data(), data->size()));
+
+  std::shared_ptr<Buffer> buffer;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
+  return buffer;
+}
+
+// ----------------------------------------------------------------------
+// DELTA_BYTE_ARRAY encoder
+
+/// Delta Byte Array encoding also known as incremental encoding or front compression:
+/// for each element in a sequence of strings, store the prefix length of the previous
+/// entry plus the suffix.
+///
+/// This is stored as a sequence of delta-encoded prefix lengths (DELTA_BINARY_PACKED),
+/// followed by the suffixes encoded as delta length byte arrays
+/// (DELTA_LENGTH_BYTE_ARRAY).
+
+template <typename DType>
+class DeltaByteArrayEncoder : public EncoderImpl, virtual public TypedEncoder<DType> {
+  static constexpr std::string_view kEmpty = "";
+
+ public:
+  using T = typename DType::c_type;
+
+  explicit DeltaByteArrayEncoder(const ColumnDescriptor* descr,
+                                 MemoryPool* pool = ::arrow::default_memory_pool())
+      : EncoderImpl(descr, Encoding::DELTA_BYTE_ARRAY, pool),
+        sink_(pool),
+        prefix_length_encoder_(/*descr=*/nullptr, pool),
+        suffix_encoder_(descr, pool),
+        last_value_(""),
+        empty_(static_cast<uint32_t>(kEmpty.size()),
+               reinterpret_cast<const uint8_t*>(kEmpty.data())) {}
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  int64_t EstimatedDataEncodedSize() override {
+    return prefix_length_encoder_.EstimatedDataEncodedSize() +
+           suffix_encoder_.EstimatedDataEncodedSize();
+  }
+
+  using TypedEncoder<DType>::Put;
+
+  void Put(const ::arrow::Array& values) override;
+
+  void Put(const T* buffer, int num_values) override;
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != nullptr) {
+      if (buffer_ == nullptr) {
+        PARQUET_ASSIGN_OR_THROW(buffer_,
+                                ::arrow::AllocateResizableBuffer(num_values * sizeof(T),
+                                                                 this->memory_pool()));
+      } else {
+        PARQUET_THROW_NOT_OK(buffer_->Resize(num_values * sizeof(T), false));
+      }
+      T* data = buffer_->mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+ protected:
+  template <typename VisitorType>
+  void PutInternal(const T* src, int num_values, const VisitorType visitor) {
+    if (num_values == 0) {
+      return;
+    }
+
+    std::string_view last_value_view = last_value_;
+    constexpr int kBatchSize = 256;
+    std::array<int32_t, kBatchSize> prefix_lengths;
+    std::array<ByteArray, kBatchSize> suffixes;
+
+    for (int i = 0; i < num_values; i += kBatchSize) {
+      const int batch_size = std::min(kBatchSize, num_values - i);
+
+      for (int j = 0; j < batch_size; ++j) {
+        const int idx = i + j;
+        const auto view = visitor[idx];
+        const auto len = static_cast<const uint32_t>(view.length());
+
+        uint32_t common_prefix_length = 0;
+        const uint32_t maximum_common_prefix_length =
+            std::min(len, static_cast<uint32_t>(last_value_view.length()));
+        while (common_prefix_length < maximum_common_prefix_length) {
+          if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
+            break;
+          }
+          common_prefix_length++;
+        }
+
+        last_value_view = view;
+        prefix_lengths[j] = common_prefix_length;
+        const uint32_t suffix_length = len - common_prefix_length;
+        const uint8_t* suffix_ptr = src[idx].ptr + common_prefix_length;
+
+        // Convert to ByteArray, so it can be passed to the suffix_encoder_.
+        const ByteArray suffix(suffix_length, suffix_ptr);
+        suffixes[j] = suffix;
+      }
+      suffix_encoder_.Put(suffixes.data(), batch_size);
+      prefix_length_encoder_.Put(prefix_lengths.data(), batch_size);
+    }
+    last_value_ = last_value_view;
+  }
+
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    auto previous_len = static_cast<uint32_t>(last_value_.length());
+    std::string_view last_value_view = last_value_;
+
+    PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::std::string_view view) {
+          if (ARROW_PREDICT_FALSE(view.size() >= kMaxByteArraySize)) {
+            return Status::Invalid(
+                "Parquet cannot store strings with size 2GB or more, got: ", view.size());
+          }
+          const ByteArray src{view};
+
+          uint32_t common_prefix_length = 0;
+          const uint32_t len = src.len;
+          const uint32_t maximum_common_prefix_length = std::min(previous_len, len);
+          while (common_prefix_length < maximum_common_prefix_length) {
+            if (last_value_view[common_prefix_length] != view[common_prefix_length]) {
+              break;
+            }
+            common_prefix_length++;
+          }
+          previous_len = len;
+          prefix_length_encoder_.Put({static_cast<int32_t>(common_prefix_length)}, 1);
+
+          last_value_view = view;
+          const auto suffix_length = static_cast<uint32_t>(len - common_prefix_length);
+          if (suffix_length == 0) {
+            suffix_encoder_.Put(&empty_, 1);
+            return Status::OK();
+          }
+          const uint8_t* suffix_ptr = src.ptr + common_prefix_length;
+          // Convert to ByteArray, so it can be passed to the suffix_encoder_.
+          const ByteArray suffix(suffix_length, suffix_ptr);
+          suffix_encoder_.Put(&suffix, 1);
+
+          return Status::OK();
+        },
+        []() { return Status::OK(); }));
+    last_value_ = last_value_view;
+  }
+
+  ::arrow::BufferBuilder sink_;
+  DeltaBitPackEncoder<Int32Type> prefix_length_encoder_;
+  DeltaLengthByteArrayEncoder suffix_encoder_;
+  std::string last_value_;
+  const ByteArray empty_;
+  std::unique_ptr<ResizableBuffer> buffer_;
+};
+
+struct ByteArrayVisitor {
+  const ByteArray* src;
+
+  std::string_view operator[](int i) const {
+    if (ARROW_PREDICT_FALSE(src[i].len >= kMaxByteArraySize)) {
+      throw ParquetException("Parquet cannot store strings with size 2GB or more, got: ",
+                             src[i].len);
+    }
+    return std::string_view{src[i]};
+  }
+
+  uint32_t len(int i) const { return src[i].len; }
+};
+
+struct FLBAVisitor {
+  const FLBA* src;
+  const uint32_t type_length;
+
+  std::string_view operator[](int i) const {
+    return std::string_view{reinterpret_cast<const char*>(src[i].ptr), type_length};
+  }
+
+  uint32_t len(int i) const { return type_length; }
+};
+
+template <>
+void DeltaByteArrayEncoder<ByteArrayType>::Put(const ByteArray* src, int num_values) {
+  auto visitor = ByteArrayVisitor{src};
+  PutInternal<ByteArrayVisitor>(src, num_values, visitor);
+}
+
+template <>
+void DeltaByteArrayEncoder<FLBAType>::Put(const FLBA* src, int num_values) {
+  auto visitor = FLBAVisitor{src, static_cast<uint32_t>(descr_->type_length())};
+  PutInternal<FLBAVisitor>(src, num_values, visitor);
+}
+
+template <typename DType>
+void DeltaByteArrayEncoder<DType>::Put(const ::arrow::Array& values) {
+  if (::arrow::is_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::BinaryArray&>(values));
+  } else if (::arrow::is_large_binary_like(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::LargeBinaryArray&>(values));
+  } else if (::arrow::is_fixed_size_binary(values.type_id())) {
+    PutBinaryArray(checked_cast<const ::arrow::FixedSizeBinaryArray&>(values));
+  } else {
+    throw ParquetException("Only BaseBinaryArray and subclasses supported");
+  }
+}
+
+template <typename DType>
+std::shared_ptr<Buffer> DeltaByteArrayEncoder<DType>::FlushValues() {
+  PARQUET_THROW_NOT_OK(sink_.Resize(EstimatedDataEncodedSize(), false));
+
+  std::shared_ptr<Buffer> prefix_lengths = prefix_length_encoder_.FlushValues();
+  PARQUET_THROW_NOT_OK(sink_.Append(prefix_lengths->data(), prefix_lengths->size()));
+
+  std::shared_ptr<Buffer> suffixes = suffix_encoder_.FlushValues();
+  PARQUET_THROW_NOT_OK(sink_.Append(suffixes->data(), suffixes->size()));
+
+  std::shared_ptr<Buffer> buffer;
+  PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true));
+  last_value_.clear();
+  return buffer;
+}
+
+// ----------------------------------------------------------------------
+// RLE encoder for BOOLEAN
+
+class RleBooleanEncoder final : public EncoderImpl, virtual public BooleanEncoder {
+ public:
+  explicit RleBooleanEncoder(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
+      : EncoderImpl(descr, Encoding::RLE, pool),
+        buffered_append_values_(::arrow::stl::allocator<T>(pool)) {}
+
+  int64_t EstimatedDataEncodedSize() override {
+    return kRleLengthInBytes + MaxRleBufferSize();
+  }
+
+  std::shared_ptr<Buffer> FlushValues() override;
+
+  void Put(const T* buffer, int num_values) override;
+  void Put(const ::arrow::Array& values) override {
+    if (values.type_id() != ::arrow::Type::BOOL) {
+      throw ParquetException("RleBooleanEncoder expects BooleanArray, got ",
+                             values.type()->ToString());
+    }
+    const auto& boolean_array = checked_cast<const ::arrow::BooleanArray&>(values);
+    if (values.null_count() == 0) {
+      for (int i = 0; i < boolean_array.length(); ++i) {
+        // null_count == 0, so just call Value directly is ok.
+        buffered_append_values_.push_back(boolean_array.Value(i));
+      }
+    } else {
+      PARQUET_THROW_NOT_OK(::arrow::VisitArraySpanInline<::arrow::BooleanType>(
+          *boolean_array.data(),
+          [&](bool value) {
+            buffered_append_values_.push_back(value);
+            return Status::OK();
+          },
+          []() { return Status::OK(); }));
+    }
+  }
+
+  void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
+                 int64_t valid_bits_offset) override {
+    if (valid_bits != NULLPTR) {
+      PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T),
+                                                                   this->memory_pool()));
+      T* data = buffer->mutable_data_as<T>();
+      int num_valid_values = ::arrow::util::internal::SpacedCompress<T>(
+          src, num_values, valid_bits, valid_bits_offset, data);
+      Put(data, num_valid_values);
+    } else {
+      Put(src, num_values);
+    }
+  }
+
+  void Put(const std::vector<bool>& src, int num_values) override;
+
+ protected:
+  template <typename SequenceType>
+  void PutImpl(const SequenceType& src, int num_values);
+
+  int MaxRleBufferSize() const noexcept {
+    return RlePreserveBufferSize(static_cast<int>(buffered_append_values_.size()),
+                                 kBitWidth);
+  }
+
+  constexpr static int32_t kBitWidth = 1;
+  /// 4 bytes in little-endian, which indicates the length.
+  constexpr static int32_t kRleLengthInBytes = 4;
+
+  // std::vector<bool> in C++ is tricky, because it's a bitmap.
+  // Here RleBooleanEncoder will only append values into it, and
+  // dump values into Buffer, so using it here is ok.
+  ArrowPoolVector<bool> buffered_append_values_;
+};
+
+void RleBooleanEncoder::Put(const bool* src, int num_values) { PutImpl(src, num_values); }
+
+void RleBooleanEncoder::Put(const std::vector<bool>& src, int num_values) {
+  PutImpl(src, num_values);
+}
+
+template <typename SequenceType>
+void RleBooleanEncoder::PutImpl(const SequenceType& src, int num_values) {
+  for (int i = 0; i < num_values; ++i) {
+    buffered_append_values_.push_back(src[i]);
+  }
+}
+
+std::shared_ptr<Buffer> RleBooleanEncoder::FlushValues() {
+  int rle_buffer_size_max = MaxRleBufferSize();
+  std::shared_ptr<ResizableBuffer> buffer =
+      AllocateBuffer(this->pool_, rle_buffer_size_max + kRleLengthInBytes);
+  ::arrow::util::RleEncoder encoder(buffer->mutable_data() + kRleLengthInBytes,
+                                    rle_buffer_size_max, /*bit_width*/ kBitWidth);
+
+  for (bool value : buffered_append_values_) {
+    encoder.Put(value ? 1 : 0);
+  }
+  encoder.Flush();
+  ::arrow::util::SafeStore(buffer->mutable_data(),
+                           ::arrow::bit_util::ToLittleEndian(encoder.len()));
+  PARQUET_THROW_NOT_OK(buffer->Resize(kRleLengthInBytes + encoder.len()));
+  buffered_append_values_.clear();
+  return buffer;
+}
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Factory function
+
+std::unique_ptr<Encoder> MakeEncoder(Type::type type_num, Encoding::type encoding,
+                                     bool use_dictionary, const ColumnDescriptor* descr,
+                                     MemoryPool* pool) {
+  if (use_dictionary) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::make_unique<DictEncoderImpl<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<DictEncoderImpl<Int64Type>>(descr, pool);
+      case Type::INT96:
+        return std::make_unique<DictEncoderImpl<Int96Type>>(descr, pool);
+      case Type::FLOAT:
+        return std::make_unique<DictEncoderImpl<FloatType>>(descr, pool);
+      case Type::DOUBLE:
+        return std::make_unique<DictEncoderImpl<DoubleType>>(descr, pool);
+      case Type::BYTE_ARRAY:
+        return std::make_unique<DictEncoderImpl<ByteArrayType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<DictEncoderImpl<FLBAType>>(descr, pool);
+      default:
+        DCHECK(false) << "Encoder not implemented";
+        break;
+    }
+  } else if (encoding == Encoding::PLAIN) {
+    switch (type_num) {
+      case Type::BOOLEAN:
+        return std::make_unique<PlainEncoder<BooleanType>>(descr, pool);
+      case Type::INT32:
+        return std::make_unique<PlainEncoder<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<PlainEncoder<Int64Type>>(descr, pool);
+      case Type::INT96:
+        return std::make_unique<PlainEncoder<Int96Type>>(descr, pool);
+      case Type::FLOAT:
+        return std::make_unique<PlainEncoder<FloatType>>(descr, pool);
+      case Type::DOUBLE:
+        return std::make_unique<PlainEncoder<DoubleType>>(descr, pool);
+      case Type::BYTE_ARRAY:
+        return std::make_unique<PlainEncoder<ByteArrayType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<PlainEncoder<FLBAType>>(descr, pool);
+      default:
+        DCHECK(false) << "Encoder not implemented";
+        break;
+    }
+  } else if (encoding == Encoding::BYTE_STREAM_SPLIT) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::make_unique<ByteStreamSplitEncoder<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<ByteStreamSplitEncoder<Int64Type>>(descr, pool);
+      case Type::FLOAT:
+        return std::make_unique<ByteStreamSplitEncoder<FloatType>>(descr, pool);
+      case Type::DOUBLE:
+        return std::make_unique<ByteStreamSplitEncoder<DoubleType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<ByteStreamSplitEncoder<FLBAType>>(descr, pool);
+      default:
+        throw ParquetException(
+            "BYTE_STREAM_SPLIT only supports FLOAT, DOUBLE, INT32, INT64 "
+            "and FIXED_LEN_BYTE_ARRAY");
+    }
+  } else if (encoding == Encoding::DELTA_BINARY_PACKED) {
+    switch (type_num) {
+      case Type::INT32:
+        return std::make_unique<DeltaBitPackEncoder<Int32Type>>(descr, pool);
+      case Type::INT64:
+        return std::make_unique<DeltaBitPackEncoder<Int64Type>>(descr, pool);
+      default:
+        throw ParquetException(
+            "DELTA_BINARY_PACKED encoder only supports INT32 and INT64");
+    }
+  } else if (encoding == Encoding::DELTA_LENGTH_BYTE_ARRAY) {
+    switch (type_num) {
+      case Type::BYTE_ARRAY:
+        return std::make_unique<DeltaLengthByteArrayEncoder>(descr, pool);
+      default:
+        throw ParquetException("DELTA_LENGTH_BYTE_ARRAY only supports BYTE_ARRAY");
+    }
+  } else if (encoding == Encoding::RLE) {
+    switch (type_num) {
+      case Type::BOOLEAN:
+        return std::make_unique<RleBooleanEncoder>(descr, pool);
+      default:
+        throw ParquetException("RLE only supports BOOLEAN");
+    }
+  } else if (encoding == Encoding::DELTA_BYTE_ARRAY) {
+    switch (type_num) {
+      case Type::BYTE_ARRAY:
+        return std::make_unique<DeltaByteArrayEncoder<ByteArrayType>>(descr, pool);
+      case Type::FIXED_LEN_BYTE_ARRAY:
+        return std::make_unique<DeltaByteArrayEncoder<FLBAType>>(descr, pool);
+      default:
+        throw ParquetException(
+            "DELTA_BYTE_ARRAY only supports BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY");
+    }
+  } else {
+    ParquetException::NYI("Selected encoding is not supported");
+  }
+  DCHECK(false) << "Should not be able to reach this code";
+  return nullptr;
+}
+
+}  // namespace parquet
diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h
index 493c4044ddc1c..5717886f10759 100644
--- a/cpp/src/parquet/encoding.h
+++ b/cpp/src/parquet/encoding.h
@@ -22,31 +22,16 @@
 #include <memory>
 #include <vector>
 
-#include "arrow/util/spaced.h"
+#include "arrow/type_fwd.h"
 
 #include "parquet/exception.h"
 #include "parquet/platform.h"
 #include "parquet/types.h"
 
 namespace arrow {
-
-class Array;
-class ArrayBuilder;
-class BinaryArray;
-class BinaryBuilder;
-class BooleanBuilder;
-class Int32Type;
-class Int64Type;
-class FloatType;
-class DoubleType;
-class FixedSizeBinaryType;
-template <typename T>
-class NumericBuilder;
-class FixedSizeBinaryBuilder;
 template <typename T>
 class Dictionary32Builder;
-
-}  // namespace arrow
+}
 
 namespace parquet {
 
@@ -184,7 +169,7 @@ class Encoder {
 template <typename DType>
 class TypedEncoder : virtual public Encoder {
  public:
-  typedef typename DType::c_type T;
+  using T = typename DType::c_type;
 
   using Encoder::Put;
 
@@ -293,20 +278,7 @@ class TypedDecoder : virtual public Decoder {
   /// \param[in] valid_bits_offset offset into valid_bits
   /// \return The number of values decoded, including nulls.
   virtual int DecodeSpaced(T* buffer, int num_values, int null_count,
-                           const uint8_t* valid_bits, int64_t valid_bits_offset) {
-    if (null_count > 0) {
-      int values_to_read = num_values - null_count;
-      int values_read = Decode(buffer, values_to_read);
-      if (values_read != values_to_read) {
-        throw ParquetException("Number of values / definition_levels read did not match");
-      }
-
-      return ::arrow::util::internal::SpacedExpand<T>(buffer, num_values, null_count,
-                                                      valid_bits, valid_bits_offset);
-    } else {
-      return Decode(buffer, num_values);
-    }
-  }
+                           const uint8_t* valid_bits, int64_t valid_bits_offset) = 0;
 
   /// \brief Decode into an ArrayBuilder or other accumulator
   ///
diff --git a/cpp/src/parquet/encryption/crypto_factory.cc b/cpp/src/parquet/encryption/crypto_factory.cc
index 72506bdc014b6..56069d559771c 100644
--- a/cpp/src/parquet/encryption/crypto_factory.cc
+++ b/cpp/src/parquet/encryption/crypto_factory.cc
@@ -72,8 +72,7 @@ std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProper
   int dek_length = dek_length_bits / 8;
 
   std::string footer_key(dek_length, '\0');
-  RandBytes(reinterpret_cast<uint8_t*>(&footer_key[0]),
-            static_cast<int>(footer_key.size()));
+  RandBytes(reinterpret_cast<uint8_t*>(footer_key.data()), footer_key.size());
 
   std::string footer_key_metadata =
       key_wrapper.GetEncryptionKeyMetadata(footer_key, footer_key_id, true);
@@ -148,8 +147,7 @@ ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties
       }
 
       std::string column_key(dek_length, '\0');
-      RandBytes(reinterpret_cast<uint8_t*>(&column_key[0]),
-                static_cast<int>(column_key.size()));
+      RandBytes(reinterpret_cast<uint8_t*>(column_key.data()), column_key.size());
       std::string column_key_key_metadata =
           key_wrapper->GetEncryptionKeyMetadata(column_key, column_key_id, false);
 
diff --git a/cpp/src/parquet/encryption/encryption_internal.cc b/cpp/src/parquet/encryption/encryption_internal.cc
index 6168dd2a9bd61..31cad130a10c7 100644
--- a/cpp/src/parquet/encryption/encryption_internal.cc
+++ b/cpp/src/parquet/encryption/encryption_internal.cc
@@ -18,6 +18,7 @@
 #include "parquet/encryption/encryption_internal.h"
 
 #include <openssl/aes.h>
+#include <openssl/err.h>
 #include <openssl/evp.h>
 #include <openssl/rand.h>
 
@@ -36,10 +37,10 @@ using parquet::ParquetException;
 
 namespace parquet::encryption {
 
-constexpr int kGcmMode = 0;
-constexpr int kCtrMode = 1;
-constexpr int kCtrIvLength = 16;
-constexpr int kBufferSizeLength = 4;
+constexpr int32_t kGcmMode = 0;
+constexpr int32_t kCtrMode = 1;
+constexpr int32_t kCtrIvLength = 16;
+constexpr int32_t kBufferSizeLength = 4;
 
 #define ENCRYPT_INIT(CTX, ALG)                                        \
   if (1 != EVP_EncryptInit_ex(CTX, ALG, nullptr, nullptr, nullptr)) { \
@@ -53,17 +54,17 @@ constexpr int kBufferSizeLength = 4;
 
 class AesEncryptor::AesEncryptorImpl {
  public:
-  explicit AesEncryptorImpl(ParquetCipher::type alg_id, int key_len, bool metadata,
+  explicit AesEncryptorImpl(ParquetCipher::type alg_id, int32_t key_len, bool metadata,
                             bool write_length);
 
   ~AesEncryptorImpl() { WipeOut(); }
 
-  int Encrypt(span<const uint8_t> plaintext, span<const uint8_t> key,
-              span<const uint8_t> aad, span<uint8_t> ciphertext);
+  int32_t Encrypt(span<const uint8_t> plaintext, span<const uint8_t> key,
+                  span<const uint8_t> aad, span<uint8_t> ciphertext);
 
-  int SignedFooterEncrypt(span<const uint8_t> footer, span<const uint8_t> key,
-                          span<const uint8_t> aad, span<const uint8_t> nonce,
-                          span<uint8_t> encrypted_footer);
+  int32_t SignedFooterEncrypt(span<const uint8_t> footer, span<const uint8_t> key,
+                              span<const uint8_t> aad, span<const uint8_t> nonce,
+                              span<uint8_t> encrypted_footer);
   void WipeOut() {
     if (nullptr != ctx_) {
       EVP_CIPHER_CTX_free(ctx_);
@@ -88,22 +89,29 @@ class AesEncryptor::AesEncryptorImpl {
   }
 
  private:
+  void CheckValid() const {
+    if (ctx_ == nullptr) {
+      throw ParquetException("AesEncryptor was wiped out");
+    }
+  }
+
   EVP_CIPHER_CTX* ctx_;
-  int aes_mode_;
-  int key_length_;
-  int ciphertext_size_delta_;
-  int length_buffer_length_;
+  int32_t aes_mode_;
+  int32_t key_length_;
+  int32_t ciphertext_size_delta_;
+  int32_t length_buffer_length_;
 
-  int GcmEncrypt(span<const uint8_t> plaintext, span<const uint8_t> key,
-                 span<const uint8_t> nonce, span<const uint8_t> aad,
-                 span<uint8_t> ciphertext);
+  int32_t GcmEncrypt(span<const uint8_t> plaintext, span<const uint8_t> key,
+                     span<const uint8_t> nonce, span<const uint8_t> aad,
+                     span<uint8_t> ciphertext);
 
-  int CtrEncrypt(span<const uint8_t> plaintext, span<const uint8_t> key,
-                 span<const uint8_t> nonce, span<uint8_t> ciphertext);
+  int32_t CtrEncrypt(span<const uint8_t> plaintext, span<const uint8_t> key,
+                     span<const uint8_t> nonce, span<uint8_t> ciphertext);
 };
 
-AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id, int key_len,
-                                                 bool metadata, bool write_length) {
+AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id,
+                                                 int32_t key_len, bool metadata,
+                                                 bool write_length) {
   openssl::EnsureInitialized();
 
   ctx_ = nullptr;
@@ -151,11 +159,11 @@ AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id, int
   }
 }
 
-int AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt(span<const uint8_t> footer,
-                                                        span<const uint8_t> key,
-                                                        span<const uint8_t> aad,
-                                                        span<const uint8_t> nonce,
-                                                        span<uint8_t> encrypted_footer) {
+int32_t AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt(
+    span<const uint8_t> footer, span<const uint8_t> key, span<const uint8_t> aad,
+    span<const uint8_t> nonce, span<uint8_t> encrypted_footer) {
+  CheckValid();
+
   if (static_cast<size_t>(key_length_) != key.size()) {
     std::stringstream ss;
     ss << "Wrong key length " << key.size() << ". Should be " << key_length_;
@@ -176,10 +184,12 @@ int AesEncryptor::AesEncryptorImpl::SignedFooterEncrypt(span<const uint8_t> foot
   return GcmEncrypt(footer, key, nonce, aad, encrypted_footer);
 }
 
-int AesEncryptor::AesEncryptorImpl::Encrypt(span<const uint8_t> plaintext,
-                                            span<const uint8_t> key,
-                                            span<const uint8_t> aad,
-                                            span<uint8_t> ciphertext) {
+int32_t AesEncryptor::AesEncryptorImpl::Encrypt(span<const uint8_t> plaintext,
+                                                span<const uint8_t> key,
+                                                span<const uint8_t> aad,
+                                                span<uint8_t> ciphertext) {
+  CheckValid();
+
   if (static_cast<size_t>(key_length_) != key.size()) {
     std::stringstream ss;
     ss << "Wrong key length " << key.size() << ". Should be " << key_length_;
@@ -205,13 +215,13 @@ int AesEncryptor::AesEncryptorImpl::Encrypt(span<const uint8_t> plaintext,
   return CtrEncrypt(plaintext, key, nonce, ciphertext);
 }
 
-int AesEncryptor::AesEncryptorImpl::GcmEncrypt(span<const uint8_t> plaintext,
-                                               span<const uint8_t> key,
-                                               span<const uint8_t> nonce,
-                                               span<const uint8_t> aad,
-                                               span<uint8_t> ciphertext) {
+int32_t AesEncryptor::AesEncryptorImpl::GcmEncrypt(span<const uint8_t> plaintext,
+                                                   span<const uint8_t> key,
+                                                   span<const uint8_t> nonce,
+                                                   span<const uint8_t> aad,
+                                                   span<uint8_t> ciphertext) {
   int len;
-  int ciphertext_len;
+  int32_t ciphertext_len;
 
   std::array<uint8_t, kGcmTagLength> tag{};
 
@@ -227,12 +237,22 @@ int AesEncryptor::AesEncryptorImpl::GcmEncrypt(span<const uint8_t> plaintext,
   }
 
   // Setting additional authenticated data
+  if (aad.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    std::stringstream ss;
+    ss << "AAD size " << aad.size() << " overflows int";
+    throw ParquetException(ss.str());
+  }
   if ((!aad.empty()) && (1 != EVP_EncryptUpdate(ctx_, nullptr, &len, aad.data(),
                                                 static_cast<int>(aad.size())))) {
     throw ParquetException("Couldn't set AAD");
   }
 
   // Encryption
+  if (plaintext.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    std::stringstream ss;
+    ss << "Plaintext size " << plaintext.size() << " overflows int";
+    throw ParquetException(ss.str());
+  }
   if (1 !=
       EVP_EncryptUpdate(ctx_, ciphertext.data() + length_buffer_length_ + kNonceLength,
                         &len, plaintext.data(), static_cast<int>(plaintext.size()))) {
@@ -256,7 +276,7 @@ int AesEncryptor::AesEncryptorImpl::GcmEncrypt(span<const uint8_t> plaintext,
   }
 
   // Copying the buffer size, nonce and tag to ciphertext
-  int buffer_size = kNonceLength + ciphertext_len + kGcmTagLength;
+  int32_t buffer_size = kNonceLength + ciphertext_len + kGcmTagLength;
   if (length_buffer_length_ > 0) {
     ciphertext[3] = static_cast<uint8_t>(0xff & (buffer_size >> 24));
     ciphertext[2] = static_cast<uint8_t>(0xff & (buffer_size >> 16));
@@ -271,12 +291,12 @@ int AesEncryptor::AesEncryptorImpl::GcmEncrypt(span<const uint8_t> plaintext,
   return length_buffer_length_ + buffer_size;
 }
 
-int AesEncryptor::AesEncryptorImpl::CtrEncrypt(span<const uint8_t> plaintext,
-                                               span<const uint8_t> key,
-                                               span<const uint8_t> nonce,
-                                               span<uint8_t> ciphertext) {
+int32_t AesEncryptor::AesEncryptorImpl::CtrEncrypt(span<const uint8_t> plaintext,
+                                                   span<const uint8_t> key,
+                                                   span<const uint8_t> nonce,
+                                                   span<uint8_t> ciphertext) {
   int len;
-  int ciphertext_len;
+  int32_t ciphertext_len;
 
   if (nonce.size() != static_cast<size_t>(kNonceLength)) {
     std::stringstream ss;
@@ -298,6 +318,11 @@ int AesEncryptor::AesEncryptorImpl::CtrEncrypt(span<const uint8_t> plaintext,
   }
 
   // Encryption
+  if (plaintext.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    std::stringstream ss;
+    ss << "Plaintext size " << plaintext.size() << " overflows int";
+    throw ParquetException(ss.str());
+  }
   if (1 !=
       EVP_EncryptUpdate(ctx_, ciphertext.data() + length_buffer_length_ + kNonceLength,
                         &len, plaintext.data(), static_cast<int>(plaintext.size()))) {
@@ -316,7 +341,7 @@ int AesEncryptor::AesEncryptorImpl::CtrEncrypt(span<const uint8_t> plaintext,
   ciphertext_len += len;
 
   // Copying the buffer size and nonce to ciphertext
-  int buffer_size = kNonceLength + ciphertext_len;
+  int32_t buffer_size = kNonceLength + ciphertext_len;
   if (length_buffer_length_ > 0) {
     ciphertext[3] = static_cast<uint8_t>(0xff & (buffer_size >> 24));
     ciphertext[2] = static_cast<uint8_t>(0xff & (buffer_size >> 16));
@@ -331,9 +356,11 @@ int AesEncryptor::AesEncryptorImpl::CtrEncrypt(span<const uint8_t> plaintext,
 
 AesEncryptor::~AesEncryptor() {}
 
-int AesEncryptor::SignedFooterEncrypt(span<const uint8_t> footer, span<const uint8_t> key,
-                                      span<const uint8_t> aad, span<const uint8_t> nonce,
-                                      span<uint8_t> encrypted_footer) {
+int32_t AesEncryptor::SignedFooterEncrypt(span<const uint8_t> footer,
+                                          span<const uint8_t> key,
+                                          span<const uint8_t> aad,
+                                          span<const uint8_t> nonce,
+                                          span<uint8_t> encrypted_footer) {
   return impl_->SignedFooterEncrypt(footer, key, aad, nonce, encrypted_footer);
 }
 
@@ -343,25 +370,25 @@ int32_t AesEncryptor::CiphertextLength(int64_t plaintext_len) const {
   return impl_->CiphertextLength(plaintext_len);
 }
 
-int AesEncryptor::Encrypt(span<const uint8_t> plaintext, span<const uint8_t> key,
-                          span<const uint8_t> aad, span<uint8_t> ciphertext) {
+int32_t AesEncryptor::Encrypt(span<const uint8_t> plaintext, span<const uint8_t> key,
+                              span<const uint8_t> aad, span<uint8_t> ciphertext) {
   return impl_->Encrypt(plaintext, key, aad, ciphertext);
 }
 
-AesEncryptor::AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata,
+AesEncryptor::AesEncryptor(ParquetCipher::type alg_id, int32_t key_len, bool metadata,
                            bool write_length)
     : impl_{std::unique_ptr<AesEncryptorImpl>(
           new AesEncryptorImpl(alg_id, key_len, metadata, write_length))} {}
 
 class AesDecryptor::AesDecryptorImpl {
  public:
-  explicit AesDecryptorImpl(ParquetCipher::type alg_id, int key_len, bool metadata,
+  explicit AesDecryptorImpl(ParquetCipher::type alg_id, int32_t key_len, bool metadata,
                             bool contains_length);
 
   ~AesDecryptorImpl() { WipeOut(); }
 
-  int Decrypt(span<const uint8_t> ciphertext, span<const uint8_t> key,
-              span<const uint8_t> aad, span<uint8_t> plaintext);
+  int32_t Decrypt(span<const uint8_t> ciphertext, span<const uint8_t> key,
+                  span<const uint8_t> aad, span<uint8_t> plaintext);
 
   void WipeOut() {
     if (nullptr != ctx_) {
@@ -370,7 +397,7 @@ class AesDecryptor::AesDecryptorImpl {
     }
   }
 
-  [[nodiscard]] int PlaintextLength(int ciphertext_len) const {
+  [[nodiscard]] int32_t PlaintextLength(int32_t ciphertext_len) const {
     if (ciphertext_len < ciphertext_size_delta_) {
       std::stringstream ss;
       ss << "Ciphertext length " << ciphertext_len << " is invalid, expected at least "
@@ -380,12 +407,13 @@ class AesDecryptor::AesDecryptorImpl {
     return ciphertext_len - ciphertext_size_delta_;
   }
 
-  [[nodiscard]] int CiphertextLength(int plaintext_len) const {
+  [[nodiscard]] int32_t CiphertextLength(int32_t plaintext_len) const {
     if (plaintext_len < 0) {
       std::stringstream ss;
       ss << "Negative plaintext length " << plaintext_len;
       throw ParquetException(ss.str());
-    } else if (plaintext_len > std::numeric_limits<int>::max() - ciphertext_size_delta_) {
+    } else if (plaintext_len >
+               std::numeric_limits<int32_t>::max() - ciphertext_size_delta_) {
       std::stringstream ss;
       ss << "Plaintext length " << plaintext_len << " plus ciphertext size delta "
          << ciphertext_size_delta_ << " overflows int32";
@@ -395,25 +423,31 @@ class AesDecryptor::AesDecryptorImpl {
   }
 
  private:
+  void CheckValid() const {
+    if (ctx_ == nullptr) {
+      throw ParquetException("AesDecryptor was wiped out");
+    }
+  }
+
   EVP_CIPHER_CTX* ctx_;
-  int aes_mode_;
-  int key_length_;
-  int ciphertext_size_delta_;
-  int length_buffer_length_;
+  int32_t aes_mode_;
+  int32_t key_length_;
+  int32_t ciphertext_size_delta_;
+  int32_t length_buffer_length_;
 
   /// Get the actual ciphertext length, inclusive of the length buffer length,
   /// and validate that the provided buffer size is large enough.
-  [[nodiscard]] int GetCiphertextLength(span<const uint8_t> ciphertext) const;
+  [[nodiscard]] int32_t GetCiphertextLength(span<const uint8_t> ciphertext) const;
 
-  int GcmDecrypt(span<const uint8_t> ciphertext, span<const uint8_t> key,
-                 span<const uint8_t> aad, span<uint8_t> plaintext);
+  int32_t GcmDecrypt(span<const uint8_t> ciphertext, span<const uint8_t> key,
+                     span<const uint8_t> aad, span<uint8_t> plaintext);
 
-  int CtrDecrypt(span<const uint8_t> ciphertext, span<const uint8_t> key,
-                 span<uint8_t> plaintext);
+  int32_t CtrDecrypt(span<const uint8_t> ciphertext, span<const uint8_t> key,
+                     span<uint8_t> plaintext);
 };
 
-int AesDecryptor::Decrypt(span<const uint8_t> ciphertext, span<const uint8_t> key,
-                          span<const uint8_t> aad, span<uint8_t> plaintext) {
+int32_t AesDecryptor::Decrypt(span<const uint8_t> ciphertext, span<const uint8_t> key,
+                              span<const uint8_t> aad, span<uint8_t> plaintext) {
   return impl_->Decrypt(ciphertext, key, aad, plaintext);
 }
 
@@ -421,8 +455,9 @@ void AesDecryptor::WipeOut() { impl_->WipeOut(); }
 
 AesDecryptor::~AesDecryptor() {}
 
-AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id, int key_len,
-                                                 bool metadata, bool contains_length) {
+AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id,
+                                                 int32_t key_len, bool metadata,
+                                                 bool contains_length) {
   openssl::EnsureInitialized();
 
   ctx_ = nullptr;
@@ -469,32 +504,30 @@ AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id, int
   }
 }
 
-AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata,
-                                 std::vector<AesEncryptor*>* all_encryptors) {
-  return Make(alg_id, key_len, metadata, true /*write_length*/, all_encryptors);
+std::unique_ptr<AesEncryptor> AesEncryptor::Make(ParquetCipher::type alg_id,
+                                                 int32_t key_len, bool metadata) {
+  return Make(alg_id, key_len, metadata, true /*write_length*/);
 }
 
-AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata,
-                                 bool write_length,
-                                 std::vector<AesEncryptor*>* all_encryptors) {
+std::unique_ptr<AesEncryptor> AesEncryptor::Make(ParquetCipher::type alg_id,
+                                                 int32_t key_len, bool metadata,
+                                                 bool write_length) {
   if (ParquetCipher::AES_GCM_V1 != alg_id && ParquetCipher::AES_GCM_CTR_V1 != alg_id) {
     std::stringstream ss;
     ss << "Crypto algorithm " << alg_id << " is not supported";
     throw ParquetException(ss.str());
   }
 
-  AesEncryptor* encryptor = new AesEncryptor(alg_id, key_len, metadata, write_length);
-  if (all_encryptors != nullptr) all_encryptors->push_back(encryptor);
-  return encryptor;
+  return std::make_unique<AesEncryptor>(alg_id, key_len, metadata, write_length);
 }
 
-AesDecryptor::AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata,
+AesDecryptor::AesDecryptor(ParquetCipher::type alg_id, int32_t key_len, bool metadata,
                            bool contains_length)
     : impl_{std::unique_ptr<AesDecryptorImpl>(
           new AesDecryptorImpl(alg_id, key_len, metadata, contains_length))} {}
 
 std::shared_ptr<AesDecryptor> AesDecryptor::Make(
-    ParquetCipher::type alg_id, int key_len, bool metadata,
+    ParquetCipher::type alg_id, int32_t key_len, bool metadata,
     std::vector<std::weak_ptr<AesDecryptor>>* all_decryptors) {
   if (ParquetCipher::AES_GCM_V1 != alg_id && ParquetCipher::AES_GCM_CTR_V1 != alg_id) {
     std::stringstream ss;
@@ -509,15 +542,15 @@ std::shared_ptr<AesDecryptor> AesDecryptor::Make(
   return decryptor;
 }
 
-int AesDecryptor::PlaintextLength(int ciphertext_len) const {
+int32_t AesDecryptor::PlaintextLength(int32_t ciphertext_len) const {
   return impl_->PlaintextLength(ciphertext_len);
 }
 
-int AesDecryptor::CiphertextLength(int plaintext_len) const {
+int32_t AesDecryptor::CiphertextLength(int32_t plaintext_len) const {
   return impl_->CiphertextLength(plaintext_len);
 }
 
-int AesDecryptor::AesDecryptorImpl::GetCiphertextLength(
+int32_t AesDecryptor::AesDecryptorImpl::GetCiphertextLength(
     span<const uint8_t> ciphertext) const {
   if (length_buffer_length_ > 0) {
     // Note: length_buffer_length_ must be either 0 or kBufferSizeLength
@@ -536,10 +569,11 @@ int AesDecryptor::AesDecryptorImpl::GetCiphertextLength(
                                       (static_cast<uint32_t>(ciphertext[0]));
 
     if (written_ciphertext_len >
-        static_cast<uint32_t>(std::numeric_limits<int>::max() - length_buffer_length_)) {
+        static_cast<uint32_t>(std::numeric_limits<int32_t>::max() -
+                              length_buffer_length_)) {
       std::stringstream ss;
       ss << "Written ciphertext length " << written_ciphertext_len
-         << " plus length buffer length " << length_buffer_length_ << " overflows int";
+         << " plus length buffer length " << length_buffer_length_ << " overflows int32";
       throw ParquetException(ss.str());
     } else if (ciphertext.size() <
                static_cast<size_t>(written_ciphertext_len) + length_buffer_length_) {
@@ -551,28 +585,28 @@ int AesDecryptor::AesDecryptorImpl::GetCiphertextLength(
       throw ParquetException(ss.str());
     }
 
-    return static_cast<int>(written_ciphertext_len) + length_buffer_length_;
+    return static_cast<int32_t>(written_ciphertext_len) + length_buffer_length_;
   } else {
-    if (ciphertext.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    if (ciphertext.size() > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
       std::stringstream ss;
-      ss << "Ciphertext buffer length " << ciphertext.size() << " overflows int";
+      ss << "Ciphertext buffer length " << ciphertext.size() << " overflows int32";
       throw ParquetException(ss.str());
     }
-    return static_cast<int>(ciphertext.size());
+    return static_cast<int32_t>(ciphertext.size());
   }
 }
 
-int AesDecryptor::AesDecryptorImpl::GcmDecrypt(span<const uint8_t> ciphertext,
-                                               span<const uint8_t> key,
-                                               span<const uint8_t> aad,
-                                               span<uint8_t> plaintext) {
+int32_t AesDecryptor::AesDecryptorImpl::GcmDecrypt(span<const uint8_t> ciphertext,
+                                                   span<const uint8_t> key,
+                                                   span<const uint8_t> aad,
+                                                   span<uint8_t> plaintext) {
   int len;
-  int plaintext_len;
+  int32_t plaintext_len;
 
   std::array<uint8_t, kGcmTagLength> tag{};
   std::array<uint8_t, kNonceLength> nonce{};
 
-  int ciphertext_len = GetCiphertextLength(ciphertext);
+  int32_t ciphertext_len = GetCiphertextLength(ciphertext);
 
   if (plaintext.size() < static_cast<size_t>(ciphertext_len) - ciphertext_size_delta_) {
     std::stringstream ss;
@@ -600,16 +634,22 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(span<const uint8_t> ciphertext,
   }
 
   // Setting additional authenticated data
+  if (aad.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    std::stringstream ss;
+    ss << "AAD size " << aad.size() << " overflows int";
+    throw ParquetException(ss.str());
+  }
   if ((!aad.empty()) && (1 != EVP_DecryptUpdate(ctx_, nullptr, &len, aad.data(),
                                                 static_cast<int>(aad.size())))) {
     throw ParquetException("Couldn't set AAD");
   }
 
   // Decryption
-  if (!EVP_DecryptUpdate(
-          ctx_, plaintext.data(), &len,
-          ciphertext.data() + length_buffer_length_ + kNonceLength,
-          ciphertext_len - length_buffer_length_ - kNonceLength - kGcmTagLength)) {
+  int decryption_length =
+      ciphertext_len - length_buffer_length_ - kNonceLength - kGcmTagLength;
+  if (!EVP_DecryptUpdate(ctx_, plaintext.data(), &len,
+                         ciphertext.data() + length_buffer_length_ + kNonceLength,
+                         decryption_length)) {
     throw ParquetException("Failed decryption update");
   }
 
@@ -629,15 +669,15 @@ int AesDecryptor::AesDecryptorImpl::GcmDecrypt(span<const uint8_t> ciphertext,
   return plaintext_len;
 }
 
-int AesDecryptor::AesDecryptorImpl::CtrDecrypt(span<const uint8_t> ciphertext,
-                                               span<const uint8_t> key,
-                                               span<uint8_t> plaintext) {
+int32_t AesDecryptor::AesDecryptorImpl::CtrDecrypt(span<const uint8_t> ciphertext,
+                                                   span<const uint8_t> key,
+                                                   span<uint8_t> plaintext) {
   int len;
-  int plaintext_len;
+  int32_t plaintext_len;
 
   std::array<uint8_t, kCtrIvLength> iv{};
 
-  int ciphertext_len = GetCiphertextLength(ciphertext);
+  int32_t ciphertext_len = GetCiphertextLength(ciphertext);
 
   if (plaintext.size() < static_cast<size_t>(ciphertext_len) - ciphertext_size_delta_) {
     std::stringstream ss;
@@ -668,9 +708,10 @@ int AesDecryptor::AesDecryptorImpl::CtrDecrypt(span<const uint8_t> ciphertext,
   }
 
   // Decryption
+  int decryption_length = ciphertext_len - length_buffer_length_ - kNonceLength;
   if (!EVP_DecryptUpdate(ctx_, plaintext.data(), &len,
                          ciphertext.data() + length_buffer_length_ + kNonceLength,
-                         ciphertext_len - length_buffer_length_ - kNonceLength)) {
+                         decryption_length)) {
     throw ParquetException("Failed decryption update");
   }
 
@@ -685,10 +726,12 @@ int AesDecryptor::AesDecryptorImpl::CtrDecrypt(span<const uint8_t> ciphertext,
   return plaintext_len;
 }
 
-int AesDecryptor::AesDecryptorImpl::Decrypt(span<const uint8_t> ciphertext,
-                                            span<const uint8_t> key,
-                                            span<const uint8_t> aad,
-                                            span<uint8_t> plaintext) {
+int32_t AesDecryptor::AesDecryptorImpl::Decrypt(span<const uint8_t> ciphertext,
+                                                span<const uint8_t> key,
+                                                span<const uint8_t> aad,
+                                                span<uint8_t> plaintext) {
+  CheckValid();
+
   if (static_cast<size_t>(key_length_) != key.size()) {
     std::stringstream ss;
     ss << "Wrong key length " << key.size() << ". Should be " << key_length_;
@@ -761,11 +804,27 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) {
   std::memcpy(AAD->data() + AAD->length() - 2, page_ordinal_bytes.data(), 2);
 }
 
-void RandBytes(unsigned char* buf, int num) {
+void RandBytes(unsigned char* buf, size_t num) {
+  if (num > static_cast<size_t>(std::numeric_limits<int>::max())) {
+    std::stringstream ss;
+    ss << "Length " << num << " for RandBytes overflows int";
+    throw ParquetException(ss.str());
+  }
   openssl::EnsureInitialized();
-  RAND_bytes(buf, num);
+  int status = RAND_bytes(buf, static_cast<int>(num));
+  if (status != 1) {
+    const auto error_code = ERR_get_error();
+    char buffer[256];
+    ERR_error_string_n(error_code, buffer, sizeof(buffer));
+    std::stringstream ss;
+    ss << "Failed to generate random bytes: " << buffer;
+    throw ParquetException(ss.str());
+  }
 }
 
 void EnsureBackendInitialized() { openssl::EnsureInitialized(); }
 
+#undef ENCRYPT_INIT
+#undef DECRYPT_INIT
+
 }  // namespace parquet::encryption
diff --git a/cpp/src/parquet/encryption/encryption_internal.h b/cpp/src/parquet/encryption/encryption_internal.h
index a9a17f1ab98e3..d79ff56ad49be 100644
--- a/cpp/src/parquet/encryption/encryption_internal.h
+++ b/cpp/src/parquet/encryption/encryption_internal.h
@@ -29,8 +29,8 @@ using parquet::ParquetCipher;
 
 namespace parquet::encryption {
 
-constexpr int kGcmTagLength = 16;
-constexpr int kNonceLength = 12;
+constexpr int32_t kGcmTagLength = 16;
+constexpr int32_t kNonceLength = 12;
 
 // Module types
 constexpr int8_t kFooter = 0;
@@ -49,15 +49,14 @@ class PARQUET_EXPORT AesEncryptor {
  public:
   /// Can serve one key length only. Possible values: 16, 24, 32 bytes.
   /// If write_length is true, prepend ciphertext length to the ciphertext
-  explicit AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata,
+  explicit AesEncryptor(ParquetCipher::type alg_id, int32_t key_len, bool metadata,
                         bool write_length = true);
 
-  static AesEncryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata,
-                            std::vector<AesEncryptor*>* all_encryptors);
+  static std::unique_ptr<AesEncryptor> Make(ParquetCipher::type alg_id, int32_t key_len,
+                                            bool metadata);
 
-  static AesEncryptor* Make(ParquetCipher::type alg_id, int key_len, bool metadata,
-                            bool write_length,
-                            std::vector<AesEncryptor*>* all_encryptors);
+  static std::unique_ptr<AesEncryptor> Make(ParquetCipher::type alg_id, int32_t key_len,
+                                            bool metadata, bool write_length);
 
   ~AesEncryptor();
 
@@ -66,17 +65,17 @@ class PARQUET_EXPORT AesEncryptor {
 
   /// Encrypts plaintext with the key and aad. Key length is passed only for validation.
   /// If different from value in constructor, exception will be thrown.
-  int Encrypt(::arrow::util::span<const uint8_t> plaintext,
-              ::arrow::util::span<const uint8_t> key,
-              ::arrow::util::span<const uint8_t> aad,
-              ::arrow::util::span<uint8_t> ciphertext);
+  int32_t Encrypt(::arrow::util::span<const uint8_t> plaintext,
+                  ::arrow::util::span<const uint8_t> key,
+                  ::arrow::util::span<const uint8_t> aad,
+                  ::arrow::util::span<uint8_t> ciphertext);
 
   /// Encrypts plaintext footer, in order to compute footer signature (tag).
-  int SignedFooterEncrypt(::arrow::util::span<const uint8_t> footer,
-                          ::arrow::util::span<const uint8_t> key,
-                          ::arrow::util::span<const uint8_t> aad,
-                          ::arrow::util::span<const uint8_t> nonce,
-                          ::arrow::util::span<uint8_t> encrypted_footer);
+  int32_t SignedFooterEncrypt(::arrow::util::span<const uint8_t> footer,
+                              ::arrow::util::span<const uint8_t> key,
+                              ::arrow::util::span<const uint8_t> aad,
+                              ::arrow::util::span<const uint8_t> nonce,
+                              ::arrow::util::span<uint8_t> encrypted_footer);
 
   void WipeOut();
 
@@ -91,7 +90,7 @@ class PARQUET_EXPORT AesDecryptor {
  public:
   /// Can serve one key length only. Possible values: 16, 24, 32 bytes.
   /// If contains_length is true, expect ciphertext length prepended to the ciphertext
-  explicit AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata,
+  explicit AesDecryptor(ParquetCipher::type alg_id, int32_t key_len, bool metadata,
                         bool contains_length = true);
 
   /// \brief Factory function to create an AesDecryptor
@@ -103,26 +102,26 @@ class PARQUET_EXPORT AesDecryptor {
   /// out when decryption is finished
   /// \return shared pointer to a new AesDecryptor
   static std::shared_ptr<AesDecryptor> Make(
-      ParquetCipher::type alg_id, int key_len, bool metadata,
+      ParquetCipher::type alg_id, int32_t key_len, bool metadata,
       std::vector<std::weak_ptr<AesDecryptor>>* all_decryptors);
 
   ~AesDecryptor();
   void WipeOut();
 
   /// The size of the plaintext, for this cipher and the specified ciphertext length.
-  [[nodiscard]] int PlaintextLength(int ciphertext_len) const;
+  [[nodiscard]] int32_t PlaintextLength(int32_t ciphertext_len) const;
 
   /// The size of the ciphertext, for this cipher and the specified plaintext length.
-  [[nodiscard]] int CiphertextLength(int plaintext_len) const;
+  [[nodiscard]] int32_t CiphertextLength(int32_t plaintext_len) const;
 
   /// Decrypts ciphertext with the key and aad. Key length is passed only for
   /// validation. If different from value in constructor, exception will be thrown.
   /// The caller is responsible for ensuring that the plaintext buffer is at least as
   /// large as PlaintextLength(ciphertext_len).
-  int Decrypt(::arrow::util::span<const uint8_t> ciphertext,
-              ::arrow::util::span<const uint8_t> key,
-              ::arrow::util::span<const uint8_t> aad,
-              ::arrow::util::span<uint8_t> plaintext);
+  int32_t Decrypt(::arrow::util::span<const uint8_t> ciphertext,
+                  ::arrow::util::span<const uint8_t> key,
+                  ::arrow::util::span<const uint8_t> aad,
+                  ::arrow::util::span<uint8_t> plaintext);
 
  private:
   // PIMPL Idiom
@@ -140,7 +139,7 @@ std::string CreateFooterAad(const std::string& aad_prefix_bytes);
 void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD);
 
 // Wraps OpenSSL RAND_bytes function
-void RandBytes(unsigned char* buf, int num);
+void RandBytes(unsigned char* buf, size_t num);
 
 // Ensure OpenSSL is initialized.
 //
diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc b/cpp/src/parquet/encryption/encryption_internal_nossl.cc
index 2f6cdc8200016..2a8162ed3964b 100644
--- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc
+++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc
@@ -29,11 +29,11 @@ class AesEncryptor::AesEncryptorImpl {};
 
 AesEncryptor::~AesEncryptor() {}
 
-int AesEncryptor::SignedFooterEncrypt(::arrow::util::span<const uint8_t> footer,
-                                      ::arrow::util::span<const uint8_t> key,
-                                      ::arrow::util::span<const uint8_t> aad,
-                                      ::arrow::util::span<const uint8_t> nonce,
-                                      ::arrow::util::span<uint8_t> encrypted_footer) {
+int32_t AesEncryptor::SignedFooterEncrypt(::arrow::util::span<const uint8_t> footer,
+                                          ::arrow::util::span<const uint8_t> key,
+                                          ::arrow::util::span<const uint8_t> aad,
+                                          ::arrow::util::span<const uint8_t> nonce,
+                                          ::arrow::util::span<uint8_t> encrypted_footer) {
   ThrowOpenSSLRequiredException();
   return -1;
 }
@@ -45,25 +45,25 @@ int32_t AesEncryptor::CiphertextLength(int64_t plaintext_len) const {
   return -1;
 }
 
-int AesEncryptor::Encrypt(::arrow::util::span<const uint8_t> plaintext,
-                          ::arrow::util::span<const uint8_t> key,
-                          ::arrow::util::span<const uint8_t> aad,
-                          ::arrow::util::span<uint8_t> ciphertext) {
+int32_t AesEncryptor::Encrypt(::arrow::util::span<const uint8_t> plaintext,
+                              ::arrow::util::span<const uint8_t> key,
+                              ::arrow::util::span<const uint8_t> aad,
+                              ::arrow::util::span<uint8_t> ciphertext) {
   ThrowOpenSSLRequiredException();
   return -1;
 }
 
-AesEncryptor::AesEncryptor(ParquetCipher::type alg_id, int key_len, bool metadata,
+AesEncryptor::AesEncryptor(ParquetCipher::type alg_id, int32_t key_len, bool metadata,
                            bool write_length) {
   ThrowOpenSSLRequiredException();
 }
 
 class AesDecryptor::AesDecryptorImpl {};
 
-int AesDecryptor::Decrypt(::arrow::util::span<const uint8_t> ciphertext,
-                          ::arrow::util::span<const uint8_t> key,
-                          ::arrow::util::span<const uint8_t> aad,
-                          ::arrow::util::span<uint8_t> plaintext) {
+int32_t AesDecryptor::Decrypt(::arrow::util::span<const uint8_t> ciphertext,
+                              ::arrow::util::span<const uint8_t> key,
+                              ::arrow::util::span<const uint8_t> aad,
+                              ::arrow::util::span<uint8_t> plaintext) {
   ThrowOpenSSLRequiredException();
   return -1;
 }
@@ -72,34 +72,37 @@ void AesDecryptor::WipeOut() { ThrowOpenSSLRequiredException(); }
 
 AesDecryptor::~AesDecryptor() {}
 
-AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata,
-                                 std::vector<AesEncryptor*>* all_encryptors) {
+std::unique_ptr<AesEncryptor> AesEncryptor::Make(ParquetCipher::type alg_id,
+                                                 int32_t key_len, bool metadata) {
+  ThrowOpenSSLRequiredException();
   return NULLPTR;
 }
 
-AesEncryptor* AesEncryptor::Make(ParquetCipher::type alg_id, int key_len, bool metadata,
-                                 bool write_length,
-                                 std::vector<AesEncryptor*>* all_encryptors) {
+std::unique_ptr<AesEncryptor> AesEncryptor::Make(ParquetCipher::type alg_id,
+                                                 int32_t key_len, bool metadata,
+                                                 bool write_length) {
+  ThrowOpenSSLRequiredException();
   return NULLPTR;
 }
 
-AesDecryptor::AesDecryptor(ParquetCipher::type alg_id, int key_len, bool metadata,
+AesDecryptor::AesDecryptor(ParquetCipher::type alg_id, int32_t key_len, bool metadata,
                            bool contains_length) {
   ThrowOpenSSLRequiredException();
 }
 
 std::shared_ptr<AesDecryptor> AesDecryptor::Make(
-    ParquetCipher::type alg_id, int key_len, bool metadata,
+    ParquetCipher::type alg_id, int32_t key_len, bool metadata,
     std::vector<std::weak_ptr<AesDecryptor>>* all_decryptors) {
+  ThrowOpenSSLRequiredException();
   return NULLPTR;
 }
 
-int AesDecryptor::PlaintextLength(int ciphertext_len) const {
+int32_t AesDecryptor::PlaintextLength(int32_t ciphertext_len) const {
   ThrowOpenSSLRequiredException();
   return -1;
 }
 
-int AesDecryptor::CiphertextLength(int plaintext_len) const {
+int32_t AesDecryptor::CiphertextLength(int32_t plaintext_len) const {
   ThrowOpenSSLRequiredException();
   return -1;
 }
@@ -120,7 +123,7 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, std::string* AAD) {
   ThrowOpenSSLRequiredException();
 }
 
-void RandBytes(unsigned char* buf, int num) { ThrowOpenSSLRequiredException(); }
+void RandBytes(unsigned char* buf, size_t num) { ThrowOpenSSLRequiredException(); }
 
 void EnsureBackendInitialized() {}
 
diff --git a/cpp/src/parquet/encryption/encryption_internal_test.cc b/cpp/src/parquet/encryption/encryption_internal_test.cc
index 22e14663ea81f..bf6607e32877d 100644
--- a/cpp/src/parquet/encryption/encryption_internal_test.cc
+++ b/cpp/src/parquet/encryption/encryption_internal_test.cc
@@ -41,22 +41,22 @@ class TestAesEncryption : public ::testing::Test {
         encryptor.CiphertextLength(static_cast<int64_t>(plain_text_.size()));
     std::vector<uint8_t> ciphertext(expected_ciphertext_len, '\0');
 
-    int ciphertext_length = encryptor.Encrypt(str2span(plain_text_), str2span(key_),
-                                              str2span(aad_), ciphertext);
+    int32_t ciphertext_length = encryptor.Encrypt(str2span(plain_text_), str2span(key_),
+                                                  str2span(aad_), ciphertext);
 
     ASSERT_EQ(ciphertext_length, expected_ciphertext_len);
 
     AesDecryptor decryptor(cipher_type, key_length_, metadata, write_length);
 
-    int expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length);
+    int32_t expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length);
     std::vector<uint8_t> decrypted_text(expected_plaintext_length, '\0');
 
-    int plaintext_length =
+    int32_t plaintext_length =
         decryptor.Decrypt(ciphertext, str2span(key_), str2span(aad_), decrypted_text);
 
     std::string decrypted_text_str(decrypted_text.begin(), decrypted_text.end());
 
-    ASSERT_EQ(plaintext_length, static_cast<int>(plain_text_.size()));
+    ASSERT_EQ(plaintext_length, static_cast<int32_t>(plain_text_.size()));
     ASSERT_EQ(plaintext_length, expected_plaintext_length);
     ASSERT_EQ(decrypted_text_str, plain_text_);
   }
@@ -68,10 +68,10 @@ class TestAesEncryption : public ::testing::Test {
     AesDecryptor decryptor(cipher_type, key_length_, metadata, write_length);
 
     // Create ciphertext of all zeros, so the ciphertext length will be read as zero
-    const int ciphertext_length = 100;
+    constexpr int32_t ciphertext_length = 100;
     std::vector<uint8_t> ciphertext(ciphertext_length, '\0');
 
-    int expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length);
+    int32_t expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length);
     std::vector<uint8_t> decrypted_text(expected_plaintext_length, '\0');
 
     EXPECT_THROW(
@@ -89,12 +89,12 @@ class TestAesEncryption : public ::testing::Test {
         encryptor.CiphertextLength(static_cast<int64_t>(plain_text_.size()));
     std::vector<uint8_t> ciphertext(expected_ciphertext_len, '\0');
 
-    int ciphertext_length = encryptor.Encrypt(str2span(plain_text_), str2span(key_),
-                                              str2span(aad_), ciphertext);
+    int32_t ciphertext_length = encryptor.Encrypt(str2span(plain_text_), str2span(key_),
+                                                  str2span(aad_), ciphertext);
 
     AesDecryptor decryptor(cipher_type, key_length_, metadata, write_length);
 
-    int expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length);
+    int32_t expected_plaintext_length = decryptor.PlaintextLength(ciphertext_length);
     std::vector<uint8_t> decrypted_text(expected_plaintext_length, '\0');
 
     ::arrow::util::span<uint8_t> truncated_ciphertext(ciphertext.data(),
@@ -105,7 +105,7 @@ class TestAesEncryption : public ::testing::Test {
   }
 
  private:
-  int key_length_ = 0;
+  int32_t key_length_ = 0;
   std::string key_;
   std::string aad_;
   std::string plain_text_;
diff --git a/cpp/src/parquet/encryption/file_key_wrapper.cc b/cpp/src/parquet/encryption/file_key_wrapper.cc
index 032ae45821a68..8ce563e60d752 100644
--- a/cpp/src/parquet/encryption/file_key_wrapper.cc
+++ b/cpp/src/parquet/encryption/file_key_wrapper.cc
@@ -112,10 +112,10 @@ std::string FileKeyWrapper::GetEncryptionKeyMetadata(const std::string& data_key
 KeyEncryptionKey FileKeyWrapper::CreateKeyEncryptionKey(
     const std::string& master_key_id) {
   std::string kek_bytes(kKeyEncryptionKeyLength, '\0');
-  RandBytes(reinterpret_cast<uint8_t*>(&kek_bytes[0]), kKeyEncryptionKeyLength);
+  RandBytes(reinterpret_cast<uint8_t*>(kek_bytes.data()), kKeyEncryptionKeyLength);
 
   std::string kek_id(kKeyEncryptionKeyIdLength, '\0');
-  RandBytes(reinterpret_cast<uint8_t*>(&kek_id[0]), kKeyEncryptionKeyIdLength);
+  RandBytes(reinterpret_cast<uint8_t*>(kek_id.data()), kKeyEncryptionKeyIdLength);
 
   // Encrypt KEK with Master key
   std::string encoded_wrapped_kek = kms_client_->WrapKey(kek_bytes, master_key_id);
diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc
index a900a4d2eb094..53a2f8c02168b 100644
--- a/cpp/src/parquet/encryption/internal_file_decryptor.cc
+++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc
@@ -27,22 +27,22 @@ namespace parquet {
 Decryptor::Decryptor(std::shared_ptr<encryption::AesDecryptor> aes_decryptor,
                      const std::string& key, const std::string& file_aad,
                      const std::string& aad, ::arrow::MemoryPool* pool)
-    : aes_decryptor_(aes_decryptor),
+    : aes_decryptor_(std::move(aes_decryptor)),
       key_(key),
       file_aad_(file_aad),
       aad_(aad),
       pool_(pool) {}
 
-int Decryptor::PlaintextLength(int ciphertext_len) const {
+int32_t Decryptor::PlaintextLength(int32_t ciphertext_len) const {
   return aes_decryptor_->PlaintextLength(ciphertext_len);
 }
 
-int Decryptor::CiphertextLength(int plaintext_len) const {
+int32_t Decryptor::CiphertextLength(int32_t plaintext_len) const {
   return aes_decryptor_->CiphertextLength(plaintext_len);
 }
 
-int Decryptor::Decrypt(::arrow::util::span<const uint8_t> ciphertext,
-                       ::arrow::util::span<uint8_t> plaintext) {
+int32_t Decryptor::Decrypt(::arrow::util::span<const uint8_t> ciphertext,
+                           ::arrow::util::span<uint8_t> plaintext) {
   return aes_decryptor_->Decrypt(ciphertext, str2span(key_), str2span(aad_), plaintext);
 }
 
@@ -143,7 +143,7 @@ std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptor(
 
   // Create both data and metadata decryptors to avoid redundant retrieval of key
   // from the key_retriever.
-  int key_len = static_cast<int>(footer_key.size());
+  auto key_len = static_cast<int32_t>(footer_key.size());
   std::shared_ptr<encryption::AesDecryptor> aes_metadata_decryptor;
   std::shared_ptr<encryption::AesDecryptor> aes_data_decryptor;
 
@@ -156,9 +156,9 @@ std::shared_ptr<Decryptor> InternalFileDecryptor::GetFooterDecryptor(
   }
 
   footer_metadata_decryptor_ = std::make_shared<Decryptor>(
-      aes_metadata_decryptor, footer_key, file_aad_, aad, pool_);
-  footer_data_decryptor_ =
-      std::make_shared<Decryptor>(aes_data_decryptor, footer_key, file_aad_, aad, pool_);
+      std::move(aes_metadata_decryptor), footer_key, file_aad_, aad, pool_);
+  footer_data_decryptor_ = std::make_shared<Decryptor>(std::move(aes_data_decryptor),
+                                                       footer_key, file_aad_, aad, pool_);
 
   if (metadata) return footer_metadata_decryptor_;
   return footer_data_decryptor_;
@@ -197,7 +197,7 @@ std::shared_ptr<Decryptor> InternalFileDecryptor::GetColumnDecryptor(
     throw HiddenColumnException("HiddenColumnException, path=" + column_path);
   }
 
-  int key_len = static_cast<int>(column_key.size());
+  auto key_len = static_cast<int32_t>(column_key.size());
   std::lock_guard<std::mutex> lock(mutex_);
   auto aes_decryptor =
       encryption::AesDecryptor::Make(algorithm_, key_len, metadata, &all_decryptors_);
diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.h b/cpp/src/parquet/encryption/internal_file_decryptor.h
index 8af3587acf884..08423de7fe920 100644
--- a/cpp/src/parquet/encryption/internal_file_decryptor.h
+++ b/cpp/src/parquet/encryption/internal_file_decryptor.h
@@ -45,10 +45,10 @@ class PARQUET_EXPORT Decryptor {
   void UpdateAad(const std::string& aad) { aad_ = aad; }
   ::arrow::MemoryPool* pool() { return pool_; }
 
-  [[nodiscard]] int PlaintextLength(int ciphertext_len) const;
-  [[nodiscard]] int CiphertextLength(int plaintext_len) const;
-  int Decrypt(::arrow::util::span<const uint8_t> ciphertext,
-              ::arrow::util::span<uint8_t> plaintext);
+  [[nodiscard]] int32_t PlaintextLength(int32_t ciphertext_len) const;
+  [[nodiscard]] int32_t CiphertextLength(int32_t plaintext_len) const;
+  int32_t Decrypt(::arrow::util::span<const uint8_t> ciphertext,
+                  ::arrow::util::span<uint8_t> plaintext);
 
  private:
   std::shared_ptr<encryption::AesDecryptor> aes_decryptor_;
diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.cc b/cpp/src/parquet/encryption/internal_file_encryptor.cc
index a423cc678cccb..94094e6aca228 100644
--- a/cpp/src/parquet/encryption/internal_file_encryptor.cc
+++ b/cpp/src/parquet/encryption/internal_file_encryptor.cc
@@ -35,8 +35,8 @@ int32_t Encryptor::CiphertextLength(int64_t plaintext_len) const {
   return aes_encryptor_->CiphertextLength(plaintext_len);
 }
 
-int Encryptor::Encrypt(::arrow::util::span<const uint8_t> plaintext,
-                       ::arrow::util::span<uint8_t> ciphertext) {
+int32_t Encryptor::Encrypt(::arrow::util::span<const uint8_t> plaintext,
+                           ::arrow::util::span<uint8_t> ciphertext) {
   return aes_encryptor_->Encrypt(plaintext, str2span(key_), str2span(aad_), ciphertext);
 }
 
@@ -53,8 +53,15 @@ InternalFileEncryptor::InternalFileEncryptor(FileEncryptionProperties* propertie
 void InternalFileEncryptor::WipeOutEncryptionKeys() {
   properties_->WipeOutEncryptionKeys();
 
-  for (auto const& i : all_encryptors_) {
-    i->WipeOut();
+  for (auto const& i : meta_encryptor_) {
+    if (i != nullptr) {
+      i->WipeOut();
+    }
+  }
+  for (auto const& i : data_encryptor_) {
+    if (i != nullptr) {
+      i->WipeOut();
+    }
   }
 }
 
@@ -136,7 +143,7 @@ InternalFileEncryptor::InternalFileEncryptor::GetColumnEncryptor(
   return encryptor;
 }
 
-int InternalFileEncryptor::MapKeyLenToEncryptorArrayIndex(int key_len) {
+int InternalFileEncryptor::MapKeyLenToEncryptorArrayIndex(int32_t key_len) const {
   if (key_len == 16)
     return 0;
   else if (key_len == 24)
@@ -148,22 +155,20 @@ int InternalFileEncryptor::MapKeyLenToEncryptorArrayIndex(int key_len) {
 
 encryption::AesEncryptor* InternalFileEncryptor::GetMetaAesEncryptor(
     ParquetCipher::type algorithm, size_t key_size) {
-  int key_len = static_cast<int>(key_size);
+  auto key_len = static_cast<int32_t>(key_size);
   int index = MapKeyLenToEncryptorArrayIndex(key_len);
   if (meta_encryptor_[index] == nullptr) {
-    meta_encryptor_[index].reset(
-        encryption::AesEncryptor::Make(algorithm, key_len, true, &all_encryptors_));
+    meta_encryptor_[index] = encryption::AesEncryptor::Make(algorithm, key_len, true);
   }
   return meta_encryptor_[index].get();
 }
 
 encryption::AesEncryptor* InternalFileEncryptor::GetDataAesEncryptor(
     ParquetCipher::type algorithm, size_t key_size) {
-  int key_len = static_cast<int>(key_size);
+  auto key_len = static_cast<int32_t>(key_size);
   int index = MapKeyLenToEncryptorArrayIndex(key_len);
   if (data_encryptor_[index] == nullptr) {
-    data_encryptor_[index].reset(
-        encryption::AesEncryptor::Make(algorithm, key_len, false, &all_encryptors_));
+    data_encryptor_[index] = encryption::AesEncryptor::Make(algorithm, key_len, false);
   }
   return data_encryptor_[index].get();
 }
diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.h b/cpp/src/parquet/encryption/internal_file_encryptor.h
index 41ffc6fd51943..5a3d743ce5365 100644
--- a/cpp/src/parquet/encryption/internal_file_encryptor.h
+++ b/cpp/src/parquet/encryption/internal_file_encryptor.h
@@ -45,8 +45,8 @@ class PARQUET_EXPORT Encryptor {
 
   [[nodiscard]] int32_t CiphertextLength(int64_t plaintext_len) const;
 
-  int Encrypt(::arrow::util::span<const uint8_t> plaintext,
-              ::arrow::util::span<uint8_t> ciphertext);
+  int32_t Encrypt(::arrow::util::span<const uint8_t> plaintext,
+                  ::arrow::util::span<uint8_t> ciphertext);
 
   bool EncryptColumnMetaData(
       bool encrypted_footer,
@@ -88,8 +88,6 @@ class InternalFileEncryptor {
   std::shared_ptr<Encryptor> footer_signing_encryptor_;
   std::shared_ptr<Encryptor> footer_encryptor_;
 
-  std::vector<encryption::AesEncryptor*> all_encryptors_;
-
   // Key must be 16, 24 or 32 bytes in length. Thus there could be up to three
   // types of meta_encryptors and data_encryptors.
   std::unique_ptr<encryption::AesEncryptor> meta_encryptor_[3];
@@ -105,7 +103,7 @@ class InternalFileEncryptor {
   encryption::AesEncryptor* GetDataAesEncryptor(ParquetCipher::type algorithm,
                                                 size_t key_len);
 
-  int MapKeyLenToEncryptorArrayIndex(int key_len);
+  int MapKeyLenToEncryptorArrayIndex(int32_t key_len) const;
 };
 
 }  // namespace parquet
diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc
index 5d7925aa0318f..89a52a2bcd632 100644
--- a/cpp/src/parquet/encryption/key_toolkit_internal.cc
+++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc
@@ -53,7 +53,7 @@ std::string DecryptKeyLocally(const std::string& encoded_encrypted_key,
                              static_cast<int>(master_key.size()), false,
                              false /*contains_length*/);
 
-  int decrypted_key_len =
+  int32_t decrypted_key_len =
       key_decryptor.PlaintextLength(static_cast<int>(encrypted_key.size()));
   std::string decrypted_key(decrypted_key_len, '\0');
   ::arrow::util::span<uint8_t> decrypted_key_span(
diff --git a/cpp/src/parquet/exception.h b/cpp/src/parquet/exception.h
index 826f5bdc8bf73..cd221ec7a24ae 100644
--- a/cpp/src/parquet/exception.h
+++ b/cpp/src/parquet/exception.h
@@ -28,7 +28,7 @@
 
 // PARQUET-1085
 #if !defined(ARROW_UNUSED)
-#define ARROW_UNUSED(x) UNUSED(x)
+#  define ARROW_UNUSED(x) UNUSED(x)
 #endif
 
 // Parquet exception to Arrow Status
diff --git a/cpp/src/parquet/level_comparison_inc.h b/cpp/src/parquet/level_comparison_inc.h
index cfee506654331..04f628d533111 100644
--- a/cpp/src/parquet/level_comparison_inc.h
+++ b/cpp/src/parquet/level_comparison_inc.h
@@ -22,7 +22,7 @@
 
 // Used to make sure ODR rule isn't violated.
 #ifndef PARQUET_IMPL_NAMESPACE
-#error "PARQUET_IMPL_NAMESPACE must be defined"
+#  error "PARQUET_IMPL_NAMESPACE must be defined"
 #endif
 namespace parquet::internal::PARQUET_IMPL_NAMESPACE {
 /// Builds a bitmap by applying predicate to the level vector provided.
diff --git a/cpp/src/parquet/level_conversion_inc.h b/cpp/src/parquet/level_conversion_inc.h
index d1ccedabfde50..5fce93e779b2d 100644
--- a/cpp/src/parquet/level_conversion_inc.h
+++ b/cpp/src/parquet/level_conversion_inc.h
@@ -31,7 +31,7 @@
 #include "parquet/level_comparison.h"
 
 #ifndef PARQUET_IMPL_NAMESPACE
-#error "PARQUET_IMPL_NAMESPACE must be defined"
+#  error "PARQUET_IMPL_NAMESPACE must be defined"
 #endif
 
 namespace parquet::internal::PARQUET_IMPL_NAMESPACE {
@@ -261,7 +261,7 @@ inline uint64_t ExtractBitsSoftware(uint64_t bitmap, uint64_t select_bitmap) {
 #ifdef ARROW_HAVE_BMI2
 
 // Use _pext_u64 on 64-bit builds, _pext_u32 on 32-bit builds,
-#if UINTPTR_MAX == 0xFFFFFFFF
+#  if UINTPTR_MAX == 0xFFFFFFFF
 
 using extract_bitmap_t = uint32_t;
 inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
@@ -269,7 +269,7 @@ inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
   return _pext_u32(bitmap, select_bitmap);
 }
 
-#else
+#  else
 
 using extract_bitmap_t = uint64_t;
 inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
@@ -277,7 +277,7 @@ inline extract_bitmap_t ExtractBits(extract_bitmap_t bitmap,
   return _pext_u64(bitmap, select_bitmap);
 }
 
-#endif
+#  endif
 
 #else  // !defined(ARROW_HAVE_BMI2)
 
@@ -296,7 +296,7 @@ template <bool has_repeated_parent>
 int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, const int64_t batch_size,
                                int64_t upper_bound_remaining, LevelInfo level_info,
                                ::arrow::internal::FirstTimeBitmapWriter* writer) {
-  DCHECK_LE(batch_size, kExtractBitsSize);
+  ARROW_DCHECK_LE(batch_size, kExtractBitsSize);
 
   // Greater than level_info.def_level - 1 implies >= the def_level
   auto defined_bitmap = static_cast<extract_bitmap_t>(
diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc
index 4ea3b05340d71..8f577be45b96d 100644
--- a/cpp/src/parquet/metadata.cc
+++ b/cpp/src/parquet/metadata.cc
@@ -21,6 +21,8 @@
 #include <cinttypes>
 #include <memory>
 #include <ostream>
+#include <random>
+#include <sstream>
 #include <string>
 #include <string_view>
 #include <utility>
@@ -29,6 +31,7 @@
 #include "arrow/io/memory.h"
 #include "arrow/util/key_value_metadata.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/pcg_random.h"
 #include "parquet/encryption/encryption_internal.h"
 #include "parquet/encryption/internal_file_decryptor.h"
 #include "parquet/exception.h"
@@ -94,7 +97,7 @@ static std::shared_ptr<Statistics> MakeTypedColumnStats(
         descr, metadata.statistics.min_value, metadata.statistics.max_value,
         metadata.num_values - metadata.statistics.null_count,
         metadata.statistics.null_count, metadata.statistics.distinct_count,
-        metadata.statistics.__isset.max_value || metadata.statistics.__isset.min_value,
+        metadata.statistics.__isset.max_value && metadata.statistics.__isset.min_value,
         metadata.statistics.__isset.null_count,
         metadata.statistics.__isset.distinct_count);
   }
@@ -103,7 +106,7 @@ static std::shared_ptr<Statistics> MakeTypedColumnStats(
       descr, metadata.statistics.min, metadata.statistics.max,
       metadata.num_values - metadata.statistics.null_count,
       metadata.statistics.null_count, metadata.statistics.distinct_count,
-      metadata.statistics.__isset.max || metadata.statistics.__isset.min,
+      metadata.statistics.__isset.max && metadata.statistics.__isset.min,
       metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count);
 }
 
@@ -132,6 +135,39 @@ std::shared_ptr<Statistics> MakeColumnStats(const format::ColumnMetaData& meta_d
   throw ParquetException("Can't decode page statistics for selected column type");
 }
 
+// Get KeyValueMetadata from parquet Thrift RowGroup or ColumnChunk metadata.
+//
+// Returns nullptr if the metadata is not set.
+template <typename Metadata>
+std::shared_ptr<KeyValueMetadata> FromThriftKeyValueMetadata(const Metadata& source) {
+  std::shared_ptr<KeyValueMetadata> metadata = nullptr;
+  if (source.__isset.key_value_metadata) {
+    std::vector<std::string> keys;
+    std::vector<std::string> values;
+    keys.reserve(source.key_value_metadata.size());
+    values.reserve(source.key_value_metadata.size());
+    for (const auto& it : source.key_value_metadata) {
+      keys.push_back(it.key);
+      values.push_back(it.value);
+    }
+    metadata = std::make_shared<KeyValueMetadata>(std::move(keys), std::move(values));
+  }
+  return metadata;
+}
+
+template <typename Metadata>
+void ToThriftKeyValueMetadata(const KeyValueMetadata& source, Metadata* metadata) {
+  std::vector<format::KeyValue> key_value_metadata;
+  key_value_metadata.reserve(static_cast<size_t>(source.size()));
+  for (int64_t i = 0; i < source.size(); ++i) {
+    format::KeyValue kv_pair;
+    kv_pair.__set_key(source.key(i));
+    kv_pair.__set_value(source.value(i));
+    key_value_metadata.emplace_back(std::move(kv_pair));
+  }
+  metadata->__set_key_value_metadata(std::move(key_value_metadata));
+}
+
 // MetaData Accessor
 
 // ColumnCryptoMetaData
@@ -230,6 +266,7 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
                                  encoding_stats.count});
     }
     possible_stats_ = nullptr;
+    InitKeyValueMetadata();
   }
 
   bool Equals(const ColumnChunkMetaDataImpl& other) const {
@@ -340,7 +377,15 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
     return std::nullopt;
   }
 
+  const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const {
+    return key_value_metadata_;
+  }
+
  private:
+  void InitKeyValueMetadata() {
+    key_value_metadata_ = FromThriftKeyValueMetadata(*column_metadata_);
+  }
+
   mutable std::shared_ptr<Statistics> possible_stats_;
   std::vector<Encoding::type> encodings_;
   std::vector<PageEncodingStats> encoding_stats_;
@@ -350,6 +395,7 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
   const ColumnDescriptor* descr_;
   const ReaderProperties properties_;
   const ApplicationVersion* writer_version_;
+  std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
 };
 
 std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(
@@ -362,15 +408,6 @@ std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(
                               properties, writer_version, std::move(file_decryptor)));
 }
 
-std::unique_ptr<ColumnChunkMetaData> ColumnChunkMetaData::Make(
-    const void* metadata, const ColumnDescriptor* descr,
-    const ApplicationVersion* writer_version, int16_t row_group_ordinal,
-    int16_t column_ordinal, std::shared_ptr<InternalFileDecryptor> file_decryptor) {
-  return std::unique_ptr<ColumnChunkMetaData>(new ColumnChunkMetaData(
-      metadata, descr, row_group_ordinal, column_ordinal, default_reader_properties(),
-      writer_version, std::move(file_decryptor)));
-}
-
 ColumnChunkMetaData::ColumnChunkMetaData(
     const void* metadata, const ColumnDescriptor* descr, int16_t row_group_ordinal,
     int16_t column_ordinal, const ReaderProperties& properties,
@@ -468,6 +505,11 @@ bool ColumnChunkMetaData::Equals(const ColumnChunkMetaData& other) const {
   return impl_->Equals(*other.impl_);
 }
 
+const std::shared_ptr<const KeyValueMetadata>& ColumnChunkMetaData::key_value_metadata()
+    const {
+  return impl_->key_value_metadata();
+}
+
 // row-group metadata
 class RowGroupMetaData::RowGroupMetaDataImpl {
  public:
@@ -536,15 +578,6 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
   std::shared_ptr<InternalFileDecryptor> file_decryptor_;
 };
 
-std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
-    const void* metadata, const SchemaDescriptor* schema,
-    const ApplicationVersion* writer_version,
-    std::shared_ptr<InternalFileDecryptor> file_decryptor) {
-  return std::unique_ptr<parquet::RowGroupMetaData>(
-      new RowGroupMetaData(metadata, schema, default_reader_properties(), writer_version,
-                           std::move(file_decryptor)));
-}
-
 std::unique_ptr<RowGroupMetaData> RowGroupMetaData::Make(
     const void* metadata, const SchemaDescriptor* schema,
     const ReaderProperties& properties, const ApplicationVersion* writer_version,
@@ -599,6 +632,49 @@ std::vector<SortingColumn> RowGroupMetaData::sorting_columns() const {
   return impl_->sorting_columns();
 }
 
+// Replace string data with random-generated uppercase characters
+static void Scrub(std::string* s) {
+  static ::arrow::random::pcg64 rng;
+  std::uniform_int_distribution<> caps(65, 90);
+  for (auto& c : *s) c = caps(rng);
+}
+
+// Replace potentially sensitive metadata with random data
+static void Scrub(format::FileMetaData* md) {
+  for (auto& s : md->schema) {
+    Scrub(&s.name);
+  }
+  for (auto& r : md->row_groups) {
+    for (auto& c : r.columns) {
+      Scrub(&c.file_path);
+      if (c.__isset.meta_data) {
+        auto& m = c.meta_data;
+        for (auto& p : m.path_in_schema) Scrub(&p);
+        for (auto& kv : m.key_value_metadata) {
+          Scrub(&kv.key);
+          Scrub(&kv.value);
+        }
+        Scrub(&m.statistics.max_value);
+        Scrub(&m.statistics.min_value);
+        Scrub(&m.statistics.min);
+        Scrub(&m.statistics.max);
+      }
+
+      if (c.crypto_metadata.__isset.ENCRYPTION_WITH_COLUMN_KEY) {
+        auto& m = c.crypto_metadata.ENCRYPTION_WITH_COLUMN_KEY;
+        for (auto& p : m.path_in_schema) Scrub(&p);
+        Scrub(&m.key_metadata);
+      }
+      Scrub(&c.encrypted_column_metadata);
+    }
+  }
+  for (auto& kv : md->key_value_metadata) {
+    Scrub(&kv.key);
+    Scrub(&kv.value);
+  }
+  Scrub(&md->footer_signing_key_metadata);
+}
+
 // file metadata
 class FileMetaData::FileMetaDataImpl {
  public:
@@ -651,18 +727,17 @@ class FileMetaData::FileMetaDataImpl {
     std::string key = file_decryptor_->GetFooterKey();
     std::string aad = encryption::CreateFooterAad(file_decryptor_->file_aad());
 
-    auto aes_encryptor = encryption::AesEncryptor::Make(
-        file_decryptor_->algorithm(), static_cast<int>(key.size()), true,
-        false /*write_length*/, nullptr);
+    auto aes_encryptor = encryption::AesEncryptor::Make(file_decryptor_->algorithm(),
+                                                        static_cast<int>(key.size()),
+                                                        true, false /*write_length*/);
 
     std::shared_ptr<Buffer> encrypted_buffer = AllocateBuffer(
         file_decryptor_->pool(), aes_encryptor->CiphertextLength(serialized_len));
-    uint32_t encrypted_len = aes_encryptor->SignedFooterEncrypt(
+    int32_t encrypted_len = aes_encryptor->SignedFooterEncrypt(
         serialized_data_span, str2span(key), str2span(aad), nonce,
         encrypted_buffer->mutable_span_as<uint8_t>());
     // Delete AES encryptor object. It was created only to verify the footer signature.
     aes_encryptor->WipeOut();
-    delete aes_encryptor;
     return 0 ==
            memcmp(encrypted_buffer->data() + encrypted_len - encryption::kGcmTagLength,
                   tag, encryption::kGcmTagLength);
@@ -706,7 +781,7 @@ class FileMetaData::FileMetaDataImpl {
 
       // encrypt the footer key
       std::vector<uint8_t> encrypted_data(encryptor->CiphertextLength(serialized_len));
-      int encrypted_len = encryptor->Encrypt(serialized_data_span, encrypted_data);
+      int32_t encrypted_len = encryptor->Encrypt(serialized_data_span, encrypted_data);
 
       // write unencrypted footer
       PARQUET_THROW_NOT_OK(dst->Write(serialized_data, serialized_len));
@@ -822,6 +897,21 @@ class FileMetaData::FileMetaDataImpl {
     return out;
   }
 
+  std::string SerializeUnencrypted(bool scrub, bool debug) const {
+    auto md = *metadata_;
+    if (scrub) Scrub(&md);
+    if (debug) {
+      std::ostringstream ss;
+      md.printTo(ss);
+      return ss.str();
+    } else {
+      ThriftSerializer serializer;
+      std::string out;
+      serializer.SerializeToString(&md, &out);
+      return out;
+    }
+  }
+
   void set_file_decryptor(std::shared_ptr<InternalFileDecryptor> file_decryptor) {
     file_decryptor_ = std::move(file_decryptor);
   }
@@ -853,7 +943,7 @@ class FileMetaData::FileMetaDataImpl {
     std::vector<parquet::ColumnOrder> column_orders;
     if (metadata_->__isset.column_orders) {
       column_orders.reserve(metadata_->column_orders.size());
-      for (auto column_order : metadata_->column_orders) {
+      for (auto& column_order : metadata_->column_orders) {
         if (column_order.__isset.TYPE_ORDER) {
           column_orders.push_back(ColumnOrder::type_defined_);
         } else {
@@ -868,14 +958,7 @@ class FileMetaData::FileMetaDataImpl {
   }
 
   void InitKeyValueMetadata() {
-    std::shared_ptr<KeyValueMetadata> metadata = nullptr;
-    if (metadata_->__isset.key_value_metadata) {
-      metadata = std::make_shared<KeyValueMetadata>();
-      for (const auto& it : metadata_->key_value_metadata) {
-        metadata->Append(it.key, it.value);
-      }
-    }
-    key_value_metadata_ = std::move(metadata);
+    key_value_metadata_ = FromThriftKeyValueMetadata(*metadata_);
   }
 };
 
@@ -887,13 +970,6 @@ std::shared_ptr<FileMetaData> FileMetaData::Make(
       new FileMetaData(metadata, metadata_len, properties, std::move(file_decryptor)));
 }
 
-std::shared_ptr<FileMetaData> FileMetaData::Make(
-    const void* metadata, uint32_t* metadata_len,
-    std::shared_ptr<InternalFileDecryptor> file_decryptor) {
-  return std::shared_ptr<FileMetaData>(new FileMetaData(
-      metadata, metadata_len, default_reader_properties(), std::move(file_decryptor)));
-}
-
 FileMetaData::FileMetaData(const void* metadata, uint32_t* metadata_len,
                            const ReaderProperties& properties,
                            std::shared_ptr<InternalFileDecryptor> file_decryptor)
@@ -993,6 +1069,10 @@ std::shared_ptr<FileMetaData> FileMetaData::Subset(
   return impl_->Subset(row_groups);
 }
 
+std::string FileMetaData::SerializeUnencrypted(bool scrub, bool debug) const {
+  return impl_->SerializeUnencrypted(scrub, debug);
+}
+
 void FileMetaData::WriteTo(::arrow::io::OutputStream* dst,
                            const std::shared_ptr<Encryptor>& encryptor) const {
   return impl_->WriteTo(dst, encryptor);
@@ -1472,10 +1552,10 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
               const std::shared_ptr<Encryptor>& encryptor) {
     if (dictionary_page_offset > 0) {
       column_chunk_->meta_data.__set_dictionary_page_offset(dictionary_page_offset);
-      column_chunk_->__set_file_offset(dictionary_page_offset + compressed_size);
-    } else {
-      column_chunk_->__set_file_offset(data_page_offset + compressed_size);
     }
+    // The `file_offset` field is deprecated and should be set to 0.
+    // See https://github.com/apache/parquet-format/pull/440 for detail.
+    column_chunk_->__set_file_offset(0);
     column_chunk_->__isset.meta_data = true;
     column_chunk_->meta_data.__set_num_values(num_values);
     if (index_page_offset >= 0) {
@@ -1526,6 +1606,10 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
     column_chunk_->meta_data.__set_encodings(std::move(thrift_encodings));
     column_chunk_->meta_data.__set_encoding_stats(std::move(thrift_encoding_stats));
 
+    if (key_value_metadata_) {
+      ToThriftKeyValueMetadata(*key_value_metadata_, &column_chunk_->meta_data);
+    }
+
     const auto& encrypt_md =
         properties_->column_encryption_properties(column_->path()->ToDotString());
     // column is encrypted
@@ -1563,7 +1647,7 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
                                                                 serialized_len);
 
         std::vector<uint8_t> encrypted_data(encryptor->CiphertextLength(serialized_len));
-        int encrypted_len = encryptor->Encrypt(serialized_data_span, encrypted_data);
+        int32_t encrypted_len = encryptor->Encrypt(serialized_data_span, encrypted_data);
 
         const char* temp =
             const_cast<const char*>(reinterpret_cast<char*>(encrypted_data.data()));
@@ -1592,6 +1676,10 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
     return column_chunk_->meta_data.total_compressed_size;
   }
 
+  void SetKeyValueMetadata(std::shared_ptr<const KeyValueMetadata> key_value_metadata) {
+    key_value_metadata_ = std::move(key_value_metadata);
+  }
+
  private:
   void Init(format::ColumnChunk* column_chunk) {
     column_chunk_ = column_chunk;
@@ -1606,6 +1694,7 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl {
   std::unique_ptr<format::ColumnChunk> owned_column_chunk_;
   const std::shared_ptr<WriterProperties> properties_;
   const ColumnDescriptor* column_;
+  std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
 };
 
 std::unique_ptr<ColumnChunkMetaDataBuilder> ColumnChunkMetaDataBuilder::Make(
@@ -1663,6 +1752,11 @@ void ColumnChunkMetaDataBuilder::SetStatistics(const EncodedStatistics& result)
   impl_->SetStatistics(result);
 }
 
+void ColumnChunkMetaDataBuilder::SetKeyValueMetadata(
+    std::shared_ptr<const KeyValueMetadata> key_value_metadata) {
+  impl_->SetKeyValueMetadata(std::move(key_value_metadata));
+}
+
 int64_t ColumnChunkMetaDataBuilder::total_compressed_size() const {
   return impl_->total_compressed_size();
 }
@@ -1792,13 +1886,11 @@ void RowGroupMetaDataBuilder::Finish(int64_t total_bytes_written,
 // file metadata
 class FileMetaDataBuilder::FileMetaDataBuilderImpl {
  public:
-  explicit FileMetaDataBuilderImpl(
-      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata)
+  explicit FileMetaDataBuilderImpl(const SchemaDescriptor* schema,
+                                   std::shared_ptr<WriterProperties> props)
       : metadata_(new format::FileMetaData()),
         properties_(std::move(props)),
-        schema_(schema),
-        key_value_metadata_(std::move(key_value_metadata)) {
+        schema_(schema) {
     if (properties_->file_encryption_properties() != nullptr &&
         properties_->file_encryption_properties()->encrypted_footer()) {
       crypto_metadata_ = std::make_unique<format::FileCryptoMetaData>();
@@ -1855,22 +1947,8 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
     metadata_->__set_num_rows(total_rows);
     metadata_->__set_row_groups(row_groups_);
 
-    if (key_value_metadata_ || key_value_metadata) {
-      if (!key_value_metadata_) {
-        key_value_metadata_ = key_value_metadata;
-      } else if (key_value_metadata) {
-        key_value_metadata_ = key_value_metadata_->Merge(*key_value_metadata);
-      }
-      metadata_->key_value_metadata.clear();
-      metadata_->key_value_metadata.reserve(
-          static_cast<size_t>(key_value_metadata_->size()));
-      for (int64_t i = 0; i < key_value_metadata_->size(); ++i) {
-        format::KeyValue kv_pair;
-        kv_pair.__set_key(key_value_metadata_->key(i));
-        kv_pair.__set_value(key_value_metadata_->value(i));
-        metadata_->key_value_metadata.push_back(std::move(kv_pair));
-      }
-      metadata_->__isset.key_value_metadata = true;
+    if (key_value_metadata) {
+      ToThriftKeyValueMetadata(*key_value_metadata, metadata_.get());
     }
 
     int32_t file_version = 0;
@@ -1956,27 +2034,17 @@ class FileMetaDataBuilder::FileMetaDataBuilderImpl {
 
   std::unique_ptr<RowGroupMetaDataBuilder> current_row_group_builder_;
   const SchemaDescriptor* schema_;
-  std::shared_ptr<const KeyValueMetadata> key_value_metadata_;
 };
 
-std::unique_ptr<FileMetaDataBuilder> FileMetaDataBuilder::Make(
-    const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
-    std::shared_ptr<const KeyValueMetadata> key_value_metadata) {
-  return std::unique_ptr<FileMetaDataBuilder>(
-      new FileMetaDataBuilder(schema, std::move(props), std::move(key_value_metadata)));
-}
-
 std::unique_ptr<FileMetaDataBuilder> FileMetaDataBuilder::Make(
     const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props) {
   return std::unique_ptr<FileMetaDataBuilder>(
       new FileMetaDataBuilder(schema, std::move(props)));
 }
 
-FileMetaDataBuilder::FileMetaDataBuilder(
-    const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
-    std::shared_ptr<const KeyValueMetadata> key_value_metadata)
-    : impl_{std::make_unique<FileMetaDataBuilderImpl>(schema, std::move(props),
-                                                      std::move(key_value_metadata))} {}
+FileMetaDataBuilder::FileMetaDataBuilder(const SchemaDescriptor* schema,
+                                         std::shared_ptr<WriterProperties> props)
+    : impl_{std::make_unique<FileMetaDataBuilderImpl>(schema, std::move(props))} {}
 
 FileMetaDataBuilder::~FileMetaDataBuilder() = default;
 
diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h
index 9fc30df58e0d3..dc97d816daa74 100644
--- a/cpp/src/parquet/metadata.h
+++ b/cpp/src/parquet/metadata.h
@@ -127,14 +127,6 @@ struct IndexLocation {
 class PARQUET_EXPORT ColumnChunkMetaData {
  public:
   // API convenience to get a MetaData accessor
-
-  ARROW_DEPRECATED("Use the ReaderProperties-taking overload")
-  static std::unique_ptr<ColumnChunkMetaData> Make(
-      const void* metadata, const ColumnDescriptor* descr,
-      const ApplicationVersion* writer_version, int16_t row_group_ordinal = -1,
-      int16_t column_ordinal = -1,
-      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
-
   static std::unique_ptr<ColumnChunkMetaData> Make(
       const void* metadata, const ColumnDescriptor* descr,
       const ReaderProperties& properties = default_reader_properties(),
@@ -146,7 +138,12 @@ class PARQUET_EXPORT ColumnChunkMetaData {
 
   bool Equals(const ColumnChunkMetaData& other) const;
 
-  // column chunk
+  // Byte offset of `ColumnMetaData` in `file_path()`.
+  //
+  // Note that the meaning of this field has been inconsistent among implementations
+  // so its use has since been deprecated in the Parquet specification. Modern
+  // implementations will set this to `0` to indicate that the `ColumnMetaData` is solely
+  // contained in the `ColumnChunk` struct.
   int64_t file_offset() const;
 
   // parameter is only used when a dataset is spread across multiple files
@@ -179,6 +176,7 @@ class PARQUET_EXPORT ColumnChunkMetaData {
   std::unique_ptr<ColumnCryptoMetaData> crypto_metadata() const;
   std::optional<IndexLocation> GetColumnIndexLocation() const;
   std::optional<IndexLocation> GetOffsetIndexLocation() const;
+  const std::shared_ptr<const KeyValueMetadata>& key_value_metadata() const;
 
  private:
   explicit ColumnChunkMetaData(
@@ -194,12 +192,6 @@ class PARQUET_EXPORT ColumnChunkMetaData {
 /// \brief RowGroupMetaData is a proxy around format::RowGroupMetaData.
 class PARQUET_EXPORT RowGroupMetaData {
  public:
-  ARROW_DEPRECATED("Use the ReaderProperties-taking overload")
-  static std::unique_ptr<RowGroupMetaData> Make(
-      const void* metadata, const SchemaDescriptor* schema,
-      const ApplicationVersion* writer_version,
-      std::shared_ptr<InternalFileDecryptor> file_decryptor = NULLPTR);
-
   /// \brief Create a RowGroupMetaData from a serialized thrift message.
   static std::unique_ptr<RowGroupMetaData> Make(
       const void* metadata, const SchemaDescriptor* schema,
@@ -267,11 +259,6 @@ class FileMetaDataBuilder;
 /// \brief FileMetaData is a proxy around format::FileMetaData.
 class PARQUET_EXPORT FileMetaData {
  public:
-  ARROW_DEPRECATED("Use the ReaderProperties-taking overload")
-  static std::shared_ptr<FileMetaData> Make(
-      const void* serialized_metadata, uint32_t* inout_metadata_len,
-      std::shared_ptr<InternalFileDecryptor> file_decryptor);
-
   /// \brief Create a FileMetaData from a serialized thrift message.
   static std::shared_ptr<FileMetaData> Make(
       const void* serialized_metadata, uint32_t* inout_metadata_len,
@@ -396,6 +383,13 @@ class PARQUET_EXPORT FileMetaData {
   /// FileMetaData.
   std::shared_ptr<FileMetaData> Subset(const std::vector<int>& row_groups) const;
 
+  /// \brief Serialize metadata unencrypted as string
+  ///
+  /// \param[in] scrub whether to remove sensitive information from the metadata.
+  /// \param[in] debug whether to serialize the metadata as Thrift (if false) or
+  /// debug text (if true).
+  std::string SerializeUnencrypted(bool scrub, bool debug) const;
+
  private:
   friend FileMetaDataBuilder;
   friend class SerializedFile;
@@ -454,8 +448,12 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder {
   // column chunk
   // Used when a dataset is spread across multiple files
   void set_file_path(const std::string& path);
+
   // column metadata
   void SetStatistics(const EncodedStatistics& stats);
+
+  void SetKeyValueMetadata(std::shared_ptr<const KeyValueMetadata> key_value_metadata);
+
   // get the column descriptor
   const ColumnDescriptor* descr() const;
 
@@ -530,11 +528,6 @@ struct PageIndexLocation {
 
 class PARQUET_EXPORT FileMetaDataBuilder {
  public:
-  ARROW_DEPRECATED("Deprecated in 12.0.0. Use overload without KeyValueMetadata instead.")
-  static std::unique_ptr<FileMetaDataBuilder> Make(
-      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata);
-
   // API convenience to get a MetaData builder
   static std::unique_ptr<FileMetaDataBuilder> Make(
       const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props);
@@ -555,9 +548,8 @@ class PARQUET_EXPORT FileMetaDataBuilder {
   std::unique_ptr<FileCryptoMetaData> GetCryptoMetaData();
 
  private:
-  explicit FileMetaDataBuilder(
-      const SchemaDescriptor* schema, std::shared_ptr<WriterProperties> props,
-      std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR);
+  explicit FileMetaDataBuilder(const SchemaDescriptor* schema,
+                               std::shared_ptr<WriterProperties> props);
   // PIMPL Idiom
   class FileMetaDataBuilderImpl;
   std::unique_ptr<FileMetaDataBuilderImpl> impl_;
diff --git a/cpp/src/parquet/platform.h b/cpp/src/parquet/platform.h
index b085e57cd9918..e8d67e225f8ff 100644
--- a/cpp/src/parquet/platform.h
+++ b/cpp/src/parquet/platform.h
@@ -28,48 +28,48 @@
 
 #if defined(_WIN32) || defined(__CYGWIN__)
 
-#if defined(_MSC_VER)
-#pragma warning(push)
+#  if defined(_MSC_VER)
+#    pragma warning(push)
 // Disable warning for STL types usage in DLL interface
 // https://web.archive.org/web/20130317015847/http://connect.microsoft.com/VisualStudio/feedback/details/696593/vc-10-vs-2010-basic-string-exports
-#pragma warning(disable : 4275 4251)
+#    pragma warning(disable : 4275 4251)
 // Disable diamond inheritance warnings
-#pragma warning(disable : 4250)
+#    pragma warning(disable : 4250)
 // Disable macro redefinition warnings
-#pragma warning(disable : 4005)
+#    pragma warning(disable : 4005)
 // Disable extern before exported template warnings
-#pragma warning(disable : 4910)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#    pragma warning(disable : 4910)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef PARQUET_STATIC
-#define PARQUET_EXPORT
-#elif defined(PARQUET_EXPORTING)
-#define PARQUET_EXPORT __declspec(dllexport)
-#else
-#define PARQUET_EXPORT __declspec(dllimport)
-#endif
+#  ifdef PARQUET_STATIC
+#    define PARQUET_EXPORT
+#  elif defined(PARQUET_EXPORTING)
+#    define PARQUET_EXPORT __declspec(dllexport)
+#  else
+#    define PARQUET_EXPORT __declspec(dllimport)
+#  endif
 
-#define PARQUET_NO_EXPORT
+#  define PARQUET_NO_EXPORT
 
 #else  // Not Windows
-#ifndef PARQUET_EXPORT
-#define PARQUET_EXPORT __attribute__((visibility("default")))
-#endif
-#ifndef PARQUET_NO_EXPORT
-#define PARQUET_NO_EXPORT __attribute__((visibility("hidden")))
-#endif
+#  ifndef PARQUET_EXPORT
+#    define PARQUET_EXPORT __attribute__((visibility("default")))
+#  endif
+#  ifndef PARQUET_NO_EXPORT
+#    define PARQUET_NO_EXPORT __attribute__((visibility("hidden")))
+#  endif
 #endif  // Non-Windows
 
 // This is a complicated topic, some reading on it:
 // http://www.codesynthesis.com/~boris/blog/2010/01/18/dll-export-cxx-templates/
 #if defined(_MSC_VER) || defined(__clang__)
-#define PARQUET_TEMPLATE_CLASS_EXPORT
-#define PARQUET_TEMPLATE_EXPORT PARQUET_EXPORT
+#  define PARQUET_TEMPLATE_CLASS_EXPORT
+#  define PARQUET_TEMPLATE_EXPORT PARQUET_EXPORT
 #else
-#define PARQUET_TEMPLATE_CLASS_EXPORT PARQUET_EXPORT
-#define PARQUET_TEMPLATE_EXPORT
+#  define PARQUET_TEMPLATE_CLASS_EXPORT PARQUET_EXPORT
+#  define PARQUET_TEMPLATE_EXPORT
 #endif
 
 #define PARQUET_DISALLOW_COPY_AND_ASSIGN ARROW_DISALLOW_COPY_AND_ASSIGN
@@ -80,7 +80,7 @@
 // If ARROW_VALGRIND set when compiling unit tests, also define
 // PARQUET_VALGRIND
 #ifdef ARROW_VALGRIND
-#define PARQUET_VALGRIND
+#  define PARQUET_VALGRIND
 #endif
 
 namespace parquet {
diff --git a/cpp/src/parquet/printer.cc b/cpp/src/parquet/printer.cc
index 33df5925a1cf1..60adfc697f95c 100644
--- a/cpp/src/parquet/printer.cc
+++ b/cpp/src/parquet/printer.cc
@@ -64,6 +64,25 @@ void PrintPageEncodingStats(std::ostream& stream,
 // the fixed initial size is just for an example
 #define COL_WIDTH 30
 
+void PutChars(std::ostream& stream, char c, int n) {
+  for (int i = 0; i < n; ++i) {
+    stream.put(c);
+  }
+}
+
+void PrintKeyValueMetadata(std::ostream& stream,
+                           const KeyValueMetadata& key_value_metadata,
+                           int indent_level = 0, int indent_width = 1) {
+  const int64_t size_of_key_value_metadata = key_value_metadata.size();
+  PutChars(stream, ' ', indent_level * indent_width);
+  stream << "Key Value Metadata: " << size_of_key_value_metadata << " entries\n";
+  for (int64_t i = 0; i < size_of_key_value_metadata; i++) {
+    PutChars(stream, ' ', (indent_level + 1) * indent_width);
+    stream << "Key nr " << i << " " << key_value_metadata.key(i) << ": "
+           << key_value_metadata.value(i) << "\n";
+  }
+}
+
 void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selected_columns,
                                     bool print_values, bool format_dump,
                                     bool print_key_value_metadata, const char* filename) {
@@ -76,12 +95,7 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
 
   if (print_key_value_metadata && file_metadata->key_value_metadata()) {
     auto key_value_metadata = file_metadata->key_value_metadata();
-    int64_t size_of_key_value_metadata = key_value_metadata->size();
-    stream << "Key Value File Metadata: " << size_of_key_value_metadata << " entries\n";
-    for (int64_t i = 0; i < size_of_key_value_metadata; i++) {
-      stream << " Key nr " << i << " " << key_value_metadata->key(i) << ": "
-             << key_value_metadata->value(i) << "\n";
-    }
+    PrintKeyValueMetadata(stream, *key_value_metadata);
   }
 
   stream << "Number of RowGroups: " << file_metadata->num_row_groups() << "\n";
@@ -136,7 +150,11 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list<int> selecte
       std::shared_ptr<Statistics> stats = column_chunk->statistics();
 
       const ColumnDescriptor* descr = file_metadata->schema()->Column(i);
-      stream << "Column " << i << std::endl << "  Values: " << column_chunk->num_values();
+      stream << "Column " << i << std::endl;
+      if (print_key_value_metadata && column_chunk->key_value_metadata()) {
+        PrintKeyValueMetadata(stream, *column_chunk->key_value_metadata(), 1, 2);
+      }
+      stream << "  Values: " << column_chunk->num_values();
       if (column_chunk->is_stats_set()) {
         std::string min = stats->EncodeMin(), max = stats->EncodeMax();
         stream << ", Null Values: " << stats->null_count()
diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
index 4d3acb491e390..7f2e371df66d7 100644
--- a/cpp/src/parquet/properties.h
+++ b/cpp/src/parquet/properties.h
@@ -870,7 +870,8 @@ class PARQUET_EXPORT ArrowReaderProperties {
         batch_size_(kArrowDefaultBatchSize),
         pre_buffer_(true),
         cache_options_(::arrow::io::CacheOptions::LazyDefaults()),
-        coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO) {}
+        coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO),
+        arrow_extensions_enabled_(false) {}
 
   /// \brief Set whether to use the IO thread pool to parse columns in parallel.
   ///
@@ -941,6 +942,18 @@ class PARQUET_EXPORT ArrowReaderProperties {
     return coerce_int96_timestamp_unit_;
   }
 
+  /// Enable Parquet-supported Arrow extension types.
+  ///
+  /// When enabled, Parquet logical types will be mapped to their corresponding Arrow
+  /// extension types at read time, if such exist. Currently only arrow::extension::json()
+  /// extension type is supported. Columns whose LogicalType is JSON will be interpreted
+  /// as arrow::extension::json(), with storage type inferred from the serialized Arrow
+  /// schema if present, or `utf8` by default.
+  void set_arrow_extensions_enabled(bool extensions_enabled) {
+    arrow_extensions_enabled_ = extensions_enabled;
+  }
+  bool get_arrow_extensions_enabled() const { return arrow_extensions_enabled_; }
+
  private:
   bool use_threads_;
   std::unordered_set<int> read_dict_indices_;
@@ -949,6 +962,7 @@ class PARQUET_EXPORT ArrowReaderProperties {
   ::arrow::io::IOContext io_context_;
   ::arrow::io::CacheOptions cache_options_;
   ::arrow::TimeUnit::type coerce_int96_timestamp_unit_;
+  bool arrow_extensions_enabled_;
 };
 
 /// EXPERIMENTAL: Constructs the default ArrowReaderProperties
diff --git a/cpp/src/parquet/statistics.h b/cpp/src/parquet/statistics.h
index 0d6ea9898f7ba..c5da44a7b6665 100644
--- a/cpp/src/parquet/statistics.h
+++ b/cpp/src/parquet/statistics.h
@@ -246,7 +246,7 @@ class PARQUET_EXPORT Statistics {
   /// \brief The number of non-null values in the column
   virtual int64_t num_values() const = 0;
 
-  /// \brief Return true if the min and max statistics are set. Obtain
+  /// \brief Return true if both min and max statistics are set. Obtain
   /// with TypedStatistics<T>::min and max
   virtual bool HasMinMax() const = 0;
 
diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc
index cb2e6455abfa9..dad414ac89b47 100644
--- a/cpp/src/parquet/statistics_test.cc
+++ b/cpp/src/parquet/statistics_test.cc
@@ -1602,5 +1602,30 @@ TEST(TestEncodedStatistics, CopySafe) {
   EXPECT_EQ("abc", encoded_statistics.max());
 }
 
+TEST(TestEncodedStatistics, ApplyStatSizeLimits) {
+  EncodedStatistics encoded_statistics;
+  encoded_statistics.set_min("a");
+  encoded_statistics.has_min = true;
+
+  encoded_statistics.set_max("abc");
+  encoded_statistics.has_max = true;
+
+  encoded_statistics.ApplyStatSizeLimits(2);
+
+  ASSERT_TRUE(encoded_statistics.has_min);
+  ASSERT_EQ("a", encoded_statistics.min());
+  ASSERT_FALSE(encoded_statistics.has_max);
+
+  NodePtr node =
+      PrimitiveNode::Make("StringColumn", Repetition::REQUIRED, Type::BYTE_ARRAY);
+  ColumnDescriptor descr(node, 0, 0);
+  std::shared_ptr<TypedStatistics<::parquet::ByteArrayType>> statistics =
+      std::dynamic_pointer_cast<TypedStatistics<::parquet::ByteArrayType>>(
+          Statistics::Make(&descr, &encoded_statistics,
+                           /*num_values=*/1000));
+  // GH-43382: HasMinMax should be false if one of min/max is not set.
+  EXPECT_FALSE(statistics->HasMinMax());
+}
+
 }  // namespace test
 }  // namespace parquet
diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h
index b21b0e07afba2..e7bfd434c81a8 100644
--- a/cpp/src/parquet/thrift_internal.h
+++ b/cpp/src/parquet/thrift_internal.h
@@ -530,7 +530,7 @@ class ThriftSerializer {
     auto cipher_buffer =
         AllocateBuffer(encryptor->pool(), encryptor->CiphertextLength(out_length));
     ::arrow::util::span<const uint8_t> out_span(out_buffer, out_length);
-    int cipher_buffer_len =
+    int32_t cipher_buffer_len =
         encryptor->Encrypt(out_span, cipher_buffer->mutable_span_as<uint8_t>());
 
     PARQUET_THROW_NOT_OK(out->Write(cipher_buffer->data(), cipher_buffer_len));
diff --git a/cpp/src/parquet/types_test.cc b/cpp/src/parquet/types_test.cc
index e0ca7d6356646..fdcaed5c81ed7 100644
--- a/cpp/src/parquet/types_test.cc
+++ b/cpp/src/parquet/types_test.cc
@@ -65,11 +65,11 @@ TEST(TestConvertedTypeToString, ConvertedTypes) {
 }
 
 #ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 #elif defined(_MSC_VER)
-#pragma warning(push)
-#pragma warning(disable : 4996)
+#  pragma warning(push)
+#  pragma warning(disable : 4996)
 #endif
 
 TEST(TypePrinter, StatisticsTypes) {
@@ -164,9 +164,9 @@ TEST(TestInt96Timestamp, Decoding) {
 }
 
 #if !(defined(_WIN32) || defined(__CYGWIN__))
-#pragma GCC diagnostic pop
+#  pragma GCC diagnostic pop
 #elif _MSC_VER
-#pragma warning(pop)
+#  pragma warning(pop)
 #endif
 
 }  // namespace parquet
diff --git a/cpp/src/parquet/windows_fixup.h b/cpp/src/parquet/windows_fixup.h
index ce44480c5732e..feac4e64d1976 100644
--- a/cpp/src/parquet/windows_fixup.h
+++ b/cpp/src/parquet/windows_fixup.h
@@ -22,8 +22,8 @@
 #ifdef _WIN32
 
 // parquet.thrift's OPTIONAL RepetitionType conflicts with a Windows #define
-#ifdef OPTIONAL
-#undef OPTIONAL
-#endif
+#  ifdef OPTIONAL
+#    undef OPTIONAL
+#  endif
 
 #endif  // _WIN32
diff --git a/cpp/submodules/parquet-testing b/cpp/submodules/parquet-testing
index 74278bc4a1122..cb7a9674142c1 160000
--- a/cpp/submodules/parquet-testing
+++ b/cpp/submodules/parquet-testing
@@ -1 +1 @@
-Subproject commit 74278bc4a1122d74945969e6dec405abd1533ec3
+Subproject commit cb7a9674142c137367bf75a01b79c6e214a73199
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index ab988badec145..30fa24a209482 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -54,8 +54,8 @@ ARROW_AWS_LC_BUILD_SHA256_CHECKSUM=ae96a3567161552744fc0cae8b4d68ed88b1ec0f3d3c9
 ARROW_AWSSDK_BUILD_VERSION=1.10.55
 ARROW_AWSSDK_BUILD_SHA256_CHECKSUM=2d552fb1a84bef4a9b65e34aa7031851ed2aef5319e02cc6e4cb735c48aa30de
 # Despite the confusing version name this is still the whole Azure SDK for C++ including core, keyvault, storage-common, etc.
-ARROW_AZURE_SDK_BUILD_VERSION=azure-core_1.10.3
-ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM=dd624c2f86adf474d2d0a23066be6e27af9cbd7e3f8d9d8fd7bf981e884b7b48
+ARROW_AZURE_SDK_BUILD_VERSION=azure-identity_1.9.0
+ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM=97065bfc971ac8df450853ce805f820f52b59457bd7556510186a1569502e4a1
 ARROW_BOOST_BUILD_VERSION=1.81.0
 ARROW_BOOST_BUILD_SHA256_CHECKSUM=9e0ffae35528c35f90468997bc8d99500bf179cbae355415a89a600c38e13574
 ARROW_BROTLI_BUILD_VERSION=v1.0.9
@@ -80,8 +80,8 @@ ARROW_GTEST_BUILD_VERSION=1.11.0
 ARROW_GTEST_BUILD_SHA256_CHECKSUM=b4870bf121ff7795ba20d20bcdd8627b8e088f2d1dab299a031c1034eddc93d5
 ARROW_JEMALLOC_BUILD_VERSION=5.3.0
 ARROW_JEMALLOC_BUILD_SHA256_CHECKSUM=2db82d1e7119df3e71b7640219b6dfe84789bc0537983c3b7ac4f7189aecfeaa
-ARROW_LZ4_BUILD_VERSION=v1.9.4
-ARROW_LZ4_BUILD_SHA256_CHECKSUM=0b0e3aa07c8c063ddf40b082bdf7e37a1562bda40a0ff5272957f3e987e0e54b
+ARROW_LZ4_BUILD_VERSION=v1.10.0
+ARROW_LZ4_BUILD_SHA256_CHECKSUM=537512904744b35e232912055ccf8ec66d768639ff3abe5788d90d792ec5f48b
 ARROW_MIMALLOC_BUILD_VERSION=v2.0.6
 ARROW_MIMALLOC_BUILD_SHA256_CHECKSUM=9f05c94cc2b017ed13698834ac2a3567b6339a8bde27640df5a1581d49d05ce5
 ARROW_NLOHMANN_JSON_BUILD_VERSION=v3.10.5
diff --git a/cpp/tools/parquet/CMakeLists.txt b/cpp/tools/parquet/CMakeLists.txt
index 81ab49421d0f6..87c3254607589 100644
--- a/cpp/tools/parquet/CMakeLists.txt
+++ b/cpp/tools/parquet/CMakeLists.txt
@@ -16,7 +16,7 @@
 # under the License.
 
 if(PARQUET_BUILD_EXECUTABLES)
-  set(PARQUET_TOOLS parquet-dump-schema parquet-reader parquet-scan)
+  set(PARQUET_TOOLS parquet-dump-footer parquet-dump-schema parquet-reader parquet-scan)
 
   foreach(TOOL ${PARQUET_TOOLS})
     string(REGEX REPLACE "-" "_" TOOL_SOURCE ${TOOL})
diff --git a/cpp/tools/parquet/parquet_dump_footer.cc b/cpp/tools/parquet/parquet_dump_footer.cc
new file mode 100644
index 0000000000000..4dd7476bc8ea3
--- /dev/null
+++ b/cpp/tools/parquet/parquet_dump_footer.cc
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <optional>
+
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/ubsan.h"
+#include "parquet/metadata.h"
+
+namespace parquet {
+namespace {
+uint32_t ReadLE32(const void* p) {
+  uint32_t x = ::arrow::util::SafeLoadAs<uint32_t>(static_cast<const uint8_t*>(p));
+  return ::arrow::bit_util::FromLittleEndian(x);
+}
+
+void AppendLE32(uint32_t v, std::string* out) {
+  v = ::arrow::bit_util::ToLittleEndian(v);
+  out->append(reinterpret_cast<const char*>(&v), sizeof(v));
+}
+
+int DoIt(std::string in, bool scrub, bool debug, std::string out) {
+  std::string path;
+  auto fs = ::arrow::fs::FileSystemFromUriOrPath(in, &path).ValueOrDie();
+  auto file = fs->OpenInputFile(path).ValueOrDie();
+  int64_t file_len = file->GetSize().ValueOrDie();
+  if (file_len < 8) {
+    std::cerr << "File too short: " << in << "\n";
+    return 3;
+  }
+  // First do an opportunistic read of up to 1 MiB to try and get the entire footer.
+  int64_t tail_len = std::min(file_len, int64_t{1} << 20);
+  std::string tail;
+  tail.resize(tail_len);
+  char* data = tail.data();
+  file->ReadAt(file_len - tail_len, tail_len, data).ValueOrDie();
+  if (auto magic = ReadLE32(data + tail_len - 4); magic != ReadLE32("PAR1")) {
+    std::cerr << "Not a Parquet file: " << in << "\n";
+    return 4;
+  }
+  uint32_t metadata_len = ReadLE32(data + tail_len - 8);
+  if (tail_len >= metadata_len + 8) {
+    // The footer is entirely in the initial read. Trim to size.
+    tail = tail.substr(tail_len - (metadata_len + 8));
+  } else {
+    // The footer is larger than the initial read, read again the exact size.
+    if (metadata_len > file_len) {
+      std::cerr << "File too short: " << in << "\n";
+      return 5;
+    }
+    tail_len = metadata_len + 8;
+    tail.resize(tail_len);
+    data = tail.data();
+    file->ReadAt(file_len - tail_len, tail_len, data).ValueOrDie();
+  }
+  auto md = FileMetaData::Make(tail.data(), &metadata_len);
+  std::string ser = md->SerializeUnencrypted(scrub, debug);
+  if (!debug) {
+    AppendLE32(static_cast<uint32_t>(ser.size()), &ser);
+    ser.append("PAR1", 4);
+  }
+  std::optional<std::fstream> fout;
+  if (!out.empty()) fout.emplace(out, std::ios::out);
+  std::ostream& os = fout ? *fout : std::cout;
+  if (!os.write(ser.data(), ser.size())) {
+    std::cerr << "Failed to write to output file: " << out << "\n";
+    return 6;
+  }
+
+  return 0;
+}
+}  // namespace
+}  // namespace parquet
+
+static int PrintHelp() {
+  std::cerr << R"(Usage: parquet-dump-footer
+  -h|--help    Print help and exit
+  --no-scrub   Do not scrub potentially confidential metadata
+  --debug      Output text represenation of footer for inspection
+  --in <uri>   Input file (required): must be an URI or an absolute local path
+  --out <path> Output file (optional, default stdout)
+
+  Dump the footer of a Parquet file to stdout or to a file, optionally with
+  potentially confidential metadata scrubbed.
+)";
+  return 1;
+}
+
+int main(int argc, char** argv) {
+  bool scrub = true;
+  bool debug = false;
+  std::string in;
+  std::string out;
+  for (int i = 1; i < argc; i++) {
+    char* arg = argv[i];
+    if (!std::strcmp(arg, "-h") || !std::strcmp(arg, "--help")) {
+      return PrintHelp();
+    } else if (!std::strcmp(arg, "--no-scrub")) {
+      scrub = false;
+    } else if (!std::strcmp(arg, "--debug")) {
+      debug = true;
+    } else if (!std::strcmp(arg, "--in")) {
+      if (i + 1 >= argc) return PrintHelp();
+      in = argv[++i];
+    } else if (!std::strcmp(arg, "--out")) {
+      if (i + 1 >= argc) return PrintHelp();
+      out = argv[++i];
+    } else {
+      // Unknown option.
+      return PrintHelp();
+    }
+  }
+  if (in.empty()) return PrintHelp();
+
+  return parquet::DoIt(in, scrub, debug, out);
+}
diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json
index f087a6d24c8f9..103e678ebb4ac 100644
--- a/cpp/vcpkg.json
+++ b/cpp/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "arrow",
-  "version-string": "17.0.0-SNAPSHOT",
+  "version-string": "18.0.0-SNAPSHOT",
   "dependencies": [
     "abseil",
     {
@@ -15,11 +15,11 @@
       ]
     },
     "benchmark",
+    "boost-crc",
     "boost-filesystem",
     "boost-multiprecision",
     "boost-process",
     "boost-system",
-    "boost-crc",
     "brotli",
     "bzip2",
     "c-ares",
diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props
index 3c06d3cd31d90..43c93238d6a7b 100644
--- a/csharp/Directory.Build.props
+++ b/csharp/Directory.Build.props
@@ -29,7 +29,7 @@
     <Product>Apache Arrow library</Product>
     <Copyright>Copyright 2016-2024 The Apache Software Foundation</Copyright>
     <Company>The Apache Software Foundation</Company>
-    <Version>17.0.0-SNAPSHOT</Version>
+    <Version>18.0.0-SNAPSHOT</Version>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/csharp/README.md b/csharp/README.md
index b36eb899db2d5..663aaf8ab243c 100644
--- a/csharp/README.md
+++ b/csharp/README.md
@@ -129,7 +129,8 @@ for currently available features.
 - Types
     - Tensor
 - Arrays
-    - Large Arrays
+    - Large Arrays. There are large array types provided to help with interoperability with other libraries,
+      but these do not support buffers larger than 2 GiB and an exception will be raised if trying to import an array that is too large.
         - Large Binary
         - Large List
         - Large String
diff --git a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
index 2dd1d9d8f98e2..ac1f8c9bae77a 100644
--- a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
+++ b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.AspNetCore.Server" Version="2.63.0" />
+    <PackageReference Include="Grpc.AspNetCore.Server" Version="2.64.0" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
index ee6d42c8d17fc..ec438fde843f4 100644
--- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
+++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.Tools" Version="2.64.0" PrivateAssets="All" />
+    <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
index 5030d37cdb16d..f97c80417c9e3 100644
--- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
+++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj
@@ -1,18 +1,23 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
-    <TargetFrameworks>netstandard2.0;netstandard2.1</TargetFrameworks>
+    <TargetFrameworks>netstandard2.0;netstandard2.1;net462</TargetFrameworks>
   </PropertyGroup>
   
   <ItemGroup>
-    <PackageReference Include="Google.Protobuf" Version="3.27.2" />
-    <PackageReference Include="Grpc.Net.Client" Version="2.63.0" />
-    <PackageReference Include="Grpc.Tools" Version="2.64.0" PrivateAssets="All" />
+    <PackageReference Include="Google.Protobuf" Version="3.28.2" />
+    <PackageReference Include="Grpc.Net.Client" Version="2.65.0" />
+    <PackageReference Include="Grpc.Tools" Version="2.66.0" PrivateAssets="All" />
+    <PackageReference Include="System.Memory" Version="4.5.5" />
   </ItemGroup>
 
   <ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0'">
     <PackageReference Include="Microsoft.Bcl.AsyncInterfaces" Version="6.0.0" />
   </ItemGroup>
+  
+  <ItemGroup Condition="'$(TargetFramework)'=='net462'">
+    <PackageReference Include="Grpc.Core" Version="2.46.6" />
+  </ItemGroup>
 
   <ItemGroup>
     <ProjectReference Include="..\Apache.Arrow\Apache.Arrow.csproj" />
diff --git a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
index efb22b1948a01..b89ce9da79d14 100644
--- a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
+++ b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+using System.Threading;
 using System.Threading.Tasks;
 using Apache.Arrow.Flight.Internal;
 using Apache.Arrow.Flight.Protocol;
@@ -34,12 +35,17 @@ public FlightClient(ChannelBase grpcChannel)
 
         public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria = null, Metadata headers = null)
         {
-            if(criteria == null)
+            return ListFlights(criteria, headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            if (criteria == null)
             {
                 criteria = FlightCriteria.Empty;
             }
-            
-            var response = _client.ListFlights(criteria.ToProtocol(), headers);
+
+            var response = _client.ListFlights(criteria.ToProtocol(), headers, deadline, cancellationToken);
             var convertStream = new StreamReader<Protocol.FlightInfo, FlightInfo>(response.ResponseStream, inFlight => new FlightInfo(inFlight));
 
             return new AsyncServerStreamingCall<FlightInfo>(convertStream, response.ResponseHeadersAsync, response.GetStatus, response.GetTrailers, response.Dispose);
@@ -47,7 +53,12 @@ public AsyncServerStreamingCall<FlightInfo> ListFlights(FlightCriteria criteria
 
         public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers = null)
         {
-            var response = _client.ListActions(EmptyInstance, headers);
+            return ListActions(headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var response = _client.ListActions(EmptyInstance, headers, deadline, cancellationToken);
             var convertStream = new StreamReader<Protocol.ActionType, FlightActionType>(response.ResponseStream, actionType => new FlightActionType(actionType));
 
             return new AsyncServerStreamingCall<FlightActionType>(convertStream, response.ResponseHeadersAsync, response.GetStatus, response.GetTrailers, response.Dispose);
@@ -55,14 +66,24 @@ public AsyncServerStreamingCall<FlightActionType> ListActions(Metadata headers =
 
         public FlightRecordBatchStreamingCall GetStream(FlightTicket ticket, Metadata headers = null)
         {
-            var stream = _client.DoGet(ticket.ToProtocol(),  headers);
+            return GetStream(ticket, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchStreamingCall GetStream(FlightTicket ticket, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var stream = _client.DoGet(ticket.ToProtocol(), headers, deadline, cancellationToken);
             var responseStream = new FlightClientRecordBatchStreamReader(stream.ResponseStream);
             return new FlightRecordBatchStreamingCall(responseStream, stream.ResponseHeadersAsync, stream.GetStatus, stream.GetTrailers, stream.Dispose);
         }
 
         public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var flightInfoResult = _client.GetFlightInfoAsync(flightDescriptor.ToProtocol(), headers);
+            return GetInfo(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var flightInfoResult = _client.GetFlightInfoAsync(flightDescriptor.ToProtocol(), headers, deadline, cancellationToken);
 
             var flightInfo = flightInfoResult
                 .ResponseAsync
@@ -79,7 +100,12 @@ public AsyncUnaryCall<FlightInfo> GetInfo(FlightDescriptor flightDescriptor, Met
 
         public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var channels = _client.DoPut(headers);
+            return StartPut(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channels = _client.DoPut(headers, deadline, cancellationToken);
             var requestStream = new FlightClientRecordBatchStreamWriter(channels.RequestStream, flightDescriptor);
             var readStream = new StreamReader<Protocol.PutResult, FlightPutResult>(channels.ResponseStream, putResult => new FlightPutResult(putResult));
             return new FlightRecordBatchDuplexStreamingCall(
@@ -93,7 +119,13 @@ public FlightRecordBatchDuplexStreamingCall StartPut(FlightDescriptor flightDesc
 
         public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse> Handshake(Metadata headers = null)
         {
-            var channel = _client.Handshake(headers);
+            return Handshake(headers, null, CancellationToken.None);
+
+        }
+
+        public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse> Handshake(Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channel = _client.Handshake(headers, deadline, cancellationToken);
             var readStream = new StreamReader<HandshakeResponse, FlightHandshakeResponse>(channel.ResponseStream, response => new FlightHandshakeResponse(response));
             var writeStream = new FlightHandshakeStreamWriterAdapter(channel.RequestStream);
             var call = new AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse>(
@@ -109,7 +141,12 @@ public AsyncDuplexStreamingCall<FlightHandshakeRequest, FlightHandshakeResponse>
 
         public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var channel = _client.DoExchange(headers);
+            return DoExchange(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var channel = _client.DoExchange(headers, deadline, cancellationToken);
             var requestStream = new FlightClientRecordBatchStreamWriter(channel.RequestStream, flightDescriptor);
             var responseStream = new FlightClientRecordBatchStreamReader(channel.ResponseStream);
             var call = new FlightRecordBatchExchangeCall(
@@ -125,14 +162,24 @@ public FlightRecordBatchExchangeCall DoExchange(FlightDescriptor flightDescripto
 
         public AsyncServerStreamingCall<FlightResult> DoAction(FlightAction action, Metadata headers = null)
         {
-            var stream = _client.DoAction(action.ToProtocol(), headers);
+            return DoAction(action, headers, null, CancellationToken.None);
+        }
+
+        public AsyncServerStreamingCall<FlightResult> DoAction(FlightAction action, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var stream = _client.DoAction(action.ToProtocol(), headers, deadline, cancellationToken);
             var streamReader = new StreamReader<Protocol.Result, FlightResult>(stream.ResponseStream, result => new FlightResult(result));
             return new AsyncServerStreamingCall<FlightResult>(streamReader, stream.ResponseHeadersAsync, stream.GetStatus, stream.GetTrailers, stream.Dispose);
         }
 
         public AsyncUnaryCall<Schema> GetSchema(FlightDescriptor flightDescriptor, Metadata headers = null)
         {
-            var schemaResult = _client.GetSchemaAsync(flightDescriptor.ToProtocol(), headers);
+            return GetSchema(flightDescriptor, headers, null, CancellationToken.None);
+        }
+
+        public AsyncUnaryCall<Schema> GetSchema(FlightDescriptor flightDescriptor, Metadata headers, System.DateTime? deadline, CancellationToken cancellationToken = default)
+        {
+            var schemaResult = _client.GetSchemaAsync(flightDescriptor.ToProtocol(), headers, deadline, cancellationToken);
 
             var schema = schemaResult
                 .ResponseAsync
diff --git a/csharp/src/Apache.Arrow.Flight/FlightInfo.cs b/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
index 16ddb6fbfb834..e2452ac9ff461 100644
--- a/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
+++ b/csharp/src/Apache.Arrow.Flight/FlightInfo.cs
@@ -18,6 +18,7 @@
 using System.Text;
 using Apache.Arrow.Flight.Internal;
 using Apache.Arrow.Ipc;
+using Google.Protobuf;
 
 namespace Apache.Arrow.Flight
 {
@@ -25,7 +26,7 @@ public class FlightInfo
     {
         internal FlightInfo(Protocol.FlightInfo flightInfo)
         {
-            Schema = FlightMessageSerializer.DecodeSchema(flightInfo.Schema.Memory);
+            Schema = flightInfo.Schema?.Length > 0 ? FlightMessageSerializer.DecodeSchema(flightInfo.Schema.Memory) : null;
             Descriptor = new FlightDescriptor(flightInfo.FlightDescriptor);
 
             var endpoints = new List<FlightEndpoint>();
@@ -60,7 +61,7 @@ public FlightInfo(Schema schema, FlightDescriptor descriptor, IReadOnlyList<Flig
 
         internal Protocol.FlightInfo ToProtocol()
         {
-            var serializedSchema = SchemaWriter.SerializeSchema(Schema);
+            var serializedSchema = Schema != null ? SchemaWriter.SerializeSchema(Schema) : ByteString.Empty;
             var response = new Protocol.FlightInfo()
             {
                 Schema = serializedSchema,
diff --git a/csharp/src/Apache.Arrow.Flight/Server/GrpcCoreFlightServerExtensions.cs b/csharp/src/Apache.Arrow.Flight/Server/GrpcCoreFlightServerExtensions.cs
new file mode 100644
index 0000000000000..3773e184df468
--- /dev/null
+++ b/csharp/src/Apache.Arrow.Flight/Server/GrpcCoreFlightServerExtensions.cs
@@ -0,0 +1,39 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if NET46_OR_GREATER
+
+using Apache.Arrow.Flight.Protocol;
+using Apache.Arrow.Flight.Server.Internal;
+using Grpc.Core;
+
+namespace Apache.Arrow.Flight.Server
+{
+    public static class GrpcCoreFlightServerExtensions
+    {
+        /// <summary>
+        /// Create a ServerServiceDefinition for use with a <see href="https://grpc.github.io/grpc/csharp/api/Grpc.Core.Server.html">Grpc.Core Server</see>
+        //  This allows running a flight server on pre-Kestrel .net Framework versions
+        /// </summary>
+        /// <param name="flightServer"></param>
+        /// <returns></returns>
+        public static ServerServiceDefinition CreateServiceDefinition(this FlightServer flightServer)
+        {
+            return FlightService.BindService(new FlightServerImplementation(flightServer));
+        }
+    }
+}
+
+#endif
diff --git a/csharp/src/Apache.Arrow/Apache.Arrow.csproj b/csharp/src/Apache.Arrow/Apache.Arrow.csproj
index 034876a114b0b..a845f8e693695 100644
--- a/csharp/src/Apache.Arrow/Apache.Arrow.csproj
+++ b/csharp/src/Apache.Arrow/Apache.Arrow.csproj
@@ -7,18 +7,16 @@
     <Description>Apache Arrow is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware.</Description>
   </PropertyGroup>
 
-  <PropertyGroup Condition="'$(IsWindows)'=='true'">
+  <PropertyGroup>
     <TargetFrameworks>netstandard2.0;net6.0;net8.0;net462</TargetFrameworks>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(IsWindows)'!='true'">
-    <TargetFrameworks>netstandard2.0;net6.0;net8.0</TargetFrameworks>
-  </PropertyGroup>
 
   <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard' or '$(TargetFramework)' == 'net462'">
     <PackageReference Include="System.Buffers" Version="4.5.1" />
     <PackageReference Include="System.Memory" Version="4.5.5" />
     <PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="4.7.1" />
     <PackageReference Include="System.Threading.Tasks.Extensions" Version="4.5.4" />
+    <PackageReference Include="System.ValueTuple" Version="4.5.0" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index 67c4b21a2e531..bd06c3a1b8b14 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -53,18 +53,24 @@ public static IArrowArray BuildArray(ArrayData data)
                     return new StringArray(data);
                 case ArrowTypeId.StringView:
                     return new StringViewArray(data);
+                case ArrowTypeId.LargeString:
+                    return new LargeStringArray(data);
                 case ArrowTypeId.FixedSizedBinary:
                     return new FixedSizeBinaryArray(data);
                 case ArrowTypeId.Binary:
                     return new BinaryArray(data);
                 case ArrowTypeId.BinaryView:
                     return new BinaryViewArray(data);
+                case ArrowTypeId.LargeBinary:
+                    return new LargeBinaryArray(data);
                 case ArrowTypeId.Timestamp:
                     return new TimestampArray(data);
                 case ArrowTypeId.List:
                     return new ListArray(data);
                 case ArrowTypeId.ListView:
                     return new ListViewArray(data);
+                case ArrowTypeId.LargeList:
+                    return new LargeListArray(data);
                 case ArrowTypeId.Map:
                     return new MapArray(data);
                 case ArrowTypeId.Struct:
diff --git a/csharp/src/Apache.Arrow/Arrays/LargeBinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/LargeBinaryArray.cs
new file mode 100644
index 0000000000000..9eddbedab54ed
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/LargeBinaryArray.cs
@@ -0,0 +1,154 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Apache.Arrow.Types;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace Apache.Arrow;
+
+public class LargeBinaryArray : Array, IReadOnlyList<byte[]>, ICollection<byte[]>
+{
+    public LargeBinaryArray(ArrayData data)
+        : base(data)
+    {
+        data.EnsureDataType(ArrowTypeId.LargeBinary);
+        data.EnsureBufferCount(3);
+    }
+
+    public LargeBinaryArray(ArrowTypeId typeId, ArrayData data)
+        : base(data)
+    {
+        data.EnsureDataType(typeId);
+        data.EnsureBufferCount(3);
+    }
+
+    public LargeBinaryArray(IArrowType dataType, int length,
+        ArrowBuffer valueOffsetsBuffer,
+        ArrowBuffer dataBuffer,
+        ArrowBuffer nullBitmapBuffer,
+        int nullCount = 0, int offset = 0)
+    : this(new ArrayData(dataType, length, nullCount, offset,
+        new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
+    { }
+
+    public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+
+    public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1];
+
+    public ArrowBuffer ValueBuffer => Data.Buffers[2];
+
+    public ReadOnlySpan<long> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<long>().Slice(Offset, Length + 1);
+
+    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+    public int GetValueLength(int index)
+    {
+        if (index < 0 || index >= Length)
+        {
+            throw new ArgumentOutOfRangeException(nameof(index));
+        }
+        if (!IsValid(index))
+        {
+            return 0;
+        }
+
+        ReadOnlySpan<long> offsets = ValueOffsets;
+        return checked((int)(offsets[index + 1] - offsets[index]));
+    }
+
+    /// <summary>
+    /// Get the collection of bytes, as a read-only span, at a given index in the array.
+    /// </summary>
+    /// <remarks>
+    /// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte
+    /// collection values when seen in the context of this method's return type of <see cref="ReadOnlySpan{Byte}"/>.
+    /// Use the <see cref="Array.IsNull"/> method or the <see cref="GetBytes(int, out bool)"/> overload instead
+    /// to reliably determine null values.
+    /// </remarks>
+    /// <param name="index">Index at which to get bytes.</param>
+    /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
+    /// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
+    /// </exception>
+    public ReadOnlySpan<byte> GetBytes(int index) => GetBytes(index, out _);
+
+    /// <summary>
+    /// Get the collection of bytes, as a read-only span, at a given index in the array.
+    /// </summary>
+    /// <param name="index">Index at which to get bytes.</param>
+    /// <param name="isNull">Set to <see langword="true"/> if the value at the given index is null.</param>
+    /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
+    /// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
+    /// </exception>
+    public ReadOnlySpan<byte> GetBytes(int index, out bool isNull)
+    {
+        if (index < 0 || index >= Length)
+        {
+            throw new ArgumentOutOfRangeException(nameof(index));
+        }
+
+        isNull = IsNull(index);
+
+        if (isNull)
+        {
+            // Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span
+            // is actually returned as an empty span.
+            return ReadOnlySpan<byte>.Empty;
+        }
+
+        var offset = checked((int)ValueOffsets[index]);
+        return ValueBuffer.Span.Slice(offset, GetValueLength(index));
+    }
+
+    int IReadOnlyCollection<byte[]>.Count => Length;
+
+    byte[] IReadOnlyList<byte[]>.this[int index] => GetBytes(index).ToArray();
+
+    IEnumerator<byte[]> IEnumerable<byte[]>.GetEnumerator()
+    {
+        for (int index = 0; index < Length; index++)
+        {
+            yield return GetBytes(index).ToArray();
+        }
+    }
+
+    IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<byte[]>)this).GetEnumerator();
+
+    int ICollection<byte[]>.Count => Length;
+    bool ICollection<byte[]>.IsReadOnly => true;
+    void ICollection<byte[]>.Add(byte[] item) => throw new NotSupportedException("Collection is read-only.");
+    bool ICollection<byte[]>.Remove(byte[] item) => throw new NotSupportedException("Collection is read-only.");
+    void ICollection<byte[]>.Clear() => throw new NotSupportedException("Collection is read-only.");
+
+    bool ICollection<byte[]>.Contains(byte[] item)
+    {
+        for (int index = 0; index < Length; index++)
+        {
+            if (GetBytes(index).SequenceEqual(item))
+                return true;
+        }
+
+        return false;
+    }
+
+    void ICollection<byte[]>.CopyTo(byte[][] array, int arrayIndex)
+    {
+        for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++)
+        {
+            array[destIndex] = GetBytes(srcIndex).ToArray();
+        }
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/LargeListArray.cs b/csharp/src/Apache.Arrow/Arrays/LargeListArray.cs
new file mode 100644
index 0000000000000..6e37aa4c63536
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/LargeListArray.cs
@@ -0,0 +1,97 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+    public class LargeListArray : Array
+    {
+        public IArrowArray Values { get; }
+
+        public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1];
+
+        public ReadOnlySpan<long> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<long>().Slice(Offset, Length + 1);
+
+        public LargeListArray(IArrowType dataType, int length,
+            ArrowBuffer valueOffsetsBuffer, IArrowArray values,
+            ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0)
+            : this(new ArrayData(dataType, length, nullCount, offset,
+                new[] { nullBitmapBuffer, valueOffsetsBuffer }, new[] { values.Data }),
+                values)
+        {
+        }
+
+        public LargeListArray(ArrayData data)
+            : this(data, ArrowArrayFactory.BuildArray(data.Children[0]))
+        {
+        }
+
+        private LargeListArray(ArrayData data, IArrowArray values) : base(data)
+        {
+            data.EnsureBufferCount(2);
+            data.EnsureDataType(ArrowTypeId.LargeList);
+            Values = values;
+        }
+
+        public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+
+        public int GetValueLength(int index)
+        {
+            if (index < 0 || index >= Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            if (IsNull(index))
+            {
+                return 0;
+            }
+
+            ReadOnlySpan<long> offsets = ValueOffsets;
+            return checked((int)(offsets[index + 1] - offsets[index]));
+        }
+
+        public IArrowArray GetSlicedValues(int index)
+        {
+            if (index < 0 || index >= Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            if (IsNull(index))
+            {
+                return null;
+            }
+
+            if (!(Values is Array array))
+            {
+                return default;
+            }
+
+            return array.Slice(checked((int)ValueOffsets[index]), GetValueLength(index));
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                Values?.Dispose();
+            }
+            base.Dispose(disposing);
+        }
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/LargeStringArray.cs b/csharp/src/Apache.Arrow/Arrays/LargeStringArray.cs
new file mode 100644
index 0000000000000..2a65b828acfa1
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/LargeStringArray.cs
@@ -0,0 +1,113 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow;
+
+public class LargeStringArray: LargeBinaryArray, IReadOnlyList<string>, ICollection<string>
+{
+    public static readonly Encoding DefaultEncoding = StringArray.DefaultEncoding;
+
+    public LargeStringArray(ArrayData data)
+        : base(ArrowTypeId.LargeString, data) { }
+
+    public LargeStringArray(int length,
+        ArrowBuffer valueOffsetsBuffer,
+        ArrowBuffer dataBuffer,
+        ArrowBuffer nullBitmapBuffer,
+        int nullCount = 0, int offset = 0)
+        : this(new ArrayData(LargeStringType.Default, length, nullCount, offset,
+            new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
+    { }
+
+    public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+
+    /// <summary>
+    /// Get the string value at the given index
+    /// </summary>
+    /// <param name="index">Input index</param>
+    /// <param name="encoding">Optional: the string encoding, default is UTF8</param>
+    /// <returns>The string object at the given index</returns>
+    public string GetString(int index, Encoding encoding = default)
+    {
+        encoding ??= DefaultEncoding;
+
+        ReadOnlySpan<byte> bytes = GetBytes(index, out bool isNull);
+
+        if (isNull)
+        {
+            return null;
+        }
+
+        if (bytes.Length == 0)
+        {
+            return string.Empty;
+        }
+
+        unsafe
+        {
+            fixed (byte* data = &MemoryMarshal.GetReference(bytes))
+            {
+                return encoding.GetString(data, bytes.Length);
+            }
+        }
+    }
+
+
+    int IReadOnlyCollection<string>.Count => Length;
+
+    string IReadOnlyList<string>.this[int index] => GetString(index);
+
+    IEnumerator<string> IEnumerable<string>.GetEnumerator()
+    {
+        for (int index = 0; index < Length; index++)
+        {
+            yield return GetString(index);
+        };
+    }
+
+    IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<string>)this).GetEnumerator();
+
+    int ICollection<string>.Count => Length;
+    bool ICollection<string>.IsReadOnly => true;
+    void ICollection<string>.Add(string item) => throw new NotSupportedException("Collection is read-only.");
+    bool ICollection<string>.Remove(string item) => throw new NotSupportedException("Collection is read-only.");
+    void ICollection<string>.Clear() => throw new NotSupportedException("Collection is read-only.");
+
+    bool ICollection<string>.Contains(string item)
+    {
+        for (int index = 0; index < Length; index++)
+        {
+            if (GetString(index) == item)
+                return true;
+        }
+
+        return false;
+    }
+
+    void ICollection<string>.CopyTo(string[] array, int arrayIndex)
+    {
+        for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++)
+        {
+            array[destIndex] = GetString(srcIndex);
+        }
+    }
+}
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
index abe02dcbb591f..c454380e17cfc 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
@@ -162,6 +162,10 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type)
                     case ArrowTypeId.BinaryView:
                         buffers = ImportByteArrayViewBuffers(cArray);
                         break;
+                    case ArrowTypeId.LargeString:
+                    case ArrowTypeId.LargeBinary:
+                        buffers = ImportLargeByteArrayBuffers(cArray);
+                        break;
                     case ArrowTypeId.List:
                         children = ProcessListChildren(cArray, ((ListType)type).ValueDataType);
                         buffers = ImportListBuffers(cArray);
@@ -170,6 +174,10 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type)
                         children = ProcessListChildren(cArray, ((ListViewType)type).ValueDataType);
                         buffers = ImportListViewBuffers(cArray);
                         break;
+                    case ArrowTypeId.LargeList:
+                        children = ProcessListChildren(cArray, ((LargeListType)type).ValueDataType);
+                        buffers = ImportLargeListBuffers(cArray);
+                        break;
                     case ArrowTypeId.FixedSizeList:
                         children = ProcessListChildren(cArray, ((FixedSizeListType)type).ValueDataType);
                         buffers = ImportFixedSizeListBuffers(cArray);
@@ -252,7 +260,7 @@ private ArrayData[] ProcessStructChildren(CArrowArray* cArray, IReadOnlyList<Fie
 
             private ArrowBuffer ImportValidityBuffer(CArrowArray* cArray)
             {
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + (int)cArray->length);
                 int validityLength = checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8);
                 return (cArray->buffers[0] == null) ? ArrowBuffer.Empty : new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[0], 0, validityLength));
             }
@@ -277,7 +285,7 @@ private ArrowBuffer[] ImportByteArrayBuffers(CArrowArray* cArray)
                     throw new InvalidOperationException("Byte arrays are expected to have exactly three buffers");
                 }
 
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + (int)cArray->length);
                 int offsetsLength = (length + 1) * 4;
                 int* offsets = (int*)cArray->buffers[1];
                 Debug.Assert(offsets != null);
@@ -298,7 +306,7 @@ private ArrowBuffer[] ImportByteArrayViewBuffers(CArrowArray* cArray)
                     throw new InvalidOperationException("Byte array views are expected to have at least three buffers");
                 }
 
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + (int)cArray->length);
                 int viewsLength = length * 16;
 
                 long* bufferLengths = (long*)cArray->buffers[cArray->n_buffers - 1];
@@ -313,6 +321,42 @@ private ArrowBuffer[] ImportByteArrayViewBuffers(CArrowArray* cArray)
                 return buffers;
             }
 
+            private ArrowBuffer[] ImportLargeByteArrayBuffers(CArrowArray* cArray)
+            {
+                if (cArray->n_buffers != 3)
+                {
+                    throw new InvalidOperationException("Large byte arrays are expected to have exactly three buffers");
+                }
+
+                const int maxLength = int.MaxValue / 8 - 1;
+                if (cArray->length > maxLength)
+                {
+                    throw new OverflowException(
+                        $"Cannot import large byte array. Array length {cArray->length} " +
+                        $"is greater than the maximum supported large byte array length ({maxLength})");
+                }
+
+                int length = checked((int)cArray->offset + (int)cArray->length);
+                int offsetsLength = (length + 1) * 8;
+                long* offsets = (long*)cArray->buffers[1];
+                Debug.Assert(offsets != null);
+                long valuesLength = offsets[length];
+
+                if (valuesLength > int.MaxValue)
+                {
+                    throw new OverflowException(
+                        $"Cannot import large byte array. Data length {valuesLength} " +
+                        $"is greater than the maximum supported large byte array data length ({int.MaxValue})");
+                }
+
+                ArrowBuffer[] buffers = new ArrowBuffer[3];
+                buffers[0] = ImportValidityBuffer(cArray);
+                buffers[1] = ImportCArrayBuffer(cArray, 1, offsetsLength);
+                buffers[2] = ImportCArrayBuffer(cArray, 2, (int)valuesLength);
+
+                return buffers;
+            }
+
             private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray)
             {
                 if (cArray->n_buffers != 2)
@@ -320,7 +364,7 @@ private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray)
                     throw new InvalidOperationException("List arrays are expected to have exactly two buffers");
                 }
 
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + (int)cArray->length);
                 int offsetsLength = (length + 1) * 4;
 
                 ArrowBuffer[] buffers = new ArrowBuffer[2];
@@ -337,7 +381,7 @@ private ArrowBuffer[] ImportListViewBuffers(CArrowArray* cArray)
                     throw new InvalidOperationException("List view arrays are expected to have exactly three buffers");
                 }
 
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + (int)cArray->length);
                 int offsetsLength = length * 4;
 
                 ArrowBuffer[] buffers = new ArrowBuffer[3];
@@ -348,6 +392,31 @@ private ArrowBuffer[] ImportListViewBuffers(CArrowArray* cArray)
                 return buffers;
             }
 
+            private ArrowBuffer[] ImportLargeListBuffers(CArrowArray* cArray)
+            {
+                if (cArray->n_buffers != 2)
+                {
+                    throw new InvalidOperationException("Large list arrays are expected to have exactly two buffers");
+                }
+
+                const int maxLength = int.MaxValue / 8 - 1;
+                if (cArray->length > maxLength)
+                {
+                    throw new OverflowException(
+                        $"Cannot import large list array. Array length {cArray->length} " +
+                        $"is greater than the maximum supported large list array length ({maxLength})");
+                }
+
+                int length = checked((int)cArray->offset + (int)cArray->length);
+                int offsetsLength = (length + 1) * 8;
+
+                ArrowBuffer[] buffers = new ArrowBuffer[2];
+                buffers[0] = ImportValidityBuffer(cArray);
+                buffers[1] = ImportCArrayBuffer(cArray, 1, offsetsLength);
+
+                return buffers;
+            }
+
             private ArrowBuffer[] ImportFixedSizeListBuffers(CArrowArray* cArray)
             {
                 if (cArray->n_buffers != 1)
@@ -367,7 +436,7 @@ private ArrowBuffer[] ImportDenseUnionBuffers(CArrowArray* cArray)
                 {
                     throw new InvalidOperationException("Dense union arrays are expected to have exactly two children");
                 }
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + (int)cArray->length);
                 int offsetsLength = length * 4;
 
                 ArrowBuffer[] buffers = new ArrowBuffer[2];
@@ -385,7 +454,7 @@ private ArrowBuffer[] ImportSparseUnionBuffers(CArrowArray* cArray)
                 }
 
                 ArrowBuffer[] buffers = new ArrowBuffer[1];
-                buffers[0] = ImportCArrayBuffer(cArray, 0, checked((int)cArray->length));
+                buffers[0] = ImportCArrayBuffer(cArray, 0, checked((int)cArray->offset + (int)cArray->length));
 
                 return buffers;
             }
@@ -398,10 +467,10 @@ private ArrowBuffer[] ImportFixedWidthBuffers(CArrowArray* cArray, int bitWidth)
                 }
 
                 // validity, data
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + (int)cArray->length);
                 int valuesLength;
                 if (bitWidth >= 8)
-                    valuesLength = checked((int)(cArray->length * bitWidth / 8));
+                    valuesLength = checked(length * bitWidth / 8);
                 else
                     valuesLength = checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8);
 
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
index 3bb7134af3ba9..92d48a2d70880 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -168,8 +168,10 @@ private static string GetFormat(IArrowType datatype)
                 // Binary
                 case BinaryType _: return "z";
                 case BinaryViewType _: return "vz";
+                case LargeBinaryType _: return "Z";
                 case StringType _: return "u";
                 case StringViewType _: return "vu";
+                case LargeStringType _: return "U";
                 case FixedSizeBinaryType binaryType:
                     return $"w:{binaryType.ByteWidth}";
                 // Date
@@ -199,6 +201,7 @@ private static string GetFormat(IArrowType datatype)
                 // Nested
                 case ListType _: return "+l";
                 case ListViewType _: return "+vl";
+                case LargeListType _: return "+L";
                 case FixedSizeListType fixedListType:
                     return $"+w:{fixedListType.ListSize}";
                 case StructType _: return "+s";
@@ -208,7 +211,7 @@ private static string GetFormat(IArrowType datatype)
                 case DictionaryType dictionaryType:
                     return GetFormat(dictionaryType.IndexType);
                 default: throw new NotImplementedException($"Exporting {datatype.Name} not implemented");
-            };
+            }
         }
 
         private static long GetFlags(IArrowType datatype, bool nullable = true)
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
index f1acc007bcef7..94177184dea00 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
@@ -165,7 +165,7 @@ public ArrowType GetAsType()
                 }
 
                 // Special handling for nested types
-                if (format == "+l" || format == "+vl")
+                if (format == "+l" || format == "+vl" || format == "+L")
                 {
                     if (_cSchema->n_children != 1)
                     {
@@ -180,7 +180,13 @@ public ArrowType GetAsType()
 
                     Field childField = childSchema.GetAsField();
 
-                    return format[1] == 'v' ? new ListViewType(childField) : new ListType(childField);
+                    return format[1] switch
+                    {
+                        'l' => new ListType(childField),
+                        'v' => new ListViewType(childField),
+                        'L' => new LargeListType(childField),
+                        _ => throw new InvalidDataException($"Invalid format for list: '{format}'"),
+                    };
                 }
                 else if (format == "+s")
                 {
@@ -304,10 +310,10 @@ public ArrowType GetAsType()
                     // Binary data
                     "z" => BinaryType.Default,
                     "vz" => BinaryViewType.Default,
-                    //"Z" => new LargeBinaryType() // Not yet implemented
+                    "Z" => LargeBinaryType.Default,
                     "u" => StringType.Default,
                     "vu" => StringViewType.Default,
-                    //"U" => new LargeStringType(), // Not yet implemented
+                    "U" => LargeStringType.Default,
                     // Date and time
                     "tdD" => Date32Type.Default,
                     "tdm" => Date64Type.Default,
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
index a37c501072f4b..7e766677f8b28 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
@@ -291,6 +291,8 @@ private ArrayData LoadField(
                     break;
                 case ArrowTypeId.String:
                 case ArrowTypeId.Binary:
+                case ArrowTypeId.LargeString:
+                case ArrowTypeId.LargeBinary:
                 case ArrowTypeId.ListView:
                     buffers = 3;
                     break;
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
index 5583a58487bf5..12a2a17cf04e2 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamReaderImplementation.cs
@@ -132,7 +132,13 @@ protected ReadResult ReadMessage()
 
                 Flatbuf.Message message = Flatbuf.Message.GetRootAsMessage(CreateByteBuffer(messageBuff));
 
-                int bodyLength = checked((int)message.BodyLength);
+                if (message.BodyLength > int.MaxValue)
+                {
+                    throw new OverflowException(
+                        $"Arrow IPC message body length ({message.BodyLength}) is larger than " +
+                        $"the maximum supported message size ({int.MaxValue})");
+                }
+                int bodyLength = (int)message.BodyLength;
 
                 IMemoryOwner<byte> bodyBuffOwner = _allocator.Allocate(bodyLength);
                 Memory<byte> bodyBuff = bodyBuffOwner.Memory.Slice(0, bodyLength);
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index c66569afeba85..eaa8471fa7bd3 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -57,11 +57,14 @@ private class ArrowRecordBatchFlatBufferBuilder :
             IArrowArrayVisitor<MonthDayNanosecondIntervalArray>,
             IArrowArrayVisitor<ListArray>,
             IArrowArrayVisitor<ListViewArray>,
+            IArrowArrayVisitor<LargeListArray>,
             IArrowArrayVisitor<FixedSizeListArray>,
             IArrowArrayVisitor<StringArray>,
             IArrowArrayVisitor<StringViewArray>,
+            IArrowArrayVisitor<LargeStringArray>,
             IArrowArrayVisitor<BinaryArray>,
             IArrowArrayVisitor<BinaryViewArray>,
+            IArrowArrayVisitor<LargeBinaryArray>,
             IArrowArrayVisitor<FixedSizeBinaryArray>,
             IArrowArrayVisitor<StructArray>,
             IArrowArrayVisitor<UnionArray>,
@@ -199,6 +202,28 @@ public void Visit(ListViewArray array)
                 VisitArray(values);
             }
 
+            public void Visit(LargeListArray array)
+            {
+                _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length));
+                _buffers.Add(CreateBuffer(GetZeroBasedLongValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length)));
+
+                int valuesOffset = 0;
+                int valuesLength = 0;
+                if (array.Length > 0)
+                {
+                    valuesOffset = checked((int)array.ValueOffsets[0]);
+                    valuesLength = checked((int)array.ValueOffsets[array.Length] - valuesOffset);
+                }
+
+                var values = array.Values;
+                if (valuesOffset > 0 || valuesLength < values.Length)
+                {
+                    values = ArrowArrayFactory.Slice(values, valuesOffset, valuesLength);
+                }
+
+                VisitArray(values);
+            }
+
             public void Visit(FixedSizeListArray array)
             {
                 _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length));
@@ -214,6 +239,8 @@ public void Visit(FixedSizeListArray array)
 
             public void Visit(StringViewArray array) => Visit(array as BinaryViewArray);
 
+            public void Visit(LargeStringArray array) => Visit(array as LargeBinaryArray);
+
             public void Visit(BinaryArray array)
             {
                 _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length));
@@ -242,6 +269,22 @@ public void Visit(BinaryViewArray array)
                 VariadicCounts.Add(array.DataBufferCount);
             }
 
+            public void Visit(LargeBinaryArray array)
+            {
+                _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length));
+                _buffers.Add(CreateBuffer(GetZeroBasedLongValueOffsets(array.ValueOffsetsBuffer, array.Offset, array.Length)));
+
+                int valuesOffset = 0;
+                int valuesLength = 0;
+                if (array.Length > 0)
+                {
+                    valuesOffset = checked((int)array.ValueOffsets[0]);
+                    valuesLength = checked((int)array.ValueOffsets[array.Length]) - valuesOffset;
+                }
+
+                _buffers.Add(CreateSlicedBuffer<byte>(array.ValueBuffer, valuesOffset, valuesLength));
+            }
+
             public void Visit(FixedSizeBinaryArray array)
             {
                 var itemSize = ((FixedSizeBinaryType)array.Data.DataType).ByteWidth;
@@ -327,6 +370,39 @@ private ArrowBuffer GetZeroBasedValueOffsets(ArrowBuffer valueOffsetsBuffer, int
                 }
             }
 
+            private ArrowBuffer GetZeroBasedLongValueOffsets(ArrowBuffer valueOffsetsBuffer, int arrayOffset, int arrayLength)
+            {
+                var requiredBytes = CalculatePaddedBufferLength(checked(sizeof(long) * (arrayLength + 1)));
+
+                if (arrayOffset != 0)
+                {
+                    // Array has been sliced, so we need to shift and adjust the offsets
+                    var originalOffsets = valueOffsetsBuffer.Span.CastTo<long>().Slice(arrayOffset, arrayLength + 1);
+                    var firstOffset = arrayLength > 0 ? originalOffsets[0] : 0L;
+
+                    var newValueOffsetsBuffer = _allocator.Allocate(requiredBytes);
+                    var newValueOffsets = newValueOffsetsBuffer.Memory.Span.CastTo<long>();
+
+                    for (int i = 0; i < arrayLength + 1; ++i)
+                    {
+                        newValueOffsets[i] = originalOffsets[i] - firstOffset;
+                    }
+
+                    return new ArrowBuffer(newValueOffsetsBuffer);
+                }
+                else if (valueOffsetsBuffer.Length > requiredBytes)
+                {
+                    // Array may have been sliced but the offset is zero,
+                    // so we can truncate the existing offsets
+                    return new ArrowBuffer(valueOffsetsBuffer.Memory.Slice(0, requiredBytes));
+                }
+                else
+                {
+                    // Use the full buffer
+                    return valueOffsetsBuffer;
+                }
+            }
+
             private (ArrowBuffer Buffer, int minOffset, int maxEnd) GetZeroBasedListViewOffsets(ListViewArray array)
             {
                 if (array.Length == 0)
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
index 473e18968f8cb..adc229a051227 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
@@ -57,6 +57,7 @@ class TypeVisitor :
             IArrowTypeVisitor<DoubleType>,
             IArrowTypeVisitor<StringType>,
             IArrowTypeVisitor<StringViewType>,
+            IArrowTypeVisitor<LargeStringType>,
             IArrowTypeVisitor<Date32Type>,
             IArrowTypeVisitor<Date64Type>,
             IArrowTypeVisitor<Time32Type>,
@@ -65,9 +66,11 @@ class TypeVisitor :
             IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<BinaryType>,
             IArrowTypeVisitor<BinaryViewType>,
+            IArrowTypeVisitor<LargeBinaryType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<ListType>,
             IArrowTypeVisitor<ListViewType>,
+            IArrowTypeVisitor<LargeListType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<UnionType>,
             IArrowTypeVisitor<StructType>,
@@ -120,6 +123,14 @@ public void Visit(BinaryViewType type)
                     Flatbuf.Type.BinaryView, offset);
             }
 
+            public void Visit(LargeBinaryType type)
+            {
+                Flatbuf.LargeBinary.StartLargeBinary(Builder);
+                Offset<LargeBinary> offset = Flatbuf.LargeBinary.EndLargeBinary(Builder);
+                Result = FieldType.Build(
+                    Flatbuf.Type.LargeBinary, offset);
+            }
+
             public void Visit(ListType type)
             {
                 Flatbuf.List.StartList(Builder);
@@ -136,6 +147,14 @@ public void Visit(ListViewType type)
                     Flatbuf.ListView.EndListView(Builder));
             }
 
+            public void Visit(LargeListType type)
+            {
+                Flatbuf.LargeList.StartLargeList(Builder);
+                Result = FieldType.Build(
+                    Flatbuf.Type.LargeList,
+                    Flatbuf.LargeList.EndLargeList(Builder));
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 Result = FieldType.Build(
@@ -166,6 +185,14 @@ public void Visit(StringViewType type)
                     Flatbuf.Type.Utf8View, offset);
             }
 
+            public void Visit(LargeStringType type)
+            {
+                Flatbuf.LargeUtf8.StartLargeUtf8(Builder);
+                Offset<LargeUtf8> offset = Flatbuf.LargeUtf8.EndLargeUtf8(Builder);
+                Result = FieldType.Build(
+                    Flatbuf.Type.LargeUtf8, offset);
+            }
+
             public void Visit(TimestampType type)
             {
                 StringOffset timezoneStringOffset = default;
@@ -363,7 +390,7 @@ private static Flatbuf.IntervalUnit ToFlatBuffer(Types.IntervalUnit unit)
                 Types.IntervalUnit.DayTime => Flatbuf.IntervalUnit.DAY_TIME,
                 Types.IntervalUnit.MonthDayNanosecond => Flatbuf.IntervalUnit.MONTH_DAY_NANO,
                 _ => throw new ArgumentException($"unsupported interval unit <{unit}>", nameof(unit))
-            }; ;
+            };
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
index 0e6f330aef091..8e15632c517e1 100644
--- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -186,6 +186,8 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c
                     return Types.StringType.Default;
                 case Flatbuf.Type.Utf8View:
                     return Types.StringViewType.Default;
+                case Flatbuf.Type.LargeUtf8:
+                    return Types.LargeStringType.Default;
                 case Flatbuf.Type.FixedSizeBinary:
                     Flatbuf.FixedSizeBinary fixedSizeBinaryMetadata = field.Type<Flatbuf.FixedSizeBinary>().Value;
                     return new Types.FixedSizeBinaryType(fixedSizeBinaryMetadata.ByteWidth);
@@ -193,6 +195,8 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c
                     return Types.BinaryType.Default;
                 case Flatbuf.Type.BinaryView:
                     return Types.BinaryViewType.Default;
+                case Flatbuf.Type.LargeBinary:
+                    return Types.LargeBinaryType.Default;
                 case Flatbuf.Type.List:
                     if (childFields == null || childFields.Length != 1)
                     {
@@ -205,6 +209,12 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c
                         throw new InvalidDataException($"List view type must have exactly one child.");
                     }
                     return new Types.ListViewType(childFields[0]);
+                case Flatbuf.Type.LargeList:
+                    if (childFields == null || childFields.Length != 1)
+                    {
+                        throw new InvalidDataException($"Large list type must have exactly one child.");
+                    }
+                    return new Types.LargeListType(childFields[0]);
                 case Flatbuf.Type.FixedSizeList:
                     if (childFields == null || childFields.Length != 1)
                     {
diff --git a/csharp/src/Apache.Arrow/RecordBatch.cs b/csharp/src/Apache.Arrow/RecordBatch.cs
index 9cc81b1648ea8..4067ba9ac6c2b 100644
--- a/csharp/src/Apache.Arrow/RecordBatch.cs
+++ b/csharp/src/Apache.Arrow/RecordBatch.cs
@@ -100,6 +100,17 @@ public RecordBatch Clone(MemoryAllocator allocator = default)
             return new RecordBatch(Schema, arrays, Length);
         }
 
+        public RecordBatch Slice(int offset, int length)
+        {
+            if (offset > Length)
+            {
+                throw new ArgumentException($"Offset {offset} cannot be greater than Length {Length} for RecordBatch.Slice");
+            }
+
+            length = Math.Min(Length - offset, length);
+            return new RecordBatch(Schema, _arrays.Select(a => ArrowArrayFactory.Slice(a, offset, length)), length);
+        }
+
         public void Accept(IArrowArrayVisitor visitor)
         {
             switch (visitor)
diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs b/csharp/src/Apache.Arrow/Types/IArrowType.cs
index cf520391fe1e6..7a3159a1bbccd 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/IArrowType.cs
@@ -53,6 +53,9 @@ public enum ArrowTypeId
         BinaryView,
         StringView,
         ListView,
+        LargeList,
+        LargeBinary,
+        LargeString,
     }
 
     public interface IArrowType
diff --git a/csharp/src/Apache.Arrow/Types/LargeBinaryType.cs b/csharp/src/Apache.Arrow/Types/LargeBinaryType.cs
new file mode 100644
index 0000000000000..e22c333824480
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/LargeBinaryType.cs
@@ -0,0 +1,27 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow.Types;
+
+public class LargeBinaryType: ArrowType
+{
+    public static readonly LargeBinaryType Default = new LargeBinaryType();
+
+    public override ArrowTypeId TypeId => ArrowTypeId.LargeBinary;
+
+    public override string Name => "large_binary";
+
+    public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+}
diff --git a/csharp/src/Apache.Arrow/Types/LargeListType.cs b/csharp/src/Apache.Arrow/Types/LargeListType.cs
new file mode 100644
index 0000000000000..2fe8166972931
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/LargeListType.cs
@@ -0,0 +1,36 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow.Types
+{
+    public sealed class LargeListType : NestedType
+    {
+        public override ArrowTypeId TypeId => ArrowTypeId.LargeList;
+
+        public override string Name => "large_list";
+
+        public Field ValueField => Fields[0];
+
+        public IArrowType ValueDataType => Fields[0].DataType;
+
+        public LargeListType(Field valueField)
+           : base(valueField) { }
+
+        public LargeListType(IArrowType valueDataType)
+            : this(new Field("item", valueDataType, true)) { }
+
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Types/LargeStringType.cs b/csharp/src/Apache.Arrow/Types/LargeStringType.cs
new file mode 100644
index 0000000000000..8698ca4747a0e
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/LargeStringType.cs
@@ -0,0 +1,27 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow.Types;
+
+public sealed class LargeStringType : ArrowType
+{
+    public static readonly LargeStringType Default = new LargeStringType();
+
+    public override ArrowTypeId TypeId => ArrowTypeId.LargeString;
+
+    public override string Name => "large_utf8";
+
+    public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+}
diff --git a/csharp/src/Apache.Arrow/Utility.cs b/csharp/src/Apache.Arrow/Utility.cs
index c4e5732e6eaa7..22b3ff15f1c5c 100644
--- a/csharp/src/Apache.Arrow/Utility.cs
+++ b/csharp/src/Apache.Arrow/Utility.cs
@@ -13,10 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using Apache.Arrow.Flatbuf;
 using System;
 using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow
 {
diff --git a/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj b/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj
index f735f01b022d0..0a3e33410419e 100644
--- a/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj
+++ b/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj
@@ -6,8 +6,8 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="BenchmarkDotNet" Version="0.13.12" />
-    <PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.13.12" />
+    <PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
+    <PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.14.0" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index bd97372d1021b..9d86436d8928b 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -7,9 +7,9 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
-    <PackageReference Include="xunit" Version="2.8.1" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.1" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
+    <PackageReference Include="xunit" Version="2.9.1" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 5a5a92ccd2c7f..97ee6a047f479 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -6,9 +6,9 @@
     </PropertyGroup>
 
     <ItemGroup>
-      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
-      <PackageReference Include="xunit" Version="2.8.1" />
-      <PackageReference Include="xunit.runner.visualstudio" Version="2.8.1" />
+      <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
+      <PackageReference Include="xunit" Version="2.9.1" />
+      <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
       <PackageReference Include="coverlet.collector" Version="6.0.2" />
     </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
index 789fb9569edba..2282c11c1ed39 100644
--- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
+++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj
@@ -5,7 +5,8 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Grpc.AspNetCore" Version="2.63.0" />
+    <PackageReference Include="Google.Protobuf" Version="3.28.2" />
+    <PackageReference Include="Grpc.AspNetCore" Version="2.66.0" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index 132f17fa212a5..baf2f04c23c55 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -6,9 +6,9 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
-    <PackageReference Include="xunit" Version="2.8.1" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.1" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
+    <PackageReference Include="xunit" Version="2.9.1" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
     <PackageReference Include="coverlet.collector" Version="6.0.2" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
index aac4e4209240a..0e82673d02240 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
+++ b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs
@@ -16,12 +16,15 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Threading;
 using System.Threading.Tasks;
 using Apache.Arrow.Flight.Client;
 using Apache.Arrow.Flight.TestWeb;
 using Apache.Arrow.Tests;
 using Google.Protobuf;
+using Grpc.Core;
 using Grpc.Core.Utils;
+using Python.Runtime;
 using Xunit;
 
 namespace Apache.Arrow.Flight.Tests
@@ -68,14 +71,14 @@ private FlightInfo GivenStoreBatches(FlightDescriptor flightDescriptor, params R
         {
             var initialBatch = batches.FirstOrDefault();
 
-            var flightHolder = new FlightHolder(flightDescriptor, initialBatch.RecordBatch.Schema, _testWebFactory.GetAddress());
+            var flightHolder = new FlightHolder(flightDescriptor, initialBatch?.RecordBatch.Schema, _testWebFactory.GetAddress());
 
-            foreach(var batch in batches)
+            foreach (var batch in batches)
             {
                 flightHolder.AddBatch(batch);
             }
 
-            _flightStore.Flights.Add(flightDescriptor, flightHolder);
+            _flightStore.Flights[flightDescriptor] = flightHolder;
 
             return flightHolder.GetFlightInfo();
         }
@@ -121,6 +124,31 @@ public async Task TestPutTwoRecordBatches()
             ArrowReaderVerifier.CompareBatches(expectedBatch2, actualBatches[1].RecordBatch);
         }
 
+        [Fact]
+        public async Task TestGetRecordBatchWithDelayedSchema()
+        {
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("test");
+            var expectedBatch = CreateTestBatch(0, 100);
+
+            //Add flight info only to the in memory store without schema or batch
+            GivenStoreBatches(flightDescriptor);
+
+            //Get the flight info for the ticket and verify the schema is null
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            Assert.Single(flightInfo.Endpoints);
+            Assert.Null(flightInfo.Schema);
+
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+
+            //Update the store with the batch and schema
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(expectedBatch));
+            var getStream = _flightClient.GetStream(endpoint.Ticket);
+            var resultList = await getStream.ResponseStream.ToListAsync();
+
+            Assert.Single(resultList);
+            ArrowReaderVerifier.CompareBatches(expectedBatch, resultList[0]);
+        }
+
         [Fact]
         public async Task TestGetSingleRecordBatch()
         {
@@ -187,8 +215,8 @@ public async Task TestGetFlightMetadata()
 
             var getStream = _flightClient.GetStream(endpoint.Ticket);
 
-            List<ByteString> actualMetadata = new List<ByteString>(); 
-            while(await getStream.ResponseStream.MoveNext(default))
+            List<ByteString> actualMetadata = new List<ByteString>();
+            while (await getStream.ResponseStream.MoveNext(default))
             {
                 actualMetadata.AddRange(getStream.ResponseStream.ApplicationMetadata);
             }
@@ -277,7 +305,7 @@ public async Task TestListFlights()
 
             var actualFlights = await listFlightStream.ResponseStream.ToListAsync();
 
-            for(int i = 0; i < expectedFlightInfo.Count; i++)
+            for (int i = 0; i < expectedFlightInfo.Count; i++)
             {
                 FlightInfoComparer.Compare(expectedFlightInfo[i], actualFlights[i]);
             }
@@ -386,7 +414,7 @@ public async Task TestGetBatchesWithAsyncEnumerable()
 
 
             List<RecordBatch> resultList = new List<RecordBatch>();
-            await foreach(var recordBatch in getStream.ResponseStream)
+            await foreach (var recordBatch in getStream.ResponseStream)
             {
                 resultList.Add(recordBatch);
             }
@@ -415,5 +443,89 @@ public async Task EnsureTheSerializedBatchContainsTheProperTotalRecordsAndTotalB
             Assert.Equal(expectedBatch.Length, result.TotalRecords);
             Assert.Equal(expectedTotalBytes, result.TotalBytes);
         }
+
+        [Fact]
+        public async Task EnsureCallRaisesDeadlineExceeded()
+        {
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("raise_deadline");
+            var deadline = DateTime.UtcNow;
+            var batch = CreateTestBatch(0, 100);
+
+            RpcException exception = null;
+
+            var asyncServerStreamingCallFlights = _flightClient.ListFlights(null, null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            var asyncServerStreamingCallActions = _flightClient.ListActions(null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(batch));
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetInfo(flightDescriptor, null, deadline));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+            var getStream = _flightClient.GetStream(endpoint.Ticket, null, deadline);
+            Assert.Equal(StatusCode.DeadlineExceeded, getStream.GetStatus().StatusCode);
+
+            var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor, null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await duplexStreamingCall.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var putStream = _flightClient.StartPut(flightDescriptor, null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await putStream.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetSchema(flightDescriptor, null, deadline));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+
+            var handshakeStreamingCall = _flightClient.Handshake(null, deadline);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await handshakeStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)));
+            Assert.Equal(StatusCode.DeadlineExceeded, exception.StatusCode);
+        }
+
+        [Fact]
+        public async Task EnsureCallRaisesRequestCancelled()
+        {
+            var cts = new CancellationTokenSource();
+            cts.CancelAfter(1);
+            
+            var batch = CreateTestBatch(0, 100);
+            var metadata = new Metadata();
+            var flightDescriptor = FlightDescriptor.CreatePathDescriptor("raise_cancelled");
+            await Task.Delay(5);
+            RpcException exception = null;
+
+            var asyncServerStreamingCallFlights = _flightClient.ListFlights(null, null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            var asyncServerStreamingCallActions = _flightClient.ListActions(null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, asyncServerStreamingCallFlights.GetStatus().StatusCode);
+
+            GivenStoreBatches(flightDescriptor, new RecordBatchWithMetadata(batch));
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetInfo(flightDescriptor, null, null, cts.Token));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var flightInfo = await _flightClient.GetInfo(flightDescriptor);
+            var endpoint = flightInfo.Endpoints.FirstOrDefault();
+            var getStream = _flightClient.GetStream(endpoint.Ticket, null, null, cts.Token);
+            Assert.Equal(StatusCode.Cancelled, getStream.GetStatus().StatusCode);
+
+            var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor, null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await duplexStreamingCall.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var putStream = _flightClient.StartPut(flightDescriptor, null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await putStream.RequestStream.WriteAsync(batch));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await _flightClient.GetSchema(flightDescriptor, null, null, cts.Token));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+            var handshakeStreamingCall = _flightClient.Handshake(null, null, cts.Token);
+            exception = await Assert.ThrowsAsync<RpcException>(async () => await handshakeStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)));
+            Assert.Equal(StatusCode.Cancelled, exception.StatusCode);
+
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
index 7232f74b8bec6..c9e44b8d2f491 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
@@ -177,8 +177,10 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children)
                 "decimal" => ToDecimalArrowType(type),
                 "binary" => BinaryType.Default,
                 "binaryview" => BinaryViewType.Default,
+                "largebinary" => LargeBinaryType.Default,
                 "utf8" => StringType.Default,
                 "utf8view" => StringViewType.Default,
+                "largeutf8" => LargeStringType.Default,
                 "fixedsizebinary" => new FixedSizeBinaryType(type.ByteWidth),
                 "date" => ToDateArrowType(type),
                 "time" => ToTimeArrowType(type),
@@ -188,6 +190,7 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children)
                 "timestamp" => ToTimestampArrowType(type),
                 "list" => ToListArrowType(type, children),
                 "listview" => ToListViewArrowType(type, children),
+                "largelist" => ToLargeListArrowType(type, children),
                 "fixedsizelist" => ToFixedSizeListArrowType(type, children),
                 "struct" => ToStructArrowType(type, children),
                 "union" => ToUnionArrowType(type, children),
@@ -303,6 +306,11 @@ private static IArrowType ToListViewArrowType(JsonArrowType type, Field[] childr
             return new ListViewType(children[0]);
         }
 
+        private static IArrowType ToLargeListArrowType(JsonArrowType type, Field[] children)
+        {
+            return new LargeListType(children[0]);
+        }
+
         private static IArrowType ToFixedSizeListArrowType(JsonArrowType type, Field[] children)
         {
             return new FixedSizeListType(children[0], type.ListSize);
@@ -461,11 +469,14 @@ private class ArrayCreator :
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<StringType>,
             IArrowTypeVisitor<StringViewType>,
+            IArrowTypeVisitor<LargeStringType>,
             IArrowTypeVisitor<BinaryType>,
             IArrowTypeVisitor<BinaryViewType>,
+            IArrowTypeVisitor<LargeBinaryType>,
             IArrowTypeVisitor<FixedSizeBinaryType>,
             IArrowTypeVisitor<ListType>,
             IArrowTypeVisitor<ListViewType>,
+            IArrowTypeVisitor<LargeListType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
@@ -696,6 +707,24 @@ public void Visit(StringViewType type)
                 Array = new StringViewArray(arrayData);
             }
 
+            public void Visit(LargeStringType type)
+            {
+                ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
+                ArrowBuffer offsetBuffer = GetLargeOffsetBuffer();
+
+                var json = JsonFieldData.Data.GetRawText();
+                string[] values = JsonSerializer.Deserialize<string[]>(json, s_options);
+
+                ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>();
+                foreach (string value in values)
+                {
+                    valueBuilder.Append(Encoding.UTF8.GetBytes(value));
+                }
+                ArrowBuffer valueBuffer = valueBuilder.Build(default);
+
+                Array = new LargeStringArray(JsonFieldData.Count, offsetBuffer, valueBuffer, validityBuffer, nullCount);
+            }
+
             public void Visit(BinaryType type)
             {
                 ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
@@ -747,6 +776,25 @@ public void Visit(BinaryViewType type)
                 Array = new BinaryViewArray(arrayData);
             }
 
+            public void Visit(LargeBinaryType type)
+            {
+                ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
+                ArrowBuffer offsetBuffer = GetLargeOffsetBuffer();
+
+                var json = JsonFieldData.Data.GetRawText();
+                string[] values = JsonSerializer.Deserialize<string[]>(json, s_options);
+
+                ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>();
+                foreach (string value in values)
+                {
+                    valueBuilder.Append(ConvertHexStringToByteArray(value));
+                }
+                ArrowBuffer valueBuffer = valueBuilder.Build(default);
+
+                ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, new[] { validityBuffer, offsetBuffer, valueBuffer });
+                Array = new LargeBinaryArray(arrayData);
+            }
+
             public void Visit(FixedSizeBinaryType type)
             {
                 ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
@@ -796,6 +844,21 @@ public void Visit(ListViewType type)
                 Array = new ListViewArray(arrayData);
             }
 
+            public void Visit(LargeListType type)
+            {
+                ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
+                ArrowBuffer offsetBuffer = GetLargeOffsetBuffer();
+
+                var data = JsonFieldData;
+                JsonFieldData = data.Children[0];
+                type.ValueDataType.Accept(this);
+                JsonFieldData = data;
+
+                ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0,
+                    new[] { validityBuffer, offsetBuffer }, new[] { Array.Data });
+                Array = new LargeListArray(arrayData);
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
@@ -975,6 +1038,13 @@ private ArrowBuffer GetOffsetBuffer()
                 return valueOffsets.Build(default);
             }
 
+            private ArrowBuffer GetLargeOffsetBuffer()
+            {
+                ArrowBuffer.Builder<long> valueOffsets = new ArrowBuffer.Builder<long>(JsonFieldData.Offset.Count);
+                valueOffsets.AppendRange(JsonFieldData.LongOffset);
+                return valueOffsets.Build(default);
+            }
+
             private ArrowBuffer GetSizeBuffer()
             {
                 ArrowBuffer.Builder<int> valueSizes = new ArrowBuffer.Builder<int>(JsonFieldData.Size.Count);
@@ -1039,6 +1109,12 @@ public IEnumerable<int> IntOffset
             get { return Offset.Select(GetInt); }
         }
 
+        [JsonIgnore]
+        public IEnumerable<long> LongOffset
+        {
+            get { return Offset.Select(GetLong); }
+        }
+
         [JsonIgnore]
         public IEnumerable<int> IntSize
         {
@@ -1056,6 +1132,18 @@ static int GetInt(JsonNode node)
                 return int.Parse(node.GetValue<string>());
             }
         }
+
+        static long GetLong(JsonNode node)
+        {
+            try
+            {
+                return node.GetValue<long>();
+            }
+            catch
+            {
+                return long.Parse(node.GetValue<string>());
+            }
+        }
     }
 
     public class JsonView
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index a3290e3be14ee..9745245f54aae 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -16,14 +16,14 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
-    <PackageReference Include="xunit" Version="2.8.1" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.1">
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.11.1" />
+    <PackageReference Include="xunit" Version="2.9.1" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
     </PackageReference>
     <PackageReference Include="xunit.skippablefact" Version="1.4.13" />
-    <PackageReference Include="pythonnet" Version="3.0.3" />
+    <PackageReference Include="pythonnet" Version="3.0.4" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
index 5c33d1fd43986..35b2c4e7f2ad3 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
@@ -95,12 +95,15 @@ private class ArrayComparer :
             IArrowArrayVisitor<MonthDayNanosecondIntervalArray>,
             IArrowArrayVisitor<ListArray>,
             IArrowArrayVisitor<ListViewArray>,
+            IArrowArrayVisitor<LargeListArray>,
             IArrowArrayVisitor<FixedSizeListArray>,
             IArrowArrayVisitor<StringArray>,
             IArrowArrayVisitor<StringViewArray>,
+            IArrowArrayVisitor<LargeStringArray>,
             IArrowArrayVisitor<FixedSizeBinaryArray>,
             IArrowArrayVisitor<BinaryArray>,
             IArrowArrayVisitor<BinaryViewArray>,
+            IArrowArrayVisitor<LargeBinaryArray>,
             IArrowArrayVisitor<StructArray>,
             IArrowArrayVisitor<UnionArray>,
             IArrowArrayVisitor<Decimal128Array>,
@@ -144,14 +147,17 @@ public ArrayComparer(IArrowArray expectedArray, bool strictCompare)
             public void Visit(MonthDayNanosecondIntervalArray array) => CompareArrays(array);
             public void Visit(ListArray array) => CompareArrays(array);
             public void Visit(ListViewArray array) => CompareArrays(array);
+            public void Visit(LargeListArray array) => CompareArrays(array);
             public void Visit(FixedSizeListArray array) => CompareArrays(array);
             public void Visit(FixedSizeBinaryArray array) => CompareArrays(array);
             public void Visit(Decimal128Array array) => CompareArrays(array);
             public void Visit(Decimal256Array array) => CompareArrays(array);
             public void Visit(StringArray array) => CompareBinaryArrays<StringArray>(array);
             public void Visit(StringViewArray array) => CompareVariadicArrays<StringViewArray>(array);
+            public void Visit(LargeStringArray array) => CompareLargeBinaryArrays<LargeStringArray>(array);
             public void Visit(BinaryArray array) => CompareBinaryArrays<BinaryArray>(array);
             public void Visit(BinaryViewArray array) => CompareVariadicArrays<BinaryViewArray>(array);
+            public void Visit(LargeBinaryArray array) => CompareLargeBinaryArrays<LargeBinaryArray>(array);
 
             public void Visit(StructArray array)
             {
@@ -276,6 +282,40 @@ private void CompareBinaryArrays<T>(BinaryArray actualArray)
                 }
             }
 
+            private void CompareLargeBinaryArrays<T>(LargeBinaryArray actualArray)
+                where T : IArrowArray
+            {
+                Assert.IsAssignableFrom<T>(_expectedArray);
+                Assert.IsAssignableFrom<T>(actualArray);
+
+                var expectedArray = (LargeBinaryArray)_expectedArray;
+
+                actualArray.Data.DataType.Accept(_arrayTypeComparer);
+
+                Assert.Equal(expectedArray.Length, actualArray.Length);
+                Assert.Equal(expectedArray.NullCount, actualArray.NullCount);
+
+                CompareValidityBuffer(
+                    expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer,
+                    expectedArray.Offset, actualArray.NullBitmapBuffer, actualArray.Offset);
+
+                if (_strictCompare)
+                {
+                    Assert.Equal(expectedArray.Offset, actualArray.Offset);
+                    Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span));
+                    Assert.True(expectedArray.ValueBuffer.Span.Slice(0, expectedArray.Length).SequenceEqual(actualArray.ValueBuffer.Span.Slice(0, actualArray.Length)));
+                }
+                else
+                {
+                    for (int i = 0; i < expectedArray.Length; i++)
+                    {
+                        Assert.True(
+                            expectedArray.GetBytes(i).SequenceEqual(actualArray.GetBytes(i)),
+                            $"LargeBinaryArray values do not match at index {i}.");
+                    }
+                }
+            }
+
             private void CompareVariadicArrays<T>(BinaryViewArray actualArray)
                 where T : IArrowArray
             {
@@ -469,6 +509,44 @@ private void CompareArrays(ListViewArray actualArray)
                 }
             }
 
+            private void CompareArrays(LargeListArray actualArray)
+            {
+                Assert.IsAssignableFrom<LargeListArray>(_expectedArray);
+                LargeListArray expectedArray = (LargeListArray)_expectedArray;
+
+                actualArray.Data.DataType.Accept(_arrayTypeComparer);
+
+                Assert.Equal(expectedArray.Length, actualArray.Length);
+                Assert.Equal(expectedArray.NullCount, actualArray.NullCount);
+
+                CompareValidityBuffer(
+                    expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer,
+                    expectedArray.Offset, actualArray.NullBitmapBuffer, actualArray.Offset);
+
+                if (_strictCompare)
+                {
+                    Assert.Equal(expectedArray.Offset, actualArray.Offset);
+                    Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span));
+                    actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare));
+                }
+                else
+                {
+                    for (int i = 0; i < actualArray.Length; ++i)
+                    {
+                        if (expectedArray.IsNull(i))
+                        {
+                            Assert.True(actualArray.IsNull(i));
+                        }
+                        else
+                        {
+                            var expectedList = expectedArray.GetSlicedValues(i);
+                            var actualList = actualArray.GetSlicedValues(i);
+                            actualList.Accept(new ArrayComparer(expectedList, _strictCompare));
+                        }
+                    }
+                }
+            }
+
             private void CompareArrays(FixedSizeListArray actualArray)
             {
                 Assert.IsAssignableFrom<FixedSizeListArray>(_expectedArray);
@@ -488,7 +566,9 @@ private void CompareArrays(FixedSizeListArray actualArray)
                 var listSize = ((FixedSizeListType)expectedArray.Data.DataType).ListSize;
                 var expectedValuesSlice = ArrowArrayFactory.Slice(
                     expectedArray.Values, expectedArray.Offset * listSize, expectedArray.Length * listSize);
-                actualArray.Values.Accept(new ArrayComparer(expectedValuesSlice, _strictCompare));
+                var actualValuesSlice = ArrowArrayFactory.Slice(
+                    actualArray.Values, actualArray.Offset * listSize, actualArray.Length * listSize);
+                actualValuesSlice.Accept(new ArrayComparer(expectedValuesSlice, _strictCompare));
             }
 
             private void CompareValidityBuffer(int nullCount, int arrayLength, ArrowBuffer expectedValidityBuffer, int expectedBufferOffset, ArrowBuffer actualValidityBuffer, int actualBufferOffset)
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
index 2bd4d4d661942..70ab1fdae2f64 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
@@ -92,5 +92,23 @@ public unsafe void CallsReleaseForInvalid()
             GC.KeepAlive(releaseCallback);
         }
 #endif
+
+        [Fact]
+        public unsafe void RoundTripInt32ArrayWithOffset()
+        {
+            Int32Array array = new Int32Array.Builder()
+                .AppendRange(new[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 })
+                .Build();
+            IArrowArray sliced = array.Slice(2, 6);
+            CArrowArray* cArray = CArrowArray.Create();
+            CArrowArrayExporter.ExportArray(sliced, cArray);
+            using (var importedSlice = (Int32Array)CArrowArrayImporter.ImportArray(cArray, array.Data.DataType))
+            {
+                Assert.Equal(6, importedSlice.Length);
+                Assert.Equal(2, importedSlice.Offset);
+                Assert.Equal(2, importedSlice.GetValue(0));
+            }
+            CArrowArray.Free(cArray);
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index fee18d165cdbd..638cbfb272de4 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -792,6 +792,49 @@ public unsafe void RoundTripTestBatch()
             CArrowSchema.Free(cImportSchema);
         }
 
+        [SkippableFact]
+        public unsafe void RoundTripTestSlicedBatch()
+        {
+            // TODO: Enable these once this the version of pyarrow referenced during testing supports them
+            HashSet<ArrowTypeId> unsupported = new HashSet<ArrowTypeId> { ArrowTypeId.ListView, ArrowTypeId.BinaryView, ArrowTypeId.StringView };
+            RecordBatch batch1 = TestData.CreateSampleRecordBatch(4, excludedTypes: unsupported);
+            RecordBatch batch1slice = batch1.Slice(1, 2);
+            RecordBatch batch2 = batch1slice.Clone();
+
+            CArrowArray* cExportArray = CArrowArray.Create();
+            CArrowArrayExporter.ExportRecordBatch(batch1slice, cExportArray);
+
+            CArrowSchema* cExportSchema = CArrowSchema.Create();
+            CArrowSchemaExporter.ExportSchema(batch1.Schema, cExportSchema);
+
+            CArrowArray* cImportArray = CArrowArray.Create();
+            CArrowSchema* cImportSchema = CArrowSchema.Create();
+
+            // For Python, we need to provide the pointers
+            long exportArrayPtr = ((IntPtr)cExportArray).ToInt64();
+            long exportSchemaPtr = ((IntPtr)cExportSchema).ToInt64();
+            long importArrayPtr = ((IntPtr)cImportArray).ToInt64();
+            long importSchemaPtr = ((IntPtr)cImportSchema).ToInt64();
+
+            using (Py.GIL())
+            {
+                dynamic pa = Py.Import("pyarrow");
+                dynamic exportedPyArray = pa.RecordBatch._import_from_c(exportArrayPtr, exportSchemaPtr);
+                exportedPyArray._export_to_c(importArrayPtr, importSchemaPtr);
+            }
+
+            Schema schema = CArrowSchemaImporter.ImportSchema(cImportSchema);
+            RecordBatch importedBatch = CArrowArrayImporter.ImportRecordBatch(cImportArray, schema);
+
+            ArrowReaderVerifier.CompareBatches(batch2, importedBatch, strictCompare: false); // Non-strict because span lengths won't match.
+
+            // Since we allocated, we are responsible for freeing the pointer.
+            CArrowArray.Free(cExportArray);
+            CArrowSchema.Free(cExportSchema);
+            CArrowArray.Free(cImportArray);
+            CArrowSchema.Free(cImportSchema);
+        }
+
         [SkippableFact]
         public unsafe void ExportBatchReader()
         {
diff --git a/csharp/test/Apache.Arrow.Tests/LargeBinaryArrayTests.cs b/csharp/test/Apache.Arrow.Tests/LargeBinaryArrayTests.cs
new file mode 100644
index 0000000000000..4ee1f1d0e0ffa
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/LargeBinaryArrayTests.cs
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests;
+
+public class LargeBinaryArrayTests
+{
+    [Fact]
+    public void GetBytesReturnsCorrectValue()
+    {
+        var byteArrays = new byte[][]
+        {
+            new byte[] {0, 1, 2, 255},
+            new byte[] {3, 4, 5},
+            new byte[] {},
+            null,
+            new byte[] {254, 253, 252},
+        };
+        var array = BuildArray(byteArrays);
+
+        Assert.Equal(array.Length, byteArrays.Length);
+        for (var i = 0; i < byteArrays.Length; ++i)
+        {
+            var byteSpan = array.GetBytes(i, out var isNull);
+            var byteArray = isNull ? null : byteSpan.ToArray();
+            Assert.Equal(byteArrays[i], byteArray);
+        }
+    }
+
+    [Fact]
+    public void GetBytesChecksForOffsetOverflow()
+    {
+        var valueBuffer = new ArrowBuffer.Builder<byte>();
+        var offsetBuffer = new ArrowBuffer.Builder<long>();
+        var validityBuffer = new ArrowBuffer.BitmapBuilder();
+
+        offsetBuffer.Append(0);
+        offsetBuffer.Append((long)int.MaxValue + 1);
+        validityBuffer.Append(true);
+
+        var array = new LargeBinaryArray(
+            LargeBinaryType.Default, length: 1,
+            offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(),
+            validityBuffer.UnsetBitCount);
+
+        Assert.Throws<OverflowException>(() => array.GetBytes(0));
+    }
+
+    private static LargeBinaryArray BuildArray(IReadOnlyCollection<byte[]> byteArrays)
+    {
+        var valueBuffer = new ArrowBuffer.Builder<byte>();
+        var offsetBuffer = new ArrowBuffer.Builder<long>();
+        var validityBuffer = new ArrowBuffer.BitmapBuilder();
+
+        long offset = 0;
+        offsetBuffer.Append(offset);
+        foreach (var bytes in byteArrays)
+        {
+            if (bytes == null)
+            {
+                validityBuffer.Append(false);
+                offsetBuffer.Append(offset);
+            }
+            else
+            {
+                valueBuffer.Append(bytes);
+                offset += bytes.Length;
+                offsetBuffer.Append(offset);
+                validityBuffer.Append(true);
+            }
+        }
+
+        return new LargeBinaryArray(
+            LargeBinaryType.Default, byteArrays.Count,
+            offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(),
+            validityBuffer.UnsetBitCount);
+    }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/LargeListArrayTests.cs b/csharp/test/Apache.Arrow.Tests/LargeListArrayTests.cs
new file mode 100644
index 0000000000000..1d35a8ffd62c5
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/LargeListArrayTests.cs
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Linq;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests;
+
+public class LargeListArrayTests
+{
+    [Fact]
+    public void GetSlicedValuesReturnsCorrectValues()
+    {
+        var values = new int?[][]
+        {
+            new int?[] {0, 1, 2},
+            System.Array.Empty<int?>(),
+            null,
+            new int?[] {3, 4, null, 6},
+        };
+
+        var array = BuildArray(values);
+
+        Assert.Equal(values.Length, array.Length);
+        for (int i = 0; i < values.Length; ++i)
+        {
+            Assert.Equal(values[i] == null, array.IsNull(i));
+            var arrayItem = (Int32Array) array.GetSlicedValues(i);
+            if (values[i] == null)
+            {
+                Assert.Null(arrayItem);
+            }
+            else
+            {
+                Assert.Equal(values[i], arrayItem.ToArray());
+            }
+        }
+    }
+
+    [Fact]
+    public void GetSlicedValuesChecksForOffsetOverflow()
+    {
+        var valuesArray = new Int32Array.Builder().Build();
+        var offsetBuffer = new ArrowBuffer.Builder<long>();
+        var validityBuffer = new ArrowBuffer.BitmapBuilder();
+
+        offsetBuffer.Append(0);
+        offsetBuffer.Append((long)int.MaxValue + 1);
+        validityBuffer.Append(true);
+
+        var array = new LargeListArray(
+            new LargeListType(new Int32Type()), length: 1,
+            offsetBuffer.Build(), valuesArray, validityBuffer.Build(),
+            validityBuffer.UnsetBitCount);
+
+        Assert.Throws<OverflowException>(() => array.GetSlicedValues(0));
+    }
+
+    private static LargeListArray BuildArray(int?[][] values)
+    {
+        var valuesBuilder = new Int32Array.Builder();
+        var offsetBuffer = new ArrowBuffer.Builder<long>();
+        var validityBuffer = new ArrowBuffer.BitmapBuilder();
+
+        long offset = 0;
+        offsetBuffer.Append(offset);
+        foreach (var listValue in values)
+        {
+            if (listValue == null)
+            {
+                validityBuffer.Append(false);
+                offsetBuffer.Append(offset);
+            }
+            else
+            {
+                foreach (var value in listValue)
+                {
+                    valuesBuilder.Append(value);
+                }
+                offset += listValue.Length;
+                offsetBuffer.Append(offset);
+                validityBuffer.Append(true);
+            }
+        }
+
+        return new LargeListArray(
+            new LargeListType(new Int32Type()), values.Length,
+            offsetBuffer.Build(), valuesBuilder.Build(), validityBuffer.Build(),
+            validityBuffer.UnsetBitCount);
+    }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/LargeStringArrayTests.cs b/csharp/test/Apache.Arrow.Tests/LargeStringArrayTests.cs
new file mode 100644
index 0000000000000..aba97ba338c75
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/LargeStringArrayTests.cs
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using Xunit;
+
+namespace Apache.Arrow.Tests;
+
+public class LargeStringArrayTests
+{
+    [Fact]
+    public void GetStringReturnsCorrectValue()
+    {
+        var strings = new string[]
+        {
+            "abc",
+            "defg",
+            "",
+            null,
+            "123",
+        };
+        var array = BuildArray(strings);
+
+        Assert.Equal(array.Length, strings.Length);
+        for (var i = 0; i < strings.Length; ++i)
+        {
+            Assert.Equal(strings[i], array.GetString(i));
+        }
+    }
+
+    [Fact]
+    public void GetStringChecksForOffsetOverflow()
+    {
+        var valueBuffer = new ArrowBuffer.Builder<byte>();
+        var offsetBuffer = new ArrowBuffer.Builder<long>();
+        var validityBuffer = new ArrowBuffer.BitmapBuilder();
+
+        offsetBuffer.Append(0);
+        offsetBuffer.Append((long)int.MaxValue + 1);
+        validityBuffer.Append(true);
+
+        var array = new LargeStringArray(
+            length: 1, offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(),
+            validityBuffer.UnsetBitCount);
+
+        Assert.Throws<OverflowException>(() => array.GetString(0));
+    }
+
+    private static LargeStringArray BuildArray(IReadOnlyCollection<string> strings)
+    {
+        var valueBuffer = new ArrowBuffer.Builder<byte>();
+        var offsetBuffer = new ArrowBuffer.Builder<long>();
+        var validityBuffer = new ArrowBuffer.BitmapBuilder();
+
+        long offset = 0;
+        offsetBuffer.Append(offset);
+        foreach (var value in strings)
+        {
+            if (value == null)
+            {
+                validityBuffer.Append(false);
+                offsetBuffer.Append(offset);
+            }
+            else
+            {
+                var bytes = LargeStringArray.DefaultEncoding.GetBytes(value);
+                valueBuffer.Append(bytes);
+                offset += value.Length;
+                offsetBuffer.Append(offset);
+                validityBuffer.Append(true);
+            }
+        }
+
+        return new LargeStringArray(
+            strings.Count, offsetBuffer.Build(), valueBuffer.Build(), validityBuffer.Build(),
+            validityBuffer.UnsetBitCount);
+    }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs b/csharp/test/Apache.Arrow.Tests/TableTests.cs
index 83c88265d172b..35fbe7cba68f1 100644
--- a/csharp/test/Apache.Arrow.Tests/TableTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs
@@ -63,9 +63,9 @@ public void TestTableFromRecordBatches()
             Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema, recordBatches);
             Assert.Equal(20, table1.RowCount);
 #if NET5_0_OR_GREATER
-            Assert.Equal(35, table1.ColumnCount);
+            Assert.Equal(38, table1.ColumnCount);
 #else
-            Assert.Equal(34, table1.ColumnCount);
+            Assert.Equal(37, table1.ColumnCount);
 #endif
             Assert.Equal("ChunkedArray: Length=20, DataType=list", table1.Column(0).Data.ToString());
 
diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs
index 3ea42ee0fbcb7..36969766aeae0 100644
--- a/csharp/test/Apache.Arrow.Tests/TestData.cs
+++ b/csharp/test/Apache.Arrow.Tests/TestData.cs
@@ -49,6 +49,7 @@ void AddField(Field field)
             {
                 AddField(CreateField(new ListType(Int64Type.Default), i));
                 AddField(CreateField(new ListViewType(Int64Type.Default), i));
+                AddField(CreateField(new LargeListType(Int64Type.Default), i));
                 AddField(CreateField(BooleanType.Default, i));
                 AddField(CreateField(UInt8Type.Default, i));
                 AddField(CreateField(Int8Type.Default, i));
@@ -84,6 +85,8 @@ void AddField(Field field)
                 AddField(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Sparse), i));
                 AddField(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Dense), -i));
                 AddField(CreateField(new DictionaryType(Int32Type.Default, StringType.Default, false), i));
+                AddField(CreateField(new LargeBinaryType(), i));
+                AddField(CreateField(new LargeStringType(), i));
             }
 
             Schema schema = builder.Build();
@@ -144,8 +147,10 @@ private class ArrayCreator :
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<StringType>,
             IArrowTypeVisitor<StringViewType>,
+            IArrowTypeVisitor<LargeStringType>,
             IArrowTypeVisitor<ListType>,
             IArrowTypeVisitor<ListViewType>,
+            IArrowTypeVisitor<LargeListType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
@@ -154,6 +159,7 @@ private class ArrayCreator :
             IArrowTypeVisitor<DictionaryType>,
             IArrowTypeVisitor<BinaryType>,
             IArrowTypeVisitor<BinaryViewType>,
+            IArrowTypeVisitor<LargeBinaryType>,
             IArrowTypeVisitor<FixedSizeBinaryType>,
             IArrowTypeVisitor<MapType>,
             IArrowTypeVisitor<IntervalType>,
@@ -335,6 +341,45 @@ public void Visit(StringViewType type)
                 Array = builder.Build();
             }
 
+            public void Visit(LargeStringType type)
+            {
+                var str = "hello";
+                var valueBuffer = new ArrowBuffer.Builder<byte>();
+                var offsetBuffer = new ArrowBuffer.Builder<long>();
+                var validityBuffer = new ArrowBuffer.BitmapBuilder();
+
+                long offset = 0;
+                offsetBuffer.Append(offset);
+
+                for (var i = 0; i < Length; i++)
+                {
+                    switch (i % 3)
+                    {
+                        case 0:
+                            offsetBuffer.Append(offset);
+                            validityBuffer.Append(false);
+                            break;
+                        case 1:
+                            valueBuffer.Append(LargeStringArray.DefaultEncoding.GetBytes(str));
+                            offset += str.Length;
+                            offsetBuffer.Append(offset);
+                            validityBuffer.Append(true);
+                            break;
+                        case 2:
+                            valueBuffer.Append(LargeStringArray.DefaultEncoding.GetBytes(str + str));
+                            offset += str.Length * 2;
+                            offsetBuffer.Append(offset);
+                            validityBuffer.Append(true);
+                            break;
+                    }
+                }
+
+                var validity = validityBuffer.UnsetBitCount > 0 ? validityBuffer.Build() : ArrowBuffer.Empty;
+                Array = new LargeStringArray(
+                    Length, offsetBuffer.Build(), valueBuffer.Build(), validity,
+                    validityBuffer.UnsetBitCount);
+            }
+
             public void Visit(ListType type)
             {
                 var builder = new ListArray.Builder(type.ValueField).Reserve(Length);
@@ -379,6 +424,37 @@ public void Visit(ListViewType type)
                 Array = builder.Build();
             }
 
+            public void Visit(LargeListType type)
+            {
+                var valueBuilder = new Int64Array.Builder().Reserve(Length * 3 / 2);
+                var offsetBuffer = new ArrowBuffer.Builder<long>();
+                var validityBuffer = new ArrowBuffer.BitmapBuilder();
+
+                offsetBuffer.Append(0);
+
+                for (var i = 0; i < Length; i++)
+                {
+                    if (i % 10 == 2)
+                    {
+                        offsetBuffer.Append(valueBuilder.Length);
+                        validityBuffer.Append(false);
+                    }
+                    else
+                    {
+                        var listLength = i % 4;
+                        valueBuilder.AppendRange(Enumerable.Range(i, listLength).Select(x => (long)x));
+                        offsetBuffer.Append(valueBuilder.Length);
+                        validityBuffer.Append(true);
+                    }
+                }
+
+                var validity = validityBuffer.UnsetBitCount > 0 ? validityBuffer.Build() : ArrowBuffer.Empty;
+                Array = new LargeListArray(
+                    new LargeListType(new Int64Type()), Length,
+                    offsetBuffer.Build(), valueBuilder.Build(), validity,
+                    validityBuffer.UnsetBitCount);
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 var builder = new FixedSizeListArray.Builder(type.ValueField, type.ListSize).Reserve(Length);
@@ -554,6 +630,48 @@ public void Visit(BinaryViewType type)
                 Array = builder.Build();
             }
 
+            public void Visit(LargeBinaryType type)
+            {
+                ReadOnlySpan<byte> shortData = new[] { (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9 };
+                ReadOnlySpan<byte> longData = new[]
+                {
+                    (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9,
+                    (byte)10, (byte)11, (byte)12, (byte)13, (byte)14, (byte)15, (byte)16, (byte)17, (byte)18, (byte)19
+                };
+                var valueBuffer = new ArrowBuffer.Builder<byte>();
+                var offsetBuffer = new ArrowBuffer.Builder<long>();
+                var validityBuffer = new ArrowBuffer.BitmapBuilder();
+
+                offsetBuffer.Append(0L);
+
+                for (var i = 0; i < Length; i++)
+                {
+                    switch (i % 3)
+                    {
+                        case 0:
+                            offsetBuffer.Append(valueBuffer.Length);
+                            validityBuffer.Append(false);
+                            break;
+                        case 1:
+                            valueBuffer.Append(shortData);
+                            offsetBuffer.Append(valueBuffer.Length);
+                            validityBuffer.Append(true);
+                            break;
+                        case 2:
+                            valueBuffer.Append(longData);
+                            offsetBuffer.Append(valueBuffer.Length);
+                            validityBuffer.Append(true);
+                            break;
+                    }
+                }
+
+                var validity = validityBuffer.UnsetBitCount > 0 ? validityBuffer.Build() : ArrowBuffer.Empty;
+                Array = new LargeBinaryArray(
+                    LargeBinaryType.Default, Length,
+                    offsetBuffer.Build(), valueBuffer.Build(), validity,
+                    validityBuffer.UnsetBitCount);
+            }
+
             public void Visit(FixedSizeBinaryType type)
             {
                 ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>();
diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py
index a91989fb95257..9ebb9226e3743 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -123,6 +123,8 @@ def default_configuration(**kwargs):
             with_csv=True,
             with_dataset=True,
             with_json=True,
+            with_jemalloc=True,
+            with_mimalloc=True,
             with_parquet=True,
             with_python=False,
             with_brotli=True,
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index cd746f9c4499a..4f090657a590a 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -724,7 +724,7 @@ def _set_default(opt, default):
 
 @archery.command(short_help="Execute protocol and Flight integration tests")
 @click.option('--with-all', is_flag=True, default=False,
-              help=('Include all known languages by default '
+              help=('Include all known implementations by default '
                     'in integration tests'))
 @click.option('--random-seed', type=int, default=12345,
               help="Seed for PRNG when generating test data")
@@ -737,13 +737,17 @@ def _set_default(opt, default):
 @click.option('--with-js', type=bool, default=False,
               help='Include JavaScript in integration tests')
 @click.option('--with-go', type=bool, default=False,
-              help='Include Go in integration tests')
+              help='Include Go in integration tests',
+              envvar="ARCHERY_INTEGRATION_WITH_GO")
 @click.option('--with-nanoarrow', type=bool, default=False,
               help='Include nanoarrow in integration tests',
               envvar="ARCHERY_INTEGRATION_WITH_NANOARROW")
 @click.option('--with-rust', type=bool, default=False,
               help='Include Rust in integration tests',
               envvar="ARCHERY_INTEGRATION_WITH_RUST")
+@click.option('--target-implementations', default='',
+              help=('Target implementations in this integration tests'),
+              envvar="ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS")
 @click.option('--write_generated_json', default="",
               help='Generate test JSON to indicated path')
 @click.option('--run-ipc', is_flag=True, default=False,
@@ -779,15 +783,15 @@ def integration(with_all=False, random_seed=12345, **args):
 
     gen_path = args['write_generated_json']
 
-    languages = ['cpp', 'csharp', 'java', 'js', 'go', 'nanoarrow', 'rust']
+    implementations = ['cpp', 'csharp', 'java', 'js', 'go', 'nanoarrow', 'rust']
     formats = ['ipc', 'flight', 'c_data']
 
-    enabled_languages = 0
-    for lang in languages:
+    enabled_implementations = 0
+    for lang in implementations:
         param = f'with_{lang}'
         if with_all:
             args[param] = with_all
-        enabled_languages += args[param]
+        enabled_implementations += args[param]
 
     enabled_formats = 0
     for fmt in formats:
@@ -804,9 +808,9 @@ def integration(with_all=False, random_seed=12345, **args):
             raise click.UsageError(
                 "Need to enable at least one format to test "
                 "(IPC, Flight, C Data Interface); try --help")
-        if enabled_languages == 0:
+        if enabled_implementations == 0:
             raise click.UsageError(
-                "Need to enable at least one language to test; try --help")
+                "Need to enable at least one implementation to test; try --help")
         run_all_tests(**args)
 
 
diff --git a/dev/archery/archery/docker/cli.py b/dev/archery/archery/docker/cli.py
index 7053db2afccff..3375d63306a10 100644
--- a/dev/archery/archery/docker/cli.py
+++ b/dev/archery/archery/docker/cli.py
@@ -21,6 +21,7 @@
 import click
 
 from ..utils.cli import validate_arrow_sources
+from ..utils.logger import group
 from .core import DockerCompose, UndefinedImage
 
 
@@ -28,17 +29,17 @@ def _mock_compose_calls(compose):
     from types import MethodType
     from subprocess import CompletedProcess
 
-    def _mock(compose, executable):
+    def _mock(compose, command_tuple):
         def _execute(self, *args, **kwargs):
-            params = ['{}={}'.format(k, v)
+            params = [f'{k}={v}'
                       for k, v in self.config.params.items()]
-            command = ' '.join(params + [executable] + list(args))
+            command = ' '.join(params + command_tuple + args)
             click.echo(command)
             return CompletedProcess([], 0)
         return MethodType(_execute, compose)
 
-    compose._execute_docker = _mock(compose, executable='docker')
-    compose._execute_compose = _mock(compose, executable='docker-compose')
+    compose._execute_docker = _mock(compose, command_tuple=('docker',))
+    compose._execute_compose = _mock(compose, command_tuple=('docker', 'compose'))
 
 
 @click.group()
@@ -47,18 +48,24 @@ def _execute(self, *args, **kwargs):
               help="Specify Arrow source directory.")
 @click.option('--dry-run/--execute', default=False,
               help="Display the docker commands instead of executing them.")
+@click.option('--using-legacy-docker-compose', default=False, is_flag=True,
+              envvar='ARCHERY_USE_LEGACY_DOCKER_COMPOSE',
+              help="Use legacy docker-compose utility instead of the built-in "
+                   "`docker compose` subcommand. This may be necessary if the "
+                   "Docker client is too old for some options.")
 @click.option('--using-docker-cli', default=False, is_flag=True,
               envvar='ARCHERY_USE_DOCKER_CLI',
               help="Use docker CLI directly for building instead of calling "
-                   "docker-compose. This may help to reuse cached layers.")
+                   "`docker compose`. This may help to reuse cached layers.")
 @click.option('--using-docker-buildx', default=False, is_flag=True,
               envvar='ARCHERY_USE_DOCKER_BUILDX',
               help="Use buildx with docker CLI directly for building instead "
-                   "of calling docker-compose or the plain docker build "
+                   "of calling `docker compose` or the plain docker build "
                    "command. This option makes the build cache reusable "
                    "across hosts.")
 @click.pass_context
-def docker(ctx, src, dry_run, using_docker_cli, using_docker_buildx):
+def docker(ctx, src, dry_run, using_legacy_docker_compose, using_docker_cli,
+           using_docker_buildx):
     """
     Interact with docker-compose based builds.
     """
@@ -74,12 +81,14 @@ def docker(ctx, src, dry_run, using_docker_cli, using_docker_buildx):
     # take the docker-compose parameters like PYTHON, PANDAS, UBUNTU from the
     # environment variables to keep the usage similar to docker-compose
     using_docker_cli |= using_docker_buildx
-    compose = DockerCompose(config_path, params=os.environ,
-                            using_docker=using_docker_cli,
-                            using_buildx=using_docker_buildx,
-                            debug=ctx.obj.get('debug', False),
-                            compose_bin=("docker compose" if using_docker_cli
-                                         else "docker-compose"))
+    compose_bin = ("docker-compose" if using_legacy_docker_compose
+                   else "docker compose")
+    with group("Docker: Prepare"):
+        compose = DockerCompose(config_path, params=os.environ,
+                                using_docker=using_docker_cli,
+                                using_buildx=using_docker_buildx,
+                                debug=ctx.obj.get('debug', False),
+                                compose_bin=compose_bin)
     if dry_run:
         _mock_compose_calls(compose)
     ctx.obj['compose'] = compose
@@ -222,10 +231,12 @@ def docker_run(obj, image, command, *, env, user, force_pull, force_build,
     env = dict(kv.split('=', 1) for kv in env)
     try:
         if force_pull:
-            compose.pull(image, pull_leaf=use_leaf_cache)
+            with group("Docker: Pull"):
+                compose.pull(image, pull_leaf=use_leaf_cache)
         if force_build:
-            compose.build(image, use_cache=use_cache,
-                          use_leaf_cache=use_leaf_cache)
+            with group("Docker: Build"):
+                compose.build(image, use_cache=use_cache,
+                              use_leaf_cache=use_leaf_cache)
         if build_only:
             return
         compose.run(
diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py
index cb831060022a4..1c486e7aae629 100644
--- a/dev/archery/archery/docker/core.py
+++ b/dev/archery/archery/docker/core.py
@@ -135,9 +135,9 @@ def _read_config(self, config_path, compose_bin):
             compose = Docker()
             args = ['compose']
         else:
-            compose = Command('docker-compose')
+            compose = Command(compose_bin)
             args = []
-        args += ['--file', str(config_path), 'config']
+        args += [f'--file={config_path}', 'config']
         result = compose.run(*args, env=self.env, check=False,
                              stderr=subprocess.PIPE, stdout=subprocess.PIPE)
 
@@ -180,7 +180,7 @@ class DockerCompose(Command):
     def __init__(self, config_path, dotenv_path=None, compose_bin=None,
                  using_docker=False, using_buildx=False, params=None,
                  debug=False):
-        compose_bin = default_bin(compose_bin, 'docker-compose')
+        compose_bin = default_bin(compose_bin, 'docker compose')
         self.config = ComposeConfig(config_path, dotenv_path, compose_bin,
                                     params=params, using_docker=using_docker,
                                     using_buildx=using_buildx, debug=debug)
@@ -193,7 +193,7 @@ def clear_pull_memory(self):
     def _execute_compose(self, *args, **kwargs):
         # execute as a docker compose command
         try:
-            result = super().run('--file', str(self.config.path), *args,
+            result = super().run(f'--file={self.config.path}', *args,
                                  env=self.config.env, **kwargs)
             result.check_returncode()
         except subprocess.CalledProcessError as e:
@@ -340,18 +340,9 @@ def run(self, service_name, command=None, *, env=None, volumes=None,
         service = self.config.get(service_name)
 
         args = []
-        if user is not None:
-            args.extend(['-u', user])
-
-        if env is not None:
-            for k, v in env.items():
-                args.extend(['-e', '{}={}'.format(k, v)])
 
-        if volumes is not None:
-            for volume in volumes:
-                args.extend(['--volume', volume])
-
-        if self.config.using_docker or service['need_gpu'] or resource_limit:
+        use_docker = self.config.using_docker or service['need_gpu'] or resource_limit
+        if use_docker:
             # use gpus, requires docker>=19.03
             if service['need_gpu']:
                 args.extend(['--gpus', 'all'])
@@ -392,6 +383,18 @@ def run(self, service_name, command=None, *, env=None, volumes=None,
                     args.append(f'--memory={memory}')
                     args.append(f'--memory-swap={memory}')
 
+        if user is not None:
+            args.extend(['-u', user])
+
+        if env is not None:
+            for k, v in env.items():
+                args.extend(['-e', '{}={}'.format(k, v)])
+
+        if volumes is not None:
+            for volume in volumes:
+                args.extend(['--volume', volume])
+
+        if use_docker:
             # get the actual docker image name instead of the compose service
             # name which we refer as image in general
             args.append(service['image'])
diff --git a/dev/archery/archery/docker/tests/test_docker.py b/dev/archery/archery/docker/tests/test_docker.py
index 386b7c2bdae3d..0849d1e97c984 100644
--- a/dev/archery/archery/docker/tests/test_docker.py
+++ b/dev/archery/archery/docker/tests/test_docker.py
@@ -243,7 +243,7 @@ def assert_docker_calls(compose, expected_args):
 
 
 def assert_compose_calls(compose, expected_args, env=mock.ANY):
-    base_command = ['docker-compose', '--file', str(compose.config.path)]
+    base_command = ['docker', 'compose', f'--file={compose.config.path}']
     expected_commands = []
     for args in expected_args:
         if isinstance(args, str):
@@ -482,7 +482,7 @@ def test_compose_push(arrow_compose_path):
     ]
     for image in ["conda-cpp", "conda-python", "conda-python-pandas"]:
         expected_calls.append(
-            mock.call(["docker-compose", "--file", str(compose.config.path),
+            mock.call(["docker", "compose", f"--file={compose.config.path}",
                        "push", image], check=True, env=expected_env)
         )
     with assert_subprocess_calls(expected_calls):
@@ -514,7 +514,7 @@ def test_image_with_gpu(arrow_compose_path):
             "run", "--rm", "--gpus", "all",
             "-e", "CUDA_ENV=1",
             "-e", "OTHER_ENV=2",
-            "-v", "/host:/container:rw",
+            "-v", "/host:/container",
             "org/ubuntu-cuda",
             "/bin/bash", "-c", "echo 1 > /tmp/dummy && cat /tmp/dummy",
         ]
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index b51f3d876f820..f63aa0d95a484 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1845,7 +1845,7 @@ def generate_nested_dictionary_case():
 def generate_extension_case():
     dict0 = Dictionary(0, StringField('dictionary0'), size=5, name='DICT0')
 
-    uuid_type = ExtensionType('uuid', 'uuid-serialized',
+    uuid_type = ExtensionType('arrow.uuid', '',
                               FixedSizeBinaryField('', 16))
     dict_ext_type = ExtensionType(
         'dict-extension', 'dict-extension-serialized',
@@ -1872,8 +1872,7 @@ def _temp_path():
         generate_primitive_case([17, 20], name='primitive'),
         generate_primitive_case([0, 0, 0], name='primitive_zerolength'),
 
-        generate_primitive_large_offsets_case([17, 20])
-        .skip_tester('C#'),
+        generate_primitive_large_offsets_case([17, 20]),
 
         generate_null_case([10, 0]),
 
@@ -1906,7 +1905,6 @@ def _temp_path():
         generate_recursive_nested_case(),
 
         generate_nested_large_offsets_case()
-        .skip_tester('C#')
         .skip_tester('JS'),
 
         generate_unions_case(),
@@ -1938,7 +1936,6 @@ def _temp_path():
 
         generate_list_view_case()
         .skip_tester('C#')     # Doesn't support large list views
-        .skip_tester('Java')
         .skip_tester('JS')
         .skip_tester('nanoarrow')
         .skip_tester('Rust'),
diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index 0ea244720cc1d..97854b87b24bd 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -17,6 +17,7 @@
 
 from collections import namedtuple
 from concurrent.futures import ThreadPoolExecutor
+import contextlib
 from functools import partial
 import glob
 import gzip
@@ -39,6 +40,7 @@
 from .tester_nanoarrow import NanoarrowTester
 from .util import guid, printer
 from .util import SKIP_C_ARRAY, SKIP_C_SCHEMA, SKIP_FLIGHT, SKIP_IPC
+from ..utils.logger import group as group_raw
 from ..utils.source import ARROW_ROOT_DEFAULT
 from . import datagen
 
@@ -49,6 +51,12 @@
 log = printer.print
 
 
+@contextlib.contextmanager
+def group(name):
+    with group_raw(name, log):
+        yield
+
+
 class Outcome:
     def __init__(self):
         self.failure = None
@@ -59,12 +67,13 @@ class IntegrationRunner(object):
 
     def __init__(self, json_files,
                  flight_scenarios: List[Scenario],
-                 testers: List[Tester], tempdir=None,
-                 debug=False, stop_on_error=True, gold_dirs=None,
+                 testers: List[Tester], other_testers: List[Tester],
+                 tempdir=None, debug=False, stop_on_error=True, gold_dirs=None,
                  serial=False, match=None, **unused_kwargs):
         self.json_files = json_files
         self.flight_scenarios = flight_scenarios
         self.testers = testers
+        self.other_testers = other_testers
         self.temp_dir = tempdir or tempfile.mkdtemp()
         self.debug = debug
         self.stop_on_error = stop_on_error
@@ -91,20 +100,36 @@ def run_ipc(self):
             self._compare_ipc_implementations(
                 producer, consumer, self._produce_consume,
                 self.json_files)
+
+        for producer, consumer in itertools.product(
+                filter(lambda t: t.PRODUCER, self.testers),
+                filter(lambda t: t.CONSUMER, self.other_testers)):
+            self._compare_ipc_implementations(
+                producer, consumer, self._produce_consume,
+                self.json_files)
+
+        for producer, consumer in itertools.product(
+                filter(lambda t: t.PRODUCER, self.other_testers),
+                filter(lambda t: t.CONSUMER, self.testers)):
+            self._compare_ipc_implementations(
+                producer, consumer, self._produce_consume,
+                self.json_files)
+
         if self.gold_dirs:
             for gold_dir, consumer in itertools.product(
                     self.gold_dirs,
                     filter(lambda t: t.CONSUMER, self.testers)):
-                log('\n')
-                log('******************************************************')
-                log('Tests against golden files in {}'.format(gold_dir))
-                log('******************************************************')
-
-                def run_gold(_, consumer, test_case: datagen.File):
-                    return self._run_gold(gold_dir, consumer, test_case)
-                self._compare_ipc_implementations(
-                    consumer, consumer, run_gold,
-                    self._gold_tests(gold_dir))
+                with group(f"Integration: Test: IPC: Gold: {consumer.name}"):
+                    log('\n')
+                    log('******************************************************')
+                    log('Tests against golden files in {}'.format(gold_dir))
+                    log('******************************************************')
+
+                    def run_gold(_, consumer, test_case: datagen.File):
+                        return self._run_gold(gold_dir, consumer, test_case)
+                    self._compare_ipc_implementations(
+                        consumer, consumer, run_gold,
+                        self._gold_tests(gold_dir))
         log('\n')
 
     def run_flight(self):
@@ -114,7 +139,7 @@ def run_flight(self):
         """
         servers = filter(lambda t: t.FLIGHT_SERVER, self.testers)
         clients = filter(lambda t: (t.FLIGHT_CLIENT and t.CONSUMER),
-                         self.testers)
+                         self.testers + self.other_testers)
         for server, client in itertools.product(servers, clients):
             self._compare_flight_implementations(server, client)
         log('\n')
@@ -128,6 +153,14 @@ def run_c_data(self):
                 filter(lambda t: t.C_DATA_SCHEMA_EXPORTER, self.testers),
                 filter(lambda t: t.C_DATA_SCHEMA_IMPORTER, self.testers)):
             self._compare_c_data_implementations(producer, consumer)
+        for producer, consumer in itertools.product(
+                filter(lambda t: t.C_DATA_SCHEMA_EXPORTER, self.testers),
+                filter(lambda t: t.C_DATA_SCHEMA_IMPORTER, self.other_testers)):
+            self._compare_c_data_implementations(producer, consumer)
+        for producer, consumer in itertools.product(
+                filter(lambda t: t.C_DATA_SCHEMA_EXPORTER, self.other_testers),
+                filter(lambda t: t.C_DATA_SCHEMA_IMPORTER, self.testers)):
+            self._compare_c_data_implementations(producer, consumer)
         log('\n')
 
     def _gold_tests(self, gold_dir):
@@ -233,14 +266,15 @@ def _compare_ipc_implementations(
         """
         Compare Arrow IPC for two implementations (one producer, one consumer).
         """
-        log('##########################################################')
-        log('IPC: {0} producing, {1} consuming'
-            .format(producer.name, consumer.name))
-        log('##########################################################')
+        with group(f"Integration: Test: IPC: {producer.name} -> {consumer.name}"):
+            log('##########################################################')
+            log('IPC: {0} producing, {1} consuming'
+                .format(producer.name, consumer.name))
+            log('##########################################################')
 
-        case_runner = partial(self._run_ipc_test_case,
-                              producer, consumer, run_binaries)
-        self._run_test_cases(case_runner, test_cases)
+            case_runner = partial(self._run_ipc_test_case,
+                                  producer, consumer, run_binaries)
+            self._run_test_cases(case_runner, test_cases)
 
     def _run_ipc_test_case(
         self,
@@ -357,14 +391,15 @@ def _compare_flight_implementations(
         producer: Tester,
         consumer: Tester
     ):
-        log('##########################################################')
-        log('Flight: {0} serving, {1} requesting'
-            .format(producer.name, consumer.name))
-        log('##########################################################')
+        with group(f"Integration: Test: Flight: {producer.name} -> {consumer.name}"):
+            log('##########################################################')
+            log('Flight: {0} serving, {1} requesting'
+                .format(producer.name, consumer.name))
+            log('##########################################################')
 
-        case_runner = partial(self._run_flight_test_case, producer, consumer)
-        self._run_test_cases(
-            case_runner, self.json_files + self.flight_scenarios)
+            case_runner = partial(self._run_flight_test_case, producer, consumer)
+            self._run_test_cases(
+                case_runner, self.json_files + self.flight_scenarios)
 
     def _run_flight_test_case(self,
                               producer: Tester,
@@ -415,27 +450,32 @@ def _compare_c_data_implementations(
         producer: Tester,
         consumer: Tester
     ):
-        log('##########################################################')
-        log(f'C Data Interface: '
-            f'{producer.name} exporting, {consumer.name} importing')
-        log('##########################################################')
-
-        # Serial execution is required for proper memory accounting
-        serial = True
-
-        with producer.make_c_data_exporter() as exporter:
-            with consumer.make_c_data_importer() as importer:
-                case_runner = partial(self._run_c_schema_test_case,
-                                      producer, consumer,
-                                      exporter, importer)
-                self._run_test_cases(case_runner, self.json_files, serial=serial)
-
-                if producer.C_DATA_ARRAY_EXPORTER and consumer.C_DATA_ARRAY_IMPORTER:
-                    case_runner = partial(self._run_c_array_test_cases,
+        with group("Integration: Test: C Data Interface: "
+                   f"{producer.name} -> {consumer.name}"):
+            log('##########################################################')
+            log(f'C Data Interface: '
+                f'{producer.name} exporting, {consumer.name} importing')
+            log('##########################################################')
+
+            # Serial execution is required for proper memory accounting
+            serial = True
+
+            with producer.make_c_data_exporter() as exporter:
+                with consumer.make_c_data_importer() as importer:
+                    case_runner = partial(self._run_c_schema_test_case,
                                           producer, consumer,
                                           exporter, importer)
                     self._run_test_cases(case_runner, self.json_files, serial=serial)
 
+                    if producer.C_DATA_ARRAY_EXPORTER and \
+                       consumer.C_DATA_ARRAY_IMPORTER:
+                        case_runner = partial(self._run_c_array_test_cases,
+                                              producer, consumer,
+                                              exporter, importer)
+                        self._run_test_cases(case_runner,
+                                             self.json_files,
+                                             serial=serial)
+
     def _run_c_schema_test_case(self,
                                 producer: Tester, consumer: Tester,
                                 exporter: CDataExporter,
@@ -543,31 +583,41 @@ def get_static_json_files():
 def run_all_tests(with_cpp=True, with_java=True, with_js=True,
                   with_csharp=True, with_go=True, with_rust=False,
                   with_nanoarrow=False, run_ipc=False, run_flight=False,
-                  run_c_data=False, tempdir=None, **kwargs):
+                  run_c_data=False, tempdir=None, target_implementations="",
+                  **kwargs):
     tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-')
+    target_implementations = \
+        target_implementations.split(",") if target_implementations else []
 
     testers: List[Tester] = []
+    other_testers: List[Tester] = []
+
+    def append_tester(implementation, tester):
+        if len(target_implementations) == 0 or implementation in target_implementations:
+            testers.append(tester)
+        else:
+            other_testers.append(tester)
 
     if with_cpp:
-        testers.append(CppTester(**kwargs))
+        append_tester("cpp", CppTester(**kwargs))
 
     if with_java:
-        testers.append(JavaTester(**kwargs))
+        append_tester("java", JavaTester(**kwargs))
 
     if with_js:
-        testers.append(JSTester(**kwargs))
+        append_tester("js", JSTester(**kwargs))
 
     if with_csharp:
-        testers.append(CSharpTester(**kwargs))
+        append_tester("csharp", CSharpTester(**kwargs))
 
     if with_go:
-        testers.append(GoTester(**kwargs))
+        append_tester("go", GoTester(**kwargs))
 
     if with_nanoarrow:
-        testers.append(NanoarrowTester(**kwargs))
+        append_tester("nanoarrow", NanoarrowTester(**kwargs))
 
     if with_rust:
-        testers.append(RustTester(**kwargs))
+        append_tester("rust", RustTester(**kwargs))
 
     static_json_files = get_static_json_files()
     generated_json_files = datagen.get_generated_json_files(tempdir=tempdir)
@@ -645,11 +695,12 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
         Scenario(
             "flight_sql:ingestion",
             description="Ensure Flight SQL ingestion works as expected.",
-            skip_testers={"JS", "C#", "Rust", "Java"}
+            skip_testers={"JS", "C#", "Rust"}
         ),
     ]
 
-    runner = IntegrationRunner(json_files, flight_scenarios, testers, **kwargs)
+    runner = IntegrationRunner(json_files, flight_scenarios, testers,
+                               other_testers, **kwargs)
     if run_ipc:
         runner.run_ipc()
     if run_flight:
@@ -657,22 +708,23 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
     if run_c_data:
         runner.run_c_data()
 
-    fail_count = 0
-    if runner.failures:
-        log("################# FAILURES #################")
-        for test_case, producer, consumer, exc_info in runner.failures:
-            fail_count += 1
-            log("FAILED TEST:", end=" ")
-            log(test_case.name, producer.name, "producing, ",
-                consumer.name, "consuming")
-            if exc_info:
-                exc_type, exc_value, exc_tb = exc_info
-                log(f'{exc_type}: {exc_value}')
-            log()
-
-    log(f"{fail_count} failures, {len(runner.skips)} skips")
-    if fail_count > 0:
-        sys.exit(1)
+    with group("Integration: Test: Result"):
+        fail_count = 0
+        if runner.failures:
+            log("################# FAILURES #################")
+            for test_case, producer, consumer, exc_info in runner.failures:
+                fail_count += 1
+                log("FAILED TEST:", end=" ")
+                log(test_case.name, producer.name, "producing, ",
+                    consumer.name, "consuming")
+                if exc_info:
+                    exc_type, exc_value, exc_tb = exc_info
+                    log(f'{exc_type}: {exc_value}')
+                log()
+
+        log(f"{fail_count} failures, {len(runner.skips)} skips")
+        if fail_count > 0:
+            sys.exit(1)
 
 
 def write_js_test_json(directory):
diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py
index 9b14c6939cde8..8d207d3393730 100644
--- a/dev/archery/archery/integration/tester_java.py
+++ b/dev/archery/archery/integration/tester_java.py
@@ -46,6 +46,7 @@ def load_version_from_pom():
 _JAVA_OPTS = [
     "-Dio.netty.tryReflectionSetAccessible=true",
     "-Darrow.struct.conflict.policy=CONFLICT_APPEND",
+    "--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED",
     # GH-39113: avoid failures accessing files in `/tmp/hsperfdata_...`
     "-XX:-UsePerfData",
 ]
@@ -88,24 +89,13 @@ def setup_jpype():
     import jpype
     jar_path = f"{_ARROW_TOOLS_JAR}:{_ARROW_C_DATA_JAR}"
     # XXX Didn't manage to tone down the logging level here (DEBUG -> INFO)
-    java_opts = _JAVA_OPTS[:]
-    proc = subprocess.run(
-        ['java', '--add-opens'],
-        stderr=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-        text=True)
-    if 'Unrecognized option: --add-opens' not in proc.stderr:
-        # Java 9+
-        java_opts.append(
-            '--add-opens=java.base/java.nio='
-            'org.apache.arrow.memory.core,ALL-UNNAMED')
     jpype.startJVM(jpype.getDefaultJVMPath(),
                    "-Djava.class.path=" + jar_path,
                    # This flag is too heavy for IPC and Flight tests
                    "-Darrow.memory.debug.allocator=true",
                    # Reduce internal use of signals by the JVM
                    "-Xrs",
-                   *java_opts)
+                   *_JAVA_OPTS)
 
 
 class _CDataBase:
@@ -253,20 +243,9 @@ class JavaTester(Tester):
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        # Detect whether we're on Java 8 or Java 9+
         self._java_opts = _JAVA_OPTS[:]
-        proc = subprocess.run(
-            ['java', '--add-opens'],
-            stderr=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            text=True)
-        if 'Unrecognized option: --add-opens' not in proc.stderr:
-            # Java 9+
-            self._java_opts.append(
-                '--add-opens=java.base/java.nio='
-                'org.apache.arrow.memory.core,ALL-UNNAMED')
-            self._java_opts.append(
-                '--add-reads=org.apache.arrow.flight.core=ALL-UNNAMED')
+        self._java_opts.append(
+            '--add-reads=org.apache.arrow.flight.core=ALL-UNNAMED')
 
     def _run(self, arrow_path=None, json_path=None, command='VALIDATE'):
         cmd = (
diff --git a/dev/archery/archery/lang/java.py b/dev/archery/archery/lang/java.py
index bc169adf647bc..f447b352e6a6c 100644
--- a/dev/archery/archery/lang/java.py
+++ b/dev/archery/archery/lang/java.py
@@ -34,8 +34,11 @@ def __init__(self, jar, *args, **kwargs):
 
 
 class JavaConfiguration:
-    def __init__(self,
+    REQUIRED_JAVA_OPTIONS = [
+        "--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED",
+    ]
 
+    def __init__(self,
                  # toolchain
                  java_home=None, java_options=None,
                  # build & benchmark
@@ -43,6 +46,13 @@ def __init__(self,
         self.java_home = java_home
         self.java_options = java_options
 
+        if self.java_options is None:
+            self.java_options = " ".join(self.REQUIRED_JAVA_OPTIONS)
+        else:
+            for option in self.REQUIRED_JAVA_OPTIONS:
+                if option not in self.java_options:
+                    self.java_options += " " + option
+
         self.build_extras = list(build_extras) if build_extras else []
         self.benchmark_extras = list(
             benchmark_extras) if benchmark_extras else []
@@ -63,7 +73,7 @@ def environment(self):
             env["JAVA_HOME"] = self.java_home
 
         if self.java_options:
-            env["JAVA_OPTIONS"] = self.java_options
+            env["JDK_JAVA_OPTIONS"] = self.java_options
 
         return env
 
diff --git a/dev/archery/archery/utils/logger.py b/dev/archery/archery/utils/logger.py
index 9d0feda88e6ea..b315a52b7a000 100644
--- a/dev/archery/archery/utils/logger.py
+++ b/dev/archery/archery/utils/logger.py
@@ -15,7 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import contextlib
 import logging
+import os
 
 """ Global logger. """
 logger = logging.getLogger("archery")
@@ -27,3 +29,24 @@ def __init__(self, quiet=False):
 
 
 ctx = LoggingContext()
+
+in_github_actions = (os.environ.get("GITHUB_ACTIONS") == "true")
+
+
+@contextlib.contextmanager
+def group(name, output=None):
+    """
+    Group outputs in the given with block.
+
+    This does nothing in non GitHub Actions environment for now.
+    """
+    if output is None:
+        def output(message):
+            print(message, flush=True)
+    if in_github_actions:
+        output(f"::group::{name}")
+    try:
+        yield
+    finally:
+        if in_github_actions:
+            output("::endgroup::")
diff --git a/dev/archery/requirements-test.txt b/dev/archery/requirements-test.txt
index e3e62a993c2a2..208ec64cdf026 100644
--- a/dev/archery/requirements-test.txt
+++ b/dev/archery/requirements-test.txt
@@ -1,3 +1,2 @@
-docker-compose
 pytest
 responses
diff --git a/dev/archery/setup.py b/dev/archery/setup.py
index f87316dcc7ab9..59e6b5ca38e8c 100755
--- a/dev/archery/setup.py
+++ b/dev/archery/setup.py
@@ -21,12 +21,8 @@
 import sys
 from setuptools import setup, find_packages
 
-# pygit2>=1.14.0 requires python 3.9, so crossbow and all
-# both technically require python 3.9 — however we still need to
-# support 3.8 when using docker. When 3.8 is EOLed and we bump
-# to Python 3.9 this will resolve itself.
-if sys.version_info < (3, 8):
-    sys.exit('Python < 3.8 is not supported')
+if sys.version_info < (3, 9):
+    sys.exit('Python < 3.9 is not supported')
 
 # For pathlib.Path compatibility
 jinja_req = 'jinja2>=2.11'
@@ -57,7 +53,7 @@
     maintainer_email='dev@arrow.apache.org',
     packages=find_packages(),
     include_package_data=True,
-    python_requires='>=3.8',
+    python_requires='>=3.9',
     install_requires=['click>=7'],
     tests_require=['pytest', 'responses'],
     extras_require=extras,
diff --git a/dev/conbench_envs/README.md b/dev/conbench_envs/README.md
index 509dc5c0c9537..7fab503974805 100644
--- a/dev/conbench_envs/README.md
+++ b/dev/conbench_envs/README.md
@@ -99,16 +99,16 @@ Here are steps how `@ursabot` benchmark builds use `benchmarks.env` and `hooks.s
 
 ### 2. Install Arrow dependencies for Java
     sudo su
-    apt-get install openjdk-8-jdk
+    apt-get install openjdk-11-jdk
     apt-get install maven
     
 Verify that you have at least these versions of `java`, `javac` and `maven`:
     
     # java -version
-    openjdk version "1.8.0_292"
+    openjdk version "11.0.22" 2024-01-16
     ..
     # javac -version
-    javac 1.8.0_292
+    javac 11.0.22
     ...
     # mvn -version
     Apache Maven 3.6.3
diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb
index fbd0b2996077c..fec99ef058c5b 100644
--- a/dev/release/01-prepare-test.rb
+++ b/dev/release/01-prepare-test.rb
@@ -323,6 +323,17 @@ def test_version_pre_tag
           "+#{new_line}",
         ]
       end
+      tag = "<tag>main</tag>"
+      target_lines = lines.grep(/#{Regexp.escape(tag)}/)
+      target_lines.each do |line|
+        new_line = line.gsub("main") do
+          "apache-arrow-#{@release_version}"
+        end
+        hunks << [
+          "-#{line}",
+          "+#{new_line}",
+        ]
+      end
       expected_changes << {hunks: hunks, path: path}
     end
 
diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb
index db008b1551309..b06b1c46b8504 100644
--- a/dev/release/binary-task.rb
+++ b/dev/release/binary-task.rb
@@ -1895,7 +1895,7 @@ def define_docs_tasks
                               :docs,
                               "#{rc_dir}/docs/#{full_version}",
                               "#{release_dir}/docs/#{full_version}",
-                              "test-ubuntu-22.04-docs/**/*")
+                              "test-debian-12-docs/**/*")
   end
 
   def define_nuget_tasks
diff --git a/dev/release/post-01-tag.sh b/dev/release/post-01-tag.sh
index df0f6756c0e7c..2fa48f1230219 100755
--- a/dev/release/post-01-tag.sh
+++ b/dev/release/post-01-tag.sh
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -e
+set -eu
 set -o pipefail
 
 if [ "$#" -ne 2 ]; then
@@ -25,8 +25,11 @@ if [ "$#" -ne 2 ]; then
   exit
 fi
 
+version=$1
+rc=$2
+
 # Create the release tag and trigger the Publish Release workflow.
-release_candidate_tag=apache-arrow-${version}-rc${num}
 release_tag=apache-arrow-${version}
-git tag -a ${release_tag} ${release_candidate_tag}^{} -m "[Release] Apache Arrow Release ${version}"
+release_candidate_tag=${release_tag}-rc${rc}
+git tag -a ${release_tag} ${release_candidate_tag} -m "[Release] Apache Arrow Release ${version}"
 git push apache ${release_tag}
diff --git a/dev/release/post-12-bump-versions-test.rb b/dev/release/post-12-bump-versions-test.rb
index 2bd14587461cc..f31e1a3122814 100644
--- a/dev/release/post-12-bump-versions-test.rb
+++ b/dev/release/post-12-bump-versions-test.rb
@@ -358,8 +358,15 @@ def test_version_post_tag
   def test_deb_package_names
     omit_on_release_branch unless bump_type.nil?
     current_commit = git_current_commit
-    stdout = bump_versions("DEB_PACKAGE_NAMES")
-    changes = parse_patch(git("log", "-p", "#{current_commit}.."))
+    stdout = bump_versions("VERSION_POST_TAG", "DEB_PACKAGE_NAMES")
+    log = git("log", "-p", "#{current_commit}..")
+    # Remove a commit for VERSION_POST_TAG
+    if log.scan(/^commit/).size == 1
+      log = ""
+    else
+      log.gsub!(/\A(commit.*?)^commit .*\z/um, "\\1")
+    end
+    changes = parse_patch(log)
     sampled_changes = changes.collect do |change|
       first_hunk = change[:hunks][0]
       first_removed_line = first_hunk.find { |line| line.start_with?("-") }
diff --git a/dev/release/post-12-bump-versions.sh b/dev/release/post-12-bump-versions.sh
index 422821a66bde5..bf40f4ce5c4ea 100755
--- a/dev/release/post-12-bump-versions.sh
+++ b/dev/release/post-12-bump-versions.sh
@@ -40,6 +40,7 @@ fi
 version=$1
 next_version=$2
 next_version_snapshot="${next_version}-SNAPSHOT"
+current_version_before_bump="$(current_version)"
 
 case "${version}" in
   *.0.0)
@@ -64,7 +65,7 @@ if [ ${BUMP_VERSION_POST_TAG} -gt 0 ]; then
 fi
 
 if [ ${BUMP_DEB_PACKAGE_NAMES} -gt 0 ] && \
-     [ "${next_version}" != "$(current_version)" ]; then
+     [ "${next_version}" != "${current_version_before_bump}" ]; then
   update_deb_package_names "${version}" "${next_version}"
 fi
 
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index ef325090f2f4b..e149c179813a0 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -95,6 +95,7 @@ python/manylinux1/.dockerignore
 python/pyarrow/includes/__init__.pxd
 python/pyarrow/tests/__init__.py
 python/pyarrow/vendored/*
+python/pyarrow/src/arrow/python/vendored/*
 python/requirements*.txt
 pax_global_header
 MANIFEST.in
diff --git a/dev/release/setup-rhel-rebuilds.sh b/dev/release/setup-rhel-rebuilds.sh
index dc190d2d2426e..e8861a19f35b7 100755
--- a/dev/release/setup-rhel-rebuilds.sh
+++ b/dev/release/setup-rhel-rebuilds.sh
@@ -35,7 +35,7 @@ dnf -y install \
   cmake \
   git \
   gobject-introspection-devel \
-  java-1.8.0-openjdk-devel \
+  java-11-openjdk-devel \
   libcurl-devel \
   llvm-devel \
   llvm-toolset \
@@ -55,3 +55,5 @@ npm install -g yarn
 
 python3 -m ensurepip --upgrade
 alternatives --set python /usr/bin/python3
+alternatives --set java java-11-openjdk.$(uname -i)
+alternatives --set javac java-11-openjdk.$(uname -i)
diff --git a/dev/release/setup-ubuntu.sh b/dev/release/setup-ubuntu.sh
index 7fde65a0060de..ef9d3dde5c1f9 100755
--- a/dev/release/setup-ubuntu.sh
+++ b/dev/release/setup-ubuntu.sh
@@ -25,23 +25,6 @@ set -exu
 codename=$(. /etc/os-release && echo ${UBUNTU_CODENAME})
 
 case ${codename} in
-  bionic)
-    llvm=12
-    nlohmann_json=
-    python=3.8
-    apt-get update -y -q
-    apt-get install -y -q --no-install-recommends \
-      apt-transport-https \
-      ca-certificates \
-      gnupg \
-      wget
-    wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
-    echo "deb https://apt.llvm.org/${codename}/ llvm-toolchain-${codename}-${llvm} main" > \
-      /etc/apt/sources.list.d/llvm.list
-    apt-get update -y -q
-    apt-get install -y -q --no-install-recommends \
-      llvm-${llvm}-dev
-    ;;
   *)
     nlohmann_json=3
     python=3
@@ -52,7 +35,7 @@ case ${codename} in
 esac
 
 case ${codename} in
-  bionic|focal)
+  focal)
     ;;
   *)
     apt-get update -y -q
@@ -85,11 +68,3 @@ apt-get install -y -q --no-install-recommends \
   ruby-dev \
   tzdata \
   wget
-
-case ${codename} in
-  bionic)
-    python${python} -m pip install -U pip
-    update-alternatives \
-      --install /usr/bin/python3 python3 /usr/bin/python${python} 1
-    ;;
-esac
diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh
index c255e728a335b..6ba8b22a06e89 100644
--- a/dev/release/utils-prepare.sh
+++ b/dev/release/utils-prepare.sh
@@ -83,8 +83,12 @@ update_versions() {
   popd
 
   pushd "${ARROW_DIR}/java"
-  mvn versions:set -DnewVersion=${version} -DprocessAllModules
-  find . -type f -name pom.xml.versionsBackup -delete
+  mvn versions:set -DnewVersion=${version} -DprocessAllModules -DgenerateBackupPoms=false
+  if [ "${type}" = "release" ]; then
+    # versions-maven-plugin:set-scm-tag does not update the whole reactor. Invoking separately
+    mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-java-root
+    mvn versions:set-scm-tag -DnewTag=apache-arrow-${version} -DgenerateBackupPoms=false -pl :arrow-bom
+  fi
   git add "pom.xml"
   git add "**/pom.xml"
   popd
diff --git a/dev/release/verify-release-candidate-wheels.bat b/dev/release/verify-release-candidate-wheels.bat
index 06deb0c5b2260..d846b69da803b 100644
--- a/dev/release/verify-release-candidate-wheels.bat
+++ b/dev/release/verify-release-candidate-wheels.bat
@@ -57,9 +57,6 @@ call deactivate
 set ARROW_TEST_DATA=%cd%\arrow\testing\data
 
 
-CALL :verify_wheel 3.8
-if errorlevel 1 GOTO error
-
 CALL :verify_wheel 3.9
 if errorlevel 1 GOTO error
 
@@ -69,6 +66,12 @@ if errorlevel 1 GOTO error
 CALL :verify_wheel 3.11
 if errorlevel 1 GOTO error
 
+CALL :verify_wheel 3.12
+if errorlevel 1 GOTO error
+
+CALL :verify_wheel 3.13
+if errorlevel 1 GOTO error
+
 :done
 cd %_CURRENT_DIR%
 
diff --git a/dev/release/verify-release-candidate.bat b/dev/release/verify-release-candidate.bat
index 06d3016c72af9..bba62b9c01273 100644
--- a/dev/release/verify-release-candidate.bat
+++ b/dev/release/verify-release-candidate.bat
@@ -56,7 +56,7 @@ if "%VERSION%"=="" (
 
 set ARROW_TEST_DATA=!ARROW_SOURCE!\testing\data
 set PARQUET_TEST_DATA=!ARROW_SOURCE!\cpp\submodules\parquet-testing\data
-set PYTHON=3.8
+set PYTHON=3.9
 
 @rem Using call with conda.bat seems necessary to avoid terminating the batch
 @rem script execution
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 2f4b203f217af..8aaffb591b0cf 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -21,10 +21,10 @@
 # Requirements
 # - Ruby >= 2.3
 # - Maven >= 3.8.7
-# - JDK >=8
+# - JDK >= 11
 # - gcc >= 4.8
 # - Node.js >= 18
-# - Go >= 1.21
+# - Go >= 1.22
 # - Docker
 #
 # If using a non-system Boost, set BOOST_ROOT and add Boost libraries to
@@ -403,7 +403,7 @@ install_go() {
     return 0
   fi
 
-  local version=1.21.8
+  local version=1.22.6
   show_info "Installing go version ${version}..."
 
   local arch="$(uname -m)"
@@ -512,7 +512,7 @@ install_maven() {
     show_info "System Maven version ${SYSTEM_MAVEN_VERSION} matches required Maven version ${MAVEN_VERSION}. Skipping installation."
   else
     # Append pipe character to make preview release versions like "X.Y.Z-beta-1" sort
-    # as older than their corresponding release version "X.Y.Z". This works because 
+    # as older than their corresponding release version "X.Y.Z". This works because
     # `sort -V` orders the pipe character lower than any version number character.
     older_version=$(printf '%s\n%s\n' "$SYSTEM_MAVEN_VERSION" "$MAVEN_VERSION" | sed 's/$/|/' | sort -V | sed 's/|$//' | head -n1)
     if [[ "$older_version" == "$SYSTEM_MAVEN_VERSION" ]]; then
@@ -953,7 +953,7 @@ test_go() {
   show_header "Build and test Go libraries"
 
   maybe_setup_go
-  maybe_setup_conda compilers go=1.21
+  maybe_setup_conda compilers go=1.22
 
   pushd go
   go get -v ./...
@@ -1146,7 +1146,7 @@ test_linux_wheels() {
     local arch="x86_64"
   fi
 
-  local python_versions="${TEST_PYTHON_VERSIONS:-3.8 3.9 3.10 3.11 3.12}"
+  local python_versions="${TEST_PYTHON_VERSIONS:-3.9 3.10 3.11 3.12 3.13}"
   local platform_tags="${TEST_WHEEL_PLATFORM_TAGS:-manylinux_2_17_${arch}.manylinux2014_${arch} manylinux_2_28_${arch}}"
 
   for python in ${python_versions}; do
@@ -1170,11 +1170,11 @@ test_macos_wheels() {
 
   # apple silicon processor
   if [ "$(uname -m)" = "arm64" ]; then
-    local python_versions="3.8 3.9 3.10 3.11 3.12"
+    local python_versions="3.9 3.10 3.11 3.12 3.13"
     local platform_tags="macosx_11_0_arm64"
     local check_flight=OFF
   else
-    local python_versions="3.8 3.9 3.10 3.11 3.12"
+    local python_versions="3.9 3.10 3.11 3.12 3.13"
     local platform_tags="macosx_10_15_x86_64"
   fi
 
diff --git a/dev/tasks/cpp-examples/github.linux.yml b/dev/tasks/cpp-examples/github.linux.yml
index fe26eb1e24e0e..febfee9ae1e60 100644
--- a/dev/tasks/cpp-examples/github.linux.yml
+++ b/dev/tasks/cpp-examples/github.linux.yml
@@ -30,4 +30,4 @@ jobs:
         shell: bash
         run: |
           cd arrow/cpp/examples/{{ type }}
-          docker-compose run --rm {{ run }}
+          docker compose run --rm {{ run }}
diff --git a/dev/tasks/docker-tests/github.cuda.yml b/dev/tasks/docker-tests/github.cuda.yml
index 30879042924c4..e65ac457b2ef7 100644
--- a/dev/tasks/docker-tests/github.cuda.yml
+++ b/dev/tasks/docker-tests/github.cuda.yml
@@ -28,14 +28,17 @@ jobs:
     timeout-minutes: {{ timeout|default(60) }}
     steps:
       {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }}
-      # python 3.8 is installed on the runner, no need to install
+      # python 3.10 is installed on the runner, no need to install
+      - name: Install pip
+        run: sudo apt update && sudo apt install python3-pip -y
       - name: Install archery
-        run: python -m pip install -e arrow/dev/archery[docker]
+        run: python3 -m pip install -e arrow/dev/archery[docker]
       - name: Execute Docker Build
         shell: bash
         env:
         {{ macros.github_set_sccache_envvars()|indent(8) }}
         run: |
+          source arrow/ci/scripts/util_enable_core_dumps.sh
           archery docker run \
             -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
             {{ flags|default("") }} \
diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml
index 697960360cfdc..ee221d6f6d8d6 100644
--- a/dev/tasks/docker-tests/github.linux.yml
+++ b/dev/tasks/docker-tests/github.linux.yml
@@ -38,6 +38,7 @@ jobs:
         run: |
           # GH-40558: reduce ASLR to avoid TSAN crashing
           sudo sysctl -w vm.mmap_rnd_bits=28
+          source arrow/ci/scripts/util_enable_core_dumps.sh
           archery docker run \
             -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
             {{ flags|default("") }} \
@@ -62,7 +63,7 @@ jobs:
           done
       - name: Save the R test output
         if: always()
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: test-output
           path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
@@ -71,6 +72,7 @@ jobs:
     {% if arrow.is_default_branch() %}
       {{ macros.github_login_dockerhub()|indent }}
       - name: Push Docker Image
+        if: {{ push|default("true") }}
         shell: bash
         run: archery docker push {{ image }}
     {% endif %}
diff --git a/dev/tasks/docs/github.linux.yml b/dev/tasks/docs/github.linux.yml
index 8ab8a593c3ef3..5863d68d2c828 100644
--- a/dev/tasks/docs/github.linux.yml
+++ b/dev/tasks/docs/github.linux.yml
@@ -34,8 +34,10 @@ jobs:
         env:
           ARROW_JAVA_SKIP_GIT_PLUGIN: true
         run: |
+          mkdir -p build
           archery docker run \
             -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
+            -v $PWD/build/:/build/ \
             {{ flags|default("") }} \
             {{ image }} \
             {{ command|default("") }}
@@ -45,7 +47,7 @@ jobs:
           ref: {{ default_branch|default("main") }}
           path: crossbow
           fetch-depth: 1
-      {% if  publish %}
+      {% if publish %}
       - name: Prepare Docs Preview
         run: |
           # build files are created by the docker user
diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
index d8f5ca872dbec..6bcae64adb92f 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb
@@ -29,7 +29,7 @@
 class ApacheArrowGlib < Formula
   desc "GLib bindings for Apache Arrow"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-17.0.0-SNAPSHOT/apache-arrow-17.0.0-SNAPSHOT.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-18.0.0-SNAPSHOT/apache-arrow-18.0.0-SNAPSHOT.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   license "Apache-2.0"
   head "https://github.com/apache/arrow.git", branch: "main"
diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb
index e17d524bd9d71..955dfa0ea9fa4 100644
--- a/dev/tasks/homebrew-formulae/apache-arrow.rb
+++ b/dev/tasks/homebrew-formulae/apache-arrow.rb
@@ -29,7 +29,7 @@
 class ApacheArrow < Formula
   desc "Columnar in-memory analytics layer designed to accelerate big data"
   homepage "https://arrow.apache.org/"
-  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-17.0.0-SNAPSHOT/apache-arrow-17.0.0-SNAPSHOT.tar.gz"
+  url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-18.0.0-SNAPSHOT/apache-arrow-18.0.0-SNAPSHOT.tar.gz"
   sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28"
   license "Apache-2.0"
   head "https://github.com/apache/arrow.git", branch: "main"
diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml
index 9493be05be6ee..9910daa21ef37 100644
--- a/dev/tasks/java-jars/github.yml
+++ b/dev/tasks/java-jars/github.yml
@@ -30,7 +30,6 @@ jobs:
       ARCH: {{ '${{ matrix.platform.archery_arch }}' }}
       ARCH_ALIAS: {{ '${{ matrix.platform.archery_arch_alias }}' }}
       ARCH_SHORT: {{ '${{ matrix.platform.archery_arch_short }}' }}
-      ARCHERY_USE_DOCKER_CLI: {{ "${{matrix.platform.archery_use_docker_cli || '1'}}" }}
     strategy:
       fail-fast: false
       matrix:
@@ -45,7 +44,6 @@ jobs:
             archery_arch: "arm64v8"
             archery_arch_alias: "aarch64"
             archery_arch_short: "arm64"
-            archery_use_docker_cli: "0"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_free_space()|indent }}
@@ -61,7 +59,7 @@ jobs:
       - name: Compress into single artifact to keep directory structure
         run: tar -cvzf arrow-shared-libs-linux-{{ arch }}.tar.gz arrow/java-dist/
       - name: Upload artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: ubuntu-shared-lib-{{ arch }}
           path: arrow-shared-libs-linux-{{ arch }}.tar.gz
@@ -83,7 +81,7 @@ jobs:
           - { runs_on: ["macos-13"], arch: "x86_64"}
           - { runs_on: ["macos-14"], arch: "aarch_64" }
     env:
-      MACOSX_DEPLOYMENT_TARGET: "10.15"
+      MACOSX_DEPLOYMENT_TARGET: "14.0"
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       - name: Set up Python
@@ -154,7 +152,7 @@ jobs:
       - name: Compress into single artifact to keep directory structure
         run: tar -cvzf arrow-shared-libs-macos-{{ arch }}.tar.gz arrow/java-dist/
       - name: Upload artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: macos-shared-lib-{{ arch }}
           path: arrow-shared-libs-macos-{{ arch }}.tar.gz
@@ -188,7 +186,7 @@ jobs:
         shell: bash
         run: tar -cvzf arrow-shared-libs-windows.tar.gz arrow/java-dist/
       - name: Upload artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: windows-shared-lib
           path: arrow-shared-libs-windows.tar.gz
@@ -203,7 +201,7 @@ jobs:
     steps:
       {{ macros.github_checkout_arrow(fetch_depth=0)|indent }}
       - name: Download Libraries
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           path: artifacts
       - name: Decompress artifacts
@@ -250,7 +248,6 @@ jobs:
           pushd arrow/java
           mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }}
           mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f bom
-          mvn versions:set -DnewVersion={{ arrow.no_rc_snapshot_version }} -f maven
           popd
           arrow/ci/scripts/java_full_build.sh \
             $GITHUB_WORKSPACE/arrow \
diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
index 04aa586dc3c96..279c3cc14a4c8 100644
--- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow-apt-source (17.0.0-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Raúl Cumplido <raulcumplido@gmail.com>  Thu, 11 Jul 2024 08:57:21 -0000
+
 apache-arrow-apt-source (16.1.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
index f0eb785dd6bc7..fd8165d748d78 100644
--- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in
@@ -102,6 +102,9 @@ else
 fi
 
 %changelog
+* Thu Jul 11 2024 Raúl Cumplido <raulcumplido@gmail.com> - 17.0.0-1
+- New upstream release.
+
 * Thu May 09 2024 Raúl Cumplido <raulcumplido@gmail.com> - 16.1.0-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
index b38ee72d68c75..ec3bf7751d2d7 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile
@@ -65,6 +65,7 @@ RUN \
     libssl-dev \
     libthrift-dev \
     libutf8proc-dev \
+    libxxhash-dev \
     libzstd-dev \
     llvm-dev \
     lsb-release \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile
index 3126c6d3cded0..c6a09da2dfd5c 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-trixie/Dockerfile
@@ -66,11 +66,11 @@ RUN \
     libssl-dev \
     libthrift-dev \
     libutf8proc-dev \
+    libxxhash-dev \
     libzstd-dev \
     llvm-dev \
     lsb-release \
     meson \
-    mold \
     ninja-build \
     nlohmann-json3-dev \
     pkg-config \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
index fdd0362680c5a..fe783638b6344 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile
@@ -56,6 +56,7 @@ RUN \
     libssl-dev \
     libthrift-dev \
     libutf8proc-dev \
+    libxxhash-dev \
     libzstd-dev \
     llvm-dev \
     lsb-release \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
index e6718e59b0aba..1d9065d6b2e61 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile
@@ -58,6 +58,7 @@ RUN \
     libssl-dev \
     libthrift-dev \
     libutf8proc-dev \
+    libxxhash-dev \
     libzstd-dev \
     llvm-dev \
     lsb-release \
diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
index 87ea2402456b0..f5f5e12f4d560 100644
--- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
+++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile
@@ -60,6 +60,7 @@ RUN \
     libssl-dev \
     libthrift-dev \
     libutf8proc-dev \
+    libxxhash-dev \
     libzstd-dev \
     llvm-dev \
     lsb-release \
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog
index 35cc598fe6f87..f59bc9f66233e 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/changelog
+++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog
@@ -1,3 +1,9 @@
+apache-arrow (17.0.0-1) unstable; urgency=low
+
+  * New upstream release.
+
+ -- Raúl Cumplido <raulcumplido@gmail.com>  Thu, 11 Jul 2024 08:57:21 -0000
+
 apache-arrow (16.1.0-1) unstable; urgency=low
 
   * New upstream release.
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in
index c33e3ac791be1..cf3f488cc17e0 100644
--- a/dev/tasks/linux-packages/apache-arrow/debian/control.in
+++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in
@@ -27,6 +27,7 @@ Build-Depends:
   libssl-dev,
   libthrift-dev,
   libutf8proc-dev,
+  libxxhash-dev,
   libzstd-dev,
   meson,
   ninja-build,
@@ -42,7 +43,7 @@ Build-Depends-Indep: libglib2.0-doc
 Standards-Version: 3.9.8
 Homepage: https://arrow.apache.org/
 
-Package: libarrow1700
+Package: libarrow1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -62,12 +63,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1700 (= ${binary:Version})
+  libarrow1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides tools.
 
-Package: libarrow-cuda1700
+Package: libarrow-cuda1800
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -75,12 +76,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1700 (= ${binary:Version})
+  libarrow1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for CUDA support.
 
-Package: libarrow-acero1700
+Package: libarrow-acero1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -88,12 +89,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1700 (= ${binary:Version})
+  libarrow1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Acero module.
 
-Package: libarrow-dataset1700
+Package: libarrow-dataset1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -101,13 +102,13 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-acero1700 (= ${binary:Version}),
-  libparquet1700 (= ${binary:Version})
+  libarrow-acero1800 (= ${binary:Version}),
+  libparquet1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Dataset module.
 
-Package: libarrow-flight1700
+Package: libarrow-flight1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -115,12 +116,12 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1700 (= ${binary:Version})
+  libarrow1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight RPC system.
 
-Package: libarrow-flight-sql1700
+Package: libarrow-flight-sql1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -128,7 +129,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-flight1700 (= ${binary:Version})
+  libarrow-flight1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ library files for Flight SQL system.
@@ -139,7 +140,8 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow1700 (= ${binary:Version}),
+  libarrow1800 (= ${binary:Version}),
+@USE_SYSTEM_GRPC@  libabsl-dev,
   libbrotli-dev,
   libbz2-dev,
   libcurl4-openssl-dev,
@@ -151,6 +153,7 @@ Depends:
   libsnappy-dev,
   libssl-dev,
   libutf8proc-dev,
+  libxxhash-dev,
   libzstd-dev,
   nlohmann-json-dev | nlohmann-json3-dev,
 @USE_SYSTEM_GRPC@  protobuf-compiler-grpc,
@@ -166,7 +169,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-cuda1700 (= ${binary:Version})
+  libarrow-cuda1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for CUDA support.
@@ -177,7 +180,7 @@ Architecture: any
 Multi-Arch: same
 Depends:
   ${misc:Depends},
-  libarrow-acero1700 (= ${binary:Version}),
+  libarrow-acero1800 (= ${binary:Version}),
   libparquet-dev (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -190,7 +193,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-acero-dev (= ${binary:Version}),
-  libarrow-dataset1700 (= ${binary:Version}),
+  libarrow-dataset1800 (= ${binary:Version}),
   libparquet-dev (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -203,7 +206,7 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libarrow-flight1700 (= ${binary:Version}),
+  libarrow-flight1800 (= ${binary:Version}),
   libc-ares-dev,
 @USE_SYSTEM_GRPC@  libgrpc++-dev
 Description: Apache Arrow is a data processing library for analysis
@@ -217,12 +220,12 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-flight-dev (= ${binary:Version}),
-  libarrow-flight-sql1700 (= ${binary:Version})
+  libarrow-flight-sql1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides C++ header files for Flight SQL system.
 
-Package: libgandiva1700
+Package: libgandiva1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -230,7 +233,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1700 (= ${binary:Version})
+  libarrow1800 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -243,13 +246,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libgandiva1700 (= ${binary:Version})
+  libgandiva1800 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
  This package provides C++ header files.
 
-Package: libparquet1700
+Package: libparquet1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -269,7 +272,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libparquet1700 (= ${binary:Version})
+  libparquet1800 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides tools.
@@ -281,13 +284,13 @@ Multi-Arch: same
 Depends:
   ${misc:Depends},
   libarrow-dev (= ${binary:Version}),
-  libparquet1700 (= ${binary:Version}),
+  libparquet1800 (= ${binary:Version}),
   libthrift-dev
 Description: Apache Parquet is a columnar storage format
  .
  This package provides C++ header files.
 
-Package: libarrow-glib1700
+Package: libarrow-glib1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -295,7 +298,7 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow1700 (= ${binary:Version})
+  libarrow1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files.
@@ -319,7 +322,7 @@ Depends:
   ${misc:Depends},
   libglib2.0-dev,
   libarrow-acero-dev (= ${binary:Version}),
-  libarrow-glib1700 (= ${binary:Version}),
+  libarrow-glib1800 (= ${binary:Version}),
   gir1.2-arrow-1.0 (= ${binary:Version})
 Suggests: libarrow-glib-doc
 Description: Apache Arrow is a data processing library for analysis
@@ -337,7 +340,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations.
 
-Package: libarrow-cuda-glib1700
+Package: libarrow-cuda-glib1800
 Section: libs
 Architecture: @CUDA_ARCHITECTURE@
 Multi-Arch: same
@@ -345,8 +348,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1700 (= ${binary:Version}),
-  libarrow-cuda1700 (= ${binary:Version})
+  libarrow-glib1800 (= ${binary:Version}),
+  libarrow-cuda1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for CUDA support.
@@ -371,13 +374,13 @@ Depends:
   ${misc:Depends},
   libarrow-cuda-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-cuda-glib1700 (= ${binary:Version}),
+  libarrow-cuda-glib1800 (= ${binary:Version}),
   gir1.2-arrow-cuda-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based header files for CUDA support.
 
-Package: libarrow-dataset-glib1700
+Package: libarrow-dataset-glib1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -385,8 +388,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1700 (= ${binary:Version}),
-  libarrow-dataset1700 (= ${binary:Version})
+  libarrow-glib1800 (= ${binary:Version}),
+  libarrow-dataset1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for dataset module.
@@ -411,7 +414,7 @@ Depends:
   ${misc:Depends},
   libarrow-dataset-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-dataset-glib1700 (= ${binary:Version}),
+  libarrow-dataset-glib1800 (= ${binary:Version}),
   gir1.2-arrow-dataset-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -428,7 +431,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for dataset module.
 
-Package: libarrow-flight-glib1700
+Package: libarrow-flight-glib1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -436,8 +439,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1700 (= ${binary:Version}),
-  libarrow-flight1700 (= ${binary:Version})
+  libarrow-glib1800 (= ${binary:Version}),
+  libarrow-flight1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for Apache Arrow Flight.
@@ -463,7 +466,7 @@ Depends:
   ${misc:Depends},
   libarrow-flight-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libarrow-flight-glib1700 (= ${binary:Version}),
+  libarrow-flight-glib1800 (= ${binary:Version}),
   gir1.2-arrow-flight-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -480,7 +483,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for Apache Arrow Flight.
 
-Package: libarrow-flight-sql-glib1700
+Package: libarrow-flight-sql-glib1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -488,8 +491,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-flight-glib1700 (= ${binary:Version}),
-  libarrow-flight-sql1700 (= ${binary:Version})
+  libarrow-flight-glib1800 (= ${binary:Version}),
+  libarrow-flight-sql1800 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
  This package provides GLib based library files for Apache Arrow Flight SQL.
@@ -515,7 +518,7 @@ Depends:
   ${misc:Depends},
   libarrow-flight-sql-dev (= ${binary:Version}),
   libarrow-flight-glib-dev (= ${binary:Version}),
-  libarrow-flight-sql-glib1700 (= ${binary:Version}),
+  libarrow-flight-sql-glib1800 (= ${binary:Version}),
   gir1.2-arrow-flight-sql-1.0 (= ${binary:Version})
 Description: Apache Arrow is a data processing library for analysis
  .
@@ -532,7 +535,7 @@ Description: Apache Arrow is a data processing library for analysis
  .
  This package provides documentations for Apache Arrow Flight SQL.
 
-Package: libgandiva-glib1700
+Package: libgandiva-glib1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -540,8 +543,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1700 (= ${binary:Version}),
-  libgandiva1700 (= ${binary:Version})
+  libarrow-glib1800 (= ${binary:Version}),
+  libgandiva1800 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
  .
@@ -568,7 +571,7 @@ Depends:
   ${misc:Depends},
   libgandiva-dev (= ${binary:Version}),
   libarrow-glib-dev (= ${binary:Version}),
-  libgandiva-glib1700 (= ${binary:Version}),
+  libgandiva-glib1800 (= ${binary:Version}),
   gir1.2-gandiva-1.0 (= ${binary:Version})
 Description: Gandiva is a toolset for compiling and evaluating expressions
  on Arrow Data.
@@ -587,7 +590,7 @@ Description: Gandiva is a toolset for compiling and evaluating expressions
  .
  This package provides documentations.
 
-Package: libparquet-glib1700
+Package: libparquet-glib1800
 Section: libs
 Architecture: any
 Multi-Arch: same
@@ -595,8 +598,8 @@ Pre-Depends: ${misc:Pre-Depends}
 Depends:
   ${misc:Depends},
   ${shlibs:Depends},
-  libarrow-glib1700 (= ${binary:Version}),
-  libparquet1700 (= ${binary:Version})
+  libarrow-glib1800 (= ${binary:Version}),
+  libparquet1800 (= ${binary:Version})
 Description: Apache Parquet is a columnar storage format
  .
  This package provides GLib based library files.
@@ -621,7 +624,7 @@ Depends:
   ${misc:Depends},
   libarrow-glib-dev (= ${binary:Version}),
   libparquet-dev (= ${binary:Version}),
-  libparquet-glib1700 (= ${binary:Version}),
+  libparquet-glib1800 (= ${binary:Version}),
   gir1.2-parquet-1.0 (= ${binary:Version})
 Suggests: libparquet-glib-doc
 Description: Apache Parquet is a columnar storage format
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-acero1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda-glib1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-cuda1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset-glib1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-dataset1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-glib1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql-glib1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight-sql1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-flight1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libarrow1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libarrow1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libgandiva1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libgandiva1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libgandiva1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet-glib1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libparquet1700.install b/dev/tasks/linux-packages/apache-arrow/debian/libparquet1800.install
similarity index 100%
rename from dev/tasks/linux-packages/apache-arrow/debian/libparquet1700.install
rename to dev/tasks/linux-packages/apache-arrow/debian/libparquet1800.install
diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules
index 6c3074ab234e1..40877f44dbe66 100755
--- a/dev/tasks/linux-packages/apache-arrow/debian/rules
+++ b/dev/tasks/linux-packages/apache-arrow/debian/rules
@@ -107,8 +107,10 @@ override_dh_auto_test:
 
 # libarrow.so: avoid failing with "Unknown DWARF DW_OP_172"
 # libgandiva.so: avoid failing with "Unknown DWARF DW_OP_255"
+# libparquet.so: avoid failing with "Unknown DWARF DW_OP_4"
 #   See also: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=949296
 override_dh_dwz:
 	dh_dwz \
 	  --exclude=libarrow.so \
-	  --exclude=libgandiva.so
+	  --exclude=libgandiva.so \
+	  --exclude=libparquet.so
diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
index 8d47407c03522..f588bb3f1ab5a 100644
--- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
+++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in
@@ -887,6 +887,9 @@ Documentation for Apache Parquet GLib.
 %endif
 
 %changelog
+* Thu Jul 11 2024 Raúl Cumplido <raulcumplido@gmail.com> - 17.0.0-1
+- New upstream release.
+
 * Thu May 09 2024 Raúl Cumplido <raulcumplido@gmail.com> - 16.1.0-1
 - New upstream release.
 
diff --git a/dev/tasks/linux-packages/github.linux.yml b/dev/tasks/linux-packages/github.linux.yml
index 9e24835b8b627..cce976cd60e4e 100644
--- a/dev/tasks/linux-packages/github.linux.yml
+++ b/dev/tasks/linux-packages/github.linux.yml
@@ -29,7 +29,6 @@ jobs:
     {% endif %}
     env:
       ARCHITECTURE: {{ architecture }}
-      ARCHERY_USE_DOCKER_CLI: {{ '0' if architecture == 'arm64' else '1' }}
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_login_dockerhub()|indent }}
@@ -64,7 +63,7 @@ jobs:
         run: |
           set -e
           pushd arrow/dev/tasks/linux-packages
-          rake version:update
+          rake version:update ARROW_RELEASE_TIME="$(date --iso-8601=seconds)"
           rake docker:pull || :
           rake --trace {{ task_namespace }}:build BUILD_DIR=build
           popd
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index b225109050aae..221d2a48b87df 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -26,7 +26,6 @@ on:
 
 env:
   ARCHERY_DEBUG: 1
-  ARCHERY_USE_DOCKER_CLI: 1
 {% endmacro %}
 
 {%- macro github_checkout_arrow(fetch_depth=1, submodules="recursive", action_v="4") -%}
@@ -170,16 +169,32 @@ env:
   - name: Upload package to Gemfury
     shell: bash
     run: |
-      fury push \
-        --api-token=${CROSSBOW_GEMFURY_TOKEN} \
-        --as=${CROSSBOW_GEMFURY_ORG} \
-        {{ pattern }}
+      if $(fury versions --as=${CROSSBOW_GEMFURY_ORG} --api-token=${CROSSBOW_GEMFURY_TOKEN} pyarrow | grep --fixed-strings -q "{{ arrow.no_rc_version }}"); then
+        echo "Version {{ arrow.no_rc_version }} already exists. Avoid pushing version."
+      else
+        fury push \
+          --api-token=${CROSSBOW_GEMFURY_TOKEN} \
+          --as=${CROSSBOW_GEMFURY_ORG} \
+          {{ pattern }}
+      fi
     env:
       CROSSBOW_GEMFURY_TOKEN: {{ '${{ secrets.CROSSBOW_GEMFURY_TOKEN }}' }}
       CROSSBOW_GEMFURY_ORG: {{ '${{ secrets.CROSSBOW_GEMFURY_ORG }}' }}
   {% endif %}
 {% endmacro %}
 
+{%- macro github_upload_wheel_scientific_python(pattern) -%}
+  {%- if arrow.is_default_branch() -%}
+  - name: Upload wheel to Anaconda scientific-python
+    shell: bash
+    run: |
+      python3 -m pip install git+https://github.com/Anaconda-Platform/anaconda-client.git@1.12.3
+      anaconda -t ${CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN} upload --force -u scientific-python-nightly-wheels --label main {{ pattern }}
+    env:
+      CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN: {{ '${{ secrets.CROSSBOW_SCIENTIFIC_PYTHON_UPLOAD_TOKEN }}' }}
+  {% endif %}
+{% endmacro %}
+
 {%- macro azure_checkout_arrow() -%}
   - script: |
       git clone --no-checkout --branch {{ arrow.branch }} {{ arrow.remote }} arrow
@@ -196,7 +211,7 @@ env:
 {%- macro azure_upload_releases(pattern) -%}
   - task: UsePythonVersion@0
     inputs:
-      versionSpec: '3.8'
+      versionSpec: '3.9'
   - script: pip install -e arrow/dev/archery[crossbow-upload]
     displayName: Install Crossbow
   - bash: |
@@ -305,7 +320,7 @@ env:
     run: mkdir repo
   {% if get_win %}
   - name: Get windows binary
-    uses: actions/download-artifact@v3
+    uses: actions/download-artifact@v4
     with:
       name: r-lib__libarrow__bin__windows
       path: repo/libarrow/bin/windows
@@ -313,7 +328,7 @@ env:
   {% if get_nix %}
     {% for openssl_version in ["1.0", "1.1", "3.0"] %}
   - name: Get Linux OpenSSL {{ openssl_version }} binary
-    uses: actions/download-artifact@v3
+    uses: actions/download-artifact@v4
     with:
       name: r-lib__libarrow__bin__linux-openssl-{{ openssl_version }}
       path: repo/libarrow/bin/linux-openssl-{{ openssl_version }}
@@ -323,7 +338,7 @@ env:
     {% for openssl_version in ["1.1", "3.0"] %}
       {% for arch in ["x86_64", "arm64"] %}
   - name: Get macOS {{ arch }} OpenSSL {{ openssl_version }} binary
-    uses: actions/download-artifact@v3
+    uses: actions/download-artifact@v4
     with:
       name: r-lib__libarrow__bin__darwin-{{arch}}-openssl-{{ openssl_version }}
       path: repo/libarrow/bin/darwin-{{ arch }}-openssl-{{ openssl_version }}
@@ -331,7 +346,7 @@ env:
     {% endfor %}
   {% endif %}
   - name: Get src pkg
-    uses: actions/download-artifact@v3
+    uses: actions/download-artifact@v4
     with:
       name: r-pkg__src__contrib
       path: repo/src/contrib
diff --git a/dev/tasks/python-minimal-build/github.linux.yml b/dev/tasks/python-minimal-build/github.linux.yml
index d97968b86b362..4216c5b1a2abf 100644
--- a/dev/tasks/python-minimal-build/github.linux.yml
+++ b/dev/tasks/python-minimal-build/github.linux.yml
@@ -31,4 +31,4 @@ jobs:
       - name: Run minimal build example
         run: |
           cd arrow/python/examples/{{ type }}
-          docker-compose run --rm {{ image }}
+          docker compose run --rm {{ image }}
diff --git a/dev/tasks/python-sdist/github.yml b/dev/tasks/python-sdist/github.yml
index ef36e358aa926..8a141c4099cd6 100644
--- a/dev/tasks/python-sdist/github.yml
+++ b/dev/tasks/python-sdist/github.yml
@@ -22,7 +22,7 @@
 jobs:
   build:
     name: "Build sdist"
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-latest
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       {{ macros.github_install_archery()|indent }}
@@ -39,6 +39,7 @@ jobs:
       - name: Test sdist
         run: archery docker run ubuntu-python-sdist-test
         env:
+          UBUNTU: 22.04
           PYARROW_VERSION: {{ arrow.no_rc_version }}
 
       {{ macros.github_upload_releases("arrow/python/dist/*.tar.gz")|indent }}
diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml
index 5c82bf74b30b7..7a1c8fb4f9d80 100644
--- a/dev/tasks/python-wheels/github.linux.yml
+++ b/dev/tasks/python-wheels/github.linux.yml
@@ -31,12 +31,16 @@ jobs:
       # archery uses these environment variables
       {% if arch == "amd64" %}
       ARCH: amd64
-      ARCHERY_USE_DOCKER_CLI: 1
       {% else %}
       ARCH: arm64v8
-      ARCHERY_USE_DOCKER_CLI: 0
       {% endif %}
       PYTHON: "{{ python_version }}"
+      PYTHON_ABI_TAG: "{{ python_abi_tag }}"
+      {% if python_version == "3.13" %}
+      PYTHON_IMAGE_TAG: "3.13-rc"
+      {% else %}
+      PYTHON_IMAGE_TAG: "{{ python_version }}"
+      {% endif %}
 
     steps:
       {{ macros.github_checkout_arrow()|indent }}
@@ -47,22 +51,35 @@ jobs:
         shell: bash
         run: archery docker run -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-manylinux-{{ manylinux_version }}
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           name: wheel
           path: arrow/python/repaired_wheels/*.whl
 
       # TODO(kszucs): auditwheel show
       - name: Test wheel
+        if: |
+          '{{ python_abi_tag }}' != 'cp313t'
         shell: bash
         run: |
+          source arrow/ci/scripts/util_enable_core_dumps.sh
           archery docker run python-wheel-manylinux-test-imports
           archery docker run python-wheel-manylinux-test-unittests
 
+      # Free-threaded wheels need to be tested using a different docker-compose service
+      - name: Test free-threaded wheel
+        if: |
+          '{{ python_abi_tag }}' == 'cp313t'
+        shell: bash
+        run: |
+          source arrow/ci/scripts/util_enable_core_dumps.sh
+          archery docker run python-free-threaded-wheel-manylinux-test-imports
+          archery docker run python-free-threaded-wheel-manylinux-test-unittests
+
       - name: Test wheel on AlmaLinux 8
         shell: bash
         if: |
-          '{{ python_version }}' == '3.8'
+          '{{ python_version }}' == '3.9'
         env:
           ALMALINUX: "8"
         run: |
@@ -78,7 +95,7 @@ jobs:
       - name: Test wheel on Ubuntu 20.04
         shell: bash
         if: |
-          '{{ python_version }}' == '3.8'
+          '{{ python_version }}' == '3.9'
         env:
           UBUNTU: "20.04"
         run: |
@@ -107,6 +124,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker Image
diff --git a/dev/tasks/python-wheels/github.osx.yml b/dev/tasks/python-wheels/github.osx.yml
index ce9613545eb54..d5a4774ab2b0d 100644
--- a/dev/tasks/python-wheels/github.osx.yml
+++ b/dev/tasks/python-wheels/github.osx.yml
@@ -26,7 +26,12 @@
   PYARROW_BUILD_VERBOSE: 1
   PYARROW_VERSION: "{{ arrow.no_rc_version }}"
   PYTHON_VERSION: "{{ python_version }}"
+  PYTHON_ABI_TAG: "{{ python_abi_tag }}"
+  {% if python_abi_tag == "cp313t" %}
+  PYTHON: "/Library/Frameworks/PythonT.framework/Versions/{{ python_version }}/bin/python{{ python_version }}t"
+  {% else %}
   PYTHON: "/Library/Frameworks/Python.framework/Versions/{{ python_version }}/bin/python{{ python_version }}"
+  {% endif %}
   VCPKG_DEFAULT_TRIPLET: "{{ vcpkg_arch }}-osx-static-release"
   VCPKG_FEATURE_FLAGS: "manifests"
   VCPKG_OVERLAY_TRIPLETS: {{ "${{ github.workspace }}/arrow/ci/vcpkg" }}
@@ -60,7 +65,7 @@ jobs:
 
       - name: Retrieve VCPKG version from arrow/.env
         run: |
-          vcpkg_version=$(cat "arrow/.env" | grep "VCPKG" | cut -d "=" -f2 | tr -d '"')
+          vcpkg_version=$(. "arrow/.env" && echo "${VCPKG}")
           echo "VCPKG_VERSION=$vcpkg_version" >> $GITHUB_ENV
 
       - name: Install Vcpkg
@@ -99,7 +104,7 @@ jobs:
             --x-feature=s3
 
       - name: Install Python {{ python_version }}
-        run: sudo arrow/ci/scripts/install_python.sh macos {{ python_version }}
+        run: sudo arrow/ci/scripts/install_python.sh macos {{ "3.13t" if python_abi_tag == "cp313t" else python_version }}
 
       - name: Build Wheel
         run: |
@@ -108,21 +113,42 @@ jobs:
           pip install --upgrade pip wheel
           PYTHON=python arrow/ci/scripts/python_wheel_macos_build.sh {{ arch }} $(pwd)/arrow $(pwd)/build
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           name: wheel
           path: arrow/python/repaired_wheels/*.whl
 
+      # Use a well-known Python version for the GCS testbench, and avoid
+      # putting it in PATH.
+      - name: Set up Python for GCS testbench
+        uses: actions/setup-python@v5.1.1
+        id: gcs-python-install
+        with:
+          python-version: 3.12
+          update-environment: false
+
+      - name: Install GCS testbench
+        env:
+          PIPX_BIN_DIR: /usr/local/bin
+          PIPX_BASE_PYTHON: {{ '${{ steps.gcs-python-install.outputs.python-path }}' }}
+        run: arrow/ci/scripts/install_gcs_testbench.sh default
+
+      - name: Install MinIO
+        run: |
+          $(brew --prefix bash)/bin/bash \
+            arrow/ci/scripts/install_minio.sh latest /usr/local
+
       - name: Test Wheel
         env:
           PYTEST_ADDOPTS: "-k 'not test_cancellation'"
+          PYTHON_GIL: {{ 0 if python_abi_tag == "cp313t" else 1 }}
         run: |
           $PYTHON -m venv test-env
           source test-env/bin/activate
           pip install --upgrade pip wheel
           arch -{{ arch }} pip install -r arrow/python/requirements-wheel-test.txt
-          PYTHON=python arch -{{ arch }} arrow/ci/scripts/install_gcs_testbench.sh default
           arch -{{ arch }} arrow/ci/scripts/python_wheel_unix_test.sh $(pwd)/arrow
 
       {{ macros.github_upload_releases("arrow/python/repaired_wheels/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/repaired_wheels/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/repaired_wheels/*.whl")|indent }}
diff --git a/dev/tasks/python-wheels/github.windows.yml b/dev/tasks/python-wheels/github.windows.yml
index 01f4977a9b0b1..2bcda4966db8b 100644
--- a/dev/tasks/python-wheels/github.windows.yml
+++ b/dev/tasks/python-wheels/github.windows.yml
@@ -33,8 +33,8 @@ jobs:
       # note that we don't run docker build since there wouldn't be a cache hit
       # and rebuilding the dependencies takes a fair amount of time
       REPO: ghcr.io/ursacomputing/arrow
-      # prefer the docker cli over docker-compose
-      ARCHERY_USE_DOCKER_CLI: 1
+      # BuildKit isn't really supported on Windows for now
+      DOCKER_BUILDKIT: 0
 
     steps:
       {{ macros.github_checkout_arrow()|indent }}
@@ -58,7 +58,7 @@ jobs:
           )
           archery docker run --no-build -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} python-wheel-windows-vs2019
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
           name: wheel
           path: arrow/python/dist/*.whl
@@ -71,6 +71,7 @@ jobs:
 
       {{ macros.github_upload_releases("arrow/python/dist/*.whl")|indent }}
       {{ macros.github_upload_gemfury("arrow/python/dist/*.whl")|indent }}
+      {{ macros.github_upload_wheel_scientific_python("arrow/python/dist/*.whl")|indent }}
 
       {% if arrow.is_default_branch() %}
       - name: Push Docker Image
diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml
index 28893a81728c3..a74208ab2542f 100644
--- a/dev/tasks/r/azure.linux.yml
+++ b/dev/tasks/r/azure.linux.yml
@@ -33,14 +33,14 @@ jobs:
   - script: |
       set -ex
       docker -v
-      docker-compose -v
+      docker compose version
       cd arrow
       export R_ORG={{ r_org }}
       export R_IMAGE={{ r_image }}
       export R_TAG={{ r_tag }}
       export R_CUSTOM_CCACHE={{ r_custom_ccache|default("false") }}
-      docker-compose pull --ignore-pull-failures r
-      docker-compose build r
+      docker compose pull --ignore-pull-failures r
+      docker compose build r
     displayName: Docker build
     env:
     {{ macros.azure_set_sccache_envvars()|indent(4) }}
@@ -54,7 +54,7 @@ jobs:
       export ARROW_R_DEV={{ not_cran|default("TRUE") }}
       # Note that by default, ci/scripts/r_test.sh sets NOT_CRAN=true
       # if ARROW_R_DEV=TRUE. Pass `-e NOT_CRAN=false` to turn that off.
-      docker-compose run {{ flags|default("") }} r
+      docker compose run {{ flags|default("") }} r
     displayName: Docker run
     env:
     {{ macros.azure_set_sccache_envvars()|indent(4) }}
diff --git a/dev/tasks/r/github.devdocs.yml b/dev/tasks/r/github.devdocs.yml
index 92e94c6c97637..6047951155cde 100644
--- a/dev/tasks/r/github.devdocs.yml
+++ b/dev/tasks/r/github.devdocs.yml
@@ -26,7 +26,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [macOS-latest, ubuntu-20.04]
+        os: [macOS-latest, ubuntu-latest]
 
     steps:
       {{ macros.github_checkout_arrow()|indent }}
@@ -68,7 +68,7 @@ jobs:
           EOF
         shell: bash -l {0}
       - name: Save the install script
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: {{ "devdocs-script_os-${{ matrix.os }}_sysinstall-${{ matrix.system-install }}" }}
           path: arrow/r/vignettes/developers/script.sh
diff --git a/dev/tasks/r/github.linux.arrow.version.back.compat.yml b/dev/tasks/r/github.linux.arrow.version.back.compat.yml
index 086705dbb9cf4..90b2554eb8cd7 100644
--- a/dev/tasks/r/github.linux.arrow.version.back.compat.yml
+++ b/dev/tasks/r/github.linux.arrow.version.back.compat.yml
@@ -58,7 +58,7 @@ jobs:
         shell: bash
 
       - name: Upload the parquet artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: files
           path: arrow/r/extra-tests/files
@@ -108,7 +108,7 @@ jobs:
           cp arrow/r/extra-tests/helper*.R extra-tests/
           cp arrow/r/extra-tests/test-*.R extra-tests/
       - name: Download artifacts
-        uses: actions/download-artifact@v2
+        uses: actions/download-artifact@v4
         with:
           name: files
           path: extra-tests/files
diff --git a/dev/tasks/r/github.linux.cran.yml b/dev/tasks/r/github.linux.cran.yml
index 34cb4b9446a0b..8f56bf771d224 100644
--- a/dev/tasks/r/github.linux.cran.yml
+++ b/dev/tasks/r/github.linux.cran.yml
@@ -55,7 +55,7 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
-          name: test-output
+          name: test-output-{{ "${{ matrix.r_image }}" }}
           path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
diff --git a/dev/tasks/r/github.linux.extra.packages.yml b/dev/tasks/r/github.linux.extra.packages.yml
new file mode 100644
index 0000000000000..bb486c72a06a9
--- /dev/null
+++ b/dev/tasks/r/github.linux.extra.packages.yml
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+  extra-packages:
+    name: "extra package roundtrip tests"
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+    env:
+      ARROW_R_DEV: "FALSE"
+      ARROW_R_FORCE_EXTRA_PACKAGE_TESTS: TRUE
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+      - uses: r-lib/actions/setup-pandoc@v2
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          working-directory: 'arrow/r'
+          extra-packages: |
+            any::data.table
+            any::rcmdcheck
+            any::readr
+            any::units
+      - name: Build arrow package
+        run: |
+          R CMD build --no-build-vignettes arrow/r
+          R CMD INSTALL --install-tests --no-test-load --no-byte-compile arrow_*.tar.gz
+      - name: run tests
+        run: |
+          testthat::test_package("arrow", filter = "extra-package-roundtrip")
+        shell: Rscript {0}
diff --git a/dev/tasks/r/github.linux.offline.build.yml b/dev/tasks/r/github.linux.offline.build.yml
index 9ac0ebc40835e..62cdaa02051dd 100644
--- a/dev/tasks/r/github.linux.offline.build.yml
+++ b/dev/tasks/r/github.linux.offline.build.yml
@@ -41,7 +41,7 @@ jobs:
           R -e "source('R/install-arrow.R'); create_package_with_all_dependencies(dest_file = 'arrow_with_deps.tar.gz', source_file = \"${built_tar}\")"
         shell: bash
       - name: Upload the third party dependency artifacts
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: thirdparty_deps
           path: arrow/r/arrow_with_deps.tar.gz
@@ -60,7 +60,7 @@ jobs:
 
       - uses: r-lib/actions/setup-r@v2
       - name: Download artifacts
-        uses: actions/download-artifact@v2
+        uses: actions/download-artifact@v4
         with:
           name: thirdparty_deps
           path: arrow/r/
@@ -91,7 +91,7 @@ jobs:
         run: cat arrow-tests/testthat.Rout*
         if: always()
       - name: Save the test output
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
           name: test-output
           path: arrow-tests/testthat.Rout*
diff --git a/dev/tasks/r/github.linux.rchk.yml b/dev/tasks/r/github.linux.rchk.yml
index a673304ff238d..65c17ae751290 100644
--- a/dev/tasks/r/github.linux.rchk.yml
+++ b/dev/tasks/r/github.linux.rchk.yml
@@ -54,7 +54,7 @@ jobs:
         # ERROR: too many states (abstraction error?))
         # https://github.com/kalibera/rchk
         run: |
-          if [ $(grep -c "Suspicious call" rchk.out) -gt 0 ] || [ $(grep -c "\[UP\]" rchk.out) -gt 0 ] || [ $(grep -c "\[PB\]" rchk.out) -gt 0 ]; then
+          if [ $(grep -Fc "Suspicious call" rchk.out) -gt 0 ] || [ $(grep -Fc "[UP]" rchk.out) -gt 0 ] || [ $(grep -Fc "[PB]" rchk.out) -gt 0 ]; then
             echo "Found rchk errors"
             cat rchk.out
             exit 1
diff --git a/dev/tasks/r/github.linux.versions.yml b/dev/tasks/r/github.linux.versions.yml
index 753efe61d048e..092ac97de8ec4 100644
--- a/dev/tasks/r/github.linux.versions.yml
+++ b/dev/tasks/r/github.linux.versions.yml
@@ -55,7 +55,7 @@ jobs:
         if: always()
       - name: Save the test output
         if: always()
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
-          name: test-output
+          name: test-output-{{ "${{ matrix.r_version }}" }}
           path: arrow/r/check/arrow.Rcheck/tests/testthat.Rout*
diff --git a/dev/tasks/r/github.macos-linux.local.yml b/dev/tasks/r/github.macos-linux.local.yml
index b221e8c5d8d5b..2db80f254fec5 100644
--- a/dev/tasks/r/github.macos-linux.local.yml
+++ b/dev/tasks/r/github.macos-linux.local.yml
@@ -97,8 +97,8 @@ jobs:
         run: cat arrow-tests/testthat.Rout*
         if: failure()
       - name: Save the test output
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v4
         with:
-          name: test-output
+          name: test-output-{{ "${{ matrix.os }}" }}
           path: arrow-tests/testthat.Rout*
         if: always()
diff --git a/dev/tasks/r/github.macos.cran.yml b/dev/tasks/r/github.macos.cran.yml
new file mode 100644
index 0000000000000..dda8ac7fd7850
--- /dev/null
+++ b/dev/tasks/r/github.macos.cran.yml
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{% import 'macros.jinja' as macros with context %}
+
+{{ macros.github_header() }}
+
+jobs:
+  macos-cran:
+    name: "macOS similar to CRAN"
+    runs-on: macOS-latest
+    strategy:
+      fail-fast: false
+
+    steps:
+      {{ macros.github_checkout_arrow()|indent }}
+
+      - name: Configure dependencies (macos)
+        run: |
+          brew install openssl
+          # disable sccache on macos as it times out for unknown reasons
+          # see GH-33721
+          # brew install sccache
+          # remove cmake so that we can test our cmake downloading abilities
+          brew uninstall cmake
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+      # CRAN builders have the entire bin here added to the path. This sometimes
+      # includes things like GNU libtool which name-collide with what we expect
+      - name: Add R.framework/Resources/bin to the path
+        run: echo "/Library/Frameworks/R.framework/Resources/bin" >> $GITHUB_PATH
+      - name : Check whether libtool in R is used
+        run: |
+          if [ "$(which libtool)" != "/Library/Frameworks/R.framework/Resources/bin/libtool" ]; then
+            echo "libtool provided by R isn't found: $(which libtool)"
+            exit 1
+          fi
+      - name: Install dependencies
+        uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          cache: false # cache does not work on across branches
+          working-directory: arrow/r
+          extra-packages: |
+            any::rcmdcheck
+            any::sys
+      - name: Install
+        env:
+          _R_CHECK_CRAN_INCOMING_: false
+          CXX: "clang++ -mmacos-version-min=14.6"
+          CFLAGS: "-falign-functions=8 -g -O2 -Wall -pedantic -Wconversion -Wno-sign-conversion -Wstrict-prototypes"
+          CXXFLAGS: "-g -O2 -Wall -pedantic -Wconversion -Wno-sign-conversion"
+          NOT_CRAN: false
+        run: |
+          sccache --start-server || echo 'sccache not found'
+          cd arrow/r
+          R CMD INSTALL . --install-tests
+      - name: Run the tests
+        run: R -e 'if(tools::testInstalledPackage("arrow") != 0L) stop("There was a test failure.")'
+      - name: Dump test logs
+        run: cat arrow-tests/testthat.Rout*
+        if: failure()
+      - name: Save the test output
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-output
+          path: arrow-tests/testthat.Rout*
+        if: always()
diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml
index 0539eae6cc9d9..11d1f85a4cb52 100644
--- a/dev/tasks/r/github.packages.yml
+++ b/dev/tasks/r/github.packages.yml
@@ -51,7 +51,7 @@ jobs:
           R CMD build --no-build-vignettes .
 
       - name: Upload package artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-pkg__src__contrib
           path: arrow/r/arrow_*.tar.gz
@@ -106,7 +106,7 @@ jobs:
           cd arrow/r/libarrow/dist
           shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512
       - name: Upload binary artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-lib__libarrow__bin__darwin-{{ '${{ matrix.platform.arch }}' }}-openssl-{{ '${{ matrix.openssl }}' }}
           path: arrow/r/libarrow/dist/arrow-*.zip*
@@ -140,8 +140,7 @@ jobs:
           UBUNTU: {{ '"${{ matrix.ubuntu }}"' }}
         {{ macros.github_set_sccache_envvars()|indent(8) }}
         run: |
-          sudo sysctl -w kernel.core_pattern="core.%e.%p"
-          ulimit -c unlimited
+          source arrow/ci/scripts/util_enable_core_dumps.sh
           archery docker run \
             -e EXTRA_CMAKE_FLAGS="{{ '${{ matrix.extra-cmake-flags }}' }}" \
             {{ '${{ matrix.os }}' }}-cpp-static
@@ -162,7 +161,7 @@ jobs:
           cd arrow/r/libarrow/dist
           shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512
       - name: Upload binary artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-lib__libarrow__bin__linux-openssl-{{ '${{ matrix.openssl }}' }}
           path: arrow/r/libarrow/dist/arrow-*.zip*
@@ -195,7 +194,7 @@ jobs:
           cd build
           sha512sum arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512
       - name: Upload binary artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-lib__libarrow__bin__windows
           path: build/arrow-*.zip*
@@ -292,7 +291,7 @@ jobs:
           cat(cmd, file = Sys.getenv("GITHUB_OUTPUT"), append = TRUE)
 
       - name: Upload binary artifact
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-pkg{{ '${{ steps.build.outputs.path }}' }}
           path: arrow_*
@@ -348,7 +347,7 @@ jobs:
           '
       - name: Upload binary artifact
         if: matrix.config.devtoolset
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: r-pkg_centos7
           path: arrow_*
@@ -360,7 +359,7 @@ jobs:
     runs-on: ubuntu-latest
     container: "rstudio/r-base:4.2-centos7"
     steps:
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v4
         with:
           name: r-pkg_centos7
       - name: Install DTS Package
@@ -442,7 +441,7 @@ jobs:
     steps:
       {{ macros.github_checkout_arrow()|indent }}
       - name: Download Artifacts
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           path: artifacts
       - name: Install R
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 45417acf856b5..10b5426b7502b 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -29,6 +29,7 @@ groups:
 
   wheel:
     - wheel-*
+    - python-sdist
 
   linux:
     - almalinux-*
@@ -69,11 +70,15 @@ groups:
 
 {############################# Testing tasks #################################}
 
+  cuda:
+    - test-cuda-*
+
   test:
     - test-*
 
   cpp:
     - test-*cpp*
+    - example-*cpp*
 
   c-glib:
     - test-*c-glib*
@@ -93,11 +98,10 @@ groups:
   ruby:
     - test-*ruby*
 
-  go:
-    - test*-go-*
-
   vcpkg:
     - test-*vcpkg*
+    - wheel-*
+    - java-jars
 
   integration:
     - test-*dask*
@@ -168,6 +172,16 @@ groups:
     - ubuntu-*
     - centos-*
     - conda-*
+    # Can be removed after conda recipes are synced: #42114
+    - ~conda-linux-aarch64-cuda-py3
+    - ~conda-linux-x64-cpu-py3
+    - ~conda-win-x64-cpu-py3
+    - ~conda-win-x64-cuda-py3
+    - ~conda-linux-ppc64le-cuda-py3
+    - ~conda-linux-aarch64-cpu-py3
+    - ~conda-linux-ppc64le-cpu-py3
+    - ~conda-linux-x64-cuda-py3
+    - ~conda-osx-arm64-cpu-py3
     - conan-*
     - java-jars
     - homebrew-cpp
@@ -195,6 +209,7 @@ tasks:
     template: docker-tests/github.linux.yml
     params:
       image: conan
+      push: false
 
   conan-maximum:
     ci: github
@@ -212,6 +227,7 @@ tasks:
         -e ARROW_CONAN_WITH_SNAPPY=True
         -e ARROW_CONAN_WITH_ZSTD=True
       image: conan
+      push: false
 
   ########################### Python Minimal ############################
 
@@ -371,11 +387,12 @@ tasks:
       - pyarrow-{no_rc_version}-py310(h[a-z0-9]+)_0_cuda.conda
       - pyarrow-{no_rc_version}-py311(h[a-z0-9]+)_0_cuda.conda
 
-{% for python_version, python_tag, abi_tag in [("3.8", "cp38", "cp38"),
-                                               ("3.9", "cp39", "cp39"),
+{% for python_version, python_tag, abi_tag in [("3.9", "cp39", "cp39"),
                                                ("3.10", "cp310", "cp310"),
                                                ("3.11", "cp311", "cp311"),
-                                               ("3.12", "cp312", "cp312")] %}
+                                               ("3.12", "cp312", "cp312"),
+                                               ("3.13", "cp313", "cp313"),
+                                               ("3.13", "cp313", "cp313t")] %}
 
 {############################## Wheel Linux ##################################}
 
@@ -384,12 +401,13 @@ tasks:
                                                  ("amd64", "2-28", "manylinux_2_28_x86_64"),
                                                  ("arm64", "2014", "manylinux_2_17_aarch64.manylinux2014_aarch64"),
                                                  ("arm64", "2-28", "manylinux_2_28_aarch64")] %}
-  wheel-manylinux-{{ manylinux_version }}-{{ python_tag }}-{{ arch }}:
+  wheel-manylinux-{{ manylinux_version }}-{{ python_tag }}-{{ abi_tag }}-{{ arch }}:
     ci: github
     template: python-wheels/github.linux.yml
     params:
       arch: "{{ arch }}"
       python_version: "{{ python_version }}"
+      python_abi_tag: "{{ abi_tag }}"
       manylinux_version: "{{ manylinux_version }}"
       wheel_platform_tag: "{{ platform_tag }}"
     artifacts:
@@ -398,39 +416,45 @@ tasks:
 
 {############################## Wheel macOS ####################################}
 
-{% for macos_version, macos_codename in [("10.15", "catalina")] %}
+{% for macos_version, macos_codename in [("12.0", "monterey")] %}
   {% set platform_tag = "macosx_{}_x86_64".format(macos_version.replace('.', '_')) %}
 
-  wheel-macos-{{ macos_codename }}-{{ python_tag }}-amd64:
+  wheel-macos-{{ macos_codename }}-{{ python_tag }}-{{ abi_tag }}-amd64:
     ci: github
     template: python-wheels/github.osx.yml
     params:
       arch: "x86_64"
       arrow_jemalloc: "ON"
       python_version: "{{ python_version }}"
+      python_abi_tag: "{{ abi_tag }}"
       macos_deployment_target: "{{ macos_version }}"
-      runs_on: "macos-13"
+      runs_on: "macos-12"
       vcpkg_arch: "amd64"
     artifacts:
       - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl
 
 {% endfor %}
 
-  wheel-macos-big-sur-{{ python_tag }}-arm64:
+  wheel-macos-monterey-{{ python_tag }}-{{ abi_tag }}-arm64:
     ci: github
     template: python-wheels/github.osx.yml
     params:
       arch: "arm64"
       arrow_jemalloc: "OFF"
       python_version: "{{ python_version }}"
-      macos_deployment_target: "11.0"
+      python_abi_tag: "{{ abi_tag }}"
+      macos_deployment_target: "12.0"
       runs_on: "macos-14"
       vcpkg_arch: "arm64"
     artifacts:
-      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ python_tag }}-macosx_11_0_arm64.whl
+      - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-macosx_12_0_arm64.whl
 
 {############################## Wheel Windows ################################}
 
+# TODO: Remove this when there's NumPy wheels for Windows.
+# See https://github.com/numpy/numpy/issues/26157 for more info.
+{% if abi_tag != "cp313t" %}
+
   wheel-windows-{{ python_tag }}-amd64:
     ci: github
     template: python-wheels/github.windows.yml
@@ -439,6 +463,7 @@ tasks:
     artifacts:
       - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-win_amd64.whl
 
+{% endif %}
 {% endfor %}
 
 {############################ Python sdist ####################################}
@@ -492,59 +517,59 @@ tasks:
       - gir1.2-gandiva-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - gir1.2-parquet-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-acero-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-acero1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-acero1700_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-acero1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-acero1800_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dataset-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset-glib1700_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-dataset1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-dataset1700_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset-glib1800_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-dataset1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-dataset1800_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight-glib1700_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight-glib1800_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-sql-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-sql-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-flight-sql-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-sql-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight-sql-glib1700_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight-sql1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight-sql1700_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-flight1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-flight1700_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-sql-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight-sql-glib1800_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight-sql1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight-sql1800_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-flight1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-flight1800_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-glib1700_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow1700_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-glib1800_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow1800_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libgandiva-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva-glib1700_{no_rc_version}-1_[a-z0-9]+.deb
-      - libgandiva1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libgandiva1700_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva-glib1800_{no_rc_version}-1_[a-z0-9]+.deb
+      - libgandiva1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libgandiva1800_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libparquet-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet-glib1700_{no_rc_version}-1_[a-z0-9]+.deb
-      - libparquet1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libparquet1700_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet-glib1800_{no_rc_version}-1_[a-z0-9]+.deb
+      - libparquet1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libparquet1800_{no_rc_version}-1_[a-z0-9]+.deb
       - parquet-tools_{no_rc_version}-1_[a-z0-9]+.deb
     {% if architecture == "amd64" %}
       - gir1.2-arrow-cuda-1.0_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-dev_{no_rc_version}-1_[a-z0-9]+.deb
       - libarrow-cuda-glib-dev_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda-glib1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda-glib1700_{no_rc_version}-1_[a-z0-9]+.deb
-      - libarrow-cuda1700-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
-      - libarrow-cuda1700_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda-glib1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda-glib1800_{no_rc_version}-1_[a-z0-9]+.deb
+      - libarrow-cuda1800-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb
+      - libarrow-cuda1800_{no_rc_version}-1_[a-z0-9]+.deb
     {% endif %}
   {% endfor %}
 {% endfor %}
@@ -745,9 +770,6 @@ tasks:
       - arrow-jdbc-{no_rc_snapshot_version}-tests.jar
       - arrow-jdbc-{no_rc_snapshot_version}.jar
       - arrow-jdbc-{no_rc_snapshot_version}.pom
-      - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.json
-      - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.xml
-      - arrow-maven-plugins-{no_rc_snapshot_version}.pom
       - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.json
       - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.xml
       - arrow-memory-core-{no_rc_snapshot_version}-javadoc.jar
@@ -810,10 +832,7 @@ tasks:
       - arrow-vector-{no_rc_snapshot_version}.pom
       - flight-core-{no_rc_snapshot_version}-cyclonedx.json
       - flight-core-{no_rc_snapshot_version}-cyclonedx.xml
-      - flight-core-{no_rc_snapshot_version}-jar-with-dependencies.jar
       - flight-core-{no_rc_snapshot_version}-javadoc.jar
-      - flight-core-{no_rc_snapshot_version}-shaded-ext.jar
-      - flight-core-{no_rc_snapshot_version}-shaded.jar
       - flight-core-{no_rc_snapshot_version}-sources.jar
       - flight-core-{no_rc_snapshot_version}-tests.jar
       - flight-core-{no_rc_snapshot_version}.jar
@@ -846,12 +865,6 @@ tasks:
       - flight-sql-jdbc-driver-{no_rc_snapshot_version}-tests.jar
       - flight-sql-jdbc-driver-{no_rc_snapshot_version}.jar
       - flight-sql-jdbc-driver-{no_rc_snapshot_version}.pom
-      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.json
-      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.xml
-      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-javadoc.jar
-      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-sources.jar
-      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.jar
-      - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.pom
 
   ############################## NuGet packages ###############################
 
@@ -904,7 +917,6 @@ tasks:
                                  ("ubuntu", "22.04")] %}
   {% for target in ["cpp",
                     "csharp",
-                    "go",
                     "integration",
                     "java",
                     "js",
@@ -957,7 +969,6 @@ tasks:
 
   {% for target in ["cpp",
                     "csharp",
-                    "go",
                     "integration",
                     "java",
                     "js",
@@ -973,7 +984,6 @@ tasks:
 
   {% for target in ["cpp",
                     "csharp",
-                    "go",
                     "integration",
                     "js",
                     "python",
@@ -991,7 +1001,7 @@ tasks:
       github_runner: "macos-14"
   {% endfor %}
 
-  {% for macos_version in ["11", "12"] %}
+  {% for macos_version in ["12"] %}
   verify-rc-binaries-wheels-macos-{{ macos_version }}-amd64:
     ci: github
     template: verify-rc/github.macos.yml
@@ -1070,6 +1080,15 @@ tasks:
         UBUNTU: 20.04
       image: ubuntu-cpp-bundled
 
+  test-ubuntu-24.04-cpp-gcc-13-bundled:
+    ci: github
+    template: docker-tests/github.linux.yml
+    params:
+      env:
+        UBUNTU: 24.04
+        GCC_VERSION: 13
+      image: ubuntu-cpp-bundled
+
   test-ubuntu-24.04-cpp:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1099,9 +1118,13 @@ tasks:
     template: docker-tests/github.linux.yml
     params:
       env:
-        ARCHERY_USE_DOCKER_CLI: 0
         UBUNTU: 20.04
-      flags: -e ARROW_SKYHOOK=ON
+      flags: >-
+        -e ARROW_AZURE=OFF
+        -e ARROW_GANDIVA=OFF
+        -e ARROW_GCS=OFF
+        -e ARROW_S3=OFF
+        -e ARROW_SKYHOOK=ON
       image: ubuntu-cpp
 
 {% for debian_version in ["12"] %}
@@ -1153,16 +1176,16 @@ tasks:
       flags: "-e ARROW_ENABLE_THREADING=OFF"
       image: ubuntu-cpp
 
-  test-ubuntu-20.04-cpp-thread-sanitizer:
+  test-ubuntu-24.04-cpp-thread-sanitizer:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
         # clang-tools and llvm version need to be synchronized so as
         # to have the right llvm-symbolizer version
-        CLANG_TOOLS: 11
-        LLVM: 11
-        UBUNTU: 20.04
+        CLANG_TOOLS: 18
+        LLVM: 18
+        UBUNTU: 24.04
       image: ubuntu-cpp-thread-sanitizer
 
   test-ubuntu-20.04-cpp-minimal-with-formats:
@@ -1182,7 +1205,7 @@ tasks:
         UBUNTU: 22.04
       image: ubuntu-cpp-emscripten
 
-{% for python_version in ["3.8", "3.9", "3.10", "3.11", "3.12"] %}
+{% for python_version in ["3.9", "3.10", "3.11", "3.12"] %}
   test-conda-python-{{ python_version }}:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1194,6 +1217,14 @@ tasks:
       image: conda-python
 {% endfor %}
 
+  test-conda-python-3.12-cpython-debug:
+    ci: github
+    template: docker-tests/github.linux.yml
+    params:
+      env:
+        PYTHON: 3.12
+      image: conda-python-cpython-debug
+
   test-conda-python-emscripten:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1231,6 +1262,14 @@ tasks:
         PYTHON: "3.10"
       image: conda-python-cython2
 
+  test-ubuntu-22.04-python-313-freethreading:
+    ci: github
+    template: docker-tests/github.linux.yml
+    params:
+      env:
+        UBUNTU: 22.04
+      image: ubuntu-python-313-freethreading
+
   test-debian-12-python-3-amd64:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1249,20 +1288,20 @@ tasks:
       flags: "-e ARROW_S3=OFF -e ARROW_GANDIVA=OFF"
       image: debian-python
 
-  test-ubuntu-20.04-python-3:
+  test-ubuntu-22.04-python-3:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        UBUNTU: 20.04
+        UBUNTU: 22.04
       image: ubuntu-python
 
-  test-ubuntu-22.04-python-3:
+  test-ubuntu-24.04-python-3:
     ci: github
     template: docker-tests/github.linux.yml
     params:
       env:
-        UBUNTU: 22.04
+        UBUNTU: 24.04
       image: ubuntu-python
 
   test-fedora-39-python-3:
@@ -1286,12 +1325,20 @@ tasks:
     ci: github
     template: r/github.linux.rchk.yml
 
+  test-r-extra-packages:
+    ci: github
+    template: r/github.linux.extra.packages.yml   
+
   test-r-linux-as-cran:
     ci: github
     template: r/github.linux.cran.yml
     params:
       MATRIX: {{ "${{ matrix.r_image }}" }}
 
+  test-r-macos-as-cran:
+    ci: github
+    template: r/github.macos.cran.yml
+
   test-r-arrow-backwards-compatibility:
     ci: github
     template: r/github.linux.arrow.version.back.compat.yml
@@ -1443,19 +1490,7 @@ tasks:
         R_PRUNE_DEPS: TRUE
       image: r-clang-sanitizer
 
-  {% for go_version, staticcheck in [("1.21", "v0.4.7"), ("1.22", "latest")] %}
-  test-debian-12-go-{{ go_version }}:
-    ci: github
-    template: docker-tests/github.linux.yml
-    params:
-      env:
-        DEBIAN: 12
-        GO: "{{go_version}}"
-        STATICCHECK: "{{ staticcheck }}"
-      image: debian-go
-  {% endfor %}
-
-  # be sure to update binary-task.rb when upgrading ubuntu
+  # be sure to update binary-task.rb when upgrading Debian
   test-debian-12-docs:
     ci: github
     template: docs/github.linux.yml
@@ -1463,7 +1498,6 @@ tasks:
       env:
         JDK: 17
       pr_number: Unset
-      flags: "-v $PWD/build/:/build/"
       image: debian-docs
       publish: false
     artifacts:
@@ -1471,20 +1505,28 @@ tasks:
 
   ############################## CUDA tests #################################
 
-  test-cuda-cpp:
+  test-cuda-cpp-ubuntu-20.04-cuda-11.2.2:
+    ci: github
+    template: docker-tests/github.cuda.yml
+    params:
+      image: ubuntu-cuda-cpp
+
+  test-cuda-cpp-ubuntu-22.04-cuda-11.7.1:
     ci: github
     template: docker-tests/github.cuda.yml
     params:
       env:
-        ARCHERY_USE_DOCKER_CLI: 0
+        UBUNTU: 22.04
+        CUDA: 11.7.1
       image: ubuntu-cuda-cpp
 
-  test-cuda-python:
+  test-cuda-python-ubuntu-22.04-cuda-11.7.1:
     ci: github
     template: docker-tests/github.cuda.yml
     params:
       env:
-        ARCHERY_USE_DOCKER_CLI: 0
+        UBUNTU: 22.04
+        CUDA: 11.7.1
       image: ubuntu-cuda-python
 
   ############################## Fuzz tests #################################
@@ -1502,11 +1544,11 @@ tasks:
 
   ############################## Integration tests ############################
 
-{% for python_version, pandas_version, numpy_version, cache_leaf in [("3.8", "1.0", "1.19", True),
-                                                                     ("3.9", "latest", "latest", False),
-                                                                     ("3.10", "latest", "1.26", False),
+{% for python_version, pandas_version, numpy_version, cache_leaf in [("3.9", "1.1.3", "1.19.5", True),
                                                                      ("3.10", "latest", "latest", False),
-                                                                     ("3.10", "nightly", "nightly", False),
+                                                                     ("3.11", "latest", "1.26", False),
+                                                                     ("3.11", "latest", "latest", False),
+                                                                     ("3.11", "nightly", "nightly", False),
                                                                      ("3.11", "upstream_devel", "nightly", False)] %}
   test-conda-python-{{ python_version }}-pandas-{{ pandas_version }}-numpy-{{ numpy_version }}:
     ci: github
@@ -1546,15 +1588,12 @@ tasks:
     template: docker-tests/github.linux.yml
     params:
       env:
-        ARCHERY_USE_DOCKER_CLI: 0
         HDFS: "{{ hdfs_version }}"
         PYTHON: "3.10"
       image: conda-python-hdfs
 {% endfor %}
 
-{% for python_version, spark_version, test_pyarrow_only, numpy_version, jdk_version in [("3.8", "v3.5.0", "false", "latest", "8"),
-                                                                                        ("3.10", "v3.5.0", "false", "1.23", "8"),
-                                                                                        ("3.11", "master", "false", "latest", "17")] %}
+{% for python_version, spark_version, test_pyarrow_only, numpy_version, jdk_version in [("3.11", "master", "false", "latest", "17")] %}
   test-conda-python-{{ python_version }}-spark-{{ spark_version }}:
     ci: github
     template: docker-tests/github.linux.yml
@@ -1596,6 +1635,5 @@ tasks:
       env:
         JDK: 17
       pr_number: Unset
-      flags: "-v $PWD/build/:/build/"
       image: debian-docs
       publish: true
diff --git a/dev/tasks/verify-rc/github.macos.yml b/dev/tasks/verify-rc/github.macos.yml
index 4bc3fff71b64a..e2bc7895c6d05 100644
--- a/dev/tasks/verify-rc/github.macos.yml
+++ b/dev/tasks/verify-rc/github.macos.yml
@@ -22,7 +22,7 @@
 {% set use_conda = use_conda|default(False) %}
 # env: is generated by macros.github_header()
   # Current oldest supported version according to https://endoflife.date/macos
-  MACOSX_DEPLOYMENT_TARGET: "10.15"
+  MACOSX_DEPLOYMENT_TARGET: "12.0"
 
 jobs:
   verify:
diff --git a/docker-compose.yml b/docker-compose.yml
index fa248d59037d3..0882121d598bb 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -38,11 +38,11 @@
 # WARNING: setting this will affect the host machine.
 #
 # Linux host:
-#   $ sudo sysctl -w kernel.core_pattern=core.%e.%p
+#   $ sudo sysctl -w kernel.core_pattern=/tmp/core.%e.%p
 #
 # macOS host running Docker for Mac (won't persist between restarts):
 #   $ screen ~/Library/Containers/com.docker.docker/Data/vms/0/tty
-#   # echo "core.%e.%p" > /proc/sys/kernel/core_pattern
+#   # echo "/tmp/core.%e.%p" > /proc/sys/kernel/core_pattern
 #
 # The setup attempts to generate coredumps by default, but the correct paths
 # above must be set. In order to disable the coredump generation set
@@ -53,8 +53,6 @@
 #
 # See more in cpp/build-support/run-test.sh::print_coredumps
 
-version: '3.5'
-
 x-common: &common
   GITHUB_ACTIONS:
 
@@ -119,12 +117,14 @@ x-hierarchy:
       - conda-python:
         - conda-python-pandas:
           - conda-python-docs
+        - conda-python-cpython-debug
         - conda-python-cython2
         - conda-python-dask
         - conda-python-emscripten
         - conda-python-hdfs
         - conda-python-java-integration
         - conda-python-jpype
+        - conda-python-no-numpy
         - conda-python-spark
         - conda-python-substrait
   - conda-verify-rc
@@ -134,9 +134,6 @@ x-hierarchy:
       - debian-ruby
     - debian-python:
       - debian-docs
-  - debian-go:
-    - debian-go-cgo
-    - debian-go-cgo-python
   - debian-js
   - fedora-cpp:
     - fedora-python
@@ -149,6 +146,7 @@ x-hierarchy:
     - ubuntu-lint
     - ubuntu-python
     - ubuntu-python-sdist-test
+    - ubuntu-python-313-freethreading
     - ubuntu-r
     - ubuntu-r-only-r
   - ubuntu-cpp-bundled
@@ -173,7 +171,9 @@ x-hierarchy:
     - java-jni-manylinux-2014
   - python-wheel-manylinux-2-28
   - python-wheel-manylinux-test-imports
+  - python-free-threaded-wheel-manylinux-test-imports
   - python-wheel-manylinux-test-unittests
+  - python-free-threaded-wheel-manylinux-test-unittests
   - python-wheel-windows-vs2019
   - python-wheel-windows-test
 
@@ -648,10 +648,10 @@ services:
       ARROW_BUILD_STATIC: "OFF"
       ARROW_CTEST_TIMEOUT: 500
       ARROW_ENABLE_TIMING_TESTS:  # inherit
-      ARROW_DATASET: "ON"
+      ARROW_FLIGHT: "OFF"
+      ARROW_FLIGHT_SQL: "OFF"
       ARROW_JEMALLOC: "OFF"
       ARROW_ORC: "OFF"
-      ARROW_S3: "OFF"
       ARROW_USE_TSAN: "ON"
     command: *cpp-command
 
@@ -854,7 +854,7 @@ services:
     #   docker-compose run --rm conda-python
     # Parameters:
     #   ARCH: amd64, arm32v7
-    #   PYTHON: 3.8, 3.9, 3.10, 3.11
+    #   PYTHON: 3.9, 3.10, 3.11, 3.12
     image: ${REPO}:${ARCH}-conda-python-${PYTHON}
     build:
       context: .
@@ -916,6 +916,7 @@ services:
     # Parameters:
     #   ARCH: amd64
     #   CUDA: <depends on your nvidia driver, should match system CUDA>
+    #   UBUNTU: 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cuda-${CUDA}-python-3
     build:
       context: .
@@ -983,7 +984,7 @@ services:
     #   docker-compose run --rm ubuntu-python
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
-    #   UBUNTU: 20.04, 22.04
+    #   UBUNTU: 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
     build:
       context: .
@@ -1063,7 +1064,7 @@ services:
     # Parameters:
     #   ARCH: amd64, arm64v8, ...
     #   PYARROW_VERSION: The test target pyarrow version such as "3.0.0"
-    #   UBUNTU: 20.04, 22.04
+    #   UBUNTU: 22.04, 24.04
     image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3
     build:
       context: .
@@ -1085,6 +1086,32 @@ services:
         /arrow/ci/scripts/cpp_build.sh /arrow /build &&
         /arrow/ci/scripts/python_sdist_test.sh /arrow"
 
+  ############################ Python free-threading ##########################
+
+  ubuntu-python-313-freethreading:
+    # Usage:
+    #   docker-compose build ubuntu-cpp
+    #   docker-compose build ubuntu-python-313-freethreading
+    #   docker-compose run --rm ubuntu-python-313-freethreading
+    # Parameters:
+    #   ARCH: amd64, arm64v8, ...
+    #   UBUNTU: 20.04, 22.04, 24.04
+    image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-313-freethreading
+    build:
+      context: .
+      dockerfile: ci/docker/linux-apt-python-313-freethreading.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-313-freethreading
+      args:
+        base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp
+    shm_size: *shm-size
+    environment:
+      <<: [*common, *ccache]
+      # Bundled build of OpenTelemetry needs a git client
+      ARROW_WITH_OPENTELEMETRY: "OFF"
+    volumes: *ubuntu-volumes
+    command: *python-command
+
   ############################ Python wheels ##################################
 
   # See available versions at:
@@ -1095,9 +1122,10 @@ services:
       args:
         arch: ${ARCH}
         arch_short: ${ARCH_SHORT}
-        base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2024-02-04-ea37246
+        base: quay.io/pypa/manylinux2014_${ARCH_ALIAS}:2024-08-03-32dfa47
         vcpkg: ${VCPKG}
         python: ${PYTHON}
+        python_abi_tag: ${PYTHON_ABI_TAG}
         manylinux: 2014
       context: .
       dockerfile: ci/docker/python-wheel-manylinux.dockerfile
@@ -1118,9 +1146,10 @@ services:
       args:
         arch: ${ARCH}
         arch_short: ${ARCH_SHORT}
-        base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-02-04-ea37246
+        base: quay.io/pypa/manylinux_2_28_${ARCH_ALIAS}:2024-08-03-32dfa47
         vcpkg: ${VCPKG}
         python: ${PYTHON}
+        python_abi_tag: ${PYTHON_ABI_TAG}
         manylinux: 2_28
       context: .
       dockerfile: ci/docker/python-wheel-manylinux.dockerfile
@@ -1134,7 +1163,28 @@ services:
     command: /arrow/ci/scripts/python_wheel_manylinux_build.sh
 
   python-wheel-manylinux-test-imports:
-    image: ${ARCH}/python:${PYTHON}
+    image: ${ARCH}/python:${PYTHON_IMAGE_TAG}
+    shm_size: 2G
+    volumes:
+      - .:/arrow:delegated
+    environment:
+      <<: *common
+      CHECK_IMPORTS: "ON"
+      CHECK_UNITTESTS: "OFF"
+      CHECK_WHEEL_CONTENT: "ON"
+    command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
+
+  # TODO: Remove this when the official Docker Python image supports the free-threaded build.
+  # See https://github.com/docker-library/python/issues/947 for more info.
+  python-free-threaded-wheel-manylinux-test-imports:
+    image: ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-imports
+    build:
+      args:
+        base: "${ARCH}/ubuntu:${UBUNTU}"
+      context: .
+      dockerfile: ci/docker/python-free-threaded-wheel-manylinux-test-imports.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-imports
     shm_size: 2G
     volumes:
       - .:/arrow:delegated
@@ -1150,11 +1200,33 @@ services:
       args:
         arch: ${ARCH}
         python: ${PYTHON}
+        python_image_tag: ${PYTHON_IMAGE_TAG}
       context: .
       dockerfile: ci/docker/python-wheel-manylinux-test.dockerfile
       cache_from:
         - ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-test
     shm_size: 2G
+    volumes:
+      - .:/arrow:delegated
+    environment:
+      <<: *common
+      CHECK_IMPORTS: "OFF"
+      CHECK_UNITTESTS: "ON"
+      CHECK_WHEEL_CONTENT: "OFF"
+    command: /arrow/ci/scripts/python_wheel_unix_test.sh /arrow
+
+  # TODO: Remove this when the official Docker Python image supports the free-threaded build.
+  # See https://github.com/docker-library/python/issues/947 for more info.
+  python-free-threaded-wheel-manylinux-test-unittests:
+    image: ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-unittests
+    build:
+      args:
+        base: "${ARCH}/ubuntu:${UBUNTU}"
+      context: .
+      dockerfile: ci/docker/python-free-threaded-wheel-manylinux-test-unittests.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-python-3.13-free-threaded-wheel-manylinux-test-unittests
+    shm_size: 2G
     volumes:
       - .:/arrow:delegated
     environment:
@@ -1202,7 +1274,7 @@ services:
     build:
       args:
         base: ${REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
-        java: 1.8.0
+        java: 11
       context: .
       dockerfile: ci/docker/java-jni-manylinux-201x.dockerfile
       cache_from:
@@ -1254,6 +1326,37 @@ services:
     volumes: *conda-volumes
     command: *python-conda-command
 
+  conda-python-no-numpy:
+    # Usage:
+    #   docker-compose build conda
+    #   docker-compose build conda-cpp
+    #   docker-compose build conda-python
+    #   docker-compose build conda-python-no-numpy
+    #   docker-compose run --rm conda-python-no-numpy
+    image: ${REPO}:${ARCH}-conda-python-${PYTHON}-no-numpy
+    build:
+      context: .
+      dockerfile: ci/docker/conda-python.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-conda-python-${PYTHON}
+      args:
+        repo: ${REPO}
+        arch: ${ARCH}
+        python: ${PYTHON}
+    shm_size: *shm-size
+    environment:
+      <<: [*common, *ccache, *sccache]
+      PARQUET_REQUIRE_ENCRYPTION:  # inherit
+      HYPOTHESIS_PROFILE:  # inherit
+      PYARROW_TEST_HYPOTHESIS:  # inherit
+    volumes: *conda-volumes
+    command:
+      ["
+        /arrow/ci/scripts/cpp_build.sh /arrow /build &&
+        /arrow/ci/scripts/python_build.sh /arrow /build &&
+        mamba uninstall -y numpy &&
+        /arrow/ci/scripts/python_test.sh /arrow"]
+
   conda-python-docs:
     # Usage:
     #   archery docker run conda-python-docs
@@ -1440,6 +1543,30 @@ services:
     volumes: *conda-volumes
     command: *python-conda-command
 
+  conda-python-cpython-debug:
+    # Usage:
+    #   docker-compose build conda
+    #   docker-compose build conda-cpp
+    #   docker-compose build conda-python
+    #   docker-compose build conda-python-cpython-debug
+    #   docker-compose run --rm conda-python-cpython-debug
+    image: ${REPO}:${ARCH}-conda-python-${PYTHON}-cpython-debug
+    build:
+      context: .
+      dockerfile: ci/docker/conda-python-cpython-debug.dockerfile
+      cache_from:
+        - ${REPO}:${ARCH}-conda-python-${PYTHON}-cpython-debug
+      args:
+        repo: ${REPO}
+        arch: ${ARCH}
+        python: ${PYTHON}
+    shm_size: *shm-size
+    environment:
+      <<: [*common, *ccache]
+      PYTEST_ARGS:  # inherit
+    volumes: *conda-volumes
+    command: *python-conda-command
+
   ################################## R ########################################
 
   ubuntu-r:
@@ -1628,68 +1755,6 @@ services:
     command: >
       /bin/bash -c "/arrow/ci/scripts/r_revdepcheck.sh /arrow"
 
-  ################################# Go ########################################
-
-  debian-go:
-    # Usage:
-    #   docker-compose build debian-go
-    #   docker-compose run debian-go
-    image: ${REPO}:${ARCH}-debian-${DEBIAN}-go-${GO}
-    build:
-      context: .
-      dockerfile: ci/docker/debian-${DEBIAN}-go.dockerfile
-      cache_from:
-        - ${REPO}:${ARCH}-debian-${DEBIAN}-go-${GO}
-      args:
-        arch: ${ARCH}
-        go: ${GO}
-        staticcheck: ${STATICCHECK}
-    shm_size: *shm-size
-    volumes: *debian-volumes
-    command: &go-command >
-      /bin/bash -c "
-        git config --global --add safe.directory /arrow &&
-        /arrow/ci/scripts/go_build.sh /arrow &&
-        /arrow/ci/scripts/go_test.sh /arrow"
-
-  debian-go-cgo:
-    # Usage:
-    #   docker-compose build debian-go-cgo
-    #   docker-compose run debian-go-cgo
-    image: ${REPO}:${ARCH}-debian-${DEBIAN}-go-${GO}-cgo
-    build:
-      context: .
-      dockerfile: ci/docker/debian-go-cgo.dockerfile
-      cache_from:
-        - ${REPO}:${ARCH}-debian-${DEBIAN}-go-${GO}-cgo
-      args:
-        base: ${REPO}:${ARCH}-debian-${DEBIAN}-go-${GO}
-    shm_size: *shm-size
-    volumes: *debian-volumes
-    environment:
-      <<: *common
-      ARROW_GO_TESTCGO: "1"
-    command: *go-command
-
-  debian-go-cgo-python:
-    # Usage:
-    #   docker-compose build debian-go-cgo-python
-    #   docker-compose run debian-go-cgo-python
-    image: ${REPO}:${ARCH}-debian-${DEBIAN}-go-${GO}-cgo-python
-    build:
-      context: .
-      dockerfile: ci/docker/debian-${DEBIAN}-go-cgo-python.dockerfile
-      cache_from:
-        - ${REPO}:${ARCH}-debian-${DEBIAN}-go-${GO}-cgo-python
-      args:
-        base: ${REPO}:${ARCH}-debian-${DEBIAN}-go-${GO}
-    shm_size: *shm-size
-    volumes: *debian-volumes
-    command: &go-cgo-python-command >
-      /bin/bash -c "
-        git config --global --add safe.directory /arrow &&
-        /arrow/ci/scripts/go_cgo_python_test.sh /arrow"
-
   ############################# JavaScript ####################################
 
   debian-js:
@@ -1747,7 +1812,7 @@ services:
     #   docker-compose run java
     # Parameters:
     #   MAVEN: 3.9.5
-    #   JDK: 8, 11, 17, 21
+    #   JDK: 11, 17, 21
     image: ${ARCH}/maven:${MAVEN}-eclipse-temurin-${JDK}
     shm_size: *shm-size
     volumes: &java-volumes
@@ -1778,10 +1843,10 @@ services:
         jdk: 17
         maven: ${MAVEN}
         node: ${NODE}
-        go: ${GO}
     volumes: *conda-volumes
     environment:
       <<: [*common, *ccache]
+      ARCHERY_INTEGRATION_WITH_GO: 0
       ARCHERY_INTEGRATION_WITH_NANOARROW: 0
       ARCHERY_INTEGRATION_WITH_RUST: 0
       # Tell Archery where Arrow binaries are located
@@ -1789,7 +1854,8 @@ services:
       ARROW_NANOARROW_PATH: /build/nanoarrow
       ARROW_RUST_EXE_PATH: /build/rust/debug
     command:
-      ["/arrow/ci/scripts/integration_arrow_build.sh /arrow /build &&
+      ["git config --global --add safe.directory /arrow/go &&
+        /arrow/ci/scripts/integration_arrow_build.sh /arrow /build &&
         /arrow/ci/scripts/integration_arrow.sh /arrow /build"]
 
   ################################ Docs #######################################
@@ -1864,6 +1930,9 @@ services:
     command: >
       /bin/bash -c "
         git config --global --add safe.directory /arrow &&
+        python3 -m venv /build/pyvenv &&
+        source /build/pyvenv/bin/activate &&
+        pip install -U pip setuptools &&
         pip install arrow/dev/archery[lint] &&
         archery lint --all --no-clang-tidy --no-iwyu --no-numpydoc --src /arrow"
 
diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json
index e879fc69138d0..6a684b56d57b5 100644
--- a/docs/source/_static/versions.json
+++ b/docs/source/_static/versions.json
@@ -1,15 +1,20 @@
 [
     {
-        "name": "17.0 (dev)",
+        "name": "18.0 (dev)",
         "version": "dev/",
         "url": "https://arrow.apache.org/docs/dev/"
     },
     {
-        "name": "16.1 (stable)",
+        "name": "17.0 (stable)",
         "version": "",
         "url": "https://arrow.apache.org/docs/",
         "preferred": true
     },
+    {
+        "name": "16.1",
+        "version": "16.1/",
+        "url": "https://arrow.apache.org/docs/16.1/"
+    },
     {
         "name": "16.0",
         "version": "16.0/",
diff --git a/docs/source/cpp/memory.rst b/docs/source/cpp/memory.rst
index 33907b5580f61..032b7d1ac90f1 100644
--- a/docs/source/cpp/memory.rst
+++ b/docs/source/cpp/memory.rst
@@ -139,9 +139,9 @@ Default Memory Pool
 
 The default memory pool depends on how Arrow C++ was compiled:
 
-- if enabled at compile time, a `jemalloc <http://jemalloc.net/>`_ heap;
-- otherwise, if enabled at compile time, a
-  `mimalloc <https://github.com/microsoft/mimalloc>`_ heap;
+- if enabled at compile time, a `mimalloc <https://github.com/microsoft/mimalloc>`_
+  heap;
+- otherwise, if enabled at compile time, a `jemalloc <http://jemalloc.net/>`_ heap;
 - otherwise, the C library ``malloc`` heap.
 
 Overriding the Default Memory Pool
diff --git a/docs/source/cpp/tutorials/compute_tutorial.rst b/docs/source/cpp/tutorials/compute_tutorial.rst
index a650865d75ce4..72ebc35650d5c 100644
--- a/docs/source/cpp/tutorials/compute_tutorial.rst
+++ b/docs/source/cpp/tutorials/compute_tutorial.rst
@@ -39,7 +39,9 @@ Pre-requisites
 
 Before continuing, make sure you have:
 
-1. An Arrow installation, which you can set up here: :doc:`/cpp/build_system`
+1. An Arrow installation, which you can set up here: :doc:`/cpp/build_system`.
+   If you're compiling Arrow yourself, be sure you compile with the compute module
+   enabled (i.e., ``-DARROW_COMPUTE=ON``), see :ref:`cpp_build_optional_components`.
 
 2. An understanding of basic Arrow data structures from :doc:`/cpp/tutorials/basic_arrow`
 
@@ -50,7 +52,7 @@ Before running some computations, we need to fill in a couple gaps:
 
 1. We need to include necessary headers.
 
-2. ``A main()`` is needed to glue things together.
+2. A ``main()`` is needed to glue things together.
 
 3. We need data to play with.
 
diff --git a/docs/source/developers/continuous_integration/archery.rst b/docs/source/developers/continuous_integration/archery.rst
index d190a0a96cfcb..2580693480ef1 100644
--- a/docs/source/developers/continuous_integration/archery.rst
+++ b/docs/source/developers/continuous_integration/archery.rst
@@ -26,7 +26,7 @@ utility called Archery.
 Installation
 ------------
 
-Archery requires Python 3.8 or later. It is recommended to install Archery in
+Archery requires Python 3.9 or later. It is recommended to install Archery in
 *editable* mode with the ``-e`` flag to automatically update the installation
 when pulling the Arrow repository. After cloning the Arrow repository, from
 the top level directory install Archery by using the command
diff --git a/docs/source/developers/continuous_integration/crossbow.rst b/docs/source/developers/continuous_integration/crossbow.rst
index 50ac607f4d87b..44f41895d4faf 100644
--- a/docs/source/developers/continuous_integration/crossbow.rst
+++ b/docs/source/developers/continuous_integration/crossbow.rst
@@ -119,7 +119,7 @@ to step 3:
 
    - Confirm the `auto cancellation`_ feature is turned off for branch builds. This should be the default setting.
 
-7. Install Python (minimum supported version is 3.8):
+7. Install Python (minimum supported version is 3.9):
 
    | Miniconda is preferred, see installation instructions:
    | https://conda.io/docs/user-guide/install/index.html
diff --git a/docs/source/developers/continuous_integration/docker.rst b/docs/source/developers/continuous_integration/docker.rst
index 68f3c7d709791..129b5d0bcf135 100644
--- a/docs/source/developers/continuous_integration/docker.rst
+++ b/docs/source/developers/continuous_integration/docker.rst
@@ -156,6 +156,18 @@ The following example starts an interactive ``bash`` session in the container
 
     archery docker run ubuntu-cpp bash
 
+**Build the image with increased debugging output:**
+
+To enable additional logging output for debugging, pass the ``--debug`` flag
+to ``archery``.
+
+.. code:: bash
+
+    archery --debug docker run ubuntu-cpp
+
+In addition to enabling ``DEBUG``-level logging, this also translates to
+passing ``--progress=plain`` to docker(-compose) build command.
+
 Docker Volume Caches
 ~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst
index b052b856c9bd5..60a9e0694158f 100644
--- a/docs/source/developers/cpp/building.rst
+++ b/docs/source/developers/cpp/building.rst
@@ -213,6 +213,8 @@ and then ask to compile the build targets:
 
    0 directories, 3 files
 
+   $ cmake --install .
+
 When creating a build, it is possible to pass custom options besides
 the preset-defined ones, for example:
 
@@ -293,6 +295,7 @@ Minimal release build (1GB of RAM for building or more recommended):
    $ cd build-release
    $ cmake ..
    $ make -j8       # if you have 8 CPU cores, otherwise adjust
+   $ make install
 
 Minimal debug build with unit tests (4GB of RAM for building or more recommended):
 
@@ -305,6 +308,7 @@ Minimal debug build with unit tests (4GB of RAM for building or more recommended
    $ cmake -DCMAKE_BUILD_TYPE=Debug -DARROW_BUILD_TESTS=ON ..
    $ make -j8       # if you have 8 CPU cores, otherwise adjust
    $ make unittest  # to run the tests
+   $ make install
 
 The unit tests are not built by default. After building, one can also invoke
 the unit tests using the ``ctest`` tool provided by CMake (note that ``test``
diff --git a/docs/source/developers/guide/resources.rst b/docs/source/developers/guide/resources.rst
index b5905af65499b..5b598ab1296ac 100644
--- a/docs/source/developers/guide/resources.rst
+++ b/docs/source/developers/guide/resources.rst
@@ -71,7 +71,6 @@ Contributing
 
 - :ref:`contributing`
 - `Arrow R Developer Guide <https://arrow.apache.org/docs/r/articles/developing.html>`_
-- `Writing Bindings article for R package <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
 
 Reproducible examples:
 
diff --git a/docs/source/developers/guide/step_by_step/arrow_codebase.rst b/docs/source/developers/guide/step_by_step/arrow_codebase.rst
index 0c194ab3a3f70..c4ea61d89ff80 100644
--- a/docs/source/developers/guide/step_by_step/arrow_codebase.rst
+++ b/docs/source/developers/guide/step_by_step/arrow_codebase.rst
@@ -150,6 +150,3 @@ C++ we must create the binding manually to use it in that implementation.
       When writing bindings between C++ compute functions and R functions,
       the aim is to expose the C++ functionality via the same interface as
       existing R functions.
-
-      To read the full content on the topic of R bindings read through the
-      `Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
diff --git a/docs/source/developers/guide/tutorials/r_tutorial.rst b/docs/source/developers/guide/tutorials/r_tutorial.rst
index 62d5cfcbc76c2..3fba873bff0a9 100644
--- a/docs/source/developers/guide/tutorials/r_tutorial.rst
+++ b/docs/source/developers/guide/tutorials/r_tutorial.rst
@@ -27,22 +27,6 @@ R tutorials
 ***********
 
 
-Writing Bindings Walkthrough
-============================
-
-The first R package tutorial to be included in the New Contributor's
-guide is a **Walkthrough** added in the **Writing Bindings**
-vignette. With time we will try to include additional tutorials
-directly into this guide.
-
-This tutorial will show how to do a binding of a C++ function
-`starts_with() <https://arrow.apache.org/docs/cpp/compute.html#containment-tests>`_
-to the (base) R function ``startsWith()``.
-
-To view the tutorial follow the
-`Walkthrough section of the Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html#walkthrough>`_.
-
-
 R tutorial on adding a lubridate binding
 ========================================
 
@@ -56,11 +40,6 @@ The binding will be added to the ``expression.R`` file in the
 R package. But you can also follow these steps in case you are
 adding a binding that will live somewhere else.
 
-.. seealso::
-
-   To read more about the philosophy behind R bindings, refer to the
-   `Writing Bindings article <https://arrow.apache.org/docs/r/articles/developers/bindings.html>`_.
-
 This tutorial is different from the :ref:`step_by_step` as we
 will be working on a specific case. This tutorial is not meant
 as a step-by-step guide.
@@ -170,13 +149,6 @@ equivalent data types. lubridate's ``mday()`` function has no additional
 arguments and there are also no option classes associated with Arrow C++
 function ``day()``.
 
-.. note::
-
-   To see what to do if there is an option class associated with the
-   function you are binding, refer to
-   `Examining the C++ function <https://arrow.apache.org/docs/r/articles/developers/bindings.html#examining-the-c-function>`_ from the Writing Bindings
-   article.
-
 Looking at the code in ``expressions.R`` we can see the day function
 is already specified/mapped on the R package side:
 `<https://github.com/apache/arrow/blob/658bec37aa5cbdd53b5e4cdc81b8ba3962e67f11/r/R/expression.R#L63-L64>`_
diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst
index 82053e901186c..3904841de9c5a 100644
--- a/docs/source/developers/java/building.rst
+++ b/docs/source/developers/java/building.rst
@@ -32,7 +32,7 @@ Arrow Java uses the `Maven <https://maven.apache.org/>`_ build system.
 
 Building requires:
 
-* JDK 8+
+* JDK 11+
 * Maven 3+
 
 .. note::
@@ -321,6 +321,54 @@ Building Java JNI Modules
           -Darrow.c.jni.dist.dir=<absolute path to your arrow folder>/java-dist/lib/ \
           -Parrow-jni clean install
 
+Testing
+=======
+
+By default, Maven uses the same Java version to both build the code and run the tests.
+
+It is also possible to use a different JDK version for the tests. This requires Maven
+toolchains to be configured beforehand, and then a specific test property needs to be set.
+
+Configuring Maven toolchains
+----------------------------
+
+To be able to use a JDK version for testing, it needs to be registered first in Maven ``toolchains.xml``
+configuration file usually located under ``${HOME}/.m2`` with the following snippet added to it:
+
+  .. code-block::
+
+      <?xml version="1.0" encoding="UTF8"?>
+      <toolchains>
+
+        [...]
+
+        <toolchain>
+          <type>jdk</type>
+          <provides>
+            <version>21</version> <!-- Replace with the corresponding JDK version: 11, 17, ... -->
+            <vendor>temurin</vendor> <!-- Replace with the vendor/distribution: temurin, oracle, zulu ... -->
+          </provides>
+          <configuration>
+            <jdkHome>path/to/jdk/home</jdkHome> <!-- Replace with the path to the JDK -->
+          </configuration>
+        </toolchain>
+
+        [...]
+
+      </toolchains>
+
+Testing with a specific JDK
+---------------------------
+
+To run Arrow tests with a specific JDK version, use the ``arrow.test.jdk-version`` property.
+
+For example, to run Arrow tests with JDK 17, use the following snippet:
+
+  .. code-block::
+
+      $ cd arrow/java
+      $ mvn -Darrow.test.jdk-version=17 clean verify
+
 IDE Configuration
 =================
 
@@ -335,7 +383,6 @@ Arrow repository, and update the following settings:
   right click the directory, and select Mark Directory as > Generated Sources
   Root. There is no need to mark other generated sources directories, as only
   the ``vector`` module generates sources.
-* For JDK 8, disable the ``error-prone`` profile to build the project successfully.
 * For JDK 11, due to an `IntelliJ bug
   <https://youtrack.jetbrains.com/issue/IDEA-201168>`__, you must go into
   Settings > Build, Execution, Deployment > Compiler > Java Compiler and disable
@@ -538,3 +585,40 @@ Installing Manually
 
 .. _builds@arrow.apache.org: https://lists.apache.org/list.html?builds@arrow.apache.org
 .. _GitHub Nightly: https://github.com/ursacomputing/crossbow/releases/tag/nightly-packaging-2022-07-30-0-github-java-jars
+
+Installing Staging Packages
+===========================
+
+.. warning::
+    These packages are not official releases. Use them at your own risk.
+
+Arrow staging builds are created when a Release Candidate (RC) is being prepared. This allows users to test the RC in their applications before voting on the release.
+
+
+Installing from Apache Staging
+--------------------------------
+1. Look up the next version number for the Arrow libraries used.
+
+2. Add Apache Staging Repository to the Maven/Gradle project.
+
+   .. code-block:: xml
+
+      <properties>
+         <arrow.version>9.0.0</arrow.version>
+      </properties>
+      ...
+      <repositories>
+         <repository>
+               <id>arrow-apache-staging</id>
+               <url>https://repository.apache.org/content/repositories/staging</url>
+         </repository>
+      </repositories>
+      ...
+      <dependencies>
+         <dependency>
+               <groupId>org.apache.arrow</groupId>
+               <artifactId>arrow-vector</artifactId>
+               <version>${arrow.version}</version>
+         </dependency>
+      </dependencies>
+      ...
diff --git a/docs/source/developers/overview.rst b/docs/source/developers/overview.rst
index 5a18b1e4eb8db..7e38dcb8ebc85 100644
--- a/docs/source/developers/overview.rst
+++ b/docs/source/developers/overview.rst
@@ -100,9 +100,6 @@ When contributing a patch, use this list as a checklist of Apache Arrow workflow
 * So that your pull request syncs with the GitHub issue, **prefix your pull request
   title with the GitHub issue id** (ex:
   `GH-14866: [C++] Remove internal GroupBy implementation <https://github.com/apache/arrow/pull/14867>`_).
-  Similarly **prefix your pull request name with the JIRA issue id** (ex:
-  `ARROW-767: [C++] Filesystem abstraction <https://github.com/apache/arrow/pull/4225>`_)
-  in case the issue is still located in Jira.
 * Give the pull request a **clear, brief description**: when the pull request is
   merged, this will be retained in the extended commit message.
 * Make sure that your code **passes the unit tests**. You can find instructions how
diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst
index 2f3e892ce8ede..2ba4b534caeff 100644
--- a/docs/source/developers/python.rst
+++ b/docs/source/developers/python.rst
@@ -267,7 +267,7 @@ On Debian/Ubuntu, you need the following minimal set of dependencies:
 
 .. code-block::
 
-   $ sudo apt-get install build-essential cmake python3-dev
+   $ sudo apt-get install build-essential ninja-build cmake python3-dev
 
 Now, let's create a Python virtual environment with all Python dependencies
 in the same folder as the repositories, and a target installation folder:
@@ -632,9 +632,6 @@ PyArrow are:
    * - ``PYARROW_BUNDLE_CYTHON_CPP``
      - Bundle the C++ files generated by Cython
      - ``0`` (``OFF``)
-   * - ``PYARROW_INSTALL_TESTS``
-     - Add the test to the python package
-     - ``1`` (``ON``)
    * - ``PYARROW_BUILD_VERBOSE``
      - Enable verbose output from Makefile builds
      - ``0`` (``OFF``)
diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst
index fae48e4d8e0f0..0d9af1f543cac 100644
--- a/docs/source/developers/release.rst
+++ b/docs/source/developers/release.rst
@@ -400,12 +400,13 @@ Be sure to go through on the following checklist:
    :class-title: sd-fs-5
    :class-container: sd-shadow-md
 
-   A committer must run the following script:
+   A committer must run the following script. This has to be done once the
+   Pull Request from the Update Website script has been merged:
 
    .. code-block:: Bash
 
       # dev/release/post-05-update-gh-release-notes.sh 17.0.0
-      dev/release/post-05-update-gh-release-notes.sh apache-arrow-X.Y.Z
+      dev/release/post-05-update-gh-release-notes.sh <version>
 
 .. dropdown:: Update Homebrew packages
    :animate: fade-in-slide-down
diff --git a/docs/source/format/CDataInterface/PyCapsuleInterface.rst b/docs/source/format/CDataInterface/PyCapsuleInterface.rst
index d38ba2822da46..f4f6b54849e77 100644
--- a/docs/source/format/CDataInterface/PyCapsuleInterface.rst
+++ b/docs/source/format/CDataInterface/PyCapsuleInterface.rst
@@ -303,7 +303,6 @@ function accepts an object implementing one of these protocols.
 .. code-block:: python
 
     from typing import Tuple, Protocol
-    from typing_extensions import Self
 
     class ArrowSchemaExportable(Protocol):
         def __arrow_c_schema__(self) -> object: ...
diff --git a/docs/source/format/CanonicalExtensions.rst b/docs/source/format/CanonicalExtensions.rst
index c258f889dc6ac..1106f8aaffdd3 100644
--- a/docs/source/format/CanonicalExtensions.rst
+++ b/docs/source/format/CanonicalExtensions.rst
@@ -272,6 +272,8 @@ JSON
   In the future, additional fields may be added, but they are not required
   to interpret the array.
 
+.. _uuid_extension:
+
 UUID
 ====
 
@@ -283,6 +285,138 @@ UUID
    A specific UUID version is not required or guaranteed. This extension represents
    UUIDs as FixedSizeBinary(16) with big-endian notation and does not interpret the bytes in any way.
 
+Opaque
+======
+
+Opaque represents a type that an Arrow-based system received from an external
+(often non-Arrow) system, but that it cannot interpret.  In this case, it can
+pass on Opaque to its clients to at least show that a field exists and
+preserve metadata about the type from the other system.
+
+Extension parameters:
+
+* Extension name: ``arrow.opaque``.
+
+* The storage type of this extension is any type.  If there is no underlying
+  data, the storage type should be Null.
+
+* Extension type parameters:
+
+  * **type_name** = the name of the unknown type in the external system.
+  * **vendor_name** = the name of the external system.
+
+* Description of the serialization:
+
+  A valid JSON object containing the parameters as fields.  In the future,
+  additional fields may be added, but all fields current and future are never
+  required to interpret the array.
+
+  Developers **should not** attempt to enable public semantic interoperability
+  of Opaque by canonicalizing specific values of these parameters.
+
+Rationale
+---------
+
+Interfacing with non-Arrow systems requires a way to handle data that doesn't
+have an equivalent Arrow type.  In this case, use the Opaque type, which
+explicitly represents an unsupported field.  Other solutions are inadequate:
+
+* Raising an error means even one unsupported field makes all operations
+  impossible, even if (for instance) the user is just trying to view a schema.
+* Dropping unsupported columns misleads the user as to the actual schema.
+* An extension type may not exist for the unsupported type.
+* Generating an extension type on the fly would falsely imply support.
+
+Applications **should not** make conventions around vendor_name and type_name.
+These parameters are meant for human end users to understand what type wasn't
+supported.  Applications may try to interpret these fields, but must be
+prepared for breakage (e.g., when the type becomes supported with a custom
+extension type later on).  Similarly, **Opaque is not a generic container for
+file formats**.  Considerations such as MIME types are irrelevant.  In both of
+these cases, create a custom extension type instead.
+
+Examples:
+
+* A Flight SQL service that supports connecting external databases may
+  encounter columns with unsupported types in external tables.  In this case,
+  it can use the Opaque[Null] type to at least report that a column exists
+  with a particular name and type name.  This lets clients know that a column
+  exists, but is not supported.  Null is used as the storage type here because
+  only schemas are involved.
+
+  An example of the extension metadata would be::
+
+    {"type_name": "varray", "vendor_name": "Oracle"}
+
+* The ADBC PostgreSQL driver gets results as a series of length-prefixed byte
+  fields.  But the driver will not always know how to parse the bytes, as
+  there may be extensions (e.g. PostGIS).  It can use Opaque[Binary] to still
+  return those bytes to the application, which may be able to parse the data
+  itself.  Opaque differentiates the column from an actual binary column and
+  makes it clear that the value is directly from PostgreSQL.  (A custom
+  extension type is preferred, but there will always be extensions that the
+  driver does not know about.)
+
+  An example of the extension metadata would be::
+
+    {"type_name": "geometry", "vendor_name": "PostGIS"}
+
+* The ADBC PostgreSQL driver may also know how to parse the bytes, but not
+  know the intended semantics.  For example, `composite types
+  <https://www.postgresql.org/docs/current/rowtypes.html>`_ can add new
+  semantics to existing types, somewhat like Arrow extension types.  The
+  driver would be able to parse the underlying bytes in this case, but would
+  still use the Opaque type.
+
+  Consider the example in the PostgreSQL documentation of a ``complex`` type.
+  Mapping the type to a plain Arrow ``struct`` type would lose meaning, just
+  like how an Arrow system deciding to treat all extension types by dropping
+  the extension metadata would be undesirable.  Instead, the driver can use
+  Opaque[Struct] to pass on the composite type info.  (It would be wrong to
+  try to map this to an Arrow-defined complex type: it does not know the
+  proper semantics of a user-defined type, which cannot and should not be
+  hardcoded into the driver in the first place.)
+
+  An example of the extension metadata would be::
+
+    {"type_name": "database_name.schema_name.complex", "vendor_name": "PostgreSQL"}
+
+* The JDBC adapter in the Arrow Java libraries converts JDBC result sets into
+  Arrow arrays, and can get Arrow schemas from result sets.  JDBC, however,
+  allows drivers to return `arbitrary Java objects
+  <https://docs.oracle.com/javase/8/docs/api/java/sql/Types.html#OTHER>`_.
+
+  The driver can use Opaque[Null] as a placeholder during schema conversion,
+  only erroring if the application tries to fetch the actual data.  That way,
+  clients can at least introspect result schemas to decide whether it can
+  proceed to fetch the data, or only query certain columns.
+
+  An example of the extension metadata would be::
+
+    {"type_name": "OTHER", "vendor_name": "JDBC driver name"}
+
+8-bit Boolean
+=============
+
+Bool8 represents a boolean value using 1 byte (8 bits) to store each value instead of only 1 bit as in
+the original Arrow Boolean type. Although less compact than the original representation, Bool8 may have
+better zero-copy compatibility with various systems that also store booleans using 1 byte.
+
+* Extension name: ``arrow.bool8``.
+
+* The storage type of this extension is ``Int8`` where:
+
+  * **false** is denoted by the value ``0``.
+  * **true** can be specified using any non-zero value. Preferably ``1``.
+
+* Extension type parameters:
+
+  This type does not have any parameters.
+
+* Description of the serialization:
+
+  Metadata is an empty string.
+
 =========================
 Community Extension Types
 =========================
diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst
index 7ae0c2b4bdbd8..b144f1cc988f3 100644
--- a/docs/source/format/Columnar.rst
+++ b/docs/source/format/Columnar.rst
@@ -21,7 +21,7 @@
 Arrow Columnar Format
 *********************
 
-*Version: 1.4*
+*Version: 1.5*
 
 .. seealso:: :ref:`Additions to the Arrow columnar format since version 1.0.0
    <post-1-0-0-format-versions>`
@@ -1284,6 +1284,8 @@ We additionally provide both schema-level and field-level
 ``custom_metadata`` attributes allowing for systems to insert their
 own application defined metadata to customize behavior.
 
+.. _ipc-recordbatch-message:
+
 RecordBatch message
 -------------------
 
@@ -1385,6 +1387,65 @@ have two entries in each RecordBatch. For a RecordBatch of this schema with
     buffer 13: col2    data
 
 
+Compression
+-----------
+
+There are three different options for compression of record batch
+body buffers: Buffers can be uncompressed, buffers can be
+compressed with the ``lz4`` compression codec, or buffers can be
+compressed with the ``zstd`` compression codec. Buffers in the
+flat sequence of a message body must be compressed separately using
+the same codec. Specific buffers in the sequence of compressed
+buffers may be left uncompressed (for example if compressing those
+specific buffers would not appreciably reduce their size).
+
+The compression type used is defined in the ``data header``
+of the :ref:`ipc-recordbatch-message` in the optional ``compression``
+field with the default being uncompressed.
+
+.. note::
+
+   ``lz4`` compression codec means the
+   `LZ4 frame format <https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md>`_
+   and should not to be confused with
+   `"raw" (also called "block") format <https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md>`_.
+
+The difference between compressed and uncompressed buffers in the
+serialized form is as follows:
+
+* If the buffers in the :ref:`ipc-recordbatch-message` are **compressed**
+
+  - the ``data header`` includes the length and memory offset
+    of each **compressed buffer** in the record batch's body together
+    with the compression type
+
+  - the ``body`` includes a flat sequence of **compressed buffers**
+    together with the **length of the uncompressed buffer** as a 64-bit
+    little-endian signed integer stored in the first 8 bytes of each
+    buffer in the sequence. This uncompressed length can be set to ``-1`` to indicate
+    that that specific buffer is left uncompressed.
+
+* If the buffers in the :ref:`ipc-recordbatch-message` are **uncompressed**
+
+  - the ``data header`` includes the length and memory offset
+    of each **uncompressed buffer** in the record batch's body
+
+  - the ``body`` includes a flat sequence of **uncompressed buffers**.
+
+.. note::
+
+   Some Arrow implementations lack support for producing and consuming
+   IPC data with compressed buffers using one or either of the codecs
+   listed above. See :doc:`../status` for details.
+
+   Some applications might apply compression in the protocol they use
+   to store or transport Arrow IPC data. (For example, an HTTP server
+   might serve gzip-compressed Arrow IPC streams.) Applications that
+   already use compression in their storage or transport protocols
+   should avoid using buffer compression. Double compression typically
+   worsens performance and does not substantially improve compression
+   ratios.
+
 Byte Order (`Endianness`_)
 ---------------------------
 
@@ -1596,12 +1657,12 @@ structure. These extension keys are:
    they should not be used for third-party extension types.
 
 This extension metadata can annotate any of the built-in Arrow logical
-types. The intent is that an implementation that does not support an
-extension type can still handle the underlying data. For example a
-16-byte UUID value could be embedded in ``FixedSizeBinary(16)``, and
-implementations that do not have this extension type can still work
-with the underlying binary values and pass along the
-``custom_metadata`` in subsequent Arrow protocol messages.
+types. For example, Arrow specifies a canonical extension type that
+represents a UUID as a ``FixedSizeBinary(16)``. Arrow implementations are
+not required to support canonical extensions, so an implementation that
+does not support this UUID type will simply interpret it as a
+``FixedSizeBinary(16)`` and pass along the ``custom_metadata`` in
+subsequent Arrow protocol messages.
 
 Extension types may or may not use the
 ``'ARROW:extension:metadata'`` field. Let's consider some example
@@ -1656,8 +1717,8 @@ the Arrow spec.
 .. _Message.fbs: https://github.com/apache/arrow/blob/main/format/Message.fbs
 .. _File.fbs: https://github.com/apache/arrow/blob/main/format/File.fbs
 .. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering
-.. _Intel performance guide: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors
+.. _Intel performance guide: https://web.archive.org/web/20151101074635/https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors
 .. _Endianness: https://en.wikipedia.org/wiki/Endianness
-.. _SIMD: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-introduction-to-the-simd-data-layout-templates
+.. _SIMD: https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/simd-data-layout-templates.html
 .. _Parquet: https://parquet.apache.org/docs/
 .. _UmbraDB: https://db.in.tum.de/~freitag/papers/p29-neumann-cidr20.pdf
diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst
index 0ab5b832ad012..f76aa0fefcf27 100644
--- a/docs/source/format/Integration.rst
+++ b/docs/source/format/Integration.rst
@@ -390,20 +390,37 @@ but can be of any type.
 
 Extension types are, as in the IPC format, represented as their underlying
 storage type plus some dedicated field metadata to reconstruct the extension
-type.  For example, assuming a "uuid" extension type backed by a
-FixedSizeBinary(16) storage, here is how a "uuid" field would be represented::
+type.  For example, assuming a "rational" extension type backed by a
+``struct<numer: int32, denom: int32>`` storage, here is how a "rational" field
+would be represented::
 
     {
       "name" : "name_of_the_field",
       "nullable" : /* boolean */,
       "type" : {
-         "name" : "fixedsizebinary",
-         "byteWidth" : 16
+        "name" : "struct"
       },
-      "children" : [],
+      "children" : [
+        {
+          "name": "numer",
+          "type": {
+            "name": "int",
+            "bitWidth": 32,
+            "isSigned": true
+          }
+        },
+        {
+          "name": "denom",
+          "type": {
+            "name": "int",
+            "bitWidth": 32,
+            "isSigned": true
+          }
+        }
+      ],
       "metadata" : [
-         {"key": "ARROW:extension:name", "value": "uuid"},
-         {"key": "ARROW:extension:metadata", "value": "uuid-serialized"}
+         {"key": "ARROW:extension:name", "value": "rational"},
+         {"key": "ARROW:extension:metadata", "value": "rational-serialized"}
       ]
     }
 
diff --git a/docs/source/format/Versioning.rst b/docs/source/format/Versioning.rst
index 8fcf11b21f0cc..52cf207b959bc 100644
--- a/docs/source/format/Versioning.rst
+++ b/docs/source/format/Versioning.rst
@@ -76,7 +76,7 @@ that new clients can read serialized data produced by library version
 Post-1.0.0 Format Versions
 ==========================
 
-Since version 1.0.0, there have been four new minor versions and zero new
+Since version 1.0.0, there have been five new minor versions and zero new
 major versions of the Arrow format. Each new minor version added new features.
 When these new features are not used, the new minor format versions are
 compatible with format version 1.0.0. The new features added in each minor
@@ -105,3 +105,8 @@ Version 1.4
 * Added :ref:`listview-layout` and the associated ListView and LargeListView
   types.
 * Added :ref:`variadic-buffers`.
+
+Version 1.5
+-----------
+
+* Expanded Decimal type bit widths to allow 32-bit and 64-bit types.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 0afe52758af25..6f38ab668d883 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -104,7 +104,7 @@ Implementations
    C/GLib <c_glib/index>
    C++ <cpp/index>
    C# <https://github.com/apache/arrow/blob/main/csharp/README.md>
-   Go <https://pkg.go.dev/github.com/apache/arrow/go/v17>
+   Go <https://pkg.go.dev/github.com/apache/arrow/go/v18>
    Java <java/index>
    JavaScript <js/index>
    Julia <https://arrow.apache.org/julia/>
diff --git a/docs/source/java/cdata.rst b/docs/source/java/cdata.rst
index 0f30fe1031588..69f7ab0b0787c 100644
--- a/docs/source/java/cdata.rst
+++ b/docs/source/java/cdata.rst
@@ -366,7 +366,7 @@ This application uses JNI to call Java code, but transfers data (zero-copy) via
        JavaVMOption options[2];
        options[0].optionString = "-Djava.class.path=cpptojava.jar";
        options[1].optionString = "-DXcheck:jni:pedantic";
-       vm_args.version = JNI_VERSION_1_8;
+       vm_args.version = JNI_VERSION_10;
        vm_args.nOptions = 2;
        vm_args.options = options;
        int status = JNI_CreateJavaVM(jvm, (void **) &env, &vm_args);
diff --git a/docs/source/java/flight_sql_jdbc_driver.rst b/docs/source/java/flight_sql_jdbc_driver.rst
index f95c2ac755d97..0224cc3235652 100644
--- a/docs/source/java/flight_sql_jdbc_driver.rst
+++ b/docs/source/java/flight_sql_jdbc_driver.rst
@@ -28,7 +28,7 @@ This driver can be used with any database that implements Flight SQL.
 Installation and Requirements
 =============================
 
-The driver is compatible with JDK 8+.  On JDK 9+, the following JVM
+The driver is compatible with JDK 11+. Note that the following JVM
 parameter is required:
 
 .. code-block:: shell
diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst
index dc6a55c87fcd6..c238690c6b930 100644
--- a/docs/source/java/install.rst
+++ b/docs/source/java/install.rst
@@ -29,10 +29,10 @@ Java modules are regularly built and tested on macOS and Linux distributions.
 Java Compatibility
 ==================
 
-Java modules are compatible with JDK 8 and above. Currently, JDK versions
-8, 11, 17, and 21 are tested in CI. The latest JDK is also tested in CI.
+Java modules are compatible with JDK 11 and above. Currently, JDK versions
+11, 17, 21, and latest are tested in CI.
 
-When using Java 9 or later, some JDK internals must be exposed by
+Note that some JDK internals must be exposed by
 adding ``--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED`` to the ``java`` command:
 
 .. code-block:: shell
@@ -40,7 +40,7 @@ adding ``--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED
    # Directly on the command line
    $ java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -jar ...
    # Indirectly via environment variables
-   $ env _JAVA_OPTIONS="--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
+   $ env JDK_JAVA_OPTIONS="--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
 
 Otherwise, you may see errors like ``module java.base does not "opens
 java.nio" to unnamed module`` or ``module java.base does not "opens
@@ -58,7 +58,7 @@ Modifying the command above for Flight:
    # Directly on the command line
    $ java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -jar ...
    # Indirectly via environment variables
-   $ env _JAVA_OPTIONS="--add-reads=org.apache.arrow.flight.core=ALL-UNNAMED --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
+   $ env JDK_JAVA_OPTIONS="--add-reads=org.apache.arrow.flight.core=ALL-UNNAMED --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
 
 Otherwise, you may see errors like ``java.lang.IllegalAccessError: superclass access check failed: class
 org.apache.arrow.flight.ArrowMessage$ArrowBufRetainingCompositeByteBuf (in module org.apache.arrow.flight.core)
@@ -67,12 +67,13 @@ org.apache.arrow.flight.core does not read unnamed module ...``
 
 Finally, if you are using arrow-dataset, you'll also need to report that JDK internals need to be exposed.
 Modifying the command above for arrow-memory:
+
 .. code-block:: shell
 
    # Directly on the command line
    $ java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -jar ...
    # Indirectly via environment variables
-   $ env _JAVA_OPTIONS="--add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
+   $ env JDK_JAVA_OPTIONS="--add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED" java -jar ...
 
 Otherwise you may see errors such as ``java.lang.RuntimeException: java.lang.reflect.InaccessibleObjectException:
 Unable to make static void java.nio.Bits.reserveMemory(long,long) accessible: module
@@ -215,7 +216,7 @@ Or they can be added via environment variable, for example when executing your c
 
 .. code-block::
 
-    _JAVA_OPTIONS="--add-opens=java.base/java.nio=ALL-UNNAMED" mvn exec:java -Dexec.mainClass="YourMainCode"
+    JDK_JAVA_OPTIONS="--add-opens=java.base/java.nio=ALL-UNNAMED" mvn exec:java -Dexec.mainClass="YourMainCode"
 
 Installing from Source
 ======================
diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst
index aefed00b3d2e0..4ad35b190cdd0 100644
--- a/docs/source/python/api/arrays.rst
+++ b/docs/source/python/api/arrays.rst
@@ -85,6 +85,7 @@ may expose data type-specific methods or properties.
    UnionArray
    ExtensionArray
    FixedShapeTensorArray
+   OpaqueArray
 
 .. _api.scalar:
 
@@ -143,3 +144,5 @@ classes may expose data type-specific methods or properties.
    StructScalar
    UnionScalar
    ExtensionScalar
+   FixedShapeTensorScalar
+   OpaqueScalar
diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst
index 7edb4e161541d..86c29296873e5 100644
--- a/docs/source/python/api/datatypes.rst
+++ b/docs/source/python/api/datatypes.rst
@@ -67,6 +67,8 @@ These should be used to create Arrow data types and schemas.
    struct
    dictionary
    run_end_encoded
+   fixed_shape_tensor
+   opaque
    field
    schema
    from_numpy_dtype
@@ -94,6 +96,7 @@ functions above.
    DataType
    DictionaryType
    ListType
+   LargeListType
    MapType
    StructType
    UnionType
@@ -117,6 +120,14 @@ Specific classes and functions for extension types.
    register_extension_type
    unregister_extension_type
 
+:doc:`Canonical extension types <../../format/CanonicalExtensions>`
+implemented by PyArrow.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   FixedShapeTensorType
+   OpaqueType
 
 .. _api.types.checking:
 .. currentmodule:: pyarrow.types
diff --git a/docs/source/python/extending_types.rst b/docs/source/python/extending_types.rst
index d746505348157..264cc8dca77e1 100644
--- a/docs/source/python/extending_types.rst
+++ b/docs/source/python/extending_types.rst
@@ -116,73 +116,103 @@ a :class:`~pyarrow.Array` or a :class:`~pyarrow.ChunkedArray`.
 Defining extension types ("user-defined types")
 -----------------------------------------------
 
-Arrow has the notion of extension types in the metadata specification as a
-possibility to extend the built-in types. This is done by annotating any of the
-built-in Arrow data types (the "storage type") with a custom type name and
-optional serialized representation ("ARROW:extension:name" and
-"ARROW:extension:metadata" keys in the Field’s custom_metadata of an IPC
-message).
-See the :ref:`format_metadata_extension_types` section of the metadata
-specification for more details.
-
-Pyarrow allows you to define such extension types from Python by subclassing
-:class:`ExtensionType` and giving the derived class its own extension name
-and serialization mechanism. The extension name and serialized metadata
-can potentially be recognized by other (non-Python) Arrow implementations
+Arrow affords a notion of extension types which allow users to annotate data
+types with additional semantics. This allows developers both to
+specify custom serialization and deserialization routines (for example,
+to :ref:`Python scalars <custom-scalar-conversion>` and
+:ref:`pandas <conversion-to-pandas>`) and to more easily interpret data.
+
+In Arrow, :ref:`extension types <format_metadata_extension_types>`
+are specified by annotating any of the built-in Arrow data types
+(the "storage type") with a custom type name and, optionally, a
+bytestring that can be used to provide additional metadata (referred to as
+"parameters" in this documentation). These appear as the
+``ARROW:extension:name`` and ``ARROW:extension:metadata`` keys in the
+Field's ``custom_metadata``.
+
+Note that since these annotations are part of the Arrow specification,
+they can potentially be recognized by other (non-Python) Arrow consumers
 such as PySpark.
 
-For example, we could define a custom UUID type for 128-bit numbers which can
-be represented as ``FixedSizeBinary`` type with 16 bytes::
-
-    class UuidType(pa.ExtensionType):
-
-        def __init__(self):
-            super().__init__(pa.binary(16), "my_package.uuid")
-
-        def __arrow_ext_serialize__(self):
-            # Since we don't have a parameterized type, we don't need extra
-            # metadata to be deserialized
-            return b''
+PyArrow allows you to define extension types from Python by subclassing
+:class:`ExtensionType` and giving the derived class its own extension name
+and mechanism to (de)serialize any parameters. For example, we could define
+a custom rational type for fractions which can be represented as a pair of
+integers::
+
+    class RationalType(pa.ExtensionType):
+
+        def __init__(self, data_type: pa.DataType):
+            if not pa.types.is_integer(data_type):
+                raise TypeError(f"data_type must be an integer type not {data_type}")
+
+            super().__init__(
+                pa.struct(
+                    [
+                        ("numer", data_type),
+                        ("denom", data_type),
+                    ],
+                ),
+                "my_package.rational",
+            )
+
+        def __arrow_ext_serialize__(self) -> bytes:
+            # No parameters are necessary
+            return b""
 
         @classmethod
         def __arrow_ext_deserialize__(cls, storage_type, serialized):
             # Sanity checks, not required but illustrate the method signature.
-            assert storage_type == pa.binary(16)
-            assert serialized == b''
-            # Return an instance of this subclass given the serialized
-            # metadata.
-            return UuidType()
+            assert pa.types.is_struct(storage_type)
+            assert pa.types.is_integer(storage_type[0].type)
+            assert storage_type[0].type == storage_type[1].type
+            assert serialized == b""
+
+            # return an instance of this subclass
+            return RationalType(storage_type[0].type)
+
 
 The special methods ``__arrow_ext_serialize__`` and ``__arrow_ext_deserialize__``
-define the serialization of an extension type instance. For non-parametric
-types such as the above, the serialization payload can be left empty.
+define the serialization and deserialization of an extension type instance.
 
 This can now be used to create arrays and tables holding the extension type::
 
-    >>> uuid_type = UuidType()
-    >>> uuid_type.extension_name
-    'my_package.uuid'
-    >>> uuid_type.storage_type
-    FixedSizeBinaryType(fixed_size_binary[16])
-
-    >>> import uuid
-    >>> storage_array = pa.array([uuid.uuid4().bytes for _ in range(4)], pa.binary(16))
-    >>> arr = pa.ExtensionArray.from_storage(uuid_type, storage_array)
+    >>> rational_type = RationalType(pa.int32())
+    >>> rational_type.extension_name
+    'my_package.rational'
+    >>> rational_type.storage_type
+    StructType(struct<numer: int32, denom: int32>)
+
+    >>> storage_array = pa.array(
+    ...     [
+    ...         {"numer": 10, "denom": 17},
+    ...         {"numer": 20, "denom": 13},
+    ...     ],
+    ...     type=rational_type.storage_type,
+    ... )
+    >>> arr = rational_type.wrap_array(storage_array)
+    >>> # or equivalently
+    >>> arr = pa.ExtensionArray.from_storage(rational_type, storage_array)
     >>> arr
-    <pyarrow.lib.ExtensionArray object at 0x7f75c2f300a0>
-    [
-      A6861959108644B797664AEEE686B682,
-      718747F48E5F4058A7261E2B6B228BE8,
-      7FE201227D624D96A5CD8639DEF2A68B,
-      C6CA8C7F95744BFD9462A40B3F57A86C
-    ]
+    <pyarrow.lib.ExtensionArray object at 0x1067f5420>
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
+        10,
+        20
+      ]
+    -- child 1 type: int32
+      [
+        17,
+        13
+      ]
 
 This array can be included in RecordBatches, sent over IPC and received in
 another Python process. The receiving process must explicitly register the
 extension type for deserialization, otherwise it will fall back to the
 storage type::
 
-    >>> pa.register_extension_type(UuidType())
+    >>> pa.register_extension_type(RationalType(pa.int32()))
 
 For example, creating a RecordBatch and writing it to a stream using the
 IPC protocol::
@@ -197,19 +227,45 @@ and then reading it back yields the proper type::
 
     >>> with pa.ipc.open_stream(buf) as reader:
     ...    result = reader.read_all()
-    >>> result.column('ext').type
-    UuidType(FixedSizeBinaryType(fixed_size_binary[16]))
+    >>> result.column("ext").type
+    RationalType(StructType(struct<numer: int32, denom: int32>))
+
+Further, note that while we registered the concrete type
+``RationalType(pa.int32())``, the same extension name
+(``"my_package.rational"``) is used by ``RationalType(integer_type)``
+for *all* Arrow integer types. As such, the above code also allows users to
+(de)serialize these data types::
+
+    >>> big_rational_type = RationalType(pa.int64())
+    >>> storage_array = pa.array(
+    ...     [
+    ...         {"numer": 10, "denom": 17},
+    ...         {"numer": 20, "denom": 13},
+    ...     ],
+    ...     type=big_rational_type.storage_type,
+    ... )
+    >>> arr = big_rational_type.wrap_array(storage_array)
+    >>> batch = pa.RecordBatch.from_arrays([arr], ["ext"])
+    >>> sink = pa.BufferOutputStream()
+    >>> with pa.RecordBatchStreamWriter(sink, batch.schema) as writer:
+    ...    writer.write_batch(batch)
+    >>> buf = sink.getvalue()
+    >>> with pa.ipc.open_stream(buf) as reader:
+    ...    result = reader.read_all()
+    >>> result.column("ext").type
+    RationalType(StructType(struct<numer: int64, denom: int64>))
 
 The receiving application doesn't need to be Python but can still recognize
-the extension type as a "my_package.uuid" type, if it has implemented its own
+the extension type as a "my_package.rational" type if it has implemented its own
 extension type to receive it. If the type is not registered in the receiving
 application, it will fall back to the storage type.
 
 Parameterized extension type
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The above example used a fixed storage type with no further metadata. But
-more flexible, parameterized extension types are also possible.
+The above example illustrated how to construct an extension type that requires
+no additional metadata beyond its storage type. But Arrow also provides more
+flexible, parameterized extension types.
 
 The example given here implements an extension type for the `pandas "period"
 data type <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-span-representation>`__,
@@ -225,7 +281,7 @@ of the given frequency since 1970.
             # attributes need to be set first before calling
             # super init (as that calls serialize)
             self._freq = freq
-            super().__init__(pa.int64(), 'my_package.period')
+            super().__init__(pa.int64(), "my_package.period")
 
         @property
         def freq(self):
@@ -240,7 +296,7 @@ of the given frequency since 1970.
             # metadata.
             serialized = serialized.decode()
             assert serialized.startswith("freq=")
-            freq = serialized.split('=')[1]
+            freq = serialized.split("=")[1]
             return PeriodType(freq)
 
 Here, we ensure to store all information in the serialized metadata that is
@@ -274,7 +330,7 @@ the data as a 2-D Numpy array ``(N, 3)`` without any copy::
             super().__init__(pa.list_(pa.float32(), 3), "my_package.Point3DType")
 
         def __arrow_ext_serialize__(self):
-            return b''
+            return b""
 
         @classmethod
         def __arrow_ext_deserialize__(cls, storage_type, serialized):
@@ -313,6 +369,8 @@ This array can be sent over IPC, received in another Python process, and the cus
 extension array class will be preserved (as long as the receiving process registers
 the extension type using :func:`register_extension_type` before reading the IPC data).
 
+.. _custom-scalar-conversion:
+
 Custom scalar conversion
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -335,7 +393,7 @@ For example, if we wanted the above example 3D point type to return a custom
             super().__init__(pa.list_(pa.float32(), 3), "my_package.Point3DType")
 
         def __arrow_ext_serialize__(self):
-            return b''
+            return b""
 
         @classmethod
         def __arrow_ext_deserialize__(cls, storage_type, serialized):
@@ -354,6 +412,7 @@ Arrays built using this extension type now provide scalars that convert to our `
     >>> arr.to_pylist()
     [Point3D(x=1.0, y=2.0, z=3.0), Point3D(x=4.0, y=5.0, z=6.0)]
 
+.. _conversion-to-pandas:
 
 Conversion to pandas
 ~~~~~~~~~~~~~~~~~~~~
@@ -436,16 +495,16 @@ Extension arrays can be used as columns in  ``pyarrow.Table`` or
 
    >>> data = [
    ...     pa.array([1, 2, 3]),
-   ...     pa.array(['foo', 'bar', None]),
+   ...     pa.array(["foo", "bar", None]),
    ...     pa.array([True, None, True]),
    ...     tensor_array,
    ...     tensor_array_2
    ... ]
-   >>> my_schema = pa.schema([('f0', pa.int8()),
-   ...                        ('f1', pa.string()),
-   ...                        ('f2', pa.bool_()),
-   ...                        ('tensors_int', tensor_type),
-   ...                        ('tensors_float', tensor_type_2)])
+   >>> my_schema = pa.schema([("f0", pa.int8()),
+   ...                        ("f1", pa.string()),
+   ...                        ("f2", pa.bool_()),
+   ...                        ("tensors_int", tensor_type),
+   ...                        ("tensors_float", tensor_type_2)])
    >>> table = pa.Table.from_arrays(data, schema=my_schema)
    >>> table
    pyarrow.Table
@@ -541,7 +600,7 @@ or
 
 .. code-block:: python
 
-    >>> tensor_type = pa.fixed_shape_tensor(pa.bool_(), [2, 2, 3], dim_names=['C', 'H', 'W'])
+    >>> tensor_type = pa.fixed_shape_tensor(pa.bool_(), [2, 2, 3], dim_names=["C", "H", "W"])
 
 for ``NCHW`` format where:
 
diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst
index 84d6253691f09..9727a68f7424b 100644
--- a/docs/source/python/install.rst
+++ b/docs/source/python/install.rst
@@ -27,7 +27,7 @@ Linux distributions. We strongly recommend using a 64-bit system.
 Python Compatibility
 --------------------
 
-PyArrow is currently compatible with Python 3.8, 3.9, 3.10 and 3.11.
+PyArrow is currently compatible with Python 3.9, 3.10, 3.11, 3.12 and 3.13.
 
 Using Conda
 -----------
diff --git a/docs/source/python/memory.rst b/docs/source/python/memory.rst
index 7b49d48ab20fa..029d30cc1b693 100644
--- a/docs/source/python/memory.rst
+++ b/docs/source/python/memory.rst
@@ -110,12 +110,12 @@ the buffer is garbage-collected, all of the memory is freed:
    pa.total_allocated_bytes()
 
 Besides the default built-in memory pool, there may be additional memory pools
-to choose (such as `mimalloc <https://github.com/microsoft/mimalloc>`_)
-from depending on how Arrow was built.  One can get the backend
-name for a memory pool::
+to choose from (such as `jemalloc <http://jemalloc.net/>`_)
+depending on how Arrow was built.  One can get the backend name for a memory
+pool::
 
    >>> pa.default_memory_pool().backend_name
-   'jemalloc'
+   'mimalloc'
 
 .. seealso::
    :ref:`API documentation for memory pools <api.memory_pool>`.
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 266381175608a..765aeb1a076ae 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -62,17 +62,17 @@ Data Types
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Binary            | ✓     | ✓     | ✓     | ✓  |  ✓    |  ✓    | ✓     | ✓     | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Large Binary      | ✓     | ✓     | ✓     | ✓  |       |  ✓    | ✓     |       | ✓         |
+| Large Binary      | ✓     | ✓     | ✓     | ✓  | \(4)  |  ✓    | ✓     |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Utf8              | ✓     | ✓     | ✓     | ✓  |  ✓    |  ✓    | ✓     | ✓     | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Large Utf8        | ✓     | ✓     | ✓     | ✓  |       |  ✓    | ✓     |       | ✓         |
+| Large Utf8        | ✓     | ✓     | ✓     | ✓  | \(4)  |  ✓    | ✓     |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Binary View       | ✓     |       | ✓     |    |   ✓   |       |       |       |           |
+| Binary View       | ✓     |       | ✓     |    |   ✓   |  ✓    |       |       |           |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Large Binary View | ✓     |       | ✓     |    |       |       |       |       |           |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Utf8 View         | ✓     |       | ✓     |    |   ✓   |       |       |       |           |
+| Utf8 View         | ✓     |       | ✓     |    |   ✓   |  ✓    |       |       |           |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | Large Utf8 View   | ✓     |       | ✓     |    |       |       |       |       |           |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
@@ -85,7 +85,7 @@ Data Types
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | List              | ✓     | ✓     | ✓     | ✓  |  ✓    |  ✓    | ✓     |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
-| Large List        | ✓     | ✓     | ✓     |    |       |  ✓    | ✓     |       | ✓         |
+| Large List        | ✓     | ✓     | ✓     |    | \(4)  |  ✓    | ✓     |       | ✓         |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
 | List View         | ✓     |       | ✓     |    |   ✓   |       |       |       |           |
 +-------------------+-------+-------+-------+----+-------+-------+-------+-------+-----------+
@@ -119,12 +119,20 @@ Data Types
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Variable shape tensor |       |       |       |            |       |       |       |       |
 +-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| JSON                  | ✓     |       | ✓     |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| UUID                  | ✓     |       | ✓     |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| 8-bit Boolean         | ✓     |       | ✓     |            |       |       |       |       |
++-----------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 
 Notes:
 
 * \(1) Casting to/from Float16 in Java is not supported.
 * \(2) Float16 support in C# is only available when targeting .NET 6+.
 * \(3) Nested dictionaries not supported
+* \(4) C# large array types are provided to help with interoperability with other libraries,
+  but these do not support buffers larger than 2 GiB and an exception will be raised if trying to import an array that is too large.
 
 .. seealso::
    The :ref:`format_columnar` and the
diff --git a/format/Flight.proto b/format/Flight.proto
index 4963e8c09ae47..f2b0f889cf7d2 100644
--- a/format/Flight.proto
+++ b/format/Flight.proto
@@ -20,7 +20,7 @@ syntax = "proto3";
 import "google/protobuf/timestamp.proto";
 
 option java_package = "org.apache.arrow.flight.impl";
-option go_package = "github.com/apache/arrow/go/arrow/flight/gen/flight";
+option go_package = "github.com/apache/arrow-go/arrow/flight/gen/flight";
 option csharp_namespace = "Apache.Arrow.Flight.Protocol";
 
 package arrow.flight.protocol;
@@ -208,24 +208,6 @@ message Action {
   bytes body = 2;
 }
 
-/*
- * The request of the CancelFlightInfo action.
- *
- * The request should be stored in Action.body.
- */
-message CancelFlightInfoRequest {
-  FlightInfo info = 1;
-}
-
-/*
- * The request of the RenewFlightEndpoint action.
- *
- * The request should be stored in Action.body.
- */
-message RenewFlightEndpointRequest {
-  FlightEndpoint endpoint = 1;
-}
-
 /*
  * An opaque result returned after executing an action.
  */
@@ -233,36 +215,6 @@ message Result {
   bytes body = 1;
 }
 
-/*
- * The result of a cancel operation.
- *
- * This is used by CancelFlightInfoResult.status.
- */
-enum CancelStatus {
-  // The cancellation status is unknown. Servers should avoid using
-  // this value (send a NOT_FOUND error if the requested query is
-  // not known). Clients can retry the request.
-  CANCEL_STATUS_UNSPECIFIED = 0;
-  // The cancellation request is complete. Subsequent requests with
-  // the same payload may return CANCELLED or a NOT_FOUND error.
-  CANCEL_STATUS_CANCELLED = 1;
-  // The cancellation request is in progress. The client may retry
-  // the cancellation request.
-  CANCEL_STATUS_CANCELLING = 2;
-  // The query is not cancellable. The client should not retry the
-  // cancellation request.
-  CANCEL_STATUS_NOT_CANCELLABLE = 3;
-}
-
-/*
- * The result of the CancelFlightInfo action.
- *
- * The result should be stored in Result.body.
- */
-message CancelFlightInfoResult {
-  CancelStatus status = 1;
-}
-
 /*
  * Wrap the result of a getSchema call
  */
@@ -423,6 +375,64 @@ message PollInfo {
   google.protobuf.Timestamp expiration_time = 4;
 }
 
+/*
+ * The request of the CancelFlightInfo action.
+ *
+ * The request should be stored in Action.body.
+ */
+message CancelFlightInfoRequest {
+  FlightInfo info = 1;
+}
+
+/*
+ * The result of a cancel operation.
+ *
+ * This is used by CancelFlightInfoResult.status.
+ */
+enum CancelStatus {
+  // The cancellation status is unknown. Servers should avoid using
+  // this value (send a NOT_FOUND error if the requested query is
+  // not known). Clients can retry the request.
+  CANCEL_STATUS_UNSPECIFIED = 0;
+  // The cancellation request is complete. Subsequent requests with
+  // the same payload may return CANCELLED or a NOT_FOUND error.
+  CANCEL_STATUS_CANCELLED = 1;
+  // The cancellation request is in progress. The client may retry
+  // the cancellation request.
+  CANCEL_STATUS_CANCELLING = 2;
+  // The query is not cancellable. The client should not retry the
+  // cancellation request.
+  CANCEL_STATUS_NOT_CANCELLABLE = 3;
+}
+
+/*
+ * The result of the CancelFlightInfo action.
+ *
+ * The result should be stored in Result.body.
+ */
+message CancelFlightInfoResult {
+  CancelStatus status = 1;
+}
+
+/*
+ * An opaque identifier that the service can use to retrieve a particular
+ * portion of a stream.
+ *
+ * Tickets are meant to be single use. It is an error/application-defined
+ * behavior to reuse a ticket.
+ */
+message Ticket {
+  bytes ticket = 1;
+}
+
+/*
+ * A location where a Flight service will accept retrieval of a particular
+ * stream given a ticket.
+ */
+message Location {
+  string uri = 1;
+}
+
 /*
  * A particular stream or split associated with a flight.
  */
@@ -475,22 +485,12 @@ message FlightEndpoint {
 }
 
 /*
- * A location where a Flight service will accept retrieval of a particular
- * stream given a ticket.
- */
-message Location {
-  string uri = 1;
-}
-
-/*
- * An opaque identifier that the service can use to retrieve a particular
- * portion of a stream.
+ * The request of the RenewFlightEndpoint action.
  *
- * Tickets are meant to be single use. It is an error/application-defined
- * behavior to reuse a ticket.
+ * The request should be stored in Action.body.
  */
-message Ticket {
-  bytes ticket = 1;
+message RenewFlightEndpointRequest {
+  FlightEndpoint endpoint = 1;
 }
 
 /*
diff --git a/format/FlightSql.proto b/format/FlightSql.proto
index 6fca141d692a7..ef1ae7513d4d4 100644
--- a/format/FlightSql.proto
+++ b/format/FlightSql.proto
@@ -20,7 +20,7 @@ syntax = "proto3";
 import "google/protobuf/descriptor.proto";
 
 option java_package = "org.apache.arrow.flight.sql.impl";
-option go_package = "github.com/apache/arrow/go/arrow/flight/gen/flight";
+option go_package = "github.com/apache/arrow-go/arrow/flight/gen/flight";
 package arrow.flight.protocol.sql;
 
 /*
diff --git a/format/Schema.fbs b/format/Schema.fbs
index a03ca31ae97c4..e8e14b112a771 100644
--- a/format/Schema.fbs
+++ b/format/Schema.fbs
@@ -24,6 +24,7 @@
 /// Version 1.3 - Add Run-End Encoded.
 /// Version 1.4 - Add BinaryView, Utf8View, variadicBufferCounts, ListView, and
 /// LargeListView.
+/// Version 1.5 - Add 32-bit and 64-bit as allowed bit widths for Decimal
 
 namespace org.apache.arrow.flatbuf;
 
@@ -222,9 +223,9 @@ table RunEndEncoded {
 }
 
 /// Exact decimal value represented as an integer value in two's
-/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
-/// are used. The representation uses the endianness indicated
-/// in the Schema.
+/// complement. Currently 32-bit (4-byte), 64-bit (8-byte), 
+/// 128-bit (16-byte) and 256-bit (32-byte) integers are used.
+/// The representation uses the endianness indicated in the Schema.
 table Decimal {
   /// Total number of decimal digits
   precision: int;
@@ -232,7 +233,7 @@ table Decimal {
   /// Number of digits after the decimal point "."
   scale: int;
 
-  /// Number of bits per value. The only accepted widths are 128 and 256.
+  /// Number of bits per value. The accepted widths are 32, 64, 128 and 256.
   /// We use bitWidth for consistency with Int::bitWidth.
   bitWidth: int = 128;
 }
diff --git a/go/README.md b/go/README.md
index 220b0a230a615..ec824229729a0 100644
--- a/go/README.md
+++ b/go/README.md
@@ -20,7 +20,7 @@
 Apache Arrow for Go
 ===================
 
-[![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow/go/v17.svg)](https://pkg.go.dev/github.com/apache/arrow/go/v17)
+[![Go Reference](https://pkg.go.dev/badge/github.com/apache/arrow/go/v18.svg)](https://pkg.go.dev/github.com/apache/arrow/go/v18)
 
 [Apache Arrow][arrow] is a cross-language development platform for in-memory
 data. It specifies a standardized language-independent columnar memory format
@@ -40,7 +40,7 @@ import (
 )
 
 func main() {
-	dsn := "uri=grpc://localhost:12345;username=mickeymouse;password=p@55w0RD"
+    dsn := "uri=grpc://localhost:12345;username=mickeymouse;password=p@55w0RD"
     db, err := sql.Open("flightsql", dsn)
     ...
 }
diff --git a/go/arrow/_examples/helloworld/main.go b/go/arrow/_examples/helloworld/main.go
index f4348d6e66771..7f932801917a4 100644
--- a/go/arrow/_examples/helloworld/main.go
+++ b/go/arrow/_examples/helloworld/main.go
@@ -19,10 +19,10 @@ package main
 import (
 	"os"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/math"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/math"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func main() {
diff --git a/go/arrow/_tools/tmpl/main.go b/go/arrow/_tools/tmpl/main.go
index 1f83a1b905ae9..33cb1686981f4 100644
--- a/go/arrow/_tools/tmpl/main.go
+++ b/go/arrow/_tools/tmpl/main.go
@@ -28,7 +28,7 @@ import (
 	"strings"
 	"text/template"
 
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 const Ext = ".tmpl"
diff --git a/go/arrow/array.go b/go/arrow/array.go
index 7f04eab71e5f4..768b30f8e0690 100644
--- a/go/arrow/array.go
+++ b/go/arrow/array.go
@@ -19,8 +19,8 @@ package arrow
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // ArrayData is the underlying memory and metadata of an Arrow array, corresponding
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index be6ba864d1aa7..ae33ca5417db0 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -19,9 +19,9 @@ package array
 import (
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 const (
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index 9beadc3cdc654..4f0627c600078 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -19,11 +19,11 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/tools"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -122,7 +122,7 @@ func TestMakeFromData(t *testing.T) {
 		{name: "dictionary", d: &arrow.DictionaryType{IndexType: arrow.PrimitiveTypes.Uint64, ValueType: &testDataType{arrow.TIMESTAMP}}, dict: array.NewData(&testDataType{arrow.TIMESTAMP}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */)},
 
 		{name: "extension", d: &testDataType{arrow.EXTENSION}, expPanic: true, expError: "arrow/array: DataType for ExtensionArray must implement arrow.ExtensionType"},
-		{name: "extension", d: types.NewUUIDType()},
+		{name: "extension", d: extensions.NewUUIDType()},
 
 		{name: "run end encoded", d: arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int64, arrow.PrimitiveTypes.Int64), child: []arrow.ArrayData{
 			array.NewData(&testDataType{arrow.INT64}, 0 /* length */, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0 /* nulls */, 0 /* offset */),
diff --git a/go/arrow/array/binary.go b/go/arrow/array/binary.go
index d36c0aac7d3f2..99764270bf39d 100644
--- a/go/arrow/array/binary.go
+++ b/go/arrow/array/binary.go
@@ -23,9 +23,9 @@ import (
 	"strings"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 type BinaryLike interface {
diff --git a/go/arrow/array/binary_test.go b/go/arrow/array/binary_test.go
index c8d793ef0670e..919fff7b5e5e8 100644
--- a/go/arrow/array/binary_test.go
+++ b/go/arrow/array/binary_test.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/binarybuilder.go b/go/arrow/array/binarybuilder.go
index e5b4dd63cf9a5..6fcc4eaf46479 100644
--- a/go/arrow/array/binarybuilder.go
+++ b/go/arrow/array/binarybuilder.go
@@ -25,10 +25,10 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // A BinaryBuilder is used to build a Binary array using the Append methods.
diff --git a/go/arrow/array/binarybuilder_test.go b/go/arrow/array/binarybuilder_test.go
index e37c5f624f9cc..65d5c7385df4c 100644
--- a/go/arrow/array/binarybuilder_test.go
+++ b/go/arrow/array/binarybuilder_test.go
@@ -20,9 +20,9 @@ import (
 	"bytes"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/boolean.go b/go/arrow/array/boolean.go
index 50a4101db18b3..eab26d273dd96 100644
--- a/go/arrow/array/boolean.go
+++ b/go/arrow/array/boolean.go
@@ -21,10 +21,10 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // A type which represents an immutable sequence of boolean values.
diff --git a/go/arrow/array/boolean_test.go b/go/arrow/array/boolean_test.go
index c3bc235de3f21..f980497d54521 100644
--- a/go/arrow/array/boolean_test.go
+++ b/go/arrow/array/boolean_test.go
@@ -22,8 +22,8 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/booleanbuilder.go b/go/arrow/array/booleanbuilder.go
index d58ff80151c06..44d33018f94ea 100644
--- a/go/arrow/array/booleanbuilder.go
+++ b/go/arrow/array/booleanbuilder.go
@@ -23,11 +23,11 @@ import (
 	"strconv"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 type BooleanBuilder struct {
diff --git a/go/arrow/array/booleanbuilder_test.go b/go/arrow/array/booleanbuilder_test.go
index e01f6660c4c10..42e49f95a2f3e 100644
--- a/go/arrow/array/booleanbuilder_test.go
+++ b/go/arrow/array/booleanbuilder_test.go
@@ -19,9 +19,9 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/tools"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/bufferbuilder.go b/go/arrow/array/bufferbuilder.go
index b9638b311584d..037d220f0b141 100644
--- a/go/arrow/array/bufferbuilder.go
+++ b/go/arrow/array/bufferbuilder.go
@@ -20,10 +20,10 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 type bufBuilder interface {
diff --git a/go/arrow/array/bufferbuilder_byte.go b/go/arrow/array/bufferbuilder_byte.go
index dd39d6b835cc3..2ac7ec703b579 100644
--- a/go/arrow/array/bufferbuilder_byte.go
+++ b/go/arrow/array/bufferbuilder_byte.go
@@ -16,7 +16,7 @@
 
 package array
 
-import "github.com/apache/arrow/go/v17/arrow/memory"
+import "github.com/apache/arrow/go/v18/arrow/memory"
 
 type byteBufferBuilder struct {
 	bufferBuilder
diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go b/go/arrow/array/bufferbuilder_numeric.gen.go
index 19388e27b0ee3..5215ecf65a312 100644
--- a/go/arrow/array/bufferbuilder_numeric.gen.go
+++ b/go/arrow/array/bufferbuilder_numeric.gen.go
@@ -19,9 +19,9 @@
 package array
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 type int64BufferBuilder struct {
diff --git a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl
index 40c96b5987cf3..2b7fcaefcdeb2 100644
--- a/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl
+++ b/go/arrow/array/bufferbuilder_numeric.gen.go.tmpl
@@ -17,9 +17,9 @@
 package array
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 {{range .In}}
diff --git a/go/arrow/array/bufferbuilder_numeric_test.go b/go/arrow/array/bufferbuilder_numeric_test.go
index 372ba6976269d..3c947c87eeaac 100644
--- a/go/arrow/array/bufferbuilder_numeric_test.go
+++ b/go/arrow/array/bufferbuilder_numeric_test.go
@@ -20,8 +20,8 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index 88c0ac479868b..1f4d0ea963509 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 const (
@@ -349,12 +349,13 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
 		typ := dtype.(*arrow.LargeListViewType)
 		return NewLargeListViewBuilderWithField(mem, typ.ElemField())
 	case arrow.EXTENSION:
-		typ := dtype.(arrow.ExtensionType)
-		bldr := NewExtensionBuilder(mem, typ)
-		if custom, ok := typ.(ExtensionBuilderWrapper); ok {
-			return custom.NewBuilder(bldr)
+		if custom, ok := dtype.(CustomExtensionBuilder); ok {
+			return custom.NewBuilder(mem)
 		}
-		return bldr
+		if typ, ok := dtype.(arrow.ExtensionType); ok {
+			return NewExtensionBuilder(mem, typ)
+		}
+		panic(fmt.Errorf("arrow/array: invalid extension type: %T", dtype))
 	case arrow.FIXED_SIZE_LIST:
 		typ := dtype.(*arrow.FixedSizeListType)
 		return NewFixedSizeListBuilderWithField(mem, typ.Len(), typ.ElemField())
diff --git a/go/arrow/array/builder_test.go b/go/arrow/array/builder_test.go
index d508d4626d4e9..7eb2b3f7cf9e3 100644
--- a/go/arrow/array/builder_test.go
+++ b/go/arrow/array/builder_test.go
@@ -19,8 +19,8 @@ package array
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/tools"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index 7c6f3cab7a7a7..a54c1e23c1e1c 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -20,9 +20,9 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
 )
 
 // RecordEqual reports whether the two provided records are equal.
diff --git a/go/arrow/array/compare_test.go b/go/arrow/array/compare_test.go
index 223535bf64000..f757ab9f25f07 100644
--- a/go/arrow/array/compare_test.go
+++ b/go/arrow/array/compare_test.go
@@ -22,11 +22,11 @@ import (
 	"sort"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/concat.go b/go/arrow/array/concat.go
index 184f7143ee5df..3d2b4b4b83167 100644
--- a/go/arrow/array/concat.go
+++ b/go/arrow/array/concat.go
@@ -23,13 +23,13 @@ import (
 	"math/bits"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/encoded"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/encoded"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 // Concatenate creates a new arrow.Array which is the concatenation of the
diff --git a/go/arrow/array/concat_test.go b/go/arrow/array/concat_test.go
index bfde89ec6600d..7e6a3c08efd5c 100644
--- a/go/arrow/array/concat_test.go
+++ b/go/arrow/array/concat_test.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/array/data.go b/go/arrow/array/data.go
index 40fad0dfd0033..19513ebaacf50 100644
--- a/go/arrow/array/data.go
+++ b/go/arrow/array/data.go
@@ -22,9 +22,9 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // Data represents the memory and metadata of an Arrow array.
diff --git a/go/arrow/array/data_test.go b/go/arrow/array/data_test.go
index 68f2ada97b037..2cfc64fbe2d7e 100644
--- a/go/arrow/array/data_test.go
+++ b/go/arrow/array/data_test.go
@@ -20,8 +20,8 @@ import (
 	"slices"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/decimal128.go b/go/arrow/array/decimal128.go
index 4578fd4061dc4..fd9e53f7f4c06 100644
--- a/go/arrow/array/decimal128.go
+++ b/go/arrow/array/decimal128.go
@@ -24,12 +24,12 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // A type which represents an immutable sequence of 128-bit decimal values.
diff --git a/go/arrow/array/decimal128_test.go b/go/arrow/array/decimal128_test.go
index 3e2d021924723..707a4f1a6c8d5 100644
--- a/go/arrow/array/decimal128_test.go
+++ b/go/arrow/array/decimal128_test.go
@@ -19,10 +19,10 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/decimal256.go b/go/arrow/array/decimal256.go
index bc082638b7bc4..6431306f969c3 100644
--- a/go/arrow/array/decimal256.go
+++ b/go/arrow/array/decimal256.go
@@ -24,12 +24,12 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // Decimal256 is a type that represents an immutable sequence of 256-bit decimal values.
diff --git a/go/arrow/array/decimal256_test.go b/go/arrow/array/decimal256_test.go
index dd671b53bf101..8adb810165430 100644
--- a/go/arrow/array/decimal256_test.go
+++ b/go/arrow/array/decimal256_test.go
@@ -19,10 +19,10 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/decimal_test.go b/go/arrow/array/decimal_test.go
index 414bb3426ca34..b321bd7fbbe7b 100644
--- a/go/arrow/array/decimal_test.go
+++ b/go/arrow/array/decimal_test.go
@@ -21,12 +21,12 @@ import (
 	"math/big"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/arrow/array/dictionary.go b/go/arrow/array/dictionary.go
index 6e202deeb37cd..ca7fed5257085 100644
--- a/go/arrow/array/dictionary.go
+++ b/go/arrow/array/dictionary.go
@@ -25,16 +25,16 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/hashing"
-	"github.com/apache/arrow/go/v17/internal/json"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/hashing"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 // Dictionary represents the type for dictionary-encoded data with a data
diff --git a/go/arrow/array/dictionary_test.go b/go/arrow/array/dictionary_test.go
index 128182d005eba..ea9587d8dcdf9 100644
--- a/go/arrow/array/dictionary_test.go
+++ b/go/arrow/array/dictionary_test.go
@@ -24,13 +24,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/array/diff.go b/go/arrow/array/diff.go
index 7110804d1b2cb..e5c1ce1521d95 100644
--- a/go/arrow/array/diff.go
+++ b/go/arrow/array/diff.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 // Edit represents one entry in the edit script to compare two arrays.
diff --git a/go/arrow/array/diff_test.go b/go/arrow/array/diff_test.go
index 67fa1d04ab506..9c9ce6a53aed0 100644
--- a/go/arrow/array/diff_test.go
+++ b/go/arrow/array/diff_test.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 type diffTestCase struct {
@@ -861,7 +861,7 @@ func TestEdits_UnifiedDiff(t *testing.T) {
 		},
 		{
 			name:       "extensions",
-			dataType:   types.NewUUIDType(),
+			dataType:   extensions.NewUUIDType(),
 			baseJSON:   `["00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000001"]`,
 			targetJSON: `["00000000-0000-0000-0000-000000000001", "00000000-0000-0000-0000-000000000002"]`,
 			want: `@@ -0, +0 @@
diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go
index 7df66061b5eb7..748c4c1fec641 100644
--- a/go/arrow/array/encoded.go
+++ b/go/arrow/array/encoded.go
@@ -23,12 +23,12 @@ import (
 	"reflect"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/encoded"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/encoded"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 // RunEndEncoded represents an array containing two children:
diff --git a/go/arrow/array/encoded_test.go b/go/arrow/array/encoded_test.go
index 1c54e56aaea6f..03352ec44177c 100644
--- a/go/arrow/array/encoded_test.go
+++ b/go/arrow/array/encoded_test.go
@@ -20,10 +20,10 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
diff --git a/go/arrow/array/extension.go b/go/arrow/array/extension.go
index 5df86c8964a7b..8c4ef840cb72c 100644
--- a/go/arrow/array/extension.go
+++ b/go/arrow/array/extension.go
@@ -20,9 +20,9 @@ import (
 	"fmt"
 	"reflect"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // ExtensionArray is the interface that needs to be implemented to handle
diff --git a/go/arrow/array/extension_builder.go b/go/arrow/array/extension_builder.go
index a71287faf0e36..9c2ee88056438 100644
--- a/go/arrow/array/extension_builder.go
+++ b/go/arrow/array/extension_builder.go
@@ -16,8 +16,10 @@
 
 package array
 
-// ExtensionBuilderWrapper is an interface that you need to implement in your custom extension type if you want to provide a customer builder as well.
-// See example in ./arrow/internal/testing/types/extension_types.go
-type ExtensionBuilderWrapper interface {
-	NewBuilder(bldr *ExtensionBuilder) Builder
+import "github.com/apache/arrow/go/v18/arrow/memory"
+
+// CustomExtensionBuilder is an interface that custom extension types may implement to provide a custom builder
+// instead of the underlying storage type's builder when array.NewBuilder is called with that type.
+type CustomExtensionBuilder interface {
+	NewBuilder(memory.Allocator) Builder
 }
diff --git a/go/arrow/array/extension_test.go b/go/arrow/array/extension_test.go
index 4520117f857f8..26245cf015dec 100644
--- a/go/arrow/array/extension_test.go
+++ b/go/arrow/array/extension_test.go
@@ -19,10 +19,10 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/stretchr/testify/suite"
 )
 
@@ -30,16 +30,6 @@ type ExtensionTypeTestSuite struct {
 	suite.Suite
 }
 
-func (e *ExtensionTypeTestSuite) SetupTest() {
-	e.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
-}
-
-func (e *ExtensionTypeTestSuite) TearDownTest() {
-	if arrow.GetExtensionType("uuid") != nil {
-		e.NoError(arrow.UnregisterExtensionType("uuid"))
-	}
-}
-
 func (e *ExtensionTypeTestSuite) TestParametricEquals() {
 	p1Type := types.NewParametric1Type(6)
 	p2Type := types.NewParametric1Type(6)
diff --git a/go/arrow/array/fixed_size_list.go b/go/arrow/array/fixed_size_list.go
index 830361b518acd..a0eefd460c2bf 100644
--- a/go/arrow/array/fixed_size_list.go
+++ b/go/arrow/array/fixed_size_list.go
@@ -22,11 +22,11 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // FixedSizeList represents an immutable sequence of N array values.
diff --git a/go/arrow/array/fixed_size_list_test.go b/go/arrow/array/fixed_size_list_test.go
index ff8c5fe552e4c..e0edb9868cffd 100644
--- a/go/arrow/array/fixed_size_list_test.go
+++ b/go/arrow/array/fixed_size_list_test.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/fixedsize_binary.go b/go/arrow/array/fixedsize_binary.go
index 535b2d51003a7..f4d16c6386d60 100644
--- a/go/arrow/array/fixedsize_binary.go
+++ b/go/arrow/array/fixedsize_binary.go
@@ -22,8 +22,8 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // A type which represents an immutable sequence of fixed-length binary strings.
diff --git a/go/arrow/array/fixedsize_binary_test.go b/go/arrow/array/fixedsize_binary_test.go
index 927818a640a3a..4a32cb9692a06 100644
--- a/go/arrow/array/fixedsize_binary_test.go
+++ b/go/arrow/array/fixedsize_binary_test.go
@@ -21,9 +21,9 @@ import (
 
 	"github.com/stretchr/testify/assert"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestFixedSizeBinary(t *testing.T) {
diff --git a/go/arrow/array/fixedsize_binarybuilder.go b/go/arrow/array/fixedsize_binarybuilder.go
index 08e8ae841b9e4..96d58632ab8c8 100644
--- a/go/arrow/array/fixedsize_binarybuilder.go
+++ b/go/arrow/array/fixedsize_binarybuilder.go
@@ -23,10 +23,10 @@ import (
 	"reflect"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // A FixedSizeBinaryBuilder is used to build a FixedSizeBinary array using the Append methods.
diff --git a/go/arrow/array/fixedsize_binarybuilder_test.go b/go/arrow/array/fixedsize_binarybuilder_test.go
index 3bd8611f37230..0c58c65ecb02e 100644
--- a/go/arrow/array/fixedsize_binarybuilder_test.go
+++ b/go/arrow/array/fixedsize_binarybuilder_test.go
@@ -19,8 +19,8 @@ package array
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/float16.go b/go/arrow/array/float16.go
index b71f23b22c5e6..757b658a9150d 100644
--- a/go/arrow/array/float16.go
+++ b/go/arrow/array/float16.go
@@ -20,9 +20,9 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // A type which represents an immutable sequence of Float16 values.
diff --git a/go/arrow/array/float16_builder.go b/go/arrow/array/float16_builder.go
index 9eeb22feca43e..7543f2b6f96dd 100644
--- a/go/arrow/array/float16_builder.go
+++ b/go/arrow/array/float16_builder.go
@@ -23,12 +23,12 @@ import (
 	"strconv"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 type Float16Builder struct {
diff --git a/go/arrow/array/float16_builder_test.go b/go/arrow/array/float16_builder_test.go
index 47d84302a9008..ab25e544ed833 100644
--- a/go/arrow/array/float16_builder_test.go
+++ b/go/arrow/array/float16_builder_test.go
@@ -19,9 +19,9 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/interval.go b/go/arrow/array/interval.go
index 2cabfad18a84a..66c6eca21bca5 100644
--- a/go/arrow/array/interval.go
+++ b/go/arrow/array/interval.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 func NewIntervalData(data arrow.ArrayData) arrow.Array {
diff --git a/go/arrow/array/interval_test.go b/go/arrow/array/interval_test.go
index c10112caaa5f1..6d36885a627d9 100644
--- a/go/arrow/array/interval_test.go
+++ b/go/arrow/array/interval_test.go
@@ -20,9 +20,9 @@ import (
 	"math"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/json_reader.go b/go/arrow/array/json_reader.go
index 4f5ebea6b3a25..2944151a5f63c 100644
--- a/go/arrow/array/json_reader.go
+++ b/go/arrow/array/json_reader.go
@@ -22,10 +22,10 @@ import (
 	"io"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 type Option func(config)
diff --git a/go/arrow/array/json_reader_test.go b/go/arrow/array/json_reader_test.go
index 30a6d5833ee69..5e258dfdc07b1 100644
--- a/go/arrow/array/json_reader_test.go
+++ b/go/arrow/array/json_reader_test.go
@@ -20,9 +20,9 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/list.go b/go/arrow/array/list.go
index 16b7cf1bc05a9..1e2191f2cfc3a 100644
--- a/go/arrow/array/list.go
+++ b/go/arrow/array/list.go
@@ -22,11 +22,11 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 type ListLike interface {
diff --git a/go/arrow/array/list_test.go b/go/arrow/array/list_test.go
index 5b624dccfc91a..f6f42a31299e4 100644
--- a/go/arrow/array/list_test.go
+++ b/go/arrow/array/list_test.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/map.go b/go/arrow/array/map.go
index eb11f963c5837..a692c2cd6d71a 100644
--- a/go/arrow/array/map.go
+++ b/go/arrow/array/map.go
@@ -20,9 +20,9 @@ import (
 	"bytes"
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // Map represents an immutable sequence of Key/Value structs. It is a
diff --git a/go/arrow/array/map_test.go b/go/arrow/array/map_test.go
index cbea072e09045..e73508e6afe11 100644
--- a/go/arrow/array/map_test.go
+++ b/go/arrow/array/map_test.go
@@ -20,9 +20,9 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/null.go b/go/arrow/array/null.go
index 365964ebb0a2f..6dccd3af59f2a 100644
--- a/go/arrow/array/null.go
+++ b/go/arrow/array/null.go
@@ -23,10 +23,10 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // Null represents an immutable, degenerate array with no physical storage.
diff --git a/go/arrow/array/null_test.go b/go/arrow/array/null_test.go
index ae645f9564220..61ccb472b1f7b 100644
--- a/go/arrow/array/null_test.go
+++ b/go/arrow/array/null_test.go
@@ -19,9 +19,9 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go
index b962cda40b8b3..413a356c2a8ab 100644
--- a/go/arrow/array/numeric.gen.go
+++ b/go/arrow/array/numeric.gen.go
@@ -24,8 +24,8 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // A type which represents an immutable sequence of int64 values.
diff --git a/go/arrow/array/numeric.gen.go.tmpl b/go/arrow/array/numeric.gen.go.tmpl
index d9daa55849619..1f4b56609f464 100644
--- a/go/arrow/array/numeric.gen.go.tmpl
+++ b/go/arrow/array/numeric.gen.go.tmpl
@@ -21,8 +21,8 @@ import (
 	"strings"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 {{range .In}}
diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go
index 3013d45acbb2b..bb8acc3f41519 100644
--- a/go/arrow/array/numeric_test.go
+++ b/go/arrow/array/numeric_test.go
@@ -21,11 +21,11 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/numericbuilder.gen.go b/go/arrow/array/numericbuilder.gen.go
index 227fcb2f68e83..c80f0c7c9578e 100644
--- a/go/arrow/array/numericbuilder.gen.go
+++ b/go/arrow/array/numericbuilder.gen.go
@@ -27,11 +27,11 @@ import (
 	"sync/atomic"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 type Int64Builder struct {
diff --git a/go/arrow/array/numericbuilder.gen.go.tmpl b/go/arrow/array/numericbuilder.gen.go.tmpl
index c701c49c4210d..d8b92cf60cc39 100644
--- a/go/arrow/array/numericbuilder.gen.go.tmpl
+++ b/go/arrow/array/numericbuilder.gen.go.tmpl
@@ -17,11 +17,11 @@
 package array
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 {{range .In}}
diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go
index b43aa7f807090..8adf86853b7c7 100644
--- a/go/arrow/array/numericbuilder.gen_test.go
+++ b/go/arrow/array/numericbuilder.gen_test.go
@@ -22,9 +22,9 @@ import (
 	"math"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/numericbuilder.gen_test.go.tmpl b/go/arrow/array/numericbuilder.gen_test.go.tmpl
index 9f381aa055fac..f3cd08a63745d 100644
--- a/go/arrow/array/numericbuilder.gen_test.go.tmpl
+++ b/go/arrow/array/numericbuilder.gen_test.go.tmpl
@@ -19,9 +19,9 @@ package array_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/record.go b/go/arrow/array/record.go
index 93543f929ed7f..2735f1baa9a30 100644
--- a/go/arrow/array/record.go
+++ b/go/arrow/array/record.go
@@ -22,10 +22,10 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // RecordReader reads a stream of records.
diff --git a/go/arrow/array/record_test.go b/go/arrow/array/record_test.go
index 6712a1c9085be..8e6dc3b06d25e 100644
--- a/go/arrow/array/record_test.go
+++ b/go/arrow/array/record_test.go
@@ -21,9 +21,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/string.go b/go/arrow/array/string.go
index f4735f2cb5e0e..88b4568ad5e84 100644
--- a/go/arrow/array/string.go
+++ b/go/arrow/array/string.go
@@ -23,9 +23,9 @@ import (
 	"strings"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 type StringLike interface {
diff --git a/go/arrow/array/string_test.go b/go/arrow/array/string_test.go
index e9d9e061b4c52..efbe51edd1a03 100644
--- a/go/arrow/array/string_test.go
+++ b/go/arrow/array/string_test.go
@@ -21,10 +21,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/struct.go b/go/arrow/array/struct.go
index 1921ee86dd894..279ac1d87b25b 100644
--- a/go/arrow/array/struct.go
+++ b/go/arrow/array/struct.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // Struct represents an ordered sequence of relative types.
diff --git a/go/arrow/array/struct_test.go b/go/arrow/array/struct_test.go
index 03190585fadf0..4338bbd0b136e 100644
--- a/go/arrow/array/struct_test.go
+++ b/go/arrow/array/struct_test.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/table.go b/go/arrow/array/table.go
index 54299ba6b1922..3b742ae78803d 100644
--- a/go/arrow/array/table.go
+++ b/go/arrow/array/table.go
@@ -23,8 +23,8 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 // NewColumnSlice returns a new zero-copy slice of the column with the indicated
diff --git a/go/arrow/array/table_test.go b/go/arrow/array/table_test.go
index 2b9aaa7f352bf..e8357ac3dfb69 100644
--- a/go/arrow/array/table_test.go
+++ b/go/arrow/array/table_test.go
@@ -22,9 +22,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestChunked(t *testing.T) {
diff --git a/go/arrow/array/timestamp.go b/go/arrow/array/timestamp.go
index dfd62ad68e568..679d9a5a8a4cc 100644
--- a/go/arrow/array/timestamp.go
+++ b/go/arrow/array/timestamp.go
@@ -24,11 +24,11 @@ import (
 	"sync/atomic"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // Timestamp represents an immutable sequence of arrow.Timestamp values.
diff --git a/go/arrow/array/timestamp_test.go b/go/arrow/array/timestamp_test.go
index 38ab9279f6785..cb9f957d3f255 100644
--- a/go/arrow/array/timestamp_test.go
+++ b/go/arrow/array/timestamp_test.go
@@ -20,9 +20,9 @@ import (
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/array/union.go b/go/arrow/array/union.go
index 10dc560348dae..5d2a8b8ecb2f0 100644
--- a/go/arrow/array/union.go
+++ b/go/arrow/array/union.go
@@ -25,12 +25,12 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 // Union is a convenience interface to encompass both Sparse and Dense
diff --git a/go/arrow/array/union_test.go b/go/arrow/array/union_test.go
index d848340a96070..43e7afd693b6c 100644
--- a/go/arrow/array/union_test.go
+++ b/go/arrow/array/union_test.go
@@ -21,9 +21,9 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
diff --git a/go/arrow/array/util.go b/go/arrow/array/util.go
index e82eb24679bc4..2b41dadaf4bfc 100644
--- a/go/arrow/array/util.go
+++ b/go/arrow/array/util.go
@@ -22,11 +22,11 @@ import (
 	"io"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/hashing"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/hashing"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 func min(a, b int) int {
diff --git a/go/arrow/array/util_test.go b/go/arrow/array/util_test.go
index 2f395b03ffbbb..114ea6e546649 100644
--- a/go/arrow/array/util_test.go
+++ b/go/arrow/array/util_test.go
@@ -25,13 +25,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/arrio/arrio.go b/go/arrow/arrio/arrio.go
index 74bebd7ebe691..53215c81f75eb 100644
--- a/go/arrow/arrio/arrio.go
+++ b/go/arrow/arrio/arrio.go
@@ -22,7 +22,7 @@ import (
 	"errors"
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 // Reader is the interface that wraps the Read method.
diff --git a/go/arrow/arrio/arrio_test.go b/go/arrow/arrio/arrio_test.go
index 09d47c50c8e23..26863ec252bf7 100644
--- a/go/arrow/arrio/arrio_test.go
+++ b/go/arrow/arrio/arrio_test.go
@@ -22,11 +22,11 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/arrio"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/arrio"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 type copyKind int
diff --git a/go/arrow/avro/avro2parquet/main.go b/go/arrow/avro/avro2parquet/main.go
index 6a74940ffe2c1..ae514c5ed1fda 100644
--- a/go/arrow/avro/avro2parquet/main.go
+++ b/go/arrow/avro/avro2parquet/main.go
@@ -26,10 +26,10 @@ import (
 	"runtime/pprof"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow/avro"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	pq "github.com/apache/arrow/go/v17/parquet/pqarrow"
+	"github.com/apache/arrow/go/v18/arrow/avro"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	pq "github.com/apache/arrow/go/v18/parquet/pqarrow"
 )
 
 var (
diff --git a/go/arrow/avro/reader.go b/go/arrow/avro/reader.go
index e4a3aefd96da1..1463041499de2 100644
--- a/go/arrow/avro/reader.go
+++ b/go/arrow/avro/reader.go
@@ -23,10 +23,10 @@ import (
 	"io"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/hamba/avro/v2/ocf"
 	"github.com/tidwall/sjson"
 
diff --git a/go/arrow/avro/reader_test.go b/go/arrow/avro/reader_test.go
index 3a02c995970a2..2cb1a7caa801c 100644
--- a/go/arrow/avro/reader_test.go
+++ b/go/arrow/avro/reader_test.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 	hamba "github.com/hamba/avro/v2"
 )
 
diff --git a/go/arrow/avro/reader_types.go b/go/arrow/avro/reader_types.go
index 3c3ebd3147053..dab2b33dce601 100644
--- a/go/arrow/avro/reader_types.go
+++ b/go/arrow/avro/reader_types.go
@@ -23,12 +23,12 @@ import (
 	"fmt"
 	"math/big"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 type dataLoader struct {
@@ -436,7 +436,7 @@ func mapFieldBuilders(b array.Builder, field arrow.Field, parent *fieldPos) {
 			}
 			return nil
 		}
-	case *types.UUIDBuilder:
+	case *extensions.UUIDBuilder:
 		f.appendFunc = func(data interface{}) error {
 			switch dt := data.(type) {
 			case nil:
diff --git a/go/arrow/avro/schema.go b/go/arrow/avro/schema.go
index 6e2ac00124df9..a6de3718d3ccf 100644
--- a/go/arrow/avro/schema.go
+++ b/go/arrow/avro/schema.go
@@ -22,10 +22,10 @@ import (
 	"math"
 	"strconv"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/internal/types"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/internal/utils"
 	avro "github.com/hamba/avro/v2"
 )
 
@@ -349,7 +349,7 @@ func avroLogicalToArrowField(n *schemaNode) {
 		// The uuid logical type represents a random generated universally unique identifier (UUID).
 		// A uuid logical type annotates an Avro string. The string has to conform with RFC-4122
 	case "uuid":
-		dt = types.NewUUIDType()
+		dt = extensions.NewUUIDType()
 
 	// The date logical type represents a date within the calendar, with no reference to a particular
 	// time zone or time of day.
diff --git a/go/arrow/avro/schema_test.go b/go/arrow/avro/schema_test.go
index ee4525707aadf..395abcb694d84 100644
--- a/go/arrow/avro/schema_test.go
+++ b/go/arrow/avro/schema_test.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 	hamba "github.com/hamba/avro/v2"
 )
 
diff --git a/go/arrow/bitutil/bitmaps.go b/go/arrow/bitutil/bitmaps.go
index eb3210043537b..fb4fcd597b804 100644
--- a/go/arrow/bitutil/bitmaps.go
+++ b/go/arrow/bitutil/bitmaps.go
@@ -22,9 +22,9 @@ import (
 	"math/bits"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // BitmapReader is a simple bitmap reader for a byte slice.
diff --git a/go/arrow/bitutil/bitmaps_test.go b/go/arrow/bitutil/bitmaps_test.go
index 8d6bfdd1dde28..726bfa050cc4b 100644
--- a/go/arrow/bitutil/bitmaps_test.go
+++ b/go/arrow/bitutil/bitmaps_test.go
@@ -22,8 +22,8 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
diff --git a/go/arrow/bitutil/bitutil.go b/go/arrow/bitutil/bitutil.go
index a6497196fe15a..c4b633c73aa40 100644
--- a/go/arrow/bitutil/bitutil.go
+++ b/go/arrow/bitutil/bitutil.go
@@ -21,7 +21,7 @@ import (
 	"math/bits"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 var (
diff --git a/go/arrow/bitutil/bitutil_test.go b/go/arrow/bitutil/bitutil_test.go
index fc5be55113b99..c03bf5268a5ff 100644
--- a/go/arrow/bitutil/bitutil_test.go
+++ b/go/arrow/bitutil/bitutil_test.go
@@ -21,8 +21,8 @@ import (
 	"math/rand"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/tools"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go
index 157df47adedc8..0562eaed0fb7a 100644
--- a/go/arrow/cdata/cdata.go
+++ b/go/arrow/cdata/cdata.go
@@ -50,10 +50,10 @@ import (
 	"syscall"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/arrow/cdata/cdata_exports.go b/go/arrow/cdata/cdata_exports.go
index b971cb21de1b9..59775926d7ef8 100644
--- a/go/arrow/cdata/cdata_exports.go
+++ b/go/arrow/cdata/cdata_exports.go
@@ -44,11 +44,11 @@ import (
 	"strings"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/internal"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/internal"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
 )
 
 func encodeCMetadata(keys, values []string) []byte {
diff --git a/go/arrow/cdata/cdata_test.go b/go/arrow/cdata/cdata_test.go
index a200e2db45145..3563aeb5f0f1e 100644
--- a/go/arrow/cdata/cdata_test.go
+++ b/go/arrow/cdata/cdata_test.go
@@ -35,12 +35,12 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/memory/mallocator"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/memory/mallocator"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/cdata/cdata_test_framework.go b/go/arrow/cdata/cdata_test_framework.go
index caa1208a20ae5..968b28b4e4afb 100644
--- a/go/arrow/cdata/cdata_test_framework.go
+++ b/go/arrow/cdata/cdata_test_framework.go
@@ -69,10 +69,10 @@ import (
 	"runtime/cgo"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/internal"
-	"github.com/apache/arrow/go/v17/arrow/memory/mallocator"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/internal"
+	"github.com/apache/arrow/go/v18/arrow/memory/mallocator"
 )
 
 const (
diff --git a/go/arrow/cdata/exports.go b/go/arrow/cdata/exports.go
index 40f5fb2023558..6dbcde831d889 100644
--- a/go/arrow/cdata/exports.go
+++ b/go/arrow/cdata/exports.go
@@ -20,8 +20,8 @@ import (
 	"runtime/cgo"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 // #include <stdlib.h>
diff --git a/go/arrow/cdata/import_allocator.go b/go/arrow/cdata/import_allocator.go
index ad107902eb8ba..4e5c2a7b38c72 100644
--- a/go/arrow/cdata/import_allocator.go
+++ b/go/arrow/cdata/import_allocator.go
@@ -20,7 +20,7 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 // #include "arrow/c/helpers.h"
diff --git a/go/arrow/cdata/interface.go b/go/arrow/cdata/interface.go
index ba821896168e2..005dda73ff0ec 100644
--- a/go/arrow/cdata/interface.go
+++ b/go/arrow/cdata/interface.go
@@ -22,10 +22,10 @@ package cdata
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/arrio"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/arrio"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/arrow/cdata/test/test_cimport.go b/go/arrow/cdata/test/test_cimport.go
index 83ce27ece0d9f..5315853fc59ca 100644
--- a/go/arrow/cdata/test/test_cimport.go
+++ b/go/arrow/cdata/test/test_cimport.go
@@ -23,10 +23,10 @@ import (
 	"fmt"
 	"runtime"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/cdata"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/cdata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // #include <stdint.h>
diff --git a/go/arrow/compute/arithmetic.go b/go/arrow/compute/arithmetic.go
index e152413cd3eed..51ca027d53375 100644
--- a/go/arrow/compute/arithmetic.go
+++ b/go/arrow/compute/arithmetic.go
@@ -22,12 +22,12 @@ import (
 	"context"
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 )
 
 type (
diff --git a/go/arrow/compute/arithmetic_test.go b/go/arrow/compute/arithmetic_test.go
index 307fa4389544b..6e693481a322c 100644
--- a/go/arrow/compute/arithmetic_test.go
+++ b/go/arrow/compute/arithmetic_test.go
@@ -26,16 +26,16 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/klauspost/cpuid/v2"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
diff --git a/go/arrow/compute/cast.go b/go/arrow/compute/cast.go
index dc2a6d022b2a1..6ef6fdddd16ff 100644
--- a/go/arrow/compute/cast.go
+++ b/go/arrow/compute/cast.go
@@ -23,11 +23,11 @@ import (
 	"fmt"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
 )
 
 var (
diff --git a/go/arrow/compute/cast_test.go b/go/arrow/compute/cast_test.go
index 11b9587e9bdca..db6098225dda8 100644
--- a/go/arrow/compute/cast_test.go
+++ b/go/arrow/compute/cast_test.go
@@ -26,16 +26,16 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
+	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
@@ -129,7 +129,7 @@ func checkScalarWithScalars(t *testing.T, funcName string, inputs []scalar.Scala
 			fmt.Fprintf(&b, " (types differed: %s vs %s)",
 				out.(*compute.ScalarDatum).Type(), expected.DataType())
 		}
-		t.Fatalf(b.String())
+		t.Fatal(b.String())
 	}
 }
 
@@ -2636,7 +2636,7 @@ func (c *CastSuite) TestStructToDifferentNullabilityStruct() {
 		defer dest3Nullable.Release()
 		checkCast(c.T(), srcNonNull, dest3Nullable, *compute.DefaultCastOptions(true))
 	})
-	c.Run("non-nullable to nullable", func() {
+	c.Run("nullable to non-nullable", func() {
 		fieldsSrcNullable := []arrow.Field{
 			{Name: "a", Type: arrow.PrimitiveTypes.Int8, Nullable: true},
 			{Name: "b", Type: arrow.PrimitiveTypes.Int8, Nullable: true},
diff --git a/go/arrow/compute/datum.go b/go/arrow/compute/datum.go
index 40d9aa5db8601..9619fe09610de 100644
--- a/go/arrow/compute/datum.go
+++ b/go/arrow/compute/datum.go
@@ -21,9 +21,9 @@ package compute
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 )
 
 //go:generate go run golang.org/x/tools/cmd/stringer -type=DatumKind -linecomment
diff --git a/go/arrow/compute/example_test.go b/go/arrow/compute/example_test.go
index 8d157aa40ac25..d427fb622d24a 100644
--- a/go/arrow/compute/example_test.go
+++ b/go/arrow/compute/example_test.go
@@ -23,11 +23,11 @@ import (
 	"fmt"
 	"log"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // This example demonstrates how to register a custom scalar function.
diff --git a/go/arrow/compute/exec.go b/go/arrow/compute/exec.go
index e3e3fc0e5b887..1142297c1c396 100644
--- a/go/arrow/compute/exec.go
+++ b/go/arrow/compute/exec.go
@@ -22,9 +22,9 @@ import (
 	"context"
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 func haveChunkedArray(values []Datum) bool {
diff --git a/go/arrow/compute/exec/kernel.go b/go/arrow/compute/exec/kernel.go
index 657f18cb87373..600e52c681686 100644
--- a/go/arrow/compute/exec/kernel.go
+++ b/go/arrow/compute/exec/kernel.go
@@ -24,10 +24,10 @@ import (
 	"hash/maphash"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"golang.org/x/exp/slices"
 )
 
diff --git a/go/arrow/compute/exec/kernel_test.go b/go/arrow/compute/exec/kernel_test.go
index 13beb62d3d704..248bad323a307 100644
--- a/go/arrow/compute/exec/kernel_test.go
+++ b/go/arrow/compute/exec/kernel_test.go
@@ -22,12 +22,12 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/compute/exec/span.go b/go/arrow/compute/exec/span.go
index d62306e663882..6156acfd008aa 100644
--- a/go/arrow/compute/exec/span.go
+++ b/go/arrow/compute/exec/span.go
@@ -22,11 +22,11 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 )
 
 // BufferSpan is a lightweight Buffer holder for ArraySpans that does not
diff --git a/go/arrow/compute/exec/span_test.go b/go/arrow/compute/exec/span_test.go
index 79452965b8f81..018fbb7d623d9 100644
--- a/go/arrow/compute/exec/span_test.go
+++ b/go/arrow/compute/exec/span_test.go
@@ -24,14 +24,15 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
+	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -192,9 +193,6 @@ func TestArraySpan_NumBuffers(t *testing.T) {
 		Children []exec.ArraySpan
 	}
 
-	arrow.RegisterExtensionType(types.NewUUIDType())
-	defer arrow.UnregisterExtensionType("uuid")
-
 	tests := []struct {
 		name   string
 		fields fields
@@ -207,7 +205,7 @@ func TestArraySpan_NumBuffers(t *testing.T) {
 		{"large binary", fields{Type: arrow.BinaryTypes.LargeBinary}, 3},
 		{"string", fields{Type: arrow.BinaryTypes.String}, 3},
 		{"large string", fields{Type: arrow.BinaryTypes.LargeString}, 3},
-		{"extension", fields{Type: types.NewUUIDType()}, 2},
+		{"extension", fields{Type: extensions.NewUUIDType()}, 2},
 		{"int32", fields{Type: arrow.PrimitiveTypes.Int32}, 2},
 	}
 	for _, tt := range tests {
diff --git a/go/arrow/compute/exec/utils.go b/go/arrow/compute/exec/utils.go
index 88390eef9cdef..832f93f13165d 100644
--- a/go/arrow/compute/exec/utils.go
+++ b/go/arrow/compute/exec/utils.go
@@ -24,10 +24,10 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"golang.org/x/exp/constraints"
 	"golang.org/x/exp/slices"
 )
diff --git a/go/arrow/compute/exec/utils_test.go b/go/arrow/compute/exec/utils_test.go
index 82b2545b5ce61..b8b7212b538c5 100644
--- a/go/arrow/compute/exec/utils_test.go
+++ b/go/arrow/compute/exec/utils_test.go
@@ -21,10 +21,10 @@ package exec_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/compute/exec_internals_test.go b/go/arrow/compute/exec_internals_test.go
index 34f14e69d06d4..f0c585f557ebc 100644
--- a/go/arrow/compute/exec_internals_test.go
+++ b/go/arrow/compute/exec_internals_test.go
@@ -24,13 +24,13 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/arrow/compute/exec_test.go b/go/arrow/compute/exec_test.go
index 40b5e55ecb8d2..27f6676f3187c 100644
--- a/go/arrow/compute/exec_test.go
+++ b/go/arrow/compute/exec_test.go
@@ -22,12 +22,12 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/arrow/compute/executor.go b/go/arrow/compute/executor.go
index 3e605db305013..1d197e4220ab2 100644
--- a/go/arrow/compute/executor.go
+++ b/go/arrow/compute/executor.go
@@ -25,14 +25,14 @@ import (
 	"runtime"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 )
 
 // ExecCtx holds simple contextual information for execution
diff --git a/go/arrow/compute/expression.go b/go/arrow/compute/expression.go
index 8dde6927a7c02..f6aadeda5634b 100644
--- a/go/arrow/compute/expression.go
+++ b/go/arrow/compute/expression.go
@@ -28,14 +28,14 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 )
 
 var hashSeed = maphash.MakeSeed()
diff --git a/go/arrow/compute/expression_test.go b/go/arrow/compute/expression_test.go
index 4f3188ea82d9f..1898bb3dc92b2 100644
--- a/go/arrow/compute/expression_test.go
+++ b/go/arrow/compute/expression_test.go
@@ -22,11 +22,11 @@ package compute_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/compute/exprs/builders.go b/go/arrow/compute/exprs/builders.go
index 525aa2ade3fe5..a3af8dd6f287d 100644
--- a/go/arrow/compute/exprs/builders.go
+++ b/go/arrow/compute/exprs/builders.go
@@ -25,8 +25,8 @@ import (
 	"strings"
 	"unicode"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute"
 	"github.com/substrait-io/substrait-go/expr"
 	"github.com/substrait-io/substrait-go/extensions"
 	"github.com/substrait-io/substrait-go/types"
diff --git a/go/arrow/compute/exprs/builders_test.go b/go/arrow/compute/exprs/builders_test.go
index 167a4378f9dfa..21ad3bd642030 100644
--- a/go/arrow/compute/exprs/builders_test.go
+++ b/go/arrow/compute/exprs/builders_test.go
@@ -21,8 +21,8 @@ package exprs_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exprs"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exprs"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/substrait-io/substrait-go/expr"
diff --git a/go/arrow/compute/exprs/exec.go b/go/arrow/compute/exprs/exec.go
index 28c360a2e7dec..850acbb3cd492 100644
--- a/go/arrow/compute/exprs/exec.go
+++ b/go/arrow/compute/exprs/exec.go
@@ -23,15 +23,15 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/substrait-io/substrait-go/expr"
 	"github.com/substrait-io/substrait-go/extensions"
 	"github.com/substrait-io/substrait-go/types"
diff --git a/go/arrow/compute/exprs/exec_internal_test.go b/go/arrow/compute/exprs/exec_internal_test.go
index e38ebb9fb8bd7..450db139e9357 100644
--- a/go/arrow/compute/exprs/exec_internal_test.go
+++ b/go/arrow/compute/exprs/exec_internal_test.go
@@ -23,10 +23,10 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/compute/exprs/exec_test.go b/go/arrow/compute/exprs/exec_test.go
index 038a57a8408f8..b74f80057a0d7 100644
--- a/go/arrow/compute/exprs/exec_test.go
+++ b/go/arrow/compute/exprs/exec_test.go
@@ -23,12 +23,12 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/compute/exprs"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/compute/exprs"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/substrait-io/substrait-go/expr"
diff --git a/go/arrow/compute/exprs/extension_types.go b/go/arrow/compute/exprs/extension_types.go
index 5dd5d229b09a7..8177675592fc9 100644
--- a/go/arrow/compute/exprs/extension_types.go
+++ b/go/arrow/compute/exprs/extension_types.go
@@ -24,8 +24,8 @@ import (
 	"reflect"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 type simpleExtensionTypeFactory[P comparable] struct {
diff --git a/go/arrow/compute/exprs/field_refs.go b/go/arrow/compute/exprs/field_refs.go
index 888b7e605f8b8..0e039d9e26601 100644
--- a/go/arrow/compute/exprs/field_refs.go
+++ b/go/arrow/compute/exprs/field_refs.go
@@ -21,11 +21,11 @@ package exprs
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/substrait-io/substrait-go/expr"
 )
 
diff --git a/go/arrow/compute/exprs/types.go b/go/arrow/compute/exprs/types.go
index c231a62c28426..594a55c9041a8 100644
--- a/go/arrow/compute/exprs/types.go
+++ b/go/arrow/compute/exprs/types.go
@@ -24,8 +24,8 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute"
 	"github.com/substrait-io/substrait-go/expr"
 	"github.com/substrait-io/substrait-go/extensions"
 	"github.com/substrait-io/substrait-go/types"
diff --git a/go/arrow/compute/fieldref.go b/go/arrow/compute/fieldref.go
index ea8579f9b60b0..d69c7d91044c7 100644
--- a/go/arrow/compute/fieldref.go
+++ b/go/arrow/compute/fieldref.go
@@ -25,8 +25,8 @@ import (
 	"strings"
 	"unicode"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 var (
diff --git a/go/arrow/compute/fieldref_hash.go b/go/arrow/compute/fieldref_hash.go
index 5da85254e314b..21ef88f1ecb4f 100644
--- a/go/arrow/compute/fieldref_hash.go
+++ b/go/arrow/compute/fieldref_hash.go
@@ -23,7 +23,7 @@ import (
 	"math/bits"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 func (f FieldPath) hash(h *maphash.Hash) {
diff --git a/go/arrow/compute/fieldref_test.go b/go/arrow/compute/fieldref_test.go
index 45ca64acbcac2..ce2051f942271 100644
--- a/go/arrow/compute/fieldref_test.go
+++ b/go/arrow/compute/fieldref_test.go
@@ -19,10 +19,10 @@ package compute_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/compute/functions.go b/go/arrow/compute/functions.go
index 44f54e09dee94..ebade11a8e60b 100644
--- a/go/arrow/compute/functions.go
+++ b/go/arrow/compute/functions.go
@@ -23,8 +23,8 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
 )
 
 type Function interface {
diff --git a/go/arrow/compute/functions_test.go b/go/arrow/compute/functions_test.go
index 0e40c0a2f8082..31a4cf124e845 100644
--- a/go/arrow/compute/functions_test.go
+++ b/go/arrow/compute/functions_test.go
@@ -21,8 +21,8 @@ package compute_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/compute/internal/kernels/base_arithmetic.go b/go/arrow/compute/internal/kernels/base_arithmetic.go
index 38aa073c76745..169fbba2f02af 100644
--- a/go/arrow/compute/internal/kernels/base_arithmetic.go
+++ b/go/arrow/compute/internal/kernels/base_arithmetic.go
@@ -24,11 +24,11 @@ import (
 	"math/bits"
 
 	"github.com/JohnCGriffin/overflow"
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go
index 902c5f341032b..b818e45dc1bb4 100644
--- a/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go
+++ b/go/arrow/compute/internal/kernels/base_arithmetic_amd64.go
@@ -21,9 +21,9 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 	"golang.org/x/exp/constraints"
 	"golang.org/x/sys/cpu"
 )
diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go
index 223e6c29b755a..89384aa09fc73 100644
--- a/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go
+++ b/go/arrow/compute/internal/kernels/base_arithmetic_avx2_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go b/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go
index 0c81f426c537b..fff54292e3b4b 100644
--- a/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go
+++ b/go/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go
index 0291989d9a45b..36619106c93cf 100644
--- a/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go
+++ b/go/arrow/compute/internal/kernels/basic_arithmetic_noasm.go
@@ -19,8 +19,8 @@
 package kernels
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/compute/internal/kernels/boolean_cast.go b/go/arrow/compute/internal/kernels/boolean_cast.go
index 266b569df7b7d..66a49f2be0294 100644
--- a/go/arrow/compute/internal/kernels/boolean_cast.go
+++ b/go/arrow/compute/internal/kernels/boolean_cast.go
@@ -22,9 +22,9 @@ import (
 	"strconv"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
 )
 
 func isNonZero[T arrow.FixedWidthType](ctx *exec.KernelCtx, in []T, out []byte) error {
diff --git a/go/arrow/compute/internal/kernels/cast.go b/go/arrow/compute/internal/kernels/cast.go
index b7e3b59d7accc..8603d3ad1891f 100644
--- a/go/arrow/compute/internal/kernels/cast.go
+++ b/go/arrow/compute/internal/kernels/cast.go
@@ -19,9 +19,9 @@
 package kernels
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
 )
 
 type CastOptions struct {
diff --git a/go/arrow/compute/internal/kernels/cast_numeric.go b/go/arrow/compute/internal/kernels/cast_numeric.go
index 88c295271fe5f..41ad94d83e68b 100644
--- a/go/arrow/compute/internal/kernels/cast_numeric.go
+++ b/go/arrow/compute/internal/kernels/cast_numeric.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 var castNumericUnsafe func(itype, otype arrow.Type, in, out []byte, len int) = castNumericGo
diff --git a/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go b/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go
index 186fac16bae82..925b4328002d0 100644
--- a/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go
+++ b/go/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go b/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go
index 630bc06199f49..0b491244dcc44 100644
--- a/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go
+++ b/go/arrow/compute/internal/kernels/cast_numeric_neon_arm64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 	"golang.org/x/sys/cpu"
 )
 
diff --git a/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go b/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go
index cea34a62a9489..4c19e06dc704e 100644
--- a/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go
+++ b/go/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/cast_temporal.go b/go/arrow/compute/internal/kernels/cast_temporal.go
index 0ad73737439c7..183d47654ec64 100644
--- a/go/arrow/compute/internal/kernels/cast_temporal.go
+++ b/go/arrow/compute/internal/kernels/cast_temporal.go
@@ -24,10 +24,10 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 const millisecondsInDay = 86400000
diff --git a/go/arrow/compute/internal/kernels/helpers.go b/go/arrow/compute/internal/kernels/helpers.go
index a99670cbbb231..230a8e9112c29 100644
--- a/go/arrow/compute/internal/kernels/helpers.go
+++ b/go/arrow/compute/internal/kernels/helpers.go
@@ -22,13 +22,13 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/compute/internal/kernels/numeric_cast.go b/go/arrow/compute/internal/kernels/numeric_cast.go
index 286c45e41d7b2..ca3a9937594aa 100644
--- a/go/arrow/compute/internal/kernels/numeric_cast.go
+++ b/go/arrow/compute/internal/kernels/numeric_cast.go
@@ -23,13 +23,13 @@ import (
 	"strconv"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/compute/internal/kernels/rounding.go b/go/arrow/compute/internal/kernels/rounding.go
index ab05082513679..1afe76065f4de 100644
--- a/go/arrow/compute/internal/kernels/rounding.go
+++ b/go/arrow/compute/internal/kernels/rounding.go
@@ -22,11 +22,11 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/compute/internal/kernels/scalar_arithmetic.go b/go/arrow/compute/internal/kernels/scalar_arithmetic.go
index 038cca507b379..f2b52a6bf7101 100644
--- a/go/arrow/compute/internal/kernels/scalar_arithmetic.go
+++ b/go/arrow/compute/internal/kernels/scalar_arithmetic.go
@@ -22,13 +22,13 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 )
 
 // scalar kernel that ignores (assumed all-null inputs) and returns null
diff --git a/go/arrow/compute/internal/kernels/scalar_boolean.go b/go/arrow/compute/internal/kernels/scalar_boolean.go
index 7dbf76d4e847f..f23a7f568b192 100644
--- a/go/arrow/compute/internal/kernels/scalar_boolean.go
+++ b/go/arrow/compute/internal/kernels/scalar_boolean.go
@@ -19,9 +19,9 @@
 package kernels
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 )
 
 type computeWordFN func(leftTrue, leftFalse, rightTrue, rightFalse uint64) (outValid, outData uint64)
diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go
index 76de68a0ae44f..bf8594e8290f7 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparison_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 	"golang.org/x/sys/cpu"
 )
 
diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go
index b98b538570554..220e65cafe291 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparison_avx2_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go
index e897f767fd081..e7b4bce2362ba 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparison_noasm.go
@@ -18,7 +18,7 @@
 
 package kernels
 
-import "github.com/apache/arrow/go/v17/arrow"
+import "github.com/apache/arrow/go/v18/arrow"
 
 func genCompareKernel[T arrow.NumericType](op CompareOperator) *CompareData {
 	return genGoCompareKernel(getCmpOp[T](op))
diff --git a/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go b/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go
index 739ee929136ea..7eea2dd1abb73 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparison_sse4_amd64.go
@@ -21,7 +21,7 @@ package kernels
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 //go:noescape
diff --git a/go/arrow/compute/internal/kernels/scalar_comparisons.go b/go/arrow/compute/internal/kernels/scalar_comparisons.go
index f7278d6b16e3a..dc43b74984a0d 100644
--- a/go/arrow/compute/internal/kernels/scalar_comparisons.go
+++ b/go/arrow/compute/internal/kernels/scalar_comparisons.go
@@ -23,14 +23,14 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
 )
 
 type binaryKernel func(left, right, out []byte, offset int)
diff --git a/go/arrow/compute/internal/kernels/string_casts.go b/go/arrow/compute/internal/kernels/string_casts.go
index ec64871d31263..6a50d6627140b 100644
--- a/go/arrow/compute/internal/kernels/string_casts.go
+++ b/go/arrow/compute/internal/kernels/string_casts.go
@@ -23,12 +23,12 @@ import (
 	"strconv"
 	"unicode/utf8"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
 )
 
 func validateUtf8Fsb(input *exec.ArraySpan) error {
diff --git a/go/arrow/compute/internal/kernels/types.go b/go/arrow/compute/internal/kernels/types.go
index a3df0b2db18ff..fb20ed02381fe 100644
--- a/go/arrow/compute/internal/kernels/types.go
+++ b/go/arrow/compute/internal/kernels/types.go
@@ -21,10 +21,10 @@ package kernels
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 )
 
 var (
diff --git a/go/arrow/compute/internal/kernels/vector_hash.go b/go/arrow/compute/internal/kernels/vector_hash.go
index 6da52e075443f..57f925dc251b1 100644
--- a/go/arrow/compute/internal/kernels/vector_hash.go
+++ b/go/arrow/compute/internal/kernels/vector_hash.go
@@ -21,13 +21,13 @@ package kernels
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/internal/hashing"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/internal/hashing"
 )
 
 type HashState interface {
diff --git a/go/arrow/compute/internal/kernels/vector_run_end_encode.go b/go/arrow/compute/internal/kernels/vector_run_end_encode.go
index f9f517585699a..08f8cf44b9206 100644
--- a/go/arrow/compute/internal/kernels/vector_run_end_encode.go
+++ b/go/arrow/compute/internal/kernels/vector_run_end_encode.go
@@ -24,14 +24,14 @@ import (
 	"sort"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 type RunEndEncodeState struct {
diff --git a/go/arrow/compute/internal/kernels/vector_selection.go b/go/arrow/compute/internal/kernels/vector_selection.go
index b4fd90848a229..e96782f4cd5ea 100644
--- a/go/arrow/compute/internal/kernels/vector_selection.go
+++ b/go/arrow/compute/internal/kernels/vector_selection.go
@@ -22,13 +22,13 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
 )
 
 type NullSelectionBehavior int8
diff --git a/go/arrow/compute/registry.go b/go/arrow/compute/registry.go
index 8c42cc9724283..cb64c7e09de0a 100644
--- a/go/arrow/compute/registry.go
+++ b/go/arrow/compute/registry.go
@@ -21,7 +21,7 @@ package compute
 import (
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 	"golang.org/x/exp/maps"
 	"golang.org/x/exp/slices"
 )
diff --git a/go/arrow/compute/registry_test.go b/go/arrow/compute/registry_test.go
index 783f75e115333..15e561ada42d3 100644
--- a/go/arrow/compute/registry_test.go
+++ b/go/arrow/compute/registry_test.go
@@ -23,9 +23,9 @@ import (
 	"errors"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
 	"github.com/stretchr/testify/assert"
 	"golang.org/x/exp/slices"
 )
diff --git a/go/arrow/compute/scalar_bool.go b/go/arrow/compute/scalar_bool.go
index ed6007af9234b..39f4242163d49 100644
--- a/go/arrow/compute/scalar_bool.go
+++ b/go/arrow/compute/scalar_bool.go
@@ -21,9 +21,9 @@ package compute
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
 )
 
 var (
diff --git a/go/arrow/compute/scalar_bool_test.go b/go/arrow/compute/scalar_bool_test.go
index 2ae7f2cba532b..4b2c5d54f8ae2 100644
--- a/go/arrow/compute/scalar_bool_test.go
+++ b/go/arrow/compute/scalar_bool_test.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/require"
 )
 
diff --git a/go/arrow/compute/scalar_compare.go b/go/arrow/compute/scalar_compare.go
index 7092431a01549..8f51440bc6362 100644
--- a/go/arrow/compute/scalar_compare.go
+++ b/go/arrow/compute/scalar_compare.go
@@ -21,9 +21,9 @@ package compute
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
 )
 
 type compareFunction struct {
diff --git a/go/arrow/compute/scalar_compare_test.go b/go/arrow/compute/scalar_compare_test.go
index 9f1115312829f..1f1b65bd0f25f 100644
--- a/go/arrow/compute/scalar_compare_test.go
+++ b/go/arrow/compute/scalar_compare_test.go
@@ -24,15 +24,15 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/compute/selection.go b/go/arrow/compute/selection.go
index e2966189c9f6a..4aeaa8884649d 100644
--- a/go/arrow/compute/selection.go
+++ b/go/arrow/compute/selection.go
@@ -22,10 +22,10 @@ import (
 	"context"
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
 	"golang.org/x/sync/errgroup"
 )
 
diff --git a/go/arrow/compute/utils.go b/go/arrow/compute/utils.go
index 899fe4cfbf4cc..909d0b68868f6 100644
--- a/go/arrow/compute/utils.go
+++ b/go/arrow/compute/utils.go
@@ -23,13 +23,13 @@ import (
 	"io"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/arrow/compute/vector_hash.go b/go/arrow/compute/vector_hash.go
index 7fcbce19ada4c..8692a6fff074c 100644
--- a/go/arrow/compute/vector_hash.go
+++ b/go/arrow/compute/vector_hash.go
@@ -21,8 +21,8 @@ package compute
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
 )
 
 var (
diff --git a/go/arrow/compute/vector_hash_test.go b/go/arrow/compute/vector_hash_test.go
index 87a4eb6806a9b..58ff1263ca880 100644
--- a/go/arrow/compute/vector_hash_test.go
+++ b/go/arrow/compute/vector_hash_test.go
@@ -23,12 +23,12 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/compute/vector_run_end_test.go b/go/arrow/compute/vector_run_end_test.go
index a6cc1306a625b..8c8e776feb23c 100644
--- a/go/arrow/compute/vector_run_end_test.go
+++ b/go/arrow/compute/vector_run_end_test.go
@@ -25,13 +25,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/arrow/compute/vector_run_ends.go b/go/arrow/compute/vector_run_ends.go
index 09b872f486a01..5dfdde4e00948 100644
--- a/go/arrow/compute/vector_run_ends.go
+++ b/go/arrow/compute/vector_run_ends.go
@@ -21,8 +21,8 @@ package compute
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
 )
 
 var (
diff --git a/go/arrow/compute/vector_selection_test.go b/go/arrow/compute/vector_selection_test.go
index 2331a2d0342b5..6fcb5c242f151 100644
--- a/go/arrow/compute/vector_selection_test.go
+++ b/go/arrow/compute/vector_selection_test.go
@@ -24,15 +24,15 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/compute/exec"
-	"github.com/apache/arrow/go/v17/arrow/compute/internal/kernels"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/gen"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/compute/exec"
+	"github.com/apache/arrow/go/v18/arrow/compute/internal/kernels"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/gen"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
+	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go
index 06fed69a77fe5..ed254ae35b353 100644
--- a/go/arrow/csv/common.go
+++ b/go/arrow/csv/common.go
@@ -23,8 +23,8 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 var (
diff --git a/go/arrow/csv/reader.go b/go/arrow/csv/reader.go
index 46591a9a5adee..12d607b26c48d 100644
--- a/go/arrow/csv/reader.go
+++ b/go/arrow/csv/reader.go
@@ -29,13 +29,13 @@ import (
 	"time"
 	"unicode/utf8"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // Reader wraps encoding/csv.Reader and creates array.Records from a schema.
diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go
index 65453db015a7e..6a89d49704298 100644
--- a/go/arrow/csv/reader_test.go
+++ b/go/arrow/csv/reader_test.go
@@ -25,13 +25,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/csv"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/csv"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -356,7 +356,7 @@ func testCSVReader(t *testing.T, filepath string, withHeader bool, stringsCanBeN
 			{Name: "binary", Type: arrow.BinaryTypes.Binary},
 			{Name: "large_binary", Type: arrow.BinaryTypes.LargeBinary},
 			{Name: "fixed_size_binary", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}},
-			{Name: "uuid", Type: types.NewUUIDType()},
+			{Name: "uuid", Type: extensions.NewUUIDType()},
 			{Name: "date32", Type: arrow.PrimitiveTypes.Date32},
 			{Name: "date64", Type: arrow.PrimitiveTypes.Date64},
 		},
diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go
index 237437c0441e1..f99d047e3c8cf 100644
--- a/go/arrow/csv/transformer.go
+++ b/go/arrow/csv/transformer.go
@@ -25,8 +25,8 @@ import (
 	"math/big"
 	"strconv"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, stringsReplacer func(string) string) []string {
diff --git a/go/arrow/csv/writer.go b/go/arrow/csv/writer.go
index 243d83f7d847f..d0efbde170d65 100644
--- a/go/arrow/csv/writer.go
+++ b/go/arrow/csv/writer.go
@@ -22,7 +22,7 @@ import (
 	"strconv"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 // Writer wraps encoding/csv.Writer and writes arrow.Record based on a schema.
diff --git a/go/arrow/csv/writer_test.go b/go/arrow/csv/writer_test.go
index ef9769fc32876..2ae01a6d49071 100644
--- a/go/arrow/csv/writer_test.go
+++ b/go/arrow/csv/writer_test.go
@@ -26,14 +26,14 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/csv"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/csv"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/google/uuid"
 )
 
@@ -230,7 +230,7 @@ func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bo
 			{Name: "binary", Type: arrow.BinaryTypes.Binary},
 			{Name: "large_binary", Type: arrow.BinaryTypes.LargeBinary},
 			{Name: "fixed_size_binary", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}},
-			{Name: "uuid", Type: types.NewUUIDType()},
+			{Name: "uuid", Type: extensions.NewUUIDType()},
 			{Name: "null", Type: arrow.Null},
 		},
 		nil,
@@ -285,7 +285,7 @@ func testCSVWriter(t *testing.T, data [][]string, writeHeader bool, fmtr func(bo
 	b.Field(22).(*array.BinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3, 4, 5}, {}}, nil)
 	b.Field(23).(*array.BinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3, 4, 5}, {}}, nil)
 	b.Field(24).(*array.FixedSizeBinaryBuilder).AppendValues([][]byte{{0, 1, 2}, {3, 4, 5}, {}}, nil)
-	b.Field(25).(*types.UUIDBuilder).AppendValues([]uuid.UUID{uuid.MustParse("00000000-0000-0000-0000-000000000001"), uuid.MustParse("00000000-0000-0000-0000-000000000002"), uuid.MustParse("00000000-0000-0000-0000-000000000003")}, nil)
+	b.Field(25).(*extensions.UUIDBuilder).AppendValues([]uuid.UUID{uuid.MustParse("00000000-0000-0000-0000-000000000001"), uuid.MustParse("00000000-0000-0000-0000-000000000002"), uuid.MustParse("00000000-0000-0000-0000-000000000003")}, nil)
 	b.Field(26).(*array.NullBuilder).AppendEmptyValues(3)
 
 	for _, field := range b.Fields() {
diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go
index 8fff5e652572b..96b7bf65505ec 100644
--- a/go/arrow/datatype.go
+++ b/go/arrow/datatype.go
@@ -21,7 +21,7 @@ import (
 	"hash/maphash"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 // Type is a logical type. They can be expressed as
diff --git a/go/arrow/datatype_binary_test.go b/go/arrow/datatype_binary_test.go
index 41dee140c5a44..a65d92a0f61ac 100644
--- a/go/arrow/datatype_binary_test.go
+++ b/go/arrow/datatype_binary_test.go
@@ -19,7 +19,7 @@ package arrow_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 func TestBinaryType(t *testing.T) {
diff --git a/go/arrow/datatype_extension_test.go b/go/arrow/datatype_extension_test.go
index 92fcacd620586..7244d377bd285 100644
--- a/go/arrow/datatype_extension_test.go
+++ b/go/arrow/datatype_extension_test.go
@@ -20,8 +20,8 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
@@ -50,24 +50,14 @@ type ExtensionTypeTestSuite struct {
 	suite.Suite
 }
 
-func (e *ExtensionTypeTestSuite) SetupTest() {
-	e.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
-}
-
-func (e *ExtensionTypeTestSuite) TearDownTest() {
-	if arrow.GetExtensionType("uuid") != nil {
-		e.NoError(arrow.UnregisterExtensionType("uuid"))
-	}
-}
-
 func (e *ExtensionTypeTestSuite) TestExtensionType() {
 	e.Nil(arrow.GetExtensionType("uuid-unknown"))
-	e.NotNil(arrow.GetExtensionType("uuid"))
+	e.NotNil(arrow.GetExtensionType("arrow.uuid"))
 
-	e.Error(arrow.RegisterExtensionType(types.NewUUIDType()))
+	e.Error(arrow.RegisterExtensionType(extensions.NewUUIDType()))
 	e.Error(arrow.UnregisterExtensionType("uuid-unknown"))
 
-	typ := types.NewUUIDType()
+	typ := extensions.NewUUIDType()
 	e.Implements((*arrow.ExtensionType)(nil), typ)
 	e.Equal(arrow.EXTENSION, typ.ID())
 	e.Equal("extension", typ.Name())
diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go
index a1c2834b8d574..8bcae78d7d8af 100644
--- a/go/arrow/datatype_fixedwidth.go
+++ b/go/arrow/datatype_fixedwidth.go
@@ -22,7 +22,7 @@ import (
 	"sync"
 	"time"
 
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/internal/json"
 
 	"golang.org/x/xerrors"
 )
diff --git a/go/arrow/datatype_fixedwidth_test.go b/go/arrow/datatype_fixedwidth_test.go
index f4f3b071ff5d4..fbd1334626774 100644
--- a/go/arrow/datatype_fixedwidth_test.go
+++ b/go/arrow/datatype_fixedwidth_test.go
@@ -21,7 +21,7 @@ import (
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go
index b38983b7f2e5d..579b2c3306003 100644
--- a/go/arrow/datatype_nested.go
+++ b/go/arrow/datatype_nested.go
@@ -22,7 +22,7 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 type (
diff --git a/go/arrow/datatype_null_test.go b/go/arrow/datatype_null_test.go
index b72395843ef8b..83b3f0c44c549 100644
--- a/go/arrow/datatype_null_test.go
+++ b/go/arrow/datatype_null_test.go
@@ -19,7 +19,7 @@ package arrow_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 func TestNullType(t *testing.T) {
diff --git a/go/arrow/datatype_viewheader.go b/go/arrow/datatype_viewheader.go
index 8dd2fa4881e2f..e153251caaf03 100644
--- a/go/arrow/datatype_viewheader.go
+++ b/go/arrow/datatype_viewheader.go
@@ -20,9 +20,9 @@ import (
 	"bytes"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 const (
diff --git a/go/arrow/datatype_viewheader_inline.go b/go/arrow/datatype_viewheader_inline.go
index 24f518d0e236a..2883ee380308e 100644
--- a/go/arrow/datatype_viewheader_inline.go
+++ b/go/arrow/datatype_viewheader_inline.go
@@ -21,7 +21,7 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 func (sh *ViewHeader) InlineString() (data string) {
diff --git a/go/arrow/datatype_viewheader_inline_go1.19.go b/go/arrow/datatype_viewheader_inline_go1.19.go
index 2f7299ec111b4..d72c0d6f17c2b 100644
--- a/go/arrow/datatype_viewheader_inline_go1.19.go
+++ b/go/arrow/datatype_viewheader_inline_go1.19.go
@@ -22,7 +22,7 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 func (sh *ViewHeader) InlineString() (data string) {
diff --git a/go/arrow/datatype_viewheader_inline_tinygo.go b/go/arrow/datatype_viewheader_inline_tinygo.go
index b309c07710e3e..a342167972fe4 100644
--- a/go/arrow/datatype_viewheader_inline_tinygo.go
+++ b/go/arrow/datatype_viewheader_inline_tinygo.go
@@ -22,7 +22,7 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 func (sh *ViewHeader) InlineString() (data string) {
diff --git a/go/arrow/decimal128/decimal128.go b/go/arrow/decimal128/decimal128.go
index e4b19797d5462..00ab253003559 100644
--- a/go/arrow/decimal128/decimal128.go
+++ b/go/arrow/decimal128/decimal128.go
@@ -23,7 +23,7 @@ import (
 	"math/big"
 	"math/bits"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 const (
diff --git a/go/arrow/decimal128/decimal128_test.go b/go/arrow/decimal128/decimal128_test.go
index 11e293ee3ebb0..18443512a36da 100644
--- a/go/arrow/decimal128/decimal128_test.go
+++ b/go/arrow/decimal128/decimal128_test.go
@@ -22,7 +22,7 @@ import (
 	"math/big"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/decimal256/decimal256.go b/go/arrow/decimal256/decimal256.go
index d5abfc2298a40..8244d2cd8334c 100644
--- a/go/arrow/decimal256/decimal256.go
+++ b/go/arrow/decimal256/decimal256.go
@@ -23,8 +23,8 @@ import (
 	"math/big"
 	"math/bits"
 
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 const (
diff --git a/go/arrow/decimal256/decimal256_test.go b/go/arrow/decimal256/decimal256_test.go
index 07c147bacf2f1..3d2ee8c543597 100644
--- a/go/arrow/decimal256/decimal256_test.go
+++ b/go/arrow/decimal256/decimal256_test.go
@@ -23,7 +23,7 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/doc.go b/go/arrow/doc.go
index 93e838d20e4d0..30e6b7eb8d6ea 100644
--- a/go/arrow/doc.go
+++ b/go/arrow/doc.go
@@ -34,7 +34,7 @@ To build with tinygo include the noasm build tag.
 */
 package arrow
 
-const PkgVersion = "17.0.0-SNAPSHOT"
+const PkgVersion = "18.0.0-SNAPSHOT"
 
 //go:generate go run _tools/tmpl/main.go -i -data=numeric.tmpldata type_traits_numeric.gen.go.tmpl type_traits_numeric.gen_test.go.tmpl array/numeric.gen.go.tmpl array/numericbuilder.gen.go.tmpl array/bufferbuilder_numeric.gen.go.tmpl
 //go:generate go run _tools/tmpl/main.go -i -data=datatype_numeric.gen.go.tmpldata datatype_numeric.gen.go.tmpl tensor/numeric.gen.go.tmpl tensor/numeric.gen_test.go.tmpl
diff --git a/go/arrow/encoded/ree_utils.go b/go/arrow/encoded/ree_utils.go
index 0493d71f31f5c..822edd0303703 100644
--- a/go/arrow/encoded/ree_utils.go
+++ b/go/arrow/encoded/ree_utils.go
@@ -20,7 +20,7 @@ import (
 	"math"
 	"sort"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 // FindPhysicalIndex performs a binary search on the run-ends to return
diff --git a/go/arrow/encoded/ree_utils_test.go b/go/arrow/encoded/ree_utils_test.go
index 99b8252d1d7c2..43a4f83b3b999 100644
--- a/go/arrow/encoded/ree_utils_test.go
+++ b/go/arrow/encoded/ree_utils_test.go
@@ -21,10 +21,10 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/encoded"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/encoded"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/endian/endian.go b/go/arrow/endian/endian.go
index 234c58cde1d14..8ac496b996938 100644
--- a/go/arrow/endian/endian.go
+++ b/go/arrow/endian/endian.go
@@ -17,8 +17,8 @@
 package endian
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
 )
 
 type Endianness flatbuf.Endianness
diff --git a/go/arrow/example_test.go b/go/arrow/example_test.go
index fbb092ef5baeb..3dac447ae7c83 100644
--- a/go/arrow/example_test.go
+++ b/go/arrow/example_test.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"log"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/tensor"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/tensor"
 )
 
 // This example demonstrates how to build an array of int64 values using a builder and Append.
diff --git a/go/arrow/extensions/bool8.go b/go/arrow/extensions/bool8.go
new file mode 100644
index 0000000000000..20ab024a2a2fb
--- /dev/null
+++ b/go/arrow/extensions/bool8.go
@@ -0,0 +1,216 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"fmt"
+	"reflect"
+	"strconv"
+	"strings"
+	"unsafe"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
+)
+
+// Bool8Type represents a logical boolean that is stored using 8 bits.
+type Bool8Type struct {
+	arrow.ExtensionBase
+}
+
+// NewBool8Type creates a new Bool8Type with the underlying storage type set correctly to Int8.
+func NewBool8Type() *Bool8Type {
+	return &Bool8Type{ExtensionBase: arrow.ExtensionBase{Storage: arrow.PrimitiveTypes.Int8}}
+}
+
+func (b *Bool8Type) ArrayType() reflect.Type { return reflect.TypeOf(Bool8Array{}) }
+
+func (b *Bool8Type) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if !arrow.TypeEqual(storageType, arrow.PrimitiveTypes.Int8) {
+		return nil, fmt.Errorf("invalid storage type for Bool8Type: %s", storageType.Name())
+	}
+	return NewBool8Type(), nil
+}
+
+func (b *Bool8Type) ExtensionEquals(other arrow.ExtensionType) bool {
+	return b.ExtensionName() == other.ExtensionName()
+}
+
+func (b *Bool8Type) ExtensionName() string { return "arrow.bool8" }
+
+func (b *Bool8Type) Serialize() string { return "" }
+
+func (b *Bool8Type) String() string { return fmt.Sprintf("extension<%s>", b.ExtensionName()) }
+
+func (*Bool8Type) NewBuilder(mem memory.Allocator) array.Builder {
+	return NewBool8Builder(mem)
+}
+
+// Bool8Array is logically an array of boolean values but uses
+// 8 bits to store values instead of 1 bit as in the native BooleanArray.
+type Bool8Array struct {
+	array.ExtensionArrayBase
+}
+
+func (a *Bool8Array) String() string {
+	var o strings.Builder
+	o.WriteString("[")
+	for i := 0; i < a.Len(); i++ {
+		if i > 0 {
+			o.WriteString(" ")
+		}
+		switch {
+		case a.IsNull(i):
+			o.WriteString(array.NullValueStr)
+		default:
+			fmt.Fprintf(&o, "%v", a.Value(i))
+		}
+	}
+	o.WriteString("]")
+	return o.String()
+}
+
+func (a *Bool8Array) Value(i int) bool {
+	return a.Storage().(*array.Int8).Value(i) != 0
+}
+
+func (a *Bool8Array) BoolValues() []bool {
+	int8s := a.Storage().(*array.Int8).Int8Values()
+	return unsafe.Slice((*bool)(unsafe.Pointer(unsafe.SliceData(int8s))), len(int8s))
+}
+
+func (a *Bool8Array) ValueStr(i int) string {
+	switch {
+	case a.IsNull(i):
+		return array.NullValueStr
+	default:
+		return fmt.Sprint(a.Value(i))
+	}
+}
+
+func (a *Bool8Array) MarshalJSON() ([]byte, error) {
+	values := make([]interface{}, a.Len())
+	for i := 0; i < a.Len(); i++ {
+		if a.IsValid(i) {
+			values[i] = a.Value(i)
+		}
+	}
+	return json.Marshal(values)
+}
+
+func (a *Bool8Array) GetOneForMarshal(i int) interface{} {
+	if a.IsNull(i) {
+		return nil
+	}
+	return a.Value(i)
+}
+
+// boolToInt8 performs the simple scalar conversion of bool to the canonical int8
+// value for the Bool8Type.
+func boolToInt8(v bool) int8 {
+	var res int8
+	if v {
+		res = 1
+	}
+	return res
+}
+
+// Bool8Builder is a convenience builder for the Bool8 extension type,
+// allowing arrays to be built with boolean values rather than the underlying storage type.
+type Bool8Builder struct {
+	*array.ExtensionBuilder
+}
+
+// NewBool8Builder creates a new Bool8Builder, exposing a convenient and efficient interface
+// for writing boolean values to the underlying int8 storage array.
+func NewBool8Builder(mem memory.Allocator) *Bool8Builder {
+	return &Bool8Builder{ExtensionBuilder: array.NewExtensionBuilder(mem, NewBool8Type())}
+}
+
+func (b *Bool8Builder) Append(v bool) {
+	b.ExtensionBuilder.Builder.(*array.Int8Builder).Append(boolToInt8(v))
+}
+
+func (b *Bool8Builder) UnsafeAppend(v bool) {
+	b.ExtensionBuilder.Builder.(*array.Int8Builder).UnsafeAppend(boolToInt8(v))
+}
+
+func (b *Bool8Builder) AppendValueFromString(s string) error {
+	if s == array.NullValueStr {
+		b.AppendNull()
+		return nil
+	}
+
+	val, err := strconv.ParseBool(s)
+	if err != nil {
+		return err
+	}
+
+	b.Append(val)
+	return nil
+}
+
+func (b *Bool8Builder) AppendValues(v []bool, valid []bool) {
+	boolsAsInt8s := unsafe.Slice((*int8)(unsafe.Pointer(unsafe.SliceData(v))), len(v))
+	b.ExtensionBuilder.Builder.(*array.Int8Builder).AppendValues(boolsAsInt8s, valid)
+}
+
+func (b *Bool8Builder) UnmarshalOne(dec *json.Decoder) error {
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	switch v := t.(type) {
+	case bool:
+		b.Append(v)
+		return nil
+	case string:
+		return b.AppendValueFromString(v)
+	case int8:
+		b.ExtensionBuilder.Builder.(*array.Int8Builder).Append(v)
+		return nil
+	case nil:
+		b.AppendNull()
+		return nil
+	default:
+		return &json.UnmarshalTypeError{
+			Value:  fmt.Sprint(t),
+			Type:   reflect.TypeOf([]byte{}),
+			Offset: dec.InputOffset(),
+			Struct: "Bool8Builder",
+		}
+	}
+}
+
+func (b *Bool8Builder) Unmarshal(dec *json.Decoder) error {
+	for dec.More() {
+		if err := b.UnmarshalOne(dec); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+var (
+	_ arrow.ExtensionType          = (*Bool8Type)(nil)
+	_ array.CustomExtensionBuilder = (*Bool8Type)(nil)
+	_ array.ExtensionArray         = (*Bool8Array)(nil)
+	_ array.Builder                = (*Bool8Builder)(nil)
+)
diff --git a/go/arrow/extensions/bool8_test.go b/go/arrow/extensions/bool8_test.go
new file mode 100644
index 0000000000000..ff129e24bc8f0
--- /dev/null
+++ b/go/arrow/extensions/bool8_test.go
@@ -0,0 +1,316 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions_test
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+const (
+	MINSIZE = 1024
+	MAXSIZE = 65536
+)
+
+func TestBool8ExtensionBuilder(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	builder := extensions.NewBool8Builder(mem)
+	defer builder.Release()
+
+	builder.Append(true)
+	builder.AppendNull()
+	builder.Append(false)
+	arr := builder.NewArray()
+	defer arr.Release()
+
+	arrStr := arr.String()
+	require.Equal(t, "[true (null) false]", arrStr)
+
+	jsonStr, err := json.Marshal(arr)
+	require.NoError(t, err)
+
+	arr1, _, err := array.FromJSON(mem, extensions.NewBool8Type(), bytes.NewReader(jsonStr))
+	require.NoError(t, err)
+	defer arr1.Release()
+
+	require.Equal(t, arr, arr1)
+}
+
+func TestBool8ExtensionRecordBuilder(t *testing.T) {
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "bool8", Type: extensions.NewBool8Type()},
+	}, nil)
+
+	builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
+	defer builder.Release()
+
+	builder.Field(0).(*extensions.Bool8Builder).Append(true)
+	record := builder.NewRecord()
+	defer record.Release()
+
+	b, err := record.MarshalJSON()
+	require.NoError(t, err)
+	require.Equal(t, "[{\"bool8\":true}\n]", string(b))
+
+	record1, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, bytes.NewReader(b))
+	require.NoError(t, err)
+	defer record1.Release()
+
+	require.Equal(t, record, record1)
+
+	require.NoError(t, builder.UnmarshalJSON([]byte(`{"bool8":true}`)))
+	record = builder.NewRecord()
+	defer record.Release()
+
+	require.Equal(t, schema, record.Schema())
+	require.Equal(t, true, record.Column(0).(*extensions.Bool8Array).Value(0))
+}
+
+func TestBool8StringRoundTrip(t *testing.T) {
+	// 1. create array
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	b := extensions.NewBool8Builder(mem)
+	b.Append(true)
+	b.AppendNull()
+	b.Append(false)
+	b.AppendNull()
+	b.Append(true)
+
+	arr := b.NewArray()
+	defer arr.Release()
+
+	// 2. create array via AppendValueFromString
+	b1 := extensions.NewBool8Builder(mem)
+	defer b1.Release()
+
+	for i := 0; i < arr.Len(); i++ {
+		assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i)))
+	}
+
+	arr1 := b1.NewArray()
+	defer arr1.Release()
+
+	assert.True(t, array.Equal(arr, arr1))
+}
+
+func TestCompareBool8AndBoolean(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	bool8bldr := extensions.NewBool8Builder(mem)
+	defer bool8bldr.Release()
+
+	boolbldr := array.NewBooleanBuilder(mem)
+	defer boolbldr.Release()
+
+	inputVals := []bool{true, false, false, false, true}
+	inputValidity := []bool{true, false, true, false, true}
+
+	bool8bldr.AppendValues(inputVals, inputValidity)
+	bool8Arr := bool8bldr.NewExtensionArray().(*extensions.Bool8Array)
+	defer bool8Arr.Release()
+
+	boolbldr.AppendValues(inputVals, inputValidity)
+	boolArr := boolbldr.NewBooleanArray()
+	defer boolArr.Release()
+
+	require.Equal(t, boolArr.Len(), bool8Arr.Len())
+	for i := 0; i < boolArr.Len(); i++ {
+		require.Equal(t, boolArr.Value(i), bool8Arr.Value(i))
+	}
+}
+
+func TestReinterpretStorageEqualToValues(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	bool8bldr := extensions.NewBool8Builder(mem)
+	defer bool8bldr.Release()
+
+	inputVals := []bool{true, false, false, false, true}
+	inputValidity := []bool{true, false, true, false, true}
+
+	bool8bldr.AppendValues(inputVals, inputValidity)
+	bool8Arr := bool8bldr.NewExtensionArray().(*extensions.Bool8Array)
+	defer bool8Arr.Release()
+
+	boolValsCopy := make([]bool, bool8Arr.Len())
+	for i := 0; i < bool8Arr.Len(); i++ {
+		boolValsCopy[i] = bool8Arr.Value(i)
+	}
+
+	boolValsZeroCopy := bool8Arr.BoolValues()
+
+	require.Equal(t, len(boolValsZeroCopy), len(boolValsCopy))
+	for i := range boolValsCopy {
+		require.Equal(t, boolValsZeroCopy[i], boolValsCopy[i])
+	}
+}
+
+func TestBool8TypeBatchIPCRoundTrip(t *testing.T) {
+	typ := extensions.NewBool8Type()
+	storage, _, err := array.FromJSON(memory.DefaultAllocator, arrow.PrimitiveTypes.Int8,
+		strings.NewReader(`[-1, 0, 1, 2, null]`))
+	require.NoError(t, err)
+	defer storage.Release()
+
+	arr := array.NewExtensionArrayWithStorage(typ, storage)
+	defer arr.Release()
+
+	batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "field", Type: typ, Nullable: true}}, nil),
+		[]arrow.Array{arr}, -1)
+	defer batch.Release()
+
+	var written arrow.Record
+	{
+		var buf bytes.Buffer
+		wr := ipc.NewWriter(&buf, ipc.WithSchema(batch.Schema()))
+		require.NoError(t, wr.Write(batch))
+		require.NoError(t, wr.Close())
+
+		rdr, err := ipc.NewReader(&buf)
+		require.NoError(t, err)
+		written, err = rdr.Read()
+		require.NoError(t, err)
+		written.Retain()
+		defer written.Release()
+		rdr.Release()
+	}
+
+	assert.Truef(t, batch.Schema().Equal(written.Schema()), "expected: %s, got: %s",
+		batch.Schema(), written.Schema())
+
+	assert.Truef(t, array.RecordEqual(batch, written), "expected: %s, got: %s",
+		batch, written)
+}
+
+func BenchmarkWriteBool8Array(b *testing.B) {
+	bool8bldr := extensions.NewBool8Builder(memory.DefaultAllocator)
+	defer bool8bldr.Release()
+
+	for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 {
+		b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) {
+
+			values := make([]bool, sz)
+			for idx := range values {
+				values[idx] = true
+			}
+
+			b.ResetTimer()
+			b.SetBytes(int64(sz))
+			for n := 0; n < b.N; n++ {
+				bool8bldr.AppendValues(values, nil)
+				bool8bldr.NewArray()
+			}
+		})
+	}
+}
+
+func BenchmarkWriteBooleanArray(b *testing.B) {
+	boolbldr := array.NewBooleanBuilder(memory.DefaultAllocator)
+	defer boolbldr.Release()
+
+	for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 {
+		b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) {
+
+			values := make([]bool, sz)
+			for idx := range values {
+				values[idx] = true
+			}
+
+			b.ResetTimer()
+			b.SetBytes(int64(len(values)))
+			for n := 0; n < b.N; n++ {
+				boolbldr.AppendValues(values, nil)
+				boolbldr.NewArray()
+			}
+		})
+	}
+}
+
+// storage benchmark result at package level to prevent compiler from eliminating the function call
+var result []bool
+
+func BenchmarkReadBool8Array(b *testing.B) {
+	bool8bldr := extensions.NewBool8Builder(memory.DefaultAllocator)
+	defer bool8bldr.Release()
+
+	for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 {
+		b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) {
+
+			values := make([]bool, sz)
+			for idx := range values {
+				values[idx] = true
+			}
+
+			bool8bldr.AppendValues(values, nil)
+			bool8Arr := bool8bldr.NewArray().(*extensions.Bool8Array)
+			defer bool8Arr.Release()
+
+			var r []bool
+			b.ResetTimer()
+			b.SetBytes(int64(len(values)))
+			for n := 0; n < b.N; n++ {
+				r = bool8Arr.BoolValues()
+			}
+			result = r
+		})
+	}
+}
+
+func BenchmarkReadBooleanArray(b *testing.B) {
+	boolbldr := array.NewBooleanBuilder(memory.DefaultAllocator)
+	defer boolbldr.Release()
+
+	for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 {
+		b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) {
+
+			values := make([]bool, sz)
+			output := make([]bool, sz)
+			for idx := range values {
+				values[idx] = true
+			}
+
+			boolbldr.AppendValues(values, nil)
+			boolArr := boolbldr.NewArray().(*array.Boolean)
+			defer boolArr.Release()
+
+			b.ResetTimer()
+			b.SetBytes(int64(len(values)))
+			for n := 0; n < b.N; n++ {
+				for i := 0; i < boolArr.Len(); i++ {
+					output[i] = boolArr.Value(i)
+				}
+			}
+		})
+	}
+}
diff --git a/go/arrow/extensions/doc.go b/go/arrow/extensions/doc.go
new file mode 100644
index 0000000000000..65b086e2eca72
--- /dev/null
+++ b/go/arrow/extensions/doc.go
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package extensions provides implementations of Arrow canonical extension
+// types as defined in the Arrow specification.
+// https://arrow.apache.org/docs/format/CanonicalExtensions.html
+package extensions
diff --git a/go/arrow/extensions/extensions.go b/go/arrow/extensions/extensions.go
new file mode 100644
index 0000000000000..03c6923e95f4f
--- /dev/null
+++ b/go/arrow/extensions/extensions.go
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"github.com/apache/arrow/go/v18/arrow"
+)
+
+var canonicalExtensionTypes = []arrow.ExtensionType{
+	&Bool8Type{},
+	&UUIDType{},
+	&OpaqueType{},
+	&JSONType{},
+}
+
+func init() {
+	for _, extType := range canonicalExtensionTypes {
+		if err := arrow.RegisterExtensionType(extType); err != nil {
+			panic(err)
+		}
+	}
+}
diff --git a/go/arrow/extensions/extensions_test.go b/go/arrow/extensions/extensions_test.go
new file mode 100644
index 0000000000000..f56fed5e132f9
--- /dev/null
+++ b/go/arrow/extensions/extensions_test.go
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions_test
+
+import (
+	"bytes"
+	"fmt"
+	"reflect"
+	"testing"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
+)
+
+// testBool8Type minimally implements arrow.ExtensionType, but importantly does not implement array.CustomExtensionBuilder
+// so it will fall back to the storage type's default builder.
+type testBool8Type struct {
+	arrow.ExtensionBase
+}
+
+func newTestBool8Type() *testBool8Type {
+	return &testBool8Type{ExtensionBase: arrow.ExtensionBase{Storage: arrow.PrimitiveTypes.Int8}}
+}
+
+func (t *testBool8Type) ArrayType() reflect.Type                  { return reflect.TypeOf(testBool8Array{}) }
+func (t *testBool8Type) ExtensionEquals(arrow.ExtensionType) bool { panic("unimplemented") }
+func (t *testBool8Type) ExtensionName() string                    { panic("unimplemented") }
+func (t *testBool8Type) Serialize() string                        { panic("unimplemented") }
+func (t *testBool8Type) Deserialize(arrow.DataType, string) (arrow.ExtensionType, error) {
+	panic("unimplemented")
+}
+
+type testBool8Array struct {
+	array.ExtensionArrayBase
+}
+
+func TestUnmarshalExtensionTypes(t *testing.T) {
+	logicalJSON := `[true,null,false,null,true]`
+	storageJSON := `[1,null,0,null,1]`
+
+	// extensions.Bool8Type implements array.CustomExtensionBuilder so we expect the array to be built with the custom builder
+	arrCustomBuilder, _, err := array.FromJSON(memory.DefaultAllocator, extensions.NewBool8Type(), bytes.NewBufferString(logicalJSON))
+	require.NoError(t, err)
+	defer arrCustomBuilder.Release()
+	require.Equal(t, 5, arrCustomBuilder.Len())
+
+	// testBoolType falls back to the default builder for the storage type, so it cannot deserialize native booleans
+	_, _, err = array.FromJSON(memory.DefaultAllocator, newTestBool8Type(), bytes.NewBufferString(logicalJSON))
+	require.ErrorContains(t, err, "cannot unmarshal true into Go value of type int8")
+
+	// testBoolType must build the array with the native storage type: Int8
+	arrDefaultBuilder, _, err := array.FromJSON(memory.DefaultAllocator, newTestBool8Type(), bytes.NewBufferString(storageJSON))
+	require.NoError(t, err)
+	defer arrDefaultBuilder.Release()
+	require.Equal(t, 5, arrDefaultBuilder.Len())
+
+	arrBool8, ok := arrCustomBuilder.(*extensions.Bool8Array)
+	require.True(t, ok)
+
+	arrExt, ok := arrDefaultBuilder.(array.ExtensionArray)
+	require.True(t, ok)
+
+	// The physical layout of both arrays is identical
+	require.True(t, array.Equal(arrBool8.Storage(), arrExt.Storage()))
+}
+
+// invalidExtensionType does not fully implement the arrow.ExtensionType interface, even though it embeds arrow.ExtensionBase
+type invalidExtensionType struct {
+	arrow.ExtensionBase
+}
+
+func newInvalidExtensionType() *invalidExtensionType {
+	return &invalidExtensionType{ExtensionBase: arrow.ExtensionBase{Storage: arrow.BinaryTypes.String}}
+}
+
+func TestInvalidExtensionType(t *testing.T) {
+	jsonStr := `["one","two","three"]`
+	typ := newInvalidExtensionType()
+
+	require.PanicsWithError(t, fmt.Sprintf("arrow/array: invalid extension type: %T", typ), func() {
+		array.FromJSON(memory.DefaultAllocator, typ, bytes.NewBufferString(jsonStr))
+	})
+}
+
+var (
+	_ arrow.ExtensionType  = (*testBool8Type)(nil)
+	_ array.ExtensionArray = (*testBool8Array)(nil)
+)
diff --git a/go/arrow/extensions/json.go b/go/arrow/extensions/json.go
new file mode 100644
index 0000000000000..12c49f9c0a76d
--- /dev/null
+++ b/go/arrow/extensions/json.go
@@ -0,0 +1,148 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"fmt"
+	"reflect"
+	"slices"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/parquet/schema"
+)
+
+var jsonSupportedStorageTypes = []arrow.DataType{
+	arrow.BinaryTypes.String,
+	arrow.BinaryTypes.LargeString,
+	arrow.BinaryTypes.StringView,
+}
+
+// JSONType represents a UTF-8 encoded JSON string as specified in RFC8259.
+type JSONType struct {
+	arrow.ExtensionBase
+}
+
+// ParquetLogicalType implements pqarrow.ExtensionCustomParquetType.
+func (b *JSONType) ParquetLogicalType() schema.LogicalType {
+	return schema.JSONLogicalType{}
+}
+
+// NewJSONType creates a new JSONType with the specified storage type.
+// storageType must be one of String, LargeString, StringView.
+func NewJSONType(storageType arrow.DataType) (*JSONType, error) {
+	if !slices.Contains(jsonSupportedStorageTypes, storageType) {
+		return nil, fmt.Errorf("unsupported storage type for JSON extension type: %s", storageType)
+	}
+	return &JSONType{ExtensionBase: arrow.ExtensionBase{Storage: storageType}}, nil
+}
+
+func (b *JSONType) ArrayType() reflect.Type { return reflect.TypeOf(JSONArray{}) }
+
+func (b *JSONType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if !(data == "" || data == "{}") {
+		return nil, fmt.Errorf("serialized metadata for JSON extension type must be '' or '{}', found: %s", data)
+	}
+	return NewJSONType(storageType)
+}
+
+func (b *JSONType) ExtensionEquals(other arrow.ExtensionType) bool {
+	return b.ExtensionName() == other.ExtensionName() && arrow.TypeEqual(b.Storage, other.StorageType())
+}
+
+func (b *JSONType) ExtensionName() string { return "arrow.json" }
+
+func (b *JSONType) Serialize() string { return "" }
+
+func (b *JSONType) String() string {
+	return fmt.Sprintf("extension<%s[storage_type=%s]>", b.ExtensionName(), b.Storage)
+}
+
+// JSONArray is logically an array of UTF-8 encoded JSON strings.
+// Its values are unmarshaled to native Go values.
+type JSONArray struct {
+	array.ExtensionArrayBase
+}
+
+func (a *JSONArray) String() string {
+	b, err := a.MarshalJSON()
+	if err != nil {
+		panic(fmt.Sprintf("failed marshal JSONArray: %s", err))
+	}
+
+	return string(b)
+}
+
+func (a *JSONArray) Value(i int) any {
+	val := a.ValueBytes(i)
+
+	var res any
+	if err := json.Unmarshal(val, &res); err != nil {
+		panic(err)
+	}
+
+	return res
+}
+
+func (a *JSONArray) ValueStr(i int) string {
+	return string(a.ValueBytes(i))
+}
+
+func (a *JSONArray) ValueBytes(i int) []byte {
+	// convert to json.RawMessage, set to nil if elem isNull.
+	val := a.ValueJSON(i)
+
+	// simply returns wrapped bytes, or null if val is nil.
+	b, err := val.MarshalJSON()
+	if err != nil {
+		panic(err)
+	}
+
+	return b
+}
+
+// ValueJSON wraps the underlying string value as a json.RawMessage,
+// or returns nil if the array value is null.
+func (a *JSONArray) ValueJSON(i int) json.RawMessage {
+	var val json.RawMessage
+	if a.IsValid(i) {
+		val = json.RawMessage(a.Storage().(array.StringLike).Value(i))
+	}
+	return val
+}
+
+// MarshalJSON implements json.Marshaler.
+// Marshaling json.RawMessage is a no-op, except that nil values will
+// be marshaled as a JSON null.
+func (a *JSONArray) MarshalJSON() ([]byte, error) {
+	values := make([]json.RawMessage, a.Len())
+	for i := 0; i < a.Len(); i++ {
+		values[i] = a.ValueJSON(i)
+	}
+	return json.Marshal(values)
+}
+
+// GetOneForMarshal implements arrow.Array.
+func (a *JSONArray) GetOneForMarshal(i int) interface{} {
+	return a.ValueJSON(i)
+}
+
+var (
+	_ arrow.ExtensionType  = (*JSONType)(nil)
+	_ array.ExtensionArray = (*JSONArray)(nil)
+)
diff --git a/go/arrow/extensions/json_test.go b/go/arrow/extensions/json_test.go
new file mode 100644
index 0000000000000..21acc58f93949
--- /dev/null
+++ b/go/arrow/extensions/json_test.go
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions_test
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestJSONTypeBasics(t *testing.T) {
+	typ, err := extensions.NewJSONType(arrow.BinaryTypes.String)
+	require.NoError(t, err)
+
+	typLarge, err := extensions.NewJSONType(arrow.BinaryTypes.LargeString)
+	require.NoError(t, err)
+
+	typView, err := extensions.NewJSONType(arrow.BinaryTypes.StringView)
+	require.NoError(t, err)
+
+	assert.Equal(t, "arrow.json", typ.ExtensionName())
+	assert.Equal(t, "arrow.json", typLarge.ExtensionName())
+	assert.Equal(t, "arrow.json", typView.ExtensionName())
+
+	assert.True(t, typ.ExtensionEquals(typ))
+	assert.True(t, typLarge.ExtensionEquals(typLarge))
+	assert.True(t, typView.ExtensionEquals(typView))
+
+	assert.False(t, arrow.TypeEqual(arrow.BinaryTypes.String, typ))
+	assert.False(t, arrow.TypeEqual(typ, typLarge))
+	assert.False(t, arrow.TypeEqual(typ, typView))
+	assert.False(t, arrow.TypeEqual(typLarge, typView))
+
+	assert.True(t, arrow.TypeEqual(arrow.BinaryTypes.String, typ.StorageType()))
+	assert.True(t, arrow.TypeEqual(arrow.BinaryTypes.LargeString, typLarge.StorageType()))
+	assert.True(t, arrow.TypeEqual(arrow.BinaryTypes.StringView, typView.StorageType()))
+
+	assert.Equal(t, "extension<arrow.json[storage_type=utf8]>", typ.String())
+	assert.Equal(t, "extension<arrow.json[storage_type=large_utf8]>", typLarge.String())
+	assert.Equal(t, "extension<arrow.json[storage_type=string_view]>", typView.String())
+}
+
+var jsonTestCases = []struct {
+	Name           string
+	StorageType    arrow.DataType
+	StorageBuilder func(mem memory.Allocator) array.Builder
+}{
+	{
+		Name:           "string",
+		StorageType:    arrow.BinaryTypes.String,
+		StorageBuilder: func(mem memory.Allocator) array.Builder { return array.NewStringBuilder(mem) },
+	},
+	{
+		Name:           "large_string",
+		StorageType:    arrow.BinaryTypes.LargeString,
+		StorageBuilder: func(mem memory.Allocator) array.Builder { return array.NewLargeStringBuilder(mem) },
+	},
+	{
+		Name:           "string_view",
+		StorageType:    arrow.BinaryTypes.StringView,
+		StorageBuilder: func(mem memory.Allocator) array.Builder { return array.NewStringViewBuilder(mem) },
+	},
+}
+
+func TestJSONTypeCreateFromArray(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			assert.Equal(t, 6, arr.Len())
+			assert.Equal(t, 1, arr.NullN())
+
+			jsonArr, ok := arr.(*extensions.JSONArray)
+			require.True(t, ok)
+
+			require.Equal(t, "foobar", jsonArr.Value(0))
+			require.Equal(t, nil, jsonArr.Value(1))
+			require.Equal(t, map[string]any{"foo": "bar"}, jsonArr.Value(2))
+			require.Equal(t, float64(42), jsonArr.Value(3))
+			require.Equal(t, true, jsonArr.Value(4))
+			require.Equal(t, []any{float64(1), true, "3", nil, map[string]any{"five": float64(5)}}, jsonArr.Value(5))
+		})
+	}
+}
+
+func TestJSONTypeBatchIPCRoundTrip(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "field", Type: typ, Nullable: true}}, nil),
+				[]arrow.Array{arr}, -1)
+			defer batch.Release()
+
+			var written arrow.Record
+			{
+				var buf bytes.Buffer
+				wr := ipc.NewWriter(&buf, ipc.WithSchema(batch.Schema()))
+				require.NoError(t, wr.Write(batch))
+				require.NoError(t, wr.Close())
+
+				rdr, err := ipc.NewReader(&buf)
+				require.NoError(t, err)
+				written, err = rdr.Read()
+				require.NoError(t, err)
+				written.Retain()
+				defer written.Release()
+				rdr.Release()
+			}
+
+			assert.Truef(t, batch.Schema().Equal(written.Schema()), "expected: %s, got: %s",
+				batch.Schema(), written.Schema())
+
+			assert.Truef(t, array.RecordEqual(batch, written), "expected: %s, got: %s",
+				batch, written)
+		})
+	}
+}
+
+func TestMarshallJSONArray(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			assert.Equal(t, 6, arr.Len())
+			assert.Equal(t, 1, arr.NullN())
+
+			jsonArr, ok := arr.(*extensions.JSONArray)
+			require.True(t, ok)
+
+			b, err := jsonArr.MarshalJSON()
+			require.NoError(t, err)
+
+			expectedJSON := `["foobar",null,{"foo":"bar"},42,true,[1,true,"3",null,{"five":5}]]`
+			require.Equal(t, expectedJSON, string(b))
+			require.Equal(t, expectedJSON, jsonArr.String())
+		})
+	}
+}
+
+func TestJSONRecordToJSON(t *testing.T) {
+	for _, tc := range jsonTestCases {
+		t.Run(tc.Name, func(t *testing.T) {
+			typ, err := extensions.NewJSONType(tc.StorageType)
+			require.NoError(t, err)
+
+			bldr := tc.StorageBuilder(memory.DefaultAllocator)
+			defer bldr.Release()
+
+			bldr.AppendValueFromString(`"foobar"`)
+			bldr.AppendNull()
+			bldr.AppendValueFromString(`{"foo": "bar"}`)
+			bldr.AppendValueFromString(`42`)
+			bldr.AppendValueFromString(`true`)
+			bldr.AppendValueFromString(`[1, true, "3", null, {"five": 5}]`)
+
+			storage := bldr.NewArray()
+			defer storage.Release()
+
+			arr := array.NewExtensionArrayWithStorage(typ, storage)
+			defer arr.Release()
+
+			assert.Equal(t, 6, arr.Len())
+			assert.Equal(t, 1, arr.NullN())
+
+			jsonArr, ok := arr.(*extensions.JSONArray)
+			require.True(t, ok)
+
+			rec := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "json", Type: typ, Nullable: true}}, nil), []arrow.Array{jsonArr}, 6)
+			defer rec.Release()
+
+			buf := bytes.NewBuffer([]byte("\n")) // expected output has leading newline for clearer formatting
+			require.NoError(t, array.RecordToJSON(rec, buf))
+
+			expectedJSON := `
+				{"json":"foobar"}
+				{"json":null}
+				{"json":{"foo":"bar"}}
+				{"json":42}
+				{"json":true}
+				{"json":[1,true,"3",null,{"five":5}]}
+			`
+
+			expectedJSONLines := strings.Split(expectedJSON, "\n")
+			actualJSONLines := strings.Split(buf.String(), "\n")
+
+			require.Equal(t, len(expectedJSONLines), len(actualJSONLines))
+			for i := range expectedJSONLines {
+				if strings.TrimSpace(expectedJSONLines[i]) != "" {
+					require.JSONEq(t, expectedJSONLines[i], actualJSONLines[i])
+				}
+			}
+		})
+	}
+}
diff --git a/go/arrow/extensions/opaque.go b/go/arrow/extensions/opaque.go
new file mode 100644
index 0000000000000..5378de0c1806d
--- /dev/null
+++ b/go/arrow/extensions/opaque.go
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"encoding/json"
+	"fmt"
+	"reflect"
+	"unsafe"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+)
+
+// OpaqueType is a placeholder for a type from an external (usually
+// non-Arrow) system that could not be interpreted.
+type OpaqueType struct {
+	arrow.ExtensionBase `json:"-"`
+
+	TypeName   string `json:"type_name"`
+	VendorName string `json:"vendor_name"`
+}
+
+// NewOpaqueType creates a new OpaqueType with the provided storage type, type name, and vendor name.
+func NewOpaqueType(storageType arrow.DataType, name, vendorName string) *OpaqueType {
+	return &OpaqueType{ExtensionBase: arrow.ExtensionBase{Storage: storageType},
+		TypeName: name, VendorName: vendorName}
+}
+
+func (*OpaqueType) ArrayType() reflect.Type {
+	return reflect.TypeOf(OpaqueArray{})
+}
+
+func (*OpaqueType) ExtensionName() string {
+	return "arrow.opaque"
+}
+
+func (o *OpaqueType) String() string {
+	return fmt.Sprintf("extension<%s[storage_type=%s, type_name=%s, vendor_name=%s]>",
+		o.ExtensionName(), o.Storage, o.TypeName, o.VendorName)
+}
+
+func (o *OpaqueType) Serialize() string {
+	data, _ := json.Marshal(o)
+	return string(data)
+}
+
+func (*OpaqueType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
+	var out OpaqueType
+	err := json.Unmarshal(unsafe.Slice(unsafe.StringData(data), len(data)), &out)
+	if err != nil {
+		return nil, err
+	}
+
+	switch {
+	case out.TypeName == "":
+		return nil, fmt.Errorf("%w: serialized JSON data for OpaqueType missing type_name",
+			arrow.ErrInvalid)
+	case out.VendorName == "":
+		return nil, fmt.Errorf("%w: serialized JSON data for OpaqueType missing vendor_name",
+			arrow.ErrInvalid)
+	}
+
+	out.ExtensionBase = arrow.ExtensionBase{Storage: storageType}
+	return &out, nil
+}
+
+func (o *OpaqueType) ExtensionEquals(other arrow.ExtensionType) bool {
+	if o.ExtensionName() != other.ExtensionName() {
+		return false
+	}
+
+	rhs, ok := other.(*OpaqueType)
+	if !ok {
+		return false
+	}
+
+	return arrow.TypeEqual(o.Storage, rhs.Storage) &&
+		o.TypeName == rhs.TypeName &&
+		o.VendorName == rhs.VendorName
+}
+
+// OpaqueArray is a placeholder for data from an external (usually
+// non-Arrow) system that could not be interpreted.
+type OpaqueArray struct {
+	array.ExtensionArrayBase
+}
+
+var (
+	_ arrow.ExtensionType  = (*OpaqueType)(nil)
+	_ array.ExtensionArray = (*OpaqueArray)(nil)
+)
diff --git a/go/arrow/extensions/opaque_test.go b/go/arrow/extensions/opaque_test.go
new file mode 100644
index 0000000000000..a0fc8962ce5e4
--- /dev/null
+++ b/go/arrow/extensions/opaque_test.go
@@ -0,0 +1,197 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions_test
+
+import (
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestOpaqueTypeBasics(t *testing.T) {
+	typ := extensions.NewOpaqueType(arrow.Null, "type", "vendor")
+	typ2 := extensions.NewOpaqueType(arrow.Null, "type2", "vendor")
+
+	assert.Equal(t, "arrow.opaque", typ.ExtensionName())
+	assert.True(t, typ.ExtensionEquals(typ))
+	assert.False(t, arrow.TypeEqual(arrow.Null, typ))
+	assert.False(t, arrow.TypeEqual(typ, typ2))
+	assert.True(t, arrow.TypeEqual(arrow.Null, typ.StorageType()))
+	assert.JSONEq(t, `{"type_name": "type", "vendor_name": "vendor"}`, typ.Serialize())
+	assert.Equal(t, "type", typ.TypeName)
+	assert.Equal(t, "vendor", typ.VendorName)
+	assert.Equal(t, "extension<arrow.opaque[storage_type=null, type_name=type, vendor_name=vendor]>",
+		typ.String())
+}
+
+func TestOpaqueTypeEquals(t *testing.T) {
+	typ := extensions.NewOpaqueType(arrow.Null, "type", "vendor")
+	typ2 := extensions.NewOpaqueType(arrow.Null, "type2", "vendor")
+	typ3 := extensions.NewOpaqueType(arrow.Null, "type", "vendor2")
+	typ4 := extensions.NewOpaqueType(arrow.PrimitiveTypes.Int64, "type", "vendor")
+	typ5 := extensions.NewOpaqueType(arrow.Null, "type", "vendor")
+
+	tests := []struct {
+		lhs, rhs arrow.ExtensionType
+		expected bool
+	}{
+		{typ, typ, true},
+		{typ2, typ2, true},
+		{typ3, typ3, true},
+		{typ4, typ4, true},
+		{typ5, typ5, true},
+		{typ, typ5, true},
+		{typ, typ2, false},
+		{typ, typ3, false},
+		{typ, typ4, false},
+		{typ2, typ, false},
+		{typ2, typ3, false},
+		{typ2, typ4, false},
+		{typ3, typ, false},
+		{typ3, typ2, false},
+		{typ3, typ4, false},
+		{typ4, typ, false},
+		{typ4, typ2, false},
+		{typ4, typ3, false},
+	}
+
+	for _, tt := range tests {
+		assert.Equalf(t, tt.expected, arrow.TypeEqual(tt.lhs, tt.rhs),
+			"%s == %s", tt.lhs, tt.rhs)
+	}
+}
+
+func TestOpaqueTypeCreateFromArray(t *testing.T) {
+	typ := extensions.NewOpaqueType(arrow.BinaryTypes.String, "geometry", "adbc.postgresql")
+	storage, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String,
+		strings.NewReader(`["foobar", null]`))
+	require.NoError(t, err)
+	defer storage.Release()
+
+	arr := array.NewExtensionArrayWithStorage(typ, storage)
+	defer arr.Release()
+
+	assert.Equal(t, 2, arr.Len())
+	assert.Equal(t, 1, arr.NullN())
+}
+
+func TestOpaqueTypeDeserialize(t *testing.T) {
+	tests := []struct {
+		serialized string
+		expected   *extensions.OpaqueType
+	}{
+		{`{"type_name": "type", "vendor_name": "vendor"}`,
+			extensions.NewOpaqueType(arrow.Null, "type", "vendor")},
+		{`{"type_name": "long name", "vendor_name": "long name"}`,
+			extensions.NewOpaqueType(arrow.Null, "long name", "long name")},
+		{`{"type_name": "名前", "vendor_name": "名字"}`,
+			extensions.NewOpaqueType(arrow.Null, "名前", "名字")},
+		{`{"type_name": "type", "vendor_name": "vendor", "extra_field": 2}`,
+			extensions.NewOpaqueType(arrow.Null, "type", "vendor")},
+	}
+
+	for _, tt := range tests {
+		deserialized, err := tt.expected.Deserialize(tt.expected.Storage, tt.serialized)
+		require.NoError(t, err)
+		assert.Truef(t, arrow.TypeEqual(tt.expected, deserialized), "%s != %s",
+			tt.expected, deserialized)
+	}
+
+	typ := extensions.NewOpaqueType(arrow.Null, "type", "vendor")
+	_, err := typ.Deserialize(arrow.Null, "")
+	assert.ErrorContains(t, err, "unexpected end of JSON input")
+
+	_, err = typ.Deserialize(arrow.Null, "[]")
+	assert.ErrorContains(t, err, "cannot unmarshal array")
+
+	_, err = typ.Deserialize(arrow.Null, "{}")
+	assert.ErrorIs(t, err, arrow.ErrInvalid)
+	assert.ErrorContains(t, err, "serialized JSON data for OpaqueType missing type_name")
+
+	_, err = typ.Deserialize(arrow.Null, `{"type_name": ""}`)
+	assert.ErrorIs(t, err, arrow.ErrInvalid)
+	assert.ErrorContains(t, err, "serialized JSON data for OpaqueType missing type_name")
+
+	_, err = typ.Deserialize(arrow.Null, `{"type_name": "type"}`)
+	assert.ErrorIs(t, err, arrow.ErrInvalid)
+	assert.ErrorContains(t, err, "serialized JSON data for OpaqueType missing vendor_name")
+
+	_, err = typ.Deserialize(arrow.Null, `{"type_name": "type", "vendor_name": ""}`)
+	assert.ErrorIs(t, err, arrow.ErrInvalid)
+	assert.ErrorContains(t, err, "serialized JSON data for OpaqueType missing vendor_name")
+}
+
+func TestOpaqueTypeMetadataRoundTrip(t *testing.T) {
+	tests := []*extensions.OpaqueType{
+		extensions.NewOpaqueType(arrow.Null, "foo", "bar"),
+		extensions.NewOpaqueType(arrow.BinaryTypes.Binary, "geometry", "postgis"),
+		extensions.NewOpaqueType(arrow.FixedSizeListOf(4, arrow.PrimitiveTypes.Int64), "foo", "bar"),
+		extensions.NewOpaqueType(arrow.BinaryTypes.String, "foo", "bar"),
+	}
+
+	for _, tt := range tests {
+		serialized := tt.Serialize()
+		deserialized, err := tt.Deserialize(tt.Storage, serialized)
+		require.NoError(t, err)
+		assert.Truef(t, arrow.TypeEqual(tt, deserialized), "%s != %s", tt, deserialized)
+	}
+}
+
+func TestOpaqueTypeBatchRoundTrip(t *testing.T) {
+	typ := extensions.NewOpaqueType(arrow.BinaryTypes.String, "geometry", "adbc.postgresql")
+	storage, _, err := array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String,
+		strings.NewReader(`["foobar", null]`))
+	require.NoError(t, err)
+	defer storage.Release()
+
+	arr := array.NewExtensionArrayWithStorage(typ, storage)
+	defer arr.Release()
+
+	batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "field", Type: typ, Nullable: true}}, nil),
+		[]arrow.Array{arr}, -1)
+	defer batch.Release()
+
+	var written arrow.Record
+	{
+		var buf bytes.Buffer
+		wr := ipc.NewWriter(&buf, ipc.WithSchema(batch.Schema()))
+		require.NoError(t, wr.Write(batch))
+		require.NoError(t, wr.Close())
+
+		rdr, err := ipc.NewReader(&buf)
+		require.NoError(t, err)
+		written, err = rdr.Read()
+		require.NoError(t, err)
+		written.Retain()
+		defer written.Release()
+		rdr.Release()
+	}
+
+	assert.Truef(t, batch.Schema().Equal(written.Schema()), "expected: %s, got: %s",
+		batch.Schema(), written.Schema())
+
+	assert.Truef(t, array.RecordEqual(batch, written), "expected: %s, got: %s",
+		batch, written)
+}
diff --git a/go/arrow/extensions/uuid.go b/go/arrow/extensions/uuid.go
new file mode 100644
index 0000000000000..422b9ea118800
--- /dev/null
+++ b/go/arrow/extensions/uuid.go
@@ -0,0 +1,265 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions
+
+import (
+	"bytes"
+	"fmt"
+	"reflect"
+	"strings"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/parquet/schema"
+	"github.com/google/uuid"
+)
+
+type UUIDBuilder struct {
+	*array.ExtensionBuilder
+}
+
+// NewUUIDBuilder creates a new UUIDBuilder, exposing a convenient and efficient interface
+// for writing uuid.UUID (or [16]byte) values to the underlying FixedSizeBinary storage array.
+func NewUUIDBuilder(mem memory.Allocator) *UUIDBuilder {
+	return &UUIDBuilder{ExtensionBuilder: array.NewExtensionBuilder(mem, NewUUIDType())}
+}
+
+func (b *UUIDBuilder) Append(v uuid.UUID) {
+	b.AppendBytes(v)
+}
+
+func (b *UUIDBuilder) AppendBytes(v [16]byte) {
+	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).Append(v[:])
+}
+
+func (b *UUIDBuilder) UnsafeAppend(v uuid.UUID) {
+	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).UnsafeAppend(v[:])
+}
+
+func (b *UUIDBuilder) AppendValueFromString(s string) error {
+	if s == array.NullValueStr {
+		b.AppendNull()
+		return nil
+	}
+
+	uid, err := uuid.Parse(s)
+	if err != nil {
+		return err
+	}
+
+	b.Append(uid)
+	return nil
+}
+
+func (b *UUIDBuilder) AppendValues(v []uuid.UUID, valid []bool) {
+	if len(v) != len(valid) && len(valid) != 0 {
+		panic("len(v) != len(valid) && len(valid) != 0")
+	}
+
+	data := make([][]byte, len(v))
+	for i := range v {
+		if len(valid) > 0 && !valid[i] {
+			continue
+		}
+		data[i] = v[i][:]
+	}
+	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(data, valid)
+}
+
+func (b *UUIDBuilder) UnmarshalOne(dec *json.Decoder) error {
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	var val uuid.UUID
+	switch v := t.(type) {
+	case string:
+		val, err = uuid.Parse(v)
+		if err != nil {
+			return err
+		}
+	case []byte:
+		val, err = uuid.ParseBytes(v)
+		if err != nil {
+			return err
+		}
+	case nil:
+		b.AppendNull()
+		return nil
+	default:
+		return &json.UnmarshalTypeError{
+			Value:  fmt.Sprint(t),
+			Type:   reflect.TypeOf([]byte{}),
+			Offset: dec.InputOffset(),
+			Struct: fmt.Sprintf("FixedSizeBinary[%d]", 16),
+		}
+	}
+
+	b.Append(val)
+	return nil
+}
+
+func (b *UUIDBuilder) Unmarshal(dec *json.Decoder) error {
+	for dec.More() {
+		if err := b.UnmarshalOne(dec); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *UUIDBuilder) UnmarshalJSON(data []byte) error {
+	dec := json.NewDecoder(bytes.NewReader(data))
+	t, err := dec.Token()
+	if err != nil {
+		return err
+	}
+
+	if delim, ok := t.(json.Delim); !ok || delim != '[' {
+		return fmt.Errorf("uuid builder must unpack from json array, found %s", delim)
+	}
+
+	return b.Unmarshal(dec)
+}
+
+// UUIDArray is a simple array which is a FixedSizeBinary(16)
+type UUIDArray struct {
+	array.ExtensionArrayBase
+}
+
+func (a *UUIDArray) String() string {
+	arr := a.Storage().(*array.FixedSizeBinary)
+	o := new(strings.Builder)
+	o.WriteString("[")
+	for i := 0; i < arr.Len(); i++ {
+		if i > 0 {
+			o.WriteString(" ")
+		}
+		switch {
+		case a.IsNull(i):
+			o.WriteString(array.NullValueStr)
+		default:
+			fmt.Fprintf(o, "%q", a.Value(i))
+		}
+	}
+	o.WriteString("]")
+	return o.String()
+}
+
+func (a *UUIDArray) Value(i int) uuid.UUID {
+	if a.IsNull(i) {
+		return uuid.Nil
+	}
+	return uuid.Must(uuid.FromBytes(a.Storage().(*array.FixedSizeBinary).Value(i)))
+}
+
+func (a *UUIDArray) Values() []uuid.UUID {
+	values := make([]uuid.UUID, a.Len())
+	for i := range values {
+		values[i] = a.Value(i)
+	}
+	return values
+}
+
+func (a *UUIDArray) ValueStr(i int) string {
+	switch {
+	case a.IsNull(i):
+		return array.NullValueStr
+	default:
+		return a.Value(i).String()
+	}
+}
+
+func (a *UUIDArray) MarshalJSON() ([]byte, error) {
+	vals := make([]any, a.Len())
+	for i := range vals {
+		vals[i] = a.GetOneForMarshal(i)
+	}
+	return json.Marshal(vals)
+}
+
+func (a *UUIDArray) GetOneForMarshal(i int) interface{} {
+	if a.IsValid(i) {
+		return a.Value(i)
+	}
+	return nil
+}
+
+// UUIDType is a simple extension type that represents a FixedSizeBinary(16)
+// to be used for representing UUIDs
+type UUIDType struct {
+	arrow.ExtensionBase
+}
+
+// ParquetLogicalType implements pqarrow.ExtensionCustomParquetType.
+func (e *UUIDType) ParquetLogicalType() schema.LogicalType {
+	return schema.UUIDLogicalType{}
+}
+
+// NewUUIDType is a convenience function to create an instance of UUIDType
+// with the correct storage type
+func NewUUIDType() *UUIDType {
+	return &UUIDType{ExtensionBase: arrow.ExtensionBase{Storage: &arrow.FixedSizeBinaryType{ByteWidth: 16}}}
+}
+
+// ArrayType returns TypeOf(UUIDArray{}) for constructing UUID arrays
+func (*UUIDType) ArrayType() reflect.Type {
+	return reflect.TypeOf(UUIDArray{})
+}
+
+func (*UUIDType) ExtensionName() string {
+	return "arrow.uuid"
+}
+
+func (e *UUIDType) String() string {
+	return fmt.Sprintf("extension<%s>", e.ExtensionName())
+}
+
+func (e *UUIDType) MarshalJSON() ([]byte, error) {
+	return []byte(fmt.Sprintf(`{"name":"%s","metadata":%s}`, e.ExtensionName(), e.Serialize())), nil
+}
+
+func (*UUIDType) Serialize() string {
+	return ""
+}
+
+// Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16}
+func (*UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
+	if !arrow.TypeEqual(storageType, &arrow.FixedSizeBinaryType{ByteWidth: 16}) {
+		return nil, fmt.Errorf("invalid storage type for UUIDType: %s", storageType.Name())
+	}
+	return NewUUIDType(), nil
+}
+
+// ExtensionEquals returns true if both extensions have the same name
+func (e *UUIDType) ExtensionEquals(other arrow.ExtensionType) bool {
+	return e.ExtensionName() == other.ExtensionName()
+}
+
+func (*UUIDType) NewBuilder(mem memory.Allocator) array.Builder {
+	return NewUUIDBuilder(mem)
+}
+
+var (
+	_ arrow.ExtensionType          = (*UUIDType)(nil)
+	_ array.CustomExtensionBuilder = (*UUIDType)(nil)
+	_ array.ExtensionArray         = (*UUIDArray)(nil)
+	_ array.Builder                = (*UUIDBuilder)(nil)
+)
diff --git a/go/arrow/extensions/uuid_test.go b/go/arrow/extensions/uuid_test.go
new file mode 100644
index 0000000000000..80c621db2a0d5
--- /dev/null
+++ b/go/arrow/extensions/uuid_test.go
@@ -0,0 +1,257 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package extensions_test
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/google/uuid"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+var testUUID = uuid.New()
+
+func TestUUIDExtensionBuilder(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+	builder := extensions.NewUUIDBuilder(mem)
+	builder.Append(testUUID)
+	builder.AppendNull()
+	builder.AppendBytes(testUUID)
+	arr := builder.NewArray()
+	defer arr.Release()
+	arrStr := arr.String()
+	assert.Equal(t, fmt.Sprintf(`["%[1]s" (null) "%[1]s"]`, testUUID), arrStr)
+	jsonStr, err := json.Marshal(arr)
+	assert.NoError(t, err)
+
+	arr1, _, err := array.FromJSON(mem, extensions.NewUUIDType(), bytes.NewReader(jsonStr))
+	defer arr1.Release()
+	assert.NoError(t, err)
+	assert.True(t, array.Equal(arr1, arr))
+
+	require.NoError(t, json.Unmarshal(jsonStr, builder))
+	arr2 := builder.NewArray()
+	defer arr2.Release()
+	assert.True(t, array.Equal(arr2, arr))
+}
+
+func TestUUIDExtensionRecordBuilder(t *testing.T) {
+	schema := arrow.NewSchema([]arrow.Field{
+		{Name: "uuid", Type: extensions.NewUUIDType()},
+	}, nil)
+	builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
+	builder.Field(0).(*extensions.UUIDBuilder).Append(testUUID)
+	builder.Field(0).(*extensions.UUIDBuilder).AppendNull()
+	builder.Field(0).(*extensions.UUIDBuilder).Append(testUUID)
+	record := builder.NewRecord()
+	b, err := record.MarshalJSON()
+	require.NoError(t, err)
+	require.Equal(t, "[{\"uuid\":\""+testUUID.String()+"\"}\n,{\"uuid\":null}\n,{\"uuid\":\""+testUUID.String()+"\"}\n]", string(b))
+	record1, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, bytes.NewReader(b))
+	require.NoError(t, err)
+	require.Equal(t, record, record1)
+}
+
+func TestUUIDStringRoundTrip(t *testing.T) {
+	// 1. create array
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	b := extensions.NewUUIDBuilder(mem)
+	b.Append(uuid.Nil)
+	b.AppendNull()
+	b.Append(uuid.NameSpaceURL)
+	b.AppendNull()
+	b.Append(testUUID)
+
+	arr := b.NewArray()
+	defer arr.Release()
+
+	// 2. create array via AppendValueFromString
+	b1 := extensions.NewUUIDBuilder(mem)
+	defer b1.Release()
+
+	for i := 0; i < arr.Len(); i++ {
+		assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i)))
+	}
+
+	arr1 := b1.NewArray()
+	defer arr1.Release()
+
+	assert.True(t, array.Equal(arr, arr1))
+}
+
+func TestUUIDTypeBasics(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	assert.Equal(t, "arrow.uuid", typ.ExtensionName())
+	assert.True(t, typ.ExtensionEquals(typ))
+
+	assert.True(t, arrow.TypeEqual(typ, typ))
+	assert.False(t, arrow.TypeEqual(&arrow.FixedSizeBinaryType{ByteWidth: 16}, typ))
+	assert.True(t, arrow.TypeEqual(&arrow.FixedSizeBinaryType{ByteWidth: 16}, typ.StorageType()))
+
+	assert.Equal(t, "extension<arrow.uuid>", typ.String())
+}
+
+func TestUUIDTypeCreateFromArray(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	bldr := array.NewFixedSizeBinaryBuilder(memory.DefaultAllocator, &arrow.FixedSizeBinaryType{ByteWidth: 16})
+	defer bldr.Release()
+
+	bldr.Append(testUUID[:])
+	bldr.AppendNull()
+	bldr.Append(testUUID[:])
+
+	storage := bldr.NewArray()
+	defer storage.Release()
+
+	arr := array.NewExtensionArrayWithStorage(typ, storage)
+	defer arr.Release()
+
+	assert.Equal(t, 3, arr.Len())
+	assert.Equal(t, 1, arr.NullN())
+
+	uuidArr, ok := arr.(*extensions.UUIDArray)
+	require.True(t, ok)
+
+	require.Equal(t, testUUID, uuidArr.Value(0))
+	require.Equal(t, uuid.Nil, uuidArr.Value(1))
+	require.Equal(t, testUUID, uuidArr.Value(2))
+}
+
+func TestUUIDTypeBatchIPCRoundTrip(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	bldr := extensions.NewUUIDBuilder(memory.DefaultAllocator)
+	defer bldr.Release()
+
+	bldr.Append(testUUID)
+	bldr.AppendNull()
+	bldr.AppendBytes(testUUID)
+
+	arr := bldr.NewArray()
+	defer arr.Release()
+
+	batch := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "field", Type: typ, Nullable: true}}, nil),
+		[]arrow.Array{arr}, -1)
+	defer batch.Release()
+
+	var written arrow.Record
+	{
+		var buf bytes.Buffer
+		wr := ipc.NewWriter(&buf, ipc.WithSchema(batch.Schema()))
+		require.NoError(t, wr.Write(batch))
+		require.NoError(t, wr.Close())
+
+		rdr, err := ipc.NewReader(&buf)
+		require.NoError(t, err)
+		written, err = rdr.Read()
+		require.NoError(t, err)
+		written.Retain()
+		defer written.Release()
+		rdr.Release()
+	}
+
+	assert.Truef(t, batch.Schema().Equal(written.Schema()), "expected: %s, got: %s",
+		batch.Schema(), written.Schema())
+
+	assert.Truef(t, array.RecordEqual(batch, written), "expected: %s, got: %s",
+		batch, written)
+}
+
+func TestMarshallUUIDArray(t *testing.T) {
+	bldr := extensions.NewUUIDBuilder(memory.DefaultAllocator)
+	defer bldr.Release()
+
+	bldr.Append(testUUID)
+	bldr.AppendNull()
+	bldr.AppendBytes(testUUID)
+
+	arr := bldr.NewArray()
+	defer arr.Release()
+
+	assert.Equal(t, 3, arr.Len())
+	assert.Equal(t, 1, arr.NullN())
+
+	uuidArr, ok := arr.(*extensions.UUIDArray)
+	require.True(t, ok)
+
+	b, err := uuidArr.MarshalJSON()
+	require.NoError(t, err)
+
+	expectedJSON := fmt.Sprintf(`["%[1]s",null,"%[1]s"]`, testUUID)
+	require.Equal(t, expectedJSON, string(b))
+}
+
+func TestUUIDRecordToJSON(t *testing.T) {
+	typ := extensions.NewUUIDType()
+
+	bldr := extensions.NewUUIDBuilder(memory.DefaultAllocator)
+	defer bldr.Release()
+
+	uuid1 := uuid.MustParse("8c607ed4-07b2-4b9c-b5eb-c0387357f9ae")
+
+	bldr.Append(uuid1)
+	bldr.AppendNull()
+
+	// c5f2cbd9-7094-491a-b267-167bb62efe02
+	bldr.AppendBytes([16]byte{197, 242, 203, 217, 112, 148, 73, 26, 178, 103, 22, 123, 182, 46, 254, 2})
+
+	arr := bldr.NewArray()
+	defer arr.Release()
+
+	assert.Equal(t, 3, arr.Len())
+	assert.Equal(t, 1, arr.NullN())
+
+	uuidArr, ok := arr.(*extensions.UUIDArray)
+	require.True(t, ok)
+
+	rec := array.NewRecord(arrow.NewSchema([]arrow.Field{{Name: "uuid", Type: typ, Nullable: true}}, nil), []arrow.Array{uuidArr}, 3)
+	defer rec.Release()
+
+	buf := bytes.NewBuffer([]byte("\n")) // expected output has leading newline for clearer formatting
+	require.NoError(t, array.RecordToJSON(rec, buf))
+
+	expectedJSON := `
+		{"uuid":"8c607ed4-07b2-4b9c-b5eb-c0387357f9ae"}
+		{"uuid":null}
+		{"uuid":"c5f2cbd9-7094-491a-b267-167bb62efe02"}
+	`
+
+	expectedJSONLines := strings.Split(expectedJSON, "\n")
+	actualJSONLines := strings.Split(buf.String(), "\n")
+
+	require.Equal(t, len(expectedJSONLines), len(actualJSONLines))
+	for i := range expectedJSONLines {
+		if strings.TrimSpace(expectedJSONLines[i]) != "" {
+			require.JSONEq(t, expectedJSONLines[i], actualJSONLines[i])
+		}
+	}
+}
diff --git a/go/arrow/flight/basic_auth_flight_test.go b/go/arrow/flight/basic_auth_flight_test.go
index c602521714727..dac10e2657085 100755
--- a/go/arrow/flight/basic_auth_flight_test.go
+++ b/go/arrow/flight/basic_auth_flight_test.go
@@ -22,7 +22,7 @@ import (
 	"io"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/credentials/insecure"
diff --git a/go/arrow/flight/client.go b/go/arrow/flight/client.go
index b049072e19199..13c068e159f2b 100644
--- a/go/arrow/flight/client.go
+++ b/go/arrow/flight/client.go
@@ -26,7 +26,7 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/gen/flight"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/metadata"
diff --git a/go/arrow/flight/cookie_middleware_test.go b/go/arrow/flight/cookie_middleware_test.go
index 53e60fb70b7ff..2e2d02bc21fb5 100644
--- a/go/arrow/flight/cookie_middleware_test.go
+++ b/go/arrow/flight/cookie_middleware_test.go
@@ -28,7 +28,7 @@ import (
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"google.golang.org/grpc"
diff --git a/go/arrow/flight/example_flight_server_test.go b/go/arrow/flight/example_flight_server_test.go
index e5e684d951541..9dd7bc8efef48 100755
--- a/go/arrow/flight/example_flight_server_test.go
+++ b/go/arrow/flight/example_flight_server_test.go
@@ -23,7 +23,7 @@ import (
 	"io"
 	"log"
 
-	"github.com/apache/arrow/go/v17/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/credentials/insecure"
diff --git a/go/arrow/flight/flight_middleware_test.go b/go/arrow/flight/flight_middleware_test.go
index 980987d5d4d0f..181b41ea41f2a 100755
--- a/go/arrow/flight/flight_middleware_test.go
+++ b/go/arrow/flight/flight_middleware_test.go
@@ -23,8 +23,8 @@ import (
 	sync "sync"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"google.golang.org/grpc"
diff --git a/go/arrow/flight/flight_test.go b/go/arrow/flight/flight_test.go
index 47c8183b3883d..a03d839e9484d 100755
--- a/go/arrow/flight/flight_test.go
+++ b/go/arrow/flight/flight_test.go
@@ -23,11 +23,13 @@ import (
 	"io"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/credentials/insecure"
@@ -449,3 +451,36 @@ func TestReaderError(t *testing.T) {
 		t.Fatal("should have errored")
 	}
 }
+
+func TestWriterInferSchema(t *testing.T) {
+	recs, ok := arrdata.Records["primitives"]
+	require.True(t, ok)
+
+	fs := flightStreamWriter{}
+	w := flight.NewRecordWriter(&fs)
+
+	for _, rec := range recs {
+		require.NoError(t, w.Write(rec))
+	}
+
+	require.NoError(t, w.Close())
+}
+
+func TestWriterInconsistentSchema(t *testing.T) {
+	recs, ok := arrdata.Records["primitives"]
+	require.True(t, ok)
+
+	schema := arrow.NewSchema([]arrow.Field{{Name: "unknown", Type: arrow.PrimitiveTypes.Int8}}, nil)
+	fs := flightStreamWriter{}
+	w := flight.NewRecordWriter(&fs, ipc.WithSchema(schema))
+
+	require.ErrorContains(t, w.Write(recs[0]), "arrow/ipc: tried to write record batch with different schema")
+	require.NoError(t, w.Close())
+}
+
+type flightStreamWriter struct{}
+
+// Send implements flight.DataStreamWriter.
+func (f *flightStreamWriter) Send(data *flight.FlightData) error { return nil }
+
+var _ flight.DataStreamWriter = (*flightStreamWriter)(nil)
diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go
index c6794820dc172..4c9dc50135108 100644
--- a/go/arrow/flight/flightsql/client.go
+++ b/go/arrow/flight/flightsql/client.go
@@ -22,12 +22,12 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"google.golang.org/grpc"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/types/known/anypb"
@@ -1102,6 +1102,15 @@ type PreparedStatement struct {
 	closed        bool
 }
 
+// NewPreparedStatement creates a prepared statement object bound to the provided
+// client using the given handle. In general, it should be sufficient to use the
+// Prepare function a client and this wouldn't be needed. But this can be used
+// to propagate a prepared statement from one client to another if needed or if
+// proxying requests.
+func NewPreparedStatement(client *Client, handle []byte) *PreparedStatement {
+	return &PreparedStatement{client: client, handle: handle}
+}
+
 // Execute executes the prepared statement on the server and returns a FlightInfo
 // indicating where to retrieve the response. If SetParameters has been called
 // then the parameter bindings will be sent before execution.
diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go
index 33da79167c4ae..d060161f94f0f 100644
--- a/go/arrow/flight/flightsql/client_test.go
+++ b/go/arrow/flight/flightsql/client_test.go
@@ -22,12 +22,12 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/mock"
 	"github.com/stretchr/testify/suite"
 	"google.golang.org/grpc"
@@ -378,8 +378,10 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() {
 	createRsp := &mockDoActionClient{}
 	defer createRsp.AssertExpectations(s.T())
 	createRsp.On("Recv").Return(&pb.Result{Body: data}, nil).Once()
-	createRsp.On("Recv").Return(&pb.Result{}, io.EOF)
-	createRsp.On("CloseSend").Return(nil)
+	createRsp.On("Recv").Return(&pb.Result{}, io.EOF).Once()
+	createRsp.On("Recv").Return(&pb.Result{Body: data}, nil).Once()
+	createRsp.On("Recv").Return(&pb.Result{}, io.EOF).Once()
+	createRsp.On("CloseSend").Return(nil).Twice()
 
 	closeRsp := &mockDoActionClient{}
 	defer closeRsp.AssertExpectations(s.T())
@@ -387,13 +389,13 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() {
 	closeRsp.On("CloseSend").Return(nil)
 
 	s.mockClient.On("DoAction", flightsql.CreatePreparedStatementActionType, action.Body, s.callOpts).
-		Return(createRsp, nil)
+		Return(createRsp, nil).Twice()
 	s.mockClient.On("DoAction", flightsql.ClosePreparedStatementActionType, closeAct.Body, s.callOpts).
 		Return(closeRsp, nil)
 
 	infoCmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(query)}
 	desc := getDesc(infoCmd)
-	s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil)
+	s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil).Twice()
 
 	prepared, err := s.sqlClient.Prepare(context.TODO(), query, s.callOpts...)
 	s.NoError(err)
@@ -404,6 +406,17 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecute() {
 	info, err := prepared.Execute(context.TODO(), s.callOpts...)
 	s.NoError(err)
 	s.Equal(&emptyFlightInfo, info)
+
+	prepared, err = s.sqlClient.Prepare(context.TODO(), query, s.callOpts...)
+	s.NoError(err)
+
+	secondPrepare := flightsql.NewPreparedStatement(&s.sqlClient, prepared.Handle())
+	s.Equal(string(secondPrepare.Handle()), "query")
+	defer secondPrepare.Close(context.TODO(), s.callOpts...)
+
+	info, err = secondPrepare.Execute(context.TODO(), s.callOpts...)
+	s.NoError(err)
+	s.Equal(&emptyFlightInfo, info)
 }
 
 func (s *FlightSqlClientSuite) TestPreparedStatementExecuteParamBinding() {
diff --git a/go/arrow/flight/flightsql/column_metadata.go b/go/arrow/flight/flightsql/column_metadata.go
index ca9c633ab1e0f..60e48b5e0f5d4 100644
--- a/go/arrow/flight/flightsql/column_metadata.go
+++ b/go/arrow/flight/flightsql/column_metadata.go
@@ -19,7 +19,7 @@ package flightsql
 import (
 	"strconv"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 const (
diff --git a/go/arrow/flight/flightsql/driver/README.md b/go/arrow/flight/flightsql/driver/README.md
index 5187c7e4248ec..802d050042c66 100644
--- a/go/arrow/flight/flightsql/driver/README.md
+++ b/go/arrow/flight/flightsql/driver/README.md
@@ -36,7 +36,7 @@ connection pooling, transactions combined with ease of use (see (#usage)).
 ## Prerequisites
 
 * Go 1.17+
-* Installation via `go get -u github.com/apache/arrow/go/v17/arrow/flight/flightsql`
+* Installation via `go get -u github.com/apache/arrow/go/v18/arrow/flight/flightsql`
 * Backend speaking FlightSQL
 
 ---------------------------------------
@@ -55,7 +55,7 @@ import (
     "database/sql"
     "time"
 
-    _ "github.com/apache/arrow/go/v17/arrow/flight/flightsql"
+    _ "github.com/apache/arrow/go/v18/arrow/flight/flightsql"
 )
 
 // Open the connection to an SQLite backend
@@ -141,7 +141,7 @@ import (
     "log"
     "time"
 
-    "github.com/apache/arrow/go/v17/arrow/flight/flightsql"
+    "github.com/apache/arrow/go/v18/arrow/flight/flightsql"
 )
 
 func main() {
diff --git a/go/arrow/flight/flightsql/driver/config_test.go b/go/arrow/flight/flightsql/driver/config_test.go
index 6984da68494be..d74f9d84d0f1e 100644
--- a/go/arrow/flight/flightsql/driver/config_test.go
+++ b/go/arrow/flight/flightsql/driver/config_test.go
@@ -22,7 +22,7 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql/driver"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql/driver"
 )
 
 func TestConfigTLSRegistry(t *testing.T) {
diff --git a/go/arrow/flight/flightsql/driver/driver.go b/go/arrow/flight/flightsql/driver/driver.go
index cd0487e7ad275..0513fe1ecd346 100644
--- a/go/arrow/flight/flightsql/driver/driver.go
+++ b/go/arrow/flight/flightsql/driver/driver.go
@@ -26,11 +26,11 @@ import (
 	"sync"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials"
@@ -266,13 +266,14 @@ func (s *Stmt) QueryContext(ctx context.Context, args []driver.NamedValue) (driv
 		return nil, err
 	}
 
+	execCtx := ctx
 	if _, set := ctx.Deadline(); !set && s.timeout > 0 {
 		var cancel context.CancelFunc
-		ctx, cancel = context.WithTimeout(ctx, s.timeout)
+		execCtx, cancel = context.WithTimeout(ctx, s.timeout)
 		defer cancel()
 	}
 
-	info, err := s.stmt.Execute(ctx)
+	info, err := s.stmt.Execute(execCtx)
 	if err != nil {
 		return nil, err
 	}
@@ -497,13 +498,14 @@ func (c *Connection) QueryContext(ctx context.Context, query string, args []driv
 		return nil, driver.ErrSkip
 	}
 
+	execCtx := ctx
 	if _, set := ctx.Deadline(); !set && c.timeout > 0 {
 		var cancel context.CancelFunc
-		ctx, cancel = context.WithTimeout(ctx, c.timeout)
+		execCtx, cancel = context.WithTimeout(ctx, c.timeout)
 		defer cancel()
 	}
 
-	info, err := c.client.Execute(ctx, query)
+	info, err := c.client.Execute(execCtx, query)
 	if err != nil {
 		return nil, err
 	}
diff --git a/go/arrow/flight/flightsql/driver/driver_test.go b/go/arrow/flight/flightsql/driver/driver_test.go
index d4361eb2dd722..c00dfe3c5d9a0 100644
--- a/go/arrow/flight/flightsql/driver/driver_test.go
+++ b/go/arrow/flight/flightsql/driver/driver_test.go
@@ -34,13 +34,13 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql/driver"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql/example"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql/driver"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql/example"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 const defaultTableName = "drivertest"
@@ -626,7 +626,6 @@ func (s *SqlTestSuite) TestRowsPrematureCloseDuringNextLoop() {
 	rows, err := db.QueryContext(context.TODO(), sqlSelectAll)
 	require.NoError(t, err)
 	require.NotNil(t, rows)
-	require.NoError(t, rows.Err())
 
 	const closeAfterNRows = 10
 	var (
@@ -645,6 +644,7 @@ func (s *SqlTestSuite) TestRowsPrematureCloseDuringNextLoop() {
 			require.NoError(t, rows.Close())
 		}
 	}
+	require.NoError(t, rows.Err())
 
 	require.Equal(t, closeAfterNRows, i)
 
diff --git a/go/arrow/flight/flightsql/driver/utils.go b/go/arrow/flight/flightsql/driver/utils.go
index d07ed446b2bcb..a206d7753529d 100644
--- a/go/arrow/flight/flightsql/driver/utils.go
+++ b/go/arrow/flight/flightsql/driver/utils.go
@@ -21,8 +21,8 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 // *** GRPC helpers ***
diff --git a/go/arrow/flight/flightsql/driver/utils_test.go b/go/arrow/flight/flightsql/driver/utils_test.go
index 2c87075423e0a..0f6033b9282ea 100644
--- a/go/arrow/flight/flightsql/driver/utils_test.go
+++ b/go/arrow/flight/flightsql/driver/utils_test.go
@@ -22,12 +22,12 @@ import (
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/require"
 )
 
diff --git a/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go b/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go
index 6bce68d7949dc..529feeb04c88c 100644
--- a/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go
+++ b/go/arrow/flight/flightsql/example/cmd/sqlite_flightsql_server/main.go
@@ -27,9 +27,9 @@ import (
 	"os"
 	"strconv"
 
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql/example"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql/example"
 )
 
 func main() {
diff --git a/go/arrow/flight/flightsql/example/sql_batch_reader.go b/go/arrow/flight/flightsql/example/sql_batch_reader.go
index cb806ef1b27ab..bfd3e354b17e9 100644
--- a/go/arrow/flight/flightsql/example/sql_batch_reader.go
+++ b/go/arrow/flight/flightsql/example/sql_batch_reader.go
@@ -26,11 +26,11 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
 	"google.golang.org/protobuf/types/known/wrapperspb"
diff --git a/go/arrow/flight/flightsql/example/sqlite_info.go b/go/arrow/flight/flightsql/example/sqlite_info.go
index ea9d7b98aade4..6135911c7b908 100644
--- a/go/arrow/flight/flightsql/example/sqlite_info.go
+++ b/go/arrow/flight/flightsql/example/sqlite_info.go
@@ -20,8 +20,8 @@
 package example
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
 )
 
 func SqlInfoResultMap() flightsql.SqlInfoResultMap {
diff --git a/go/arrow/flight/flightsql/example/sqlite_server.go b/go/arrow/flight/flightsql/example/sqlite_server.go
index b5a64d57dceb6..6a2b80e0dbc36 100644
--- a/go/arrow/flight/flightsql/example/sqlite_server.go
+++ b/go/arrow/flight/flightsql/example/sqlite_server.go
@@ -45,13 +45,13 @@ import (
 	"strings"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/metadata"
diff --git a/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go b/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go
index 4786110e232a8..373be784b9145 100644
--- a/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go
+++ b/go/arrow/flight/flightsql/example/sqlite_tables_schema_batch_reader.go
@@ -25,12 +25,12 @@ import (
 	"strings"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	sqlite3 "modernc.org/sqlite/lib"
 )
 
diff --git a/go/arrow/flight/flightsql/example/type_info.go b/go/arrow/flight/flightsql/example/type_info.go
index cfe47b4090fe7..5e5e52cf2a4cf 100644
--- a/go/arrow/flight/flightsql/example/type_info.go
+++ b/go/arrow/flight/flightsql/example/type_info.go
@@ -22,10 +22,10 @@ package example
 import (
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func GetTypeInfoResult(mem memory.Allocator) arrow.Record {
diff --git a/go/arrow/flight/flightsql/schema_ref/reference_schemas.go b/go/arrow/flight/flightsql/schema_ref/reference_schemas.go
index 5b2684bae1694..d9ba7767feb83 100644
--- a/go/arrow/flight/flightsql/schema_ref/reference_schemas.go
+++ b/go/arrow/flight/flightsql/schema_ref/reference_schemas.go
@@ -18,7 +18,7 @@
 // by FlightSQL servers and clients.
 package schema_ref
 
-import "github.com/apache/arrow/go/v17/arrow"
+import "github.com/apache/arrow/go/v18/arrow"
 
 var (
 	Catalogs = arrow.NewSchema(
diff --git a/go/arrow/flight/flightsql/server.go b/go/arrow/flight/flightsql/server.go
index 25bdc5f4d5bb3..b085619745c90 100644
--- a/go/arrow/flight/flightsql/server.go
+++ b/go/arrow/flight/flightsql/server.go
@@ -20,14 +20,14 @@ import (
 	"context"
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref"
-	pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref"
+	pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
 	"google.golang.org/protobuf/proto"
diff --git a/go/arrow/flight/flightsql/server_test.go b/go/arrow/flight/flightsql/server_test.go
index e594f8e773fd2..494dda1703fc4 100644
--- a/go/arrow/flight/flightsql/server_test.go
+++ b/go/arrow/flight/flightsql/server_test.go
@@ -22,13 +22,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/session"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/session"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
 	"google.golang.org/grpc"
diff --git a/go/arrow/flight/flightsql/sql_info.go b/go/arrow/flight/flightsql/sql_info.go
index 662d809955522..2cd7f91cfd70a 100644
--- a/go/arrow/flight/flightsql/sql_info.go
+++ b/go/arrow/flight/flightsql/sql_info.go
@@ -17,8 +17,8 @@
 package flightsql
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 const (
diff --git a/go/arrow/flight/flightsql/sqlite_server_test.go b/go/arrow/flight/flightsql/sqlite_server_test.go
index eb30b0aa50695..fee2475b2b2ec 100644
--- a/go/arrow/flight/flightsql/sqlite_server_test.go
+++ b/go/arrow/flight/flightsql/sqlite_server_test.go
@@ -26,14 +26,14 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql/example"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql/example"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 	"google.golang.org/grpc/codes"
diff --git a/go/arrow/flight/flightsql/types.go b/go/arrow/flight/flightsql/types.go
index c709a865da7bb..88840cd7d6caf 100644
--- a/go/arrow/flight/flightsql/types.go
+++ b/go/arrow/flight/flightsql/types.go
@@ -17,7 +17,7 @@
 package flightsql
 
 import (
-	pb "github.com/apache/arrow/go/v17/arrow/flight/gen/flight"
+	pb "github.com/apache/arrow/go/v18/arrow/flight/gen/flight"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/types/known/anypb"
 )
diff --git a/go/arrow/flight/record_batch_reader.go b/go/arrow/flight/record_batch_reader.go
index 8332fedd2dc0a..9067e9e2982e5 100644
--- a/go/arrow/flight/record_batch_reader.go
+++ b/go/arrow/flight/record_batch_reader.go
@@ -21,13 +21,13 @@ import (
 	"fmt"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/arrio"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/arrio"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 // DataStreamReader is an interface for receiving flight data messages on a stream
diff --git a/go/arrow/flight/record_batch_writer.go b/go/arrow/flight/record_batch_writer.go
index ca0354e00bc99..27211277ab061 100644
--- a/go/arrow/flight/record_batch_writer.go
+++ b/go/arrow/flight/record_batch_writer.go
@@ -19,9 +19,9 @@ package flight
 import (
 	"bytes"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // DataStreamWriter is an interface that represents an Arrow Flight stream
diff --git a/go/arrow/flight/server.go b/go/arrow/flight/server.go
index fc74ba7aa4d6e..b67e52f4357ce 100644
--- a/go/arrow/flight/server.go
+++ b/go/arrow/flight/server.go
@@ -23,7 +23,7 @@ import (
 	"os"
 	"os/signal"
 
-	"github.com/apache/arrow/go/v17/arrow/flight/gen/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/gen/flight"
 	"google.golang.org/grpc"
 )
 
diff --git a/go/arrow/flight/server_example_test.go b/go/arrow/flight/server_example_test.go
index 8386147c311aa..60e5ec8f4e381 100644
--- a/go/arrow/flight/server_example_test.go
+++ b/go/arrow/flight/server_example_test.go
@@ -21,7 +21,7 @@ import (
 	"fmt"
 	"net"
 
-	"github.com/apache/arrow/go/v17/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
 	"google.golang.org/grpc/health"
diff --git a/go/arrow/flight/session/example_session_test.go b/go/arrow/flight/session/example_session_test.go
index 7d92f27f2c840..5a9e7e83d21e0 100644
--- a/go/arrow/flight/session/example_session_test.go
+++ b/go/arrow/flight/session/example_session_test.go
@@ -19,9 +19,9 @@ package session_test
 import (
 	"log"
 
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v17/arrow/flight/session"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v18/arrow/flight/session"
 	"github.com/google/uuid"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
diff --git a/go/arrow/flight/session/session.go b/go/arrow/flight/session/session.go
index f09a2ed620d8f..508f79befc258 100644
--- a/go/arrow/flight/session/session.go
+++ b/go/arrow/flight/session/session.go
@@ -36,7 +36,7 @@ import (
 	"net/http"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/metadata"
 	"google.golang.org/protobuf/proto"
diff --git a/go/arrow/flight/session/stateful_session.go b/go/arrow/flight/session/stateful_session.go
index 1a339c1bc4eb7..0fb1c8f729756 100644
--- a/go/arrow/flight/session/stateful_session.go
+++ b/go/arrow/flight/session/stateful_session.go
@@ -22,7 +22,7 @@ import (
 	"net/http"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight"
 	"github.com/google/uuid"
 )
 
diff --git a/go/arrow/flight/session/stateless_session.go b/go/arrow/flight/session/stateless_session.go
index 1e248a7a5e92d..d792a91f84ece 100644
--- a/go/arrow/flight/session/stateless_session.go
+++ b/go/arrow/flight/session/stateless_session.go
@@ -22,7 +22,7 @@ import (
 	"fmt"
 	"net/http"
 
-	"github.com/apache/arrow/go/v17/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight"
 	"google.golang.org/protobuf/proto"
 )
 
diff --git a/go/arrow/internal/arrdata/arrdata.go b/go/arrow/internal/arrdata/arrdata.go
index 9a049c762a364..5111f2dbc4da6 100644
--- a/go/arrow/internal/arrdata/arrdata.go
+++ b/go/arrow/internal/arrdata/arrdata.go
@@ -21,14 +21,14 @@ import (
 	"fmt"
 	"sort"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/types"
 )
 
 var (
diff --git a/go/arrow/internal/arrdata/ioutil.go b/go/arrow/internal/arrdata/ioutil.go
index 63fac86fde948..715451ad9634d 100644
--- a/go/arrow/internal/arrdata/ioutil.go
+++ b/go/arrow/internal/arrdata/ioutil.go
@@ -23,11 +23,11 @@ import (
 	"sync"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // CheckArrowFile checks whether a given ARROW file contains the expected list of records.
diff --git a/go/arrow/internal/arrjson/arrjson.go b/go/arrow/internal/arrjson/arrjson.go
index 3f41f1b40a38e..42bbee8d5a2ec 100644
--- a/go/arrow/internal/arrjson/arrjson.go
+++ b/go/arrow/internal/arrjson/arrjson.go
@@ -26,16 +26,16 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 type Schema struct {
diff --git a/go/arrow/internal/arrjson/arrjson_test.go b/go/arrow/internal/arrjson/arrjson_test.go
index 08a879e7ea3e3..7459ef8ae8f1d 100644
--- a/go/arrow/internal/arrjson/arrjson_test.go
+++ b/go/arrow/internal/arrjson/arrjson_test.go
@@ -22,9 +22,9 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/internal/arrjson/option.go b/go/arrow/internal/arrjson/option.go
index ec53f1e8f43b9..261bc75b64e6f 100644
--- a/go/arrow/internal/arrjson/option.go
+++ b/go/arrow/internal/arrjson/option.go
@@ -17,8 +17,8 @@
 package arrjson
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 type config struct {
diff --git a/go/arrow/internal/arrjson/reader.go b/go/arrow/internal/arrjson/reader.go
index 8be44c001e180..97fe2904cbe5f 100644
--- a/go/arrow/internal/arrjson/reader.go
+++ b/go/arrow/internal/arrjson/reader.go
@@ -20,11 +20,11 @@ import (
 	"io"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/arrio"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/arrio"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 type Reader struct {
diff --git a/go/arrow/internal/arrjson/writer.go b/go/arrow/internal/arrjson/writer.go
index 1d1565885ba6b..25004863abe0d 100644
--- a/go/arrow/internal/arrjson/writer.go
+++ b/go/arrow/internal/arrjson/writer.go
@@ -20,11 +20,11 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/arrio"
-	"github.com/apache/arrow/go/v17/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v17/internal/json"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/arrio"
+	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v18/internal/json"
 )
 
 const (
diff --git a/go/arrow/internal/cdata_integration/entrypoints.go b/go/arrow/internal/cdata_integration/entrypoints.go
index 59e1db52b50e0..06f7cc8a41019 100644
--- a/go/arrow/internal/cdata_integration/entrypoints.go
+++ b/go/arrow/internal/cdata_integration/entrypoints.go
@@ -25,11 +25,11 @@ import (
 	"runtime"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/cdata"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrjson"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/cdata"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrjson"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 // #include <stdint.h>
diff --git a/go/arrow/internal/dictutils/dict.go b/go/arrow/internal/dictutils/dict.go
index 76382a3dbcac2..da18c2d0e3fae 100644
--- a/go/arrow/internal/dictutils/dict.go
+++ b/go/arrow/internal/dictutils/dict.go
@@ -21,9 +21,9 @@ import (
 	"fmt"
 	"hash/maphash"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 type Kind int8
diff --git a/go/arrow/internal/dictutils/dict_test.go b/go/arrow/internal/dictutils/dict_test.go
index 393ad5d9e2de7..7a68ae3073ddb 100644
--- a/go/arrow/internal/dictutils/dict_test.go
+++ b/go/arrow/internal/dictutils/dict_test.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestDictMemo(t *testing.T) {
diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
index 8d7a3617f2ada..105491d38e936 100755
--- a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
+++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-client/main.go
@@ -22,7 +22,7 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/flight_integration"
+	"github.com/apache/arrow/go/v18/arrow/internal/flight_integration"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
 )
diff --git a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
index 2ae02789b79e7..5de4076b708b2 100644
--- a/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
+++ b/go/arrow/internal/flight_integration/cmd/arrow-flight-integration-server/main.go
@@ -23,7 +23,7 @@ import (
 	"os"
 	"syscall"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/flight_integration"
+	"github.com/apache/arrow/go/v18/arrow/internal/flight_integration"
 )
 
 var (
diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go
index ccfc7a0ed45a3..b9535002a0a17 100644
--- a/go/arrow/internal/flight_integration/scenario.go
+++ b/go/arrow/internal/flight_integration/scenario.go
@@ -31,16 +31,15 @@ import (
 	"strings"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql"
-	"github.com/apache/arrow/go/v17/arrow/flight/flightsql/schema_ref"
-	"github.com/apache/arrow/go/v17/arrow/flight/session"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrjson"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql"
+	"github.com/apache/arrow/go/v18/arrow/flight/flightsql/schema_ref"
+	"github.com/apache/arrow/go/v18/arrow/flight/session"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrjson"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"golang.org/x/xerrors"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
@@ -161,9 +160,6 @@ func (s *defaultIntegrationTester) RunClient(addr string, opts ...grpc.DialOptio
 
 	ctx := context.Background()
 
-	arrow.RegisterExtensionType(types.NewUUIDType())
-	defer arrow.UnregisterExtensionType("uuid")
-
 	descr := &flight.FlightDescriptor{
 		Type: flight.DescriptorPATH,
 		Path: []string{s.path},
diff --git a/go/arrow/internal/testing/gen/random_array_gen.go b/go/arrow/internal/testing/gen/random_array_gen.go
index 05a8749d15dce..b4623bc4c3596 100644
--- a/go/arrow/internal/testing/gen/random_array_gen.go
+++ b/go/arrow/internal/testing/gen/random_array_gen.go
@@ -19,11 +19,11 @@ package gen
 import (
 	"math"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/stat/distuv"
 )
diff --git a/go/arrow/internal/testing/tools/bits_test.go b/go/arrow/internal/testing/tools/bits_test.go
index e9a638a6b9b2a..6897485e4c702 100644
--- a/go/arrow/internal/testing/tools/bits_test.go
+++ b/go/arrow/internal/testing/tools/bits_test.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/tools"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/internal/testing/tools/data_types.go b/go/arrow/internal/testing/tools/data_types.go
index bbfa1af0f6703..545e3f8bc3a2c 100644
--- a/go/arrow/internal/testing/tools/data_types.go
+++ b/go/arrow/internal/testing/tools/data_types.go
@@ -21,8 +21,8 @@ package tools
 import (
 	"reflect"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/float16"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go
index 2f1f2346f1d3b..bafd0cf0f6926 100644
--- a/go/arrow/internal/utils.go
+++ b/go/arrow/internal/utils.go
@@ -17,8 +17,8 @@
 package internal
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
 )
 
 const CurMetadataVersion = flatbuf.MetadataVersionV5
diff --git a/go/arrow/ipc/cmd/arrow-cat/main.go b/go/arrow/ipc/cmd/arrow-cat/main.go
index 4faaabb05ddc1..3e8d47a86c249 100644
--- a/go/arrow/ipc/cmd/arrow-cat/main.go
+++ b/go/arrow/ipc/cmd/arrow-cat/main.go
@@ -63,8 +63,8 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func main() {
diff --git a/go/arrow/ipc/cmd/arrow-cat/main_test.go b/go/arrow/ipc/cmd/arrow-cat/main_test.go
index 53b89fadaf89f..904480ed374d4 100644
--- a/go/arrow/ipc/cmd/arrow-cat/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-cat/main_test.go
@@ -23,10 +23,10 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestCatStream(t *testing.T) {
diff --git a/go/arrow/ipc/cmd/arrow-file-to-stream/main.go b/go/arrow/ipc/cmd/arrow-file-to-stream/main.go
index 514f8d42ce3a4..e8cdcd66ea3cb 100644
--- a/go/arrow/ipc/cmd/arrow-file-to-stream/main.go
+++ b/go/arrow/ipc/cmd/arrow-file-to-stream/main.go
@@ -24,9 +24,9 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v17/arrow/arrio"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/arrio"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func main() {
diff --git a/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go b/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go
index f671ff1d0987c..e31430f5fa392 100644
--- a/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-file-to-stream/main_test.go
@@ -21,8 +21,8 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestFileToStream(t *testing.T) {
diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
index 31669f6a0f00e..c47a091268be9 100644
--- a/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
+++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main.go
@@ -22,12 +22,10 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/arrio"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrjson"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/arrio"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrjson"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
 )
 
 func main() {
@@ -50,8 +48,6 @@ func main() {
 }
 
 func runCommand(jsonName, arrowName, mode string, verbose bool) error {
-	arrow.RegisterExtensionType(types.NewUUIDType())
-
 	if jsonName == "" {
 		return fmt.Errorf("must specify json file name")
 	}
diff --git a/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go b/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go
index 1aaf1430b4ff8..44e6aeb472f32 100644
--- a/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-json-integration-test/main_test.go
@@ -20,8 +20,8 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestIntegration(t *testing.T) {
diff --git a/go/arrow/ipc/cmd/arrow-ls/main.go b/go/arrow/ipc/cmd/arrow-ls/main.go
index 2f54744c4068d..f461131786d02 100644
--- a/go/arrow/ipc/cmd/arrow-ls/main.go
+++ b/go/arrow/ipc/cmd/arrow-ls/main.go
@@ -61,8 +61,8 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func main() {
diff --git a/go/arrow/ipc/cmd/arrow-ls/main_test.go b/go/arrow/ipc/cmd/arrow-ls/main_test.go
index 2c123c064a638..6ec8ef862bc14 100644
--- a/go/arrow/ipc/cmd/arrow-ls/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-ls/main_test.go
@@ -23,10 +23,10 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestLsStream(t *testing.T) {
diff --git a/go/arrow/ipc/cmd/arrow-stream-to-file/main.go b/go/arrow/ipc/cmd/arrow-stream-to-file/main.go
index 3ea635ec03a2a..5c9b768995ec9 100644
--- a/go/arrow/ipc/cmd/arrow-stream-to-file/main.go
+++ b/go/arrow/ipc/cmd/arrow-stream-to-file/main.go
@@ -24,9 +24,9 @@ import (
 	"log"
 	"os"
 
-	"github.com/apache/arrow/go/v17/arrow/arrio"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/arrio"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func main() {
diff --git a/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go b/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go
index 2ae0c7c64624f..13c566f3593cd 100644
--- a/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go
+++ b/go/arrow/ipc/cmd/arrow-stream-to-file/main_test.go
@@ -21,8 +21,8 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestStreamToFile(t *testing.T) {
diff --git a/go/arrow/ipc/compression.go b/go/arrow/ipc/compression.go
index 12bb5d402d5e6..501d9b7c38159 100644
--- a/go/arrow/ipc/compression.go
+++ b/go/arrow/ipc/compression.go
@@ -19,9 +19,9 @@ package ipc
 import (
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/klauspost/compress/zstd"
 	"github.com/pierrec/lz4/v4"
 )
diff --git a/go/arrow/ipc/endian_swap.go b/go/arrow/ipc/endian_swap.go
index af4dead45a6df..f10adf5c13e7d 100644
--- a/go/arrow/ipc/endian_swap.go
+++ b/go/arrow/ipc/endian_swap.go
@@ -21,9 +21,9 @@ import (
 	"fmt"
 	"math/bits"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // swap the endianness of the array's buffers as needed in-place to save
diff --git a/go/arrow/ipc/endian_swap_test.go b/go/arrow/ipc/endian_swap_test.go
index 102ae4364345f..498b7decad146 100644
--- a/go/arrow/ipc/endian_swap_test.go
+++ b/go/arrow/ipc/endian_swap_test.go
@@ -20,11 +20,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/types"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/ipc/file_reader.go b/go/arrow/ipc/file_reader.go
index 031a028a558be..947bbd474328f 100644
--- a/go/arrow/ipc/file_reader.go
+++ b/go/arrow/ipc/file_reader.go
@@ -23,14 +23,14 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/internal"
-	"github.com/apache/arrow/go/v17/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/internal"
+	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // FileReader is an Arrow file reader.
diff --git a/go/arrow/ipc/file_test.go b/go/arrow/ipc/file_test.go
index a24f61e8c29c9..b9a4547a5126a 100644
--- a/go/arrow/ipc/file_test.go
+++ b/go/arrow/ipc/file_test.go
@@ -17,13 +17,17 @@
 package ipc_test
 
 import (
+	"bytes"
 	"fmt"
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/stretchr/testify/require"
 )
 
 func TestFile(t *testing.T) {
@@ -75,3 +79,39 @@ func TestFileCompressed(t *testing.T) {
 		}
 	}
 }
+
+func TestFileEmbedsStream(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+	defer mem.AssertSize(t, 0)
+
+	recs := arrdata.Records["primitives"]
+	schema := recs[0].Schema()
+
+	var buf bytes.Buffer
+	w, err := ipc.NewFileWriter(&buf, ipc.WithSchema(schema), ipc.WithAllocator(mem))
+	require.NoError(t, err)
+	defer w.Close()
+
+	for _, rec := range recs {
+		require.NoError(t, w.Write(rec))
+	}
+
+	require.NoError(t, w.Close())
+
+	// we should be able to read a valid ipc stream within the ipc file
+
+	// create an ipc stream reader, skipping the file magic+padding bytes
+	rdr, err := ipc.NewReader(bytes.NewReader(buf.Bytes()[8:]), ipc.WithSchema(schema), ipc.WithAllocator(mem))
+	require.NoError(t, err)
+	defer rdr.Release()
+
+	// the stream reader should know to stop before the footer if the EOS indicator is properly written
+	var i int
+	for rdr.Next() {
+		rec := rdr.Record()
+		require.Truef(t, array.RecordEqual(rec, recs[i]), "records[%d] differ", i)
+		i++
+	}
+
+	require.NoError(t, rdr.Err())
+}
diff --git a/go/arrow/ipc/file_writer.go b/go/arrow/ipc/file_writer.go
index 8cea458192b85..9a3d7d3dbeb02 100644
--- a/go/arrow/ipc/file_writer.go
+++ b/go/arrow/ipc/file_writer.go
@@ -21,11 +21,11 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // PayloadWriter is an interface for injecting a different payloadwriter
@@ -37,23 +37,17 @@ type PayloadWriter interface {
 	Close() error
 }
 
-type pwriter struct {
-	w   io.WriteSeeker
-	pos int64
+type fileWriter struct {
+	streamWriter
 
 	schema *arrow.Schema
 	dicts  []fileBlock
 	recs   []fileBlock
 }
 
-func (w *pwriter) Start() error {
+func (w *fileWriter) Start() error {
 	var err error
 
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in start: %w", err)
-	}
-
 	// only necessary to align to 8-byte boundary at the start of the file
 	_, err = w.Write(Magic)
 	if err != nil {
@@ -65,10 +59,10 @@ func (w *pwriter) Start() error {
 		return fmt.Errorf("arrow/ipc: could not align start block: %w", err)
 	}
 
-	return err
+	return w.streamWriter.Start()
 }
 
-func (w *pwriter) WritePayload(p Payload) error {
+func (w *fileWriter) WritePayload(p Payload) error {
 	blk := fileBlock{Offset: w.pos, Meta: 0, Body: p.size}
 	n, err := writeIPCPayload(w, p)
 	if err != nil {
@@ -77,11 +71,6 @@ func (w *pwriter) WritePayload(p Payload) error {
 
 	blk.Meta = int32(n)
 
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in write-payload: %w", err)
-	}
-
 	switch flatbuf.MessageHeader(p.msg) {
 	case flatbuf.MessageHeaderDictionaryBatch:
 		w.dicts = append(w.dicts, blk)
@@ -92,27 +81,18 @@ func (w *pwriter) WritePayload(p Payload) error {
 	return nil
 }
 
-func (w *pwriter) Close() error {
+func (w *fileWriter) Close() error {
 	var err error
 
-	// write file footer
-	err = w.updatePos()
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not update position while in close: %w", err)
+	if err = w.streamWriter.Close(); err != nil {
+		return err
 	}
 
 	pos := w.pos
-	err = writeFileFooter(w.schema, w.dicts, w.recs, w)
-	if err != nil {
+	if err = writeFileFooter(w.schema, w.dicts, w.recs, w); err != nil {
 		return fmt.Errorf("arrow/ipc: could not write file footer: %w", err)
 	}
 
-	// write file footer length
-	err = w.updatePos() // not strictly needed as we passed w to writeFileFooter...
-	if err != nil {
-		return fmt.Errorf("arrow/ipc: could not compute file footer length: %w", err)
-	}
-
 	size := w.pos - pos
 	if size <= 0 {
 		return fmt.Errorf("arrow/ipc: invalid file footer size (size=%d)", size)
@@ -133,13 +113,7 @@ func (w *pwriter) Close() error {
 	return nil
 }
 
-func (w *pwriter) updatePos() error {
-	var err error
-	w.pos, err = w.w.Seek(0, io.SeekCurrent)
-	return err
-}
-
-func (w *pwriter) align(align int32) error {
+func (w *fileWriter) align(align int32) error {
 	remainder := paddedLength(w.pos, align) - w.pos
 	if remainder == 0 {
 		return nil
@@ -149,12 +123,6 @@ func (w *pwriter) align(align int32) error {
 	return err
 }
 
-func (w *pwriter) Write(p []byte) (int, error) {
-	n, err := w.w.Write(p)
-	w.pos += int64(n)
-	return n, err
-}
-
 func writeIPCPayload(w io.Writer, p Payload) (int, error) {
 	n, err := writeMessage(p.meta, kArrowIPCAlignment, w)
 	if err != nil {
@@ -259,18 +227,12 @@ func (ps payloads) Release() {
 
 // FileWriter is an Arrow file writer.
 type FileWriter struct {
-	w io.WriteSeeker
+	w io.Writer
 
 	mem memory.Allocator
 
-	header struct {
-		started bool
-		offset  int64
-	}
-
-	footer struct {
-		written bool
-	}
+	headerStarted bool
+	footerWritten bool
 
 	pw PayloadWriter
 
@@ -289,7 +251,7 @@ type FileWriter struct {
 }
 
 // NewFileWriter opens an Arrow file using the provided writer w.
-func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
+func NewFileWriter(w io.Writer, opts ...Option) (*FileWriter, error) {
 	var (
 		cfg = newConfig(opts...)
 		err error
@@ -297,7 +259,7 @@ func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
 
 	f := FileWriter{
 		w:               w,
-		pw:              &pwriter{w: w, schema: cfg.schema, pos: -1},
+		pw:              &fileWriter{streamWriter: streamWriter{w: w}, schema: cfg.schema},
 		mem:             cfg.alloc,
 		schema:          cfg.schema,
 		codec:           cfg.codec,
@@ -306,12 +268,6 @@ func NewFileWriter(w io.WriteSeeker, opts ...Option) (*FileWriter, error) {
 		compressors:     make([]compressor, cfg.compressNP),
 	}
 
-	pos, err := f.w.Seek(0, io.SeekCurrent)
-	if err != nil {
-		return nil, fmt.Errorf("arrow/ipc: could not seek current position: %w", err)
-	}
-	f.header.offset = pos
-
 	return &f, err
 }
 
@@ -321,7 +277,7 @@ func (f *FileWriter) Close() error {
 		return fmt.Errorf("arrow/ipc: could not write empty file: %w", err)
 	}
 
-	if f.footer.written {
+	if f.footerWritten {
 		return nil
 	}
 
@@ -329,7 +285,7 @@ func (f *FileWriter) Close() error {
 	if err != nil {
 		return fmt.Errorf("arrow/ipc: could not close payload writer: %w", err)
 	}
-	f.footer.written = true
+	f.footerWritten = true
 
 	return nil
 }
@@ -367,14 +323,14 @@ func (f *FileWriter) Write(rec arrow.Record) error {
 }
 
 func (f *FileWriter) checkStarted() error {
-	if !f.header.started {
+	if !f.headerStarted {
 		return f.start()
 	}
 	return nil
 }
 
 func (f *FileWriter) start() error {
-	f.header.started = true
+	f.headerStarted = true
 	err := f.pw.Start()
 	if err != nil {
 		return err
diff --git a/go/arrow/ipc/ipc.go b/go/arrow/ipc/ipc.go
index b31a358a8a8e0..76d12f061efa5 100644
--- a/go/arrow/ipc/ipc.go
+++ b/go/arrow/ipc/ipc.go
@@ -19,10 +19,10 @@ package ipc
 import (
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/arrio"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/arrio"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 const (
diff --git a/go/arrow/ipc/ipc_test.go b/go/arrow/ipc/ipc_test.go
index 6348eff2e8393..7df9bc8c28bb0 100644
--- a/go/arrow/ipc/ipc_test.go
+++ b/go/arrow/ipc/ipc_test.go
@@ -29,10 +29,11 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestArrow12072(t *testing.T) {
@@ -620,3 +621,70 @@ func TestIpcEmptyMap(t *testing.T) {
 	assert.Zero(t, r.Record().NumRows())
 	assert.True(t, arrow.TypeEqual(dt, r.Record().Column(0).DataType()))
 }
+
+// GH-41993
+func TestArrowBinaryIPCWriterTruncatedVOffsets(t *testing.T) {
+	var buf bytes.Buffer
+	buf.WriteString("apple")
+	buf.WriteString("pear")
+	buf.WriteString("banana")
+	values := buf.Bytes()
+
+	offsets := []int32{5, 9, 15} // <-- only "pear" and "banana"
+	voffsets := arrow.Int32Traits.CastToBytes(offsets)
+
+	validity := []byte{0}
+	bitutil.SetBit(validity, 0)
+	bitutil.SetBit(validity, 1)
+
+	data := array.NewData(
+		arrow.BinaryTypes.String,
+		2, // <-- only "pear" and "banana"
+		[]*memory.Buffer{
+			memory.NewBufferBytes(validity),
+			memory.NewBufferBytes(voffsets),
+			memory.NewBufferBytes(values),
+		},
+		nil,
+		0,
+		0,
+	)
+
+	str := array.NewStringData(data)
+	require.Equal(t, 2, str.Len())
+	require.Equal(t, "pear", str.Value(0))
+	require.Equal(t, "banana", str.Value(1))
+
+	schema := arrow.NewSchema([]arrow.Field{
+		{
+			Name:     "string",
+			Type:     arrow.BinaryTypes.String,
+			Nullable: true,
+		},
+	}, nil)
+	record := array.NewRecord(schema, []arrow.Array{str}, 2)
+
+	var output bytes.Buffer
+	writer := ipc.NewWriter(&output, ipc.WithSchema(schema))
+
+	require.NoError(t, writer.Write(record))
+	require.NoError(t, writer.Close())
+
+	reader, err := ipc.NewReader(bytes.NewReader(output.Bytes()), ipc.WithSchema(schema))
+	require.NoError(t, err)
+	defer reader.Release()
+
+	require.True(t, reader.Next())
+	require.NoError(t, reader.Err())
+
+	rec := reader.Record()
+	require.EqualValues(t, 1, rec.NumCols())
+	require.EqualValues(t, 2, rec.NumRows())
+
+	col, ok := rec.Column(0).(*array.String)
+	require.True(t, ok)
+	require.Equal(t, "pear", col.Value(0))
+	require.Equal(t, "banana", col.Value(1))
+
+	require.False(t, reader.Next())
+}
diff --git a/go/arrow/ipc/message.go b/go/arrow/ipc/message.go
index 23142d04c8229..897f031791b2b 100644
--- a/go/arrow/ipc/message.go
+++ b/go/arrow/ipc/message.go
@@ -22,9 +22,9 @@ import (
 	"io"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 // MetadataVersion represents the Arrow metadata version.
diff --git a/go/arrow/ipc/message_test.go b/go/arrow/ipc/message_test.go
index f81b0cb2845f7..e5760c6f70719 100644
--- a/go/arrow/ipc/message_test.go
+++ b/go/arrow/ipc/message_test.go
@@ -22,9 +22,9 @@ import (
 	"io"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestMessageReaderBodyInAllocator(t *testing.T) {
diff --git a/go/arrow/ipc/metadata.go b/go/arrow/ipc/metadata.go
index 604adab322959..ba90c993885d6 100644
--- a/go/arrow/ipc/metadata.go
+++ b/go/arrow/ipc/metadata.go
@@ -23,11 +23,11 @@ import (
 	"io"
 	"sort"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	flatbuffers "github.com/google/flatbuffers/go"
 )
 
diff --git a/go/arrow/ipc/metadata_test.go b/go/arrow/ipc/metadata_test.go
index f24ac204129e2..14b8da2cf7cf7 100644
--- a/go/arrow/ipc/metadata_test.go
+++ b/go/arrow/ipc/metadata_test.go
@@ -21,12 +21,12 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	flatbuffers "github.com/google/flatbuffers/go"
 	"github.com/stretchr/testify/assert"
 )
@@ -169,7 +169,7 @@ func TestRWFooter(t *testing.T) {
 }
 
 func exampleUUID(mem memory.Allocator) arrow.Array {
-	extType := types.NewUUIDType()
+	extType := extensions.NewUUIDType()
 	bldr := array.NewExtensionBuilder(mem, extType)
 	defer bldr.Release()
 
@@ -184,9 +184,6 @@ func TestUnrecognizedExtensionType(t *testing.T) {
 	pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
 	defer pool.AssertSize(t, 0)
 
-	// register the uuid type
-	assert.NoError(t, arrow.RegisterExtensionType(types.NewUUIDType()))
-
 	extArr := exampleUUID(pool)
 	defer extArr.Release()
 
@@ -205,7 +202,9 @@ func TestUnrecognizedExtensionType(t *testing.T) {
 
 	// unregister the uuid type before we read back the buffer so it is
 	// unrecognized when reading back the record batch.
-	assert.NoError(t, arrow.UnregisterExtensionType("uuid"))
+	assert.NoError(t, arrow.UnregisterExtensionType("arrow.uuid"))
+	// re-register once the test is complete
+	defer arrow.RegisterExtensionType(extensions.NewUUIDType())
 	rdr, err := NewReader(&buf, WithAllocator(pool))
 	defer rdr.Release()
 
diff --git a/go/arrow/ipc/reader.go b/go/arrow/ipc/reader.go
index aeb47cfbd25c5..147b22213debf 100644
--- a/go/arrow/ipc/reader.go
+++ b/go/arrow/ipc/reader.go
@@ -23,14 +23,14 @@ import (
 	"io"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 // Reader reads records from an io.Reader.
diff --git a/go/arrow/ipc/reader_test.go b/go/arrow/ipc/reader_test.go
index 556d372881ec0..44aee19ecadd6 100644
--- a/go/arrow/ipc/reader_test.go
+++ b/go/arrow/ipc/reader_test.go
@@ -22,9 +22,9 @@ import (
 	"io"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/arrow/ipc/stream_test.go b/go/arrow/ipc/stream_test.go
index 14937a3caca97..b044acff5350d 100644
--- a/go/arrow/ipc/stream_test.go
+++ b/go/arrow/ipc/stream_test.go
@@ -22,9 +22,9 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/arrdata"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/internal/arrdata"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func TestStream(t *testing.T) {
diff --git a/go/arrow/ipc/writer.go b/go/arrow/ipc/writer.go
index f2afd2db4274c..5a280fbf84a1f 100644
--- a/go/arrow/ipc/writer.go
+++ b/go/arrow/ipc/writer.go
@@ -26,29 +26,29 @@ import (
 	"sync"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/internal/dictutils"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/dictutils"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
-type swriter struct {
+type streamWriter struct {
 	w   io.Writer
 	pos int64
 }
 
-func (w *swriter) Start() error { return nil }
-func (w *swriter) Close() error {
+func (w *streamWriter) Start() error { return nil }
+func (w *streamWriter) Close() error {
 	_, err := w.Write(kEOS[:])
 	return err
 }
 
-func (w *swriter) WritePayload(p Payload) error {
+func (w *streamWriter) WritePayload(p Payload) error {
 	_, err := writeIPCPayload(w, p)
 	if err != nil {
 		return err
@@ -56,7 +56,7 @@ func (w *swriter) WritePayload(p Payload) error {
 	return nil
 }
 
-func (w *swriter) Write(p []byte) (int, error) {
+func (w *streamWriter) Write(p []byte) (int, error) {
 	n, err := w.w.Write(p)
 	w.pos += int64(n)
 	return n, err
@@ -118,7 +118,7 @@ func NewWriter(w io.Writer, opts ...Option) *Writer {
 	return &Writer{
 		w:              w,
 		mem:            cfg.alloc,
-		pw:             &swriter{w: w},
+		pw:             &streamWriter{w: w},
 		schema:         cfg.schema,
 		codec:          cfg.codec,
 		emitDictDeltas: cfg.emitDictDeltas,
@@ -159,15 +159,19 @@ func (w *Writer) Write(rec arrow.Record) (err error) {
 		}
 	}()
 
+	incomingSchema := rec.Schema()
+
 	if !w.started {
+		if w.schema == nil {
+			w.schema = incomingSchema
+		}
 		err := w.start()
 		if err != nil {
 			return err
 		}
 	}
 
-	schema := rec.Schema()
-	if schema == nil || !schema.Equal(w.schema) {
+	if incomingSchema == nil || !incomingSchema.Equal(w.schema) {
 		return errInconsistentSchema
 	}
 
@@ -853,19 +857,35 @@ func (w *recordEncoder) getZeroBasedValueOffsets(arr arrow.Array) *memory.Buffer
 		return nil
 	}
 
+	dataTypeWidth := arr.DataType().Layout().Buffers[1].ByteWidth
+
 	// if we have a non-zero offset, then the value offsets do not start at
 	// zero. we must a) create a new offsets array with shifted offsets and
 	// b) slice the values array accordingly
-	//
+	hasNonZeroOffset := data.Offset() != 0
+
 	// or if there are more value offsets than values (the array has been sliced)
 	// we need to trim off the trailing offsets
-	needsTruncateAndShift := data.Offset() != 0 || offsetBytesNeeded < voffsets.Len()
+	hasMoreOffsetsThanValues := offsetBytesNeeded < voffsets.Len()
+
+	// or if the offsets do not start from the zero index, we need to shift them
+	// and slice the values array
+	var firstOffset int64
+	if dataTypeWidth == 8 {
+		firstOffset = arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[0]
+	} else {
+		firstOffset = int64(arrow.Int32Traits.CastFromBytes(voffsets.Bytes())[0])
+	}
+	offsetsDoNotStartFromZero := firstOffset != 0
+
+	// determine whether the offsets array should be shifted
+	needsTruncateAndShift := hasNonZeroOffset || hasMoreOffsetsThanValues || offsetsDoNotStartFromZero
 
 	if needsTruncateAndShift {
 		shiftedOffsets := memory.NewResizableBuffer(w.mem)
 		shiftedOffsets.Resize(offsetBytesNeeded)
 
-		switch arr.DataType().Layout().Buffers[1].ByteWidth {
+		switch dataTypeWidth {
 		case 8:
 			dest := arrow.Int64Traits.CastFromBytes(shiftedOffsets.Bytes())
 			offsets := arrow.Int64Traits.CastFromBytes(voffsets.Bytes())[data.Offset() : data.Offset()+data.Len()+1]
diff --git a/go/arrow/ipc/writer_test.go b/go/arrow/ipc/writer_test.go
index 4e519ed293422..60d811e68e87e 100644
--- a/go/arrow/ipc/writer_test.go
+++ b/go/arrow/ipc/writer_test.go
@@ -24,11 +24,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/internal/flatbuf"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/internal/flatbuf"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -235,3 +235,22 @@ func TestWriteWithCompressionAndMinSavings(t *testing.T) {
 		}
 	}
 }
+
+func TestWriterInferSchema(t *testing.T) {
+	bldr := array.NewRecordBuilder(memory.DefaultAllocator, arrow.NewSchema([]arrow.Field{{Name: "col", Type: arrow.PrimitiveTypes.Int8}}, nil))
+	bldr.Field(0).(*array.Int8Builder).AppendValues([]int8{1, 2, 3, 4, 5}, nil)
+	rec := bldr.NewRecord()
+	defer rec.Release()
+
+	var buf bytes.Buffer
+	w := NewWriter(&buf)
+
+	require.NoError(t, w.Write(rec))
+	require.NoError(t, w.Close())
+
+	r, err := NewReader(&buf)
+	require.NoError(t, err)
+	defer r.Release()
+
+	require.True(t, r.Schema().Equal(rec.Schema()))
+}
diff --git a/go/arrow/math/float64.go b/go/arrow/math/float64.go
index 4f8fca9e0e959..b5429e50aec09 100644
--- a/go/arrow/math/float64.go
+++ b/go/arrow/math/float64.go
@@ -19,7 +19,7 @@
 package math
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 type Float64Funcs struct {
diff --git a/go/arrow/math/float64_avx2_amd64.go b/go/arrow/math/float64_avx2_amd64.go
index 73f0126e30f67..8f11b1f2481a3 100644
--- a/go/arrow/math/float64_avx2_amd64.go
+++ b/go/arrow/math/float64_avx2_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/float64_neon_arm64.go b/go/arrow/math/float64_neon_arm64.go
index 77f97f5e68bd8..c41801714ea20 100755
--- a/go/arrow/math/float64_neon_arm64.go
+++ b/go/arrow/math/float64_neon_arm64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/float64_sse4_amd64.go b/go/arrow/math/float64_sse4_amd64.go
index ea1a1a009011f..bdd17559edfaf 100644
--- a/go/arrow/math/float64_sse4_amd64.go
+++ b/go/arrow/math/float64_sse4_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/float64_test.go b/go/arrow/math/float64_test.go
index 637db6e42b556..de1a1ef1ec3be 100644
--- a/go/arrow/math/float64_test.go
+++ b/go/arrow/math/float64_test.go
@@ -21,9 +21,9 @@ package math_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/math"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/math"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/math/int64.go b/go/arrow/math/int64.go
index 457e9d37465d9..a7d2b76b69704 100644
--- a/go/arrow/math/int64.go
+++ b/go/arrow/math/int64.go
@@ -19,7 +19,7 @@
 package math
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 type Int64Funcs struct {
diff --git a/go/arrow/math/int64_avx2_amd64.go b/go/arrow/math/int64_avx2_amd64.go
index 791436adb0a15..353338d43282a 100644
--- a/go/arrow/math/int64_avx2_amd64.go
+++ b/go/arrow/math/int64_avx2_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/int64_neon_arm64.go b/go/arrow/math/int64_neon_arm64.go
index 6439e00e9f999..29c5a8eed6c89 100755
--- a/go/arrow/math/int64_neon_arm64.go
+++ b/go/arrow/math/int64_neon_arm64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/int64_sse4_amd64.go b/go/arrow/math/int64_sse4_amd64.go
index dca70c838baf2..cf443c5f1a7ec 100644
--- a/go/arrow/math/int64_sse4_amd64.go
+++ b/go/arrow/math/int64_sse4_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/int64_test.go b/go/arrow/math/int64_test.go
index 4e6f808db0516..0719d6955b367 100644
--- a/go/arrow/math/int64_test.go
+++ b/go/arrow/math/int64_test.go
@@ -21,9 +21,9 @@ package math_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/math"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/math"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/math/type.go.tmpl b/go/arrow/math/type.go.tmpl
index a4e25ae574548..28becffdb3842 100644
--- a/go/arrow/math/type.go.tmpl
+++ b/go/arrow/math/type.go.tmpl
@@ -17,7 +17,7 @@
 package math
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 {{$def := .D}}
diff --git a/go/arrow/math/type_simd_amd64.go.tmpl b/go/arrow/math/type_simd_amd64.go.tmpl
index 86b31e31152bc..cb11dc0ff808c 100644
--- a/go/arrow/math/type_simd_amd64.go.tmpl
+++ b/go/arrow/math/type_simd_amd64.go.tmpl
@@ -21,7 +21,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 {{$name := printf "%s_%s" .In.Type .D.arch}}
diff --git a/go/arrow/math/type_simd_arm64.go.tmpl b/go/arrow/math/type_simd_arm64.go.tmpl
index 86b31e31152bc..cb11dc0ff808c 100755
--- a/go/arrow/math/type_simd_arm64.go.tmpl
+++ b/go/arrow/math/type_simd_arm64.go.tmpl
@@ -21,7 +21,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 {{$name := printf "%s_%s" .In.Type .D.arch}}
diff --git a/go/arrow/math/type_test.go.tmpl b/go/arrow/math/type_test.go.tmpl
index 4b11c043155a6..cc3d39a4a1ad6 100644
--- a/go/arrow/math/type_test.go.tmpl
+++ b/go/arrow/math/type_test.go.tmpl
@@ -19,9 +19,9 @@ package math_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/math"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/math"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/math/uint64.go b/go/arrow/math/uint64.go
index 3b752caecdaf9..2a24886ee5510 100644
--- a/go/arrow/math/uint64.go
+++ b/go/arrow/math/uint64.go
@@ -19,7 +19,7 @@
 package math
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 type Uint64Funcs struct {
diff --git a/go/arrow/math/uint64_avx2_amd64.go b/go/arrow/math/uint64_avx2_amd64.go
index a4092e2140b1e..ba6ea29b92201 100644
--- a/go/arrow/math/uint64_avx2_amd64.go
+++ b/go/arrow/math/uint64_avx2_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/uint64_neon_arm64.go b/go/arrow/math/uint64_neon_arm64.go
index 574bbe4064f2a..b83ca85e55701 100755
--- a/go/arrow/math/uint64_neon_arm64.go
+++ b/go/arrow/math/uint64_neon_arm64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/uint64_sse4_amd64.go b/go/arrow/math/uint64_sse4_amd64.go
index fff3f3ffeeb31..a91ff4e5100d1 100644
--- a/go/arrow/math/uint64_sse4_amd64.go
+++ b/go/arrow/math/uint64_sse4_amd64.go
@@ -24,7 +24,7 @@ package math
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 //go:noescape
diff --git a/go/arrow/math/uint64_test.go b/go/arrow/math/uint64_test.go
index 3403bf5039159..20418557b2101 100644
--- a/go/arrow/math/uint64_test.go
+++ b/go/arrow/math/uint64_test.go
@@ -21,9 +21,9 @@ package math_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/math"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/math"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/memory/buffer.go b/go/arrow/memory/buffer.go
index 55a17afcf7401..586ff387f83de 100644
--- a/go/arrow/memory/buffer.go
+++ b/go/arrow/memory/buffer.go
@@ -19,7 +19,7 @@ package memory
 import (
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 // Buffer is a wrapper type for a buffer of bytes.
diff --git a/go/arrow/memory/buffer_test.go b/go/arrow/memory/buffer_test.go
index d61b4e0a3bb4f..ddc5871c85379 100644
--- a/go/arrow/memory/buffer_test.go
+++ b/go/arrow/memory/buffer_test.go
@@ -19,7 +19,7 @@ package memory_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/memory/cgo_allocator.go b/go/arrow/memory/cgo_allocator.go
index 5eb66ade9d861..c3cb0df47523d 100644
--- a/go/arrow/memory/cgo_allocator.go
+++ b/go/arrow/memory/cgo_allocator.go
@@ -22,7 +22,7 @@ package memory
 import (
 	"runtime"
 
-	cga "github.com/apache/arrow/go/v17/arrow/memory/internal/cgoalloc"
+	cga "github.com/apache/arrow/go/v18/arrow/memory/internal/cgoalloc"
 )
 
 // CgoArrowAllocator is an allocator which exposes the C++ memory pool class
diff --git a/go/arrow/memory/default_mallocator.go b/go/arrow/memory/default_mallocator.go
index b30e7e75d8080..c1a4ed095fadf 100644
--- a/go/arrow/memory/default_mallocator.go
+++ b/go/arrow/memory/default_mallocator.go
@@ -19,7 +19,7 @@
 package memory
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/memory/mallocator"
+	"github.com/apache/arrow/go/v18/arrow/memory/mallocator"
 )
 
 // DefaultAllocator is a default implementation of Allocator and can be used anywhere
diff --git a/go/arrow/memory/default_mallocator_test.go b/go/arrow/memory/default_mallocator_test.go
index 8737a5224e0f4..7667de1111a31 100644
--- a/go/arrow/memory/default_mallocator_test.go
+++ b/go/arrow/memory/default_mallocator_test.go
@@ -21,8 +21,8 @@ package memory_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/memory/mallocator"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/memory/mallocator"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/memory/mallocator/mallocator_test.go b/go/arrow/memory/mallocator/mallocator_test.go
index 4070cc32b2735..5e1482b73697f 100644
--- a/go/arrow/memory/mallocator/mallocator_test.go
+++ b/go/arrow/memory/mallocator/mallocator_test.go
@@ -23,7 +23,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory/mallocator"
+	"github.com/apache/arrow/go/v18/arrow/memory/mallocator"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/memory/memory_test.go b/go/arrow/memory/memory_test.go
index 35bd28e53c795..4a823494ff99b 100644
--- a/go/arrow/memory/memory_test.go
+++ b/go/arrow/memory/memory_test.go
@@ -19,7 +19,7 @@ package memory_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/record.go b/go/arrow/record.go
index a2234d03a9f5b..b812fcd481a60 100644
--- a/go/arrow/record.go
+++ b/go/arrow/record.go
@@ -16,7 +16,7 @@
 
 package arrow
 
-import "github.com/apache/arrow/go/v17/internal/json"
+import "github.com/apache/arrow/go/v18/internal/json"
 
 // Record is a collection of equal-length arrays matching a particular Schema.
 // Also known as a RecordBatch in the spec and in some implementations.
diff --git a/go/arrow/scalar/append.go b/go/arrow/scalar/append.go
index 9520514743443..3a5823775457d 100644
--- a/go/arrow/scalar/append.go
+++ b/go/arrow/scalar/append.go
@@ -21,11 +21,11 @@ package scalar
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/float16"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/scalar/append_test.go b/go/arrow/scalar/append_test.go
index bbfa5d289cf78..a8eca4ee455b4 100644
--- a/go/arrow/scalar/append_test.go
+++ b/go/arrow/scalar/append_test.go
@@ -23,11 +23,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/internal/testing/tools"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/internal/testing/tools"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/scalar/binary.go b/go/arrow/scalar/binary.go
index 6e1fadb7a1256..26c153dc7a46d 100644
--- a/go/arrow/scalar/binary.go
+++ b/go/arrow/scalar/binary.go
@@ -21,8 +21,8 @@ import (
 	"fmt"
 	"unicode/utf8"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 type BinaryScalar interface {
diff --git a/go/arrow/scalar/compare.go b/go/arrow/scalar/compare.go
index f54ff7c82c84a..d32a685b819c6 100644
--- a/go/arrow/scalar/compare.go
+++ b/go/arrow/scalar/compare.go
@@ -16,7 +16,7 @@
 
 package scalar
 
-import "github.com/apache/arrow/go/v17/arrow"
+import "github.com/apache/arrow/go/v18/arrow"
 
 // Equals returns true if two scalars are equal, which means they have the same
 // datatype, validity and value.
diff --git a/go/arrow/scalar/nested.go b/go/arrow/scalar/nested.go
index 484dad795e791..5f3447e686873 100644
--- a/go/arrow/scalar/nested.go
+++ b/go/arrow/scalar/nested.go
@@ -21,10 +21,10 @@ import (
 	"errors"
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/arrow/scalar/numeric.gen.go b/go/arrow/scalar/numeric.gen.go
index 7287d4ff00932..25bdc242ae8c9 100644
--- a/go/arrow/scalar/numeric.gen.go
+++ b/go/arrow/scalar/numeric.gen.go
@@ -24,9 +24,9 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
 )
 
 type Int8 struct {
diff --git a/go/arrow/scalar/numeric.gen_test.go b/go/arrow/scalar/numeric.gen_test.go
index 071af7cfc1524..9349022b65591 100644
--- a/go/arrow/scalar/numeric.gen_test.go
+++ b/go/arrow/scalar/numeric.gen_test.go
@@ -21,8 +21,8 @@ package scalar_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/scalar/numeric.gen_test.go.tmpl b/go/arrow/scalar/numeric.gen_test.go.tmpl
index e21b4f20eeb7c..f5de3f9dcbfa3 100644
--- a/go/arrow/scalar/numeric.gen_test.go.tmpl
+++ b/go/arrow/scalar/numeric.gen_test.go.tmpl
@@ -19,8 +19,8 @@ package scalar_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/scalar/parse.go b/go/arrow/scalar/parse.go
index ba189523fcc01..27db42afa69b1 100644
--- a/go/arrow/scalar/parse.go
+++ b/go/arrow/scalar/parse.go
@@ -25,12 +25,12 @@ import (
 	"strings"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 type TypeToScalar interface {
@@ -329,7 +329,7 @@ func fromListScalar(s ListScalar, v reflect.Value) error {
 		}
 	case *array.Map:
 		// only implementing slice of metadata for now
-		if v.Type().Elem() != reflect.PtrTo(reflect.TypeOf(arrow.Metadata{})) {
+		if v.Type().Elem() != reflect.PointerTo(reflect.TypeOf(arrow.Metadata{})) {
 			return fmt.Errorf("unimplemented fromListScalar type %s to %s", arr.DataType(), v.Type().String())
 		}
 
diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go
index 7f210c0a580cf..f81465278a1e0 100644
--- a/go/arrow/scalar/scalar.go
+++ b/go/arrow/scalar/scalar.go
@@ -26,16 +26,16 @@ import (
 	"strconv"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/encoded"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/encoded"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/arrow/scalar/scalar_test.go b/go/arrow/scalar/scalar_test.go
index fcb88c0fb7e4f..0775eecedd027 100644
--- a/go/arrow/scalar/scalar_test.go
+++ b/go/arrow/scalar/scalar_test.go
@@ -25,12 +25,12 @@ import (
 	"testing"
 	"time"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/scalar"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/scalar"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/arrow/scalar/temporal.go b/go/arrow/scalar/temporal.go
index 718a63b6b225b..ee43f1b629c1d 100644
--- a/go/arrow/scalar/temporal.go
+++ b/go/arrow/scalar/temporal.go
@@ -22,7 +22,7 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 func temporalToString(s TemporalScalar) string {
diff --git a/go/arrow/schema.go b/go/arrow/schema.go
index bdfee7f4325e9..fd6c3cf1f4025 100644
--- a/go/arrow/schema.go
+++ b/go/arrow/schema.go
@@ -21,7 +21,7 @@ import (
 	"sort"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/endian"
 )
 
 type Metadata struct {
diff --git a/go/arrow/schema_test.go b/go/arrow/schema_test.go
index 3d26a769e98b5..ccdd8a02c9c2b 100644
--- a/go/arrow/schema_test.go
+++ b/go/arrow/schema_test.go
@@ -21,7 +21,7 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/endian"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/table.go b/go/arrow/table.go
index ccf28547c2177..c7a13fc37d28c 100644
--- a/go/arrow/table.go
+++ b/go/arrow/table.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 // Table represents a logical sequence of chunked arrays of equal length. It is
@@ -49,7 +49,7 @@ type Table interface {
 // To get strongly typed data from a Column, you need to iterate the
 // chunks and type assert each individual Array. For example:
 //
-//	switch column.DataType().ID {
+//	switch column.DataType().ID() {
 //	case arrow.INT32:
 //		for _, c := range column.Data().Chunks() {
 //			arr := c.(*array.Int32)
diff --git a/go/arrow/tensor/numeric.gen.go b/go/arrow/tensor/numeric.gen.go
index 51995d9ba147f..81ae6af41b09e 100644
--- a/go/arrow/tensor/numeric.gen.go
+++ b/go/arrow/tensor/numeric.gen.go
@@ -19,7 +19,7 @@
 package tensor
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 // Int8 is an n-dim array of int8s.
diff --git a/go/arrow/tensor/numeric.gen.go.tmpl b/go/arrow/tensor/numeric.gen.go.tmpl
index 17c348fa2b73d..9f30297e1ac32 100644
--- a/go/arrow/tensor/numeric.gen.go.tmpl
+++ b/go/arrow/tensor/numeric.gen.go.tmpl
@@ -17,8 +17,8 @@
 package tensor
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
 )
 
 {{range .In}}
diff --git a/go/arrow/tensor/numeric.gen_test.go b/go/arrow/tensor/numeric.gen_test.go
index 92f4f7015edd2..8039aea39667a 100644
--- a/go/arrow/tensor/numeric.gen_test.go
+++ b/go/arrow/tensor/numeric.gen_test.go
@@ -23,10 +23,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/tensor"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/tensor"
 )
 
 func TestTensorInt8(t *testing.T) {
diff --git a/go/arrow/tensor/numeric.gen_test.go.tmpl b/go/arrow/tensor/numeric.gen_test.go.tmpl
index c080df30e0f36..593be259ce630 100644
--- a/go/arrow/tensor/numeric.gen_test.go.tmpl
+++ b/go/arrow/tensor/numeric.gen_test.go.tmpl
@@ -21,10 +21,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/tensor"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/tensor"
 )
 
 {{range .In}}
diff --git a/go/arrow/tensor/tensor.go b/go/arrow/tensor/tensor.go
index b19cc2b916743..067015f3a97d0 100644
--- a/go/arrow/tensor/tensor.go
+++ b/go/arrow/tensor/tensor.go
@@ -21,8 +21,8 @@ import (
 	"fmt"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 // Interface represents an n-dimensional array of numerical data.
diff --git a/go/arrow/tensor/tensor_test.go b/go/arrow/tensor/tensor_test.go
index 5ed420b96bba7..73547e32c3bcb 100644
--- a/go/arrow/tensor/tensor_test.go
+++ b/go/arrow/tensor/tensor_test.go
@@ -21,10 +21,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/tensor"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/tensor"
 )
 
 func TestTensor(t *testing.T) {
diff --git a/go/arrow/type_traits.go b/go/arrow/type_traits.go
index e8e7cf174ed2d..aae6ad106487f 100644
--- a/go/arrow/type_traits.go
+++ b/go/arrow/type_traits.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/float16"
 	"golang.org/x/exp/constraints"
 )
 
diff --git a/go/arrow/type_traits_boolean.go b/go/arrow/type_traits_boolean.go
index 9bdc0b824e6f6..74d643ba6206e 100644
--- a/go/arrow/type_traits_boolean.go
+++ b/go/arrow/type_traits_boolean.go
@@ -17,7 +17,7 @@
 package arrow
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
 )
 
 type booleanTraits struct{}
diff --git a/go/arrow/type_traits_decimal128.go b/go/arrow/type_traits_decimal128.go
index 604c5be3f3670..c93e781d934cb 100644
--- a/go/arrow/type_traits_decimal128.go
+++ b/go/arrow/type_traits_decimal128.go
@@ -19,8 +19,8 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/endian"
 )
 
 // Decimal128 traits
diff --git a/go/arrow/type_traits_decimal256.go b/go/arrow/type_traits_decimal256.go
index de4d931fed748..9ef47c31bdd04 100644
--- a/go/arrow/type_traits_decimal256.go
+++ b/go/arrow/type_traits_decimal256.go
@@ -19,8 +19,8 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/endian"
 )
 
 // Decimal256 traits
diff --git a/go/arrow/type_traits_float16.go b/go/arrow/type_traits_float16.go
index 46ab2b8fc9ed8..0552932cf9b02 100644
--- a/go/arrow/type_traits_float16.go
+++ b/go/arrow/type_traits_float16.go
@@ -19,8 +19,8 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/float16"
 )
 
 // Float16 traits
diff --git a/go/arrow/type_traits_interval.go b/go/arrow/type_traits_interval.go
index 48ae7eb376453..94b5274d45968 100644
--- a/go/arrow/type_traits_interval.go
+++ b/go/arrow/type_traits_interval.go
@@ -19,8 +19,8 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/internal/debug"
 )
 
 var (
diff --git a/go/arrow/type_traits_numeric.gen.go b/go/arrow/type_traits_numeric.gen.go
index bf20dc9995a70..84a32a9115805 100644
--- a/go/arrow/type_traits_numeric.gen.go
+++ b/go/arrow/type_traits_numeric.gen.go
@@ -22,7 +22,7 @@ import (
 	"math"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/endian"
 )
 
 var (
diff --git a/go/arrow/type_traits_numeric.gen.go.tmpl b/go/arrow/type_traits_numeric.gen.go.tmpl
index 9e5c68a2fc8fd..e74194b5260fc 100644
--- a/go/arrow/type_traits_numeric.gen.go.tmpl
+++ b/go/arrow/type_traits_numeric.gen.go.tmpl
@@ -20,7 +20,7 @@ import (
 	"math"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/endian"
 )
 
 var (
diff --git a/go/arrow/type_traits_numeric.gen_test.go b/go/arrow/type_traits_numeric.gen_test.go
index 90324197a3352..3d021575a6654 100644
--- a/go/arrow/type_traits_numeric.gen_test.go
+++ b/go/arrow/type_traits_numeric.gen_test.go
@@ -22,7 +22,7 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 func TestInt64Traits(t *testing.T) {
diff --git a/go/arrow/type_traits_numeric.gen_test.go.tmpl b/go/arrow/type_traits_numeric.gen_test.go.tmpl
index 440f240ab30d2..9f7118eb8ec4a 100644
--- a/go/arrow/type_traits_numeric.gen_test.go.tmpl
+++ b/go/arrow/type_traits_numeric.gen_test.go.tmpl
@@ -20,7 +20,7 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 {{- range .In}}
diff --git a/go/arrow/type_traits_test.go b/go/arrow/type_traits_test.go
index ee28b81de09a6..ec653c0b5bbae 100644
--- a/go/arrow/type_traits_test.go
+++ b/go/arrow/type_traits_test.go
@@ -22,10 +22,10 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/float16"
 )
 
 func TestBooleanTraits(t *testing.T) {
diff --git a/go/arrow/type_traits_timestamp.go b/go/arrow/type_traits_timestamp.go
index 09ef09d8ea188..e506b6f473d80 100644
--- a/go/arrow/type_traits_timestamp.go
+++ b/go/arrow/type_traits_timestamp.go
@@ -19,7 +19,7 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/endian"
 )
 
 var TimestampTraits timestampTraits
diff --git a/go/arrow/type_traits_view.go b/go/arrow/type_traits_view.go
index 36c83f011251e..ef35bc0d7ec55 100644
--- a/go/arrow/type_traits_view.go
+++ b/go/arrow/type_traits_view.go
@@ -19,7 +19,7 @@ package arrow
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/endian"
 )
 
 var ViewHeaderTraits viewHeaderTraits
diff --git a/go/arrow/util/byte_size.go b/go/arrow/util/byte_size.go
index 37b47886f2652..bb163fcef5fc0 100644
--- a/go/arrow/util/byte_size.go
+++ b/go/arrow/util/byte_size.go
@@ -17,9 +17,9 @@
 package util
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 )
 
 func isArrayDataNil(arrayData arrow.ArrayData) bool {
diff --git a/go/arrow/util/byte_size_test.go b/go/arrow/util/byte_size_test.go
index 6fbbe9dce094b..ff6d8ea7edf0c 100644
--- a/go/arrow/util/byte_size_test.go
+++ b/go/arrow/util/byte_size_test.go
@@ -20,10 +20,10 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/util"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/util"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/arrow/util/messages/types.proto b/go/arrow/util/messages/types.proto
index c085273ca35e0..79b922a22a3be 100644
--- a/go/arrow/util/messages/types.proto
+++ b/go/arrow/util/messages/types.proto
@@ -54,3 +54,49 @@ message AllTheTypes {
     OPTION_1 = 1;
   }
 }
+
+message AllTheTypesNoAny {
+  string str = 1;
+  int32 int32 = 2;
+  int64 int64 = 3;
+  sint32 sint32 = 4;
+  sint64 sin64 = 5;
+  uint32 uint32 = 6;
+  uint64 uint64 = 7;
+  fixed32 fixed32 = 8;
+  fixed64 fixed64 = 9;
+  sfixed32 sfixed32 = 10;
+  bool bool = 11;
+  bytes bytes = 12;
+  double double = 13;
+  ExampleEnum enum = 14;
+  ExampleMessage message = 15;
+  oneof oneof {
+    string oneofstring = 16;
+    ExampleMessage oneofmessage = 17;
+  }
+  map<int32, string> simple_map = 19;
+  map<string, ExampleMessage> complex_map = 20;
+  repeated string simple_list = 21;
+  repeated ExampleMessage complex_list = 22;
+
+  enum ExampleEnum {
+    OPTION_0 = 0;
+    OPTION_1 = 1;
+  }
+}
+
+message SimpleNested {
+  repeated ExampleMessage simple_a = 1;
+  repeated ExampleMessage simple_b = 2;
+}
+
+message ComplexNested {
+  repeated AllTheTypesNoAny all_the_types_no_any_a = 1;
+  repeated AllTheTypesNoAny all_the_types_no_any_b = 2;
+}
+
+message DeepNested {
+  ComplexNested complex_nested = 1;
+  SimpleNested simple_nested = 2;
+}
diff --git a/go/arrow/util/protobuf_reflect.go b/go/arrow/util/protobuf_reflect.go
index b4c8d68db8b0d..c8cda96acf941 100644
--- a/go/arrow/util/protobuf_reflect.go
+++ b/go/arrow/util/protobuf_reflect.go
@@ -20,9 +20,9 @@ import (
 	"fmt"
 	"reflect"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/huandu/xstrings"
 	"google.golang.org/protobuf/proto"
 	"google.golang.org/protobuf/reflect/protoreflect"
@@ -60,6 +60,7 @@ type ProtobufFieldReflection struct {
 	rValue     reflect.Value
 	schemaOptions
 	arrow.Field
+	isListItem bool
 }
 
 func (pfr *ProtobufFieldReflection) isNull() bool {
@@ -170,7 +171,7 @@ func (pfr *ProtobufFieldReflection) isEnum() bool {
 }
 
 func (pfr *ProtobufFieldReflection) isStruct() bool {
-	return pfr.descriptor.Kind() == protoreflect.MessageKind && !pfr.descriptor.IsMap() && pfr.rValue.Kind() != reflect.Slice
+	return pfr.descriptor.Kind() == protoreflect.MessageKind && !pfr.descriptor.IsMap() && !pfr.isList()
 }
 
 func (pfr *ProtobufFieldReflection) isMap() bool {
@@ -178,7 +179,7 @@ func (pfr *ProtobufFieldReflection) isMap() bool {
 }
 
 func (pfr *ProtobufFieldReflection) isList() bool {
-	return pfr.descriptor.IsList() && pfr.rValue.Kind() == reflect.Slice
+	return pfr.descriptor.IsList() && !pfr.isListItem
 }
 
 // ProtobufMessageReflection represents the metadata and values of a protobuf message
@@ -218,11 +219,7 @@ func (psr ProtobufMessageReflection) getArrowFields() []arrow.Field {
 	var fields []arrow.Field
 
 	for pfr := range psr.generateStructFields() {
-		fields = append(fields, arrow.Field{
-			Name:     pfr.name(),
-			Type:     pfr.getDataType(),
-			Nullable: true,
-		})
+		fields = append(fields, pfr.arrowField())
 	}
 
 	return fields
@@ -237,12 +234,10 @@ func (pfr *ProtobufFieldReflection) asList() protobufListReflection {
 }
 
 func (plr protobufListReflection) getDataType() arrow.DataType {
-	for li := range plr.generateListItems() {
-		return arrow.ListOf(li.getDataType())
-	}
 	pfr := ProtobufFieldReflection{
 		descriptor:    plr.descriptor,
 		schemaOptions: plr.schemaOptions,
+		isListItem:    true,
 	}
 	return arrow.ListOf(pfr.getDataType())
 }
@@ -401,6 +396,22 @@ func (pmr protobufMapReflection) generateKeyValuePairs() chan protobufMapKeyValu
 
 	go func() {
 		defer close(out)
+		if !pmr.rValue.IsValid() {
+			kvp := protobufMapKeyValuePairReflection{
+				k: ProtobufFieldReflection{
+					parent:        pmr.parent,
+					descriptor:    pmr.descriptor.MapKey(),
+					schemaOptions: pmr.schemaOptions,
+				},
+				v: ProtobufFieldReflection{
+					parent:        pmr.parent,
+					descriptor:    pmr.descriptor.MapValue(),
+					schemaOptions: pmr.schemaOptions,
+				},
+			}
+			out <- kvp
+			return
+		}
 		for _, k := range pmr.rValue.MapKeys() {
 			kvp := protobufMapKeyValuePairReflection{
 				k: ProtobufFieldReflection{
diff --git a/go/arrow/util/protobuf_reflect_test.go b/go/arrow/util/protobuf_reflect_test.go
index ab3cbdf9a6b13..7420aa726337d 100644
--- a/go/arrow/util/protobuf_reflect_test.go
+++ b/go/arrow/util/protobuf_reflect_test.go
@@ -17,27 +17,68 @@
 package util
 
 import (
-	"strings"
+	"encoding/json"
+	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/arrow/util/util_message"
+	"google.golang.org/protobuf/proto"
+
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow/util/util_message"
 	"github.com/huandu/xstrings"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"google.golang.org/protobuf/types/known/anypb"
 )
 
-func SetupTest() util_message.AllTheTypes {
-	msg := util_message.ExampleMessage{
-		Field1: "Example",
+type Fixture struct {
+	msg     proto.Message
+	schema  string
+	jsonStr string
+}
+
+type J map[string]any
+
+func AllTheTypesFixture() Fixture {
+	e := J{"field1": "Example"}
+
+	m := J{
+		"str":          "Hello",
+		"int32":        10,
+		"int64":        100,
+		"sint32":       -10,
+		"sin64":        -100,
+		"uint32":       10,
+		"uint64":       100,
+		"fixed32":      10,
+		"fixed64":      1000,
+		"sfixed32":     10,
+		"bool":         false,
+		"bytes":        "SGVsbG8sIHdvcmxkIQ==",
+		"double":       1.1,
+		"enum":         "OPTION_1",
+		"message":      e,
+		"oneof":        []any{0, "World"},
+		"any":          J{"field1": "Example"},
+		"simple_map":   []J{{"key": 99, "value": "Hello"}},
+		"complex_map":  []J{{"key": "complex", "value": e}},
+		"simple_list":  []any{"Hello", "World"},
+		"complex_list": []J{e},
 	}
+	jm, err := json.Marshal(m)
+	if err != nil {
+		panic(err)
+	}
+	jsonString := string(jm)
 
-	anyMsg, _ := anypb.New(&msg)
+	exampleMsg := util_message.ExampleMessage{
+		Field1: "Example",
+	}
+	anyMsg, _ := anypb.New(&exampleMsg)
 
-	return util_message.AllTheTypes{
+	msg := util_message.AllTheTypes{
 		Str:      "Hello",
 		Int32:    10,
 		Int64:    100,
@@ -52,23 +93,80 @@ func SetupTest() util_message.AllTheTypes {
 		Bytes:    []byte("Hello, world!"),
 		Double:   1.1,
 		Enum:     util_message.AllTheTypes_OPTION_1,
-		Message:  &msg,
+		Message:  &exampleMsg,
 		Oneof:    &util_message.AllTheTypes_Oneofstring{Oneofstring: "World"},
 		Any:      anyMsg,
 		//Breaks the test as the Golang maps have a non-deterministic order
 		//SimpleMap:   map[int32]string{99: "Hello", 100: "World", 98: "How", 101: "Are", 1: "You"},
 		SimpleMap:   map[int32]string{99: "Hello"},
-		ComplexMap:  map[string]*util_message.ExampleMessage{"complex": &msg},
+		ComplexMap:  map[string]*util_message.ExampleMessage{"complex": &exampleMsg},
 		SimpleList:  []string{"Hello", "World"},
-		ComplexList: []*util_message.ExampleMessage{&msg},
+		ComplexList: []*util_message.ExampleMessage{&exampleMsg},
+	}
+
+	schema := `schema:
+  fields: 22
+    - str: type=utf8, nullable
+    - int32: type=int32, nullable
+    - int64: type=int64, nullable
+    - sint32: type=int32, nullable
+    - sin64: type=int64, nullable
+    - uint32: type=uint32, nullable
+    - uint64: type=uint64, nullable
+    - fixed32: type=uint32, nullable
+    - fixed64: type=uint64, nullable
+    - sfixed32: type=int32, nullable
+    - bool: type=bool, nullable
+    - bytes: type=binary, nullable
+    - double: type=float64, nullable
+    - enum: type=dictionary<values=utf8, indices=int32, ordered=false>, nullable
+    - message: type=struct<field1: utf8>, nullable
+    - oneofstring: type=utf8, nullable
+    - oneofmessage: type=struct<field1: utf8>, nullable
+    - any: type=struct<field1: utf8>, nullable
+    - simple_map: type=map<int32, utf8, items_nullable>, nullable
+    - complex_map: type=map<utf8, struct<field1: utf8>, items_nullable>, nullable
+    - simple_list: type=list<item: utf8, nullable>, nullable
+    - complex_list: type=list<item: struct<field1: utf8>, nullable>, nullable`
+
+	return Fixture{
+		msg:     &msg,
+		schema:  schema,
+		jsonStr: jsonString,
 	}
 }
 
-func TestGetSchema(t *testing.T) {
-	msg := SetupTest()
+func AllTheTypesNoAnyFixture() Fixture {
+	exampleMsg := util_message.ExampleMessage{
+		Field1: "Example",
+	}
 
-	got := NewProtobufMessageReflection(&msg).Schema().String()
-	want := `schema:
+	msg := util_message.AllTheTypesNoAny{
+		Str:      "Hello",
+		Int32:    10,
+		Int64:    100,
+		Sint32:   -10,
+		Sin64:    -100,
+		Uint32:   10,
+		Uint64:   100,
+		Fixed32:  10,
+		Fixed64:  1000,
+		Sfixed32: 10,
+		Bool:     false,
+		Bytes:    []byte("Hello, world!"),
+		Double:   1.1,
+		Enum:     util_message.AllTheTypesNoAny_OPTION_1,
+		Message:  &exampleMsg,
+		Oneof:    &util_message.AllTheTypesNoAny_Oneofstring{Oneofstring: "World"},
+		//Breaks the test as the Golang maps have a non-deterministic order
+		//SimpleMap:   map[int32]string{99: "Hello", 100: "World", 98: "How", 101: "Are", 1: "You"},
+		SimpleMap:   map[int32]string{99: "Hello"},
+		ComplexMap:  map[string]*util_message.ExampleMessage{"complex": &exampleMsg},
+		SimpleList:  []string{"Hello", "World"},
+		ComplexList: []*util_message.ExampleMessage{&exampleMsg},
+	}
+
+	schema := `schema:
   fields: 22
     - str: type=utf8, nullable
     - int32: type=int32, nullable
@@ -87,16 +185,62 @@ func TestGetSchema(t *testing.T) {
     - message: type=struct<field1: utf8>, nullable
     - oneofstring: type=utf8, nullable
     - oneofmessage: type=struct<field1: utf8>, nullable
-    - any: type=struct<field1: utf8>, nullable
     - simple_map: type=map<int32, utf8, items_nullable>, nullable
     - complex_map: type=map<utf8, struct<field1: utf8>, items_nullable>, nullable
     - simple_list: type=list<item: utf8, nullable>, nullable
     - complex_list: type=list<item: struct<field1: utf8>, nullable>, nullable`
 
-	require.Equal(t, want, got, "got: %s\nwant: %s", got, want)
+	jsonStr := `{
+			"str":"Hello",
+			"int32":10,
+			"int64":100,
+			"sint32":-10,
+			"sin64":-100,
+			"uint32":10,
+			"uint64":100,
+			"fixed32":10,
+			"fixed64":1000,
+			"sfixed32":10,
+			"bool":false,
+			"bytes":"SGVsbG8sIHdvcmxkIQ==",
+			"double":1.1,
+			"enum":"OPTION_1",
+			"message":{"field1":"Example"},
+			"oneofmessage": { "field1": null },
+			"oneofstring": "World",
+			"simple_map":[{"key":99,"value":"Hello"}],
+			"complex_map":[{"key":"complex","value":{"field1":"Example"}}],
+			"simple_list":["Hello","World"],
+			"complex_list":[{"field1":"Example"}]
+		}`
+
+	return Fixture{
+		msg:     &msg,
+		schema:  schema,
+		jsonStr: jsonStr,
+	}
+}
 
-	got = NewProtobufMessageReflection(&msg, WithOneOfHandler(OneOfDenseUnion)).Schema().String()
-	want = `schema:
+func CheckSchema(t *testing.T, pmr *ProtobufMessageReflection, want string) {
+	got := pmr.Schema().String()
+	require.Equal(t, got, want, "got: %s\nwant: %s", got, want)
+}
+
+func CheckRecord(t *testing.T, pmr *ProtobufMessageReflection, jsonStr string) {
+	rec := pmr.Record(nil)
+	got, err := json.Marshal(rec)
+	assert.NoError(t, err)
+	assert.JSONEq(t, jsonStr, string(got), "got: %s\nwant: %s", got, jsonStr)
+}
+
+func TestGetSchema(t *testing.T) {
+	f := AllTheTypesFixture()
+
+	pmr := NewProtobufMessageReflection(f.msg)
+	CheckSchema(t, pmr, f.schema)
+
+	pmr = NewProtobufMessageReflection(f.msg, WithOneOfHandler(OneOfDenseUnion))
+	want := `schema:
   fields: 21
     - str: type=utf8, nullable
     - int32: type=int32, nullable
@@ -119,14 +263,13 @@ func TestGetSchema(t *testing.T) {
     - complex_map: type=map<utf8, struct<field1: utf8>, items_nullable>, nullable
     - simple_list: type=list<item: utf8, nullable>, nullable
     - complex_list: type=list<item: struct<field1: utf8>, nullable>, nullable`
-
-	require.Equal(t, want, got, "got: %s\nwant: %s", got, want)
+	CheckSchema(t, pmr, want)
 
 	excludeComplex := func(pfr *ProtobufFieldReflection) bool {
 		return pfr.isMap() || pfr.isList() || pfr.isStruct()
 	}
 
-	got = NewProtobufMessageReflection(&msg, WithExclusionPolicy(excludeComplex)).Schema().String()
+	pmr = NewProtobufMessageReflection(f.msg, WithExclusionPolicy(excludeComplex))
 	want = `schema:
   fields: 15
     - str: type=utf8, nullable
@@ -144,14 +287,13 @@ func TestGetSchema(t *testing.T) {
     - double: type=float64, nullable
     - enum: type=dictionary<values=utf8, indices=int32, ordered=false>, nullable
     - oneofstring: type=utf8, nullable`
+	CheckSchema(t, pmr, want)
 
-	require.Equal(t, want, got, "got: %s\nwant: %s", got, want)
-
-	got = NewProtobufMessageReflection(
-		&msg,
+	pmr = NewProtobufMessageReflection(
+		f.msg,
 		WithExclusionPolicy(excludeComplex),
 		WithFieldNameFormatter(xstrings.ToCamelCase),
-	).Schema().String()
+	)
 	want = `schema:
   fields: 15
     - Str: type=utf8, nullable
@@ -169,123 +311,168 @@ func TestGetSchema(t *testing.T) {
     - Double: type=float64, nullable
     - Enum: type=dictionary<values=utf8, indices=int32, ordered=false>, nullable
     - Oneofstring: type=utf8, nullable`
-
-	require.Equal(t, want, got, "got: %s\nwant: %s", got, want)
+	CheckSchema(t, pmr, want)
 
 	onlyEnum := func(pfr *ProtobufFieldReflection) bool {
 		return !pfr.isEnum()
 	}
-	got = NewProtobufMessageReflection(
-		&msg,
+	pmr = NewProtobufMessageReflection(
+		f.msg,
 		WithExclusionPolicy(onlyEnum),
 		WithEnumHandler(EnumNumber),
-	).Schema().String()
+	)
 	want = `schema:
   fields: 1
     - enum: type=int32, nullable`
+	CheckSchema(t, pmr, want)
 
-	require.Equal(t, want, got, "got: %s\nwant: %s", got, want)
-
-	got = NewProtobufMessageReflection(
-		&msg,
+	pmr = NewProtobufMessageReflection(
+		f.msg,
 		WithExclusionPolicy(onlyEnum),
 		WithEnumHandler(EnumValue),
-	).Schema().String()
+	)
 	want = `schema:
   fields: 1
     - enum: type=utf8, nullable`
-
-	require.Equal(t, want, got, "got: %s\nwant: %s", got, want)
+	CheckSchema(t, pmr, want)
 }
 
 func TestRecordFromProtobuf(t *testing.T) {
-	msg := SetupTest()
-
-	pmr := NewProtobufMessageReflection(&msg, WithOneOfHandler(OneOfDenseUnion))
-	schema := pmr.Schema()
-	got := pmr.Record(nil)
-	jsonStr := `[
-		{
-			"str":"Hello",
-			"int32":10,
-			"int64":100,
-			"sint32":-10,
-			"sin64":-100,
-			"uint32":10,
-			"uint64":100,
-			"fixed32":10,
-			"fixed64":1000,
-			"sfixed32":10,
-			"bool":false,
-			"bytes":"SGVsbG8sIHdvcmxkIQ==",
-			"double":1.1,
-			"enum":"OPTION_1",
-			"message":{"field1":"Example"},
-			"oneof": [0, "World"],
-			"any":{"field1":"Example"},
-			"simple_map":[{"key":99,"value":"Hello"}],
-			"complex_map":[{"key":"complex","value":{"field1":"Example"}}],
-			"simple_list":["Hello","World"],
-			"complex_list":[{"field1":"Example"}]
-		}
-	]`
-	want, _, err := array.RecordFromJSON(memory.NewGoAllocator(), schema, strings.NewReader(jsonStr))
+	f := AllTheTypesFixture()
 
-	require.NoError(t, err)
-	require.EqualExportedValues(t, got, want, "got: %s\nwant: %s", got, want)
+	pmr := NewProtobufMessageReflection(f.msg, WithOneOfHandler(OneOfDenseUnion))
+	CheckRecord(t, pmr, fmt.Sprintf(`[%s]`, f.jsonStr))
 
 	onlyEnum := func(pfr *ProtobufFieldReflection) bool { return !pfr.isEnum() }
-	pmr = NewProtobufMessageReflection(&msg, WithExclusionPolicy(onlyEnum), WithEnumHandler(EnumValue))
-	got = pmr.Record(nil)
-	jsonStr = `[ { "enum":"OPTION_1" } ]`
-	want, _, err = array.RecordFromJSON(memory.NewGoAllocator(), pmr.Schema(), strings.NewReader(jsonStr))
-	require.NoError(t, err)
-	require.True(t, array.RecordEqual(got, want), "got: %s\nwant: %s", got, want)
-
-	pmr = NewProtobufMessageReflection(&msg, WithExclusionPolicy(onlyEnum), WithEnumHandler(EnumNumber))
-	got = pmr.Record(nil)
-	jsonStr = `[ { "enum":"1" } ]`
-	want, _, err = array.RecordFromJSON(memory.NewGoAllocator(), pmr.Schema(), strings.NewReader(jsonStr))
-	require.NoError(t, err)
-	require.True(t, array.RecordEqual(got, want), "got: %s\nwant: %s", got, want)
+	pmr = NewProtobufMessageReflection(f.msg, WithExclusionPolicy(onlyEnum), WithEnumHandler(EnumValue))
+	jsonStr := `[ { "enum":"OPTION_1" } ]`
+	CheckRecord(t, pmr, jsonStr)
+
+	pmr = NewProtobufMessageReflection(f.msg, WithExclusionPolicy(onlyEnum), WithEnumHandler(EnumNumber))
+	jsonStr = `[ { "enum":1 } ]`
+	CheckRecord(t, pmr, jsonStr)
 }
 
 func TestNullRecordFromProtobuf(t *testing.T) {
 	pmr := NewProtobufMessageReflection(&util_message.AllTheTypes{})
-	schema := pmr.Schema()
-	got := pmr.Record(nil)
-	_, _ = got.MarshalJSON()
-	jsonStr := `[
-		{
-			"str":"",
-			"int32":0,
-			"int64":0,
-			"sint32":0,
-			"sin64":0,
-			"uint32":0,
-			"uint64":0,
-			"fixed32":0,
-			"fixed64":0,
-			"sfixed32":0,
-			"bool":false,
-			"bytes":"",
-			"double":0,
-			"enum":"OPTION_0",
-			"message":null,
-			"oneofmessage":{"field1":""},
-			"oneofstring":"",
-			"any":null,
-			"simple_map":[],
-			"complex_map":[],
-			"simple_list":[],
-			"complex_list":[]
-		}
-	]`
-
-	want, _, err := array.RecordFromJSON(memory.NewGoAllocator(), schema, strings.NewReader(jsonStr))
-
-	require.NoError(t, err)
-	require.EqualExportedValues(t, got, want, "got: %s\nwant: %s", got, want)
+	CheckRecord(t, pmr, `[{
+		"str":"",
+		"int32":0,
+		"int64":0,
+		"sint32":0,
+		"sin64":0,
+		"uint32":0,
+		"uint64":0,
+		"fixed32":0,
+		"fixed64":0,
+		"sfixed32":0,
+		"bool":false,
+		"bytes":null,
+		"double":0,
+		"enum":"OPTION_0",
+		"message":null,
+		"oneofmessage":{"field1":""},
+		"oneofstring":"",
+		"any": null,
+		"simple_map":[],
+		"complex_map":[],
+		"simple_list":[],
+		"complex_list":[]
+	}]`)
+}
+
+func TestExcludedNested(t *testing.T) {
+	msg := util_message.ExampleMessage{
+		Field1: "Example",
+	}
+	schema := `schema:
+  fields: 2
+    - simple_a: type=list<item: struct<field1: utf8>, nullable>, nullable
+    - simple_b: type=list<item: struct<field1: utf8>, nullable>, nullable`
+
+	simpleNested := util_message.SimpleNested{
+		SimpleA: []*util_message.ExampleMessage{&msg},
+		SimpleB: []*util_message.ExampleMessage{&msg},
+	}
+	pmr := NewProtobufMessageReflection(&simpleNested)
+	jsonStr := `[{ "simple_a":[{"field1":"Example"}], "simple_b":[{"field1":"Example"}] }]`
+	CheckSchema(t, pmr, schema)
+	CheckRecord(t, pmr, jsonStr)
+
+	//exclude one value
+	simpleNested = util_message.SimpleNested{
+		SimpleA: []*util_message.ExampleMessage{&msg},
+	}
+	jsonStr = `[{ "simple_a":[{"field1":"Example"}], "simple_b":[]}]`
+	CheckSchema(t, pmr, schema)
+	CheckRecord(t, pmr, jsonStr)
+
+	////exclude both values
+	simpleNested = util_message.SimpleNested{}
+	jsonStr = `[{ "simple_a":[], "simple_b":[] }]`
+	CheckSchema(t, pmr, schema)
+	CheckRecord(t, pmr, jsonStr)
+
+	f := AllTheTypesNoAnyFixture()
+	schema = `schema:
+  fields: 2
+    - all_the_types_no_any_a: type=list<item: struct<str: utf8, int32: int32, int64: int64, sint32: int32, sin64: int64, uint32: uint32, uint64: uint64, fixed32: uint32, fixed64: uint64, sfixed32: int32, bool: bool, bytes: binary, double: float64, enum: dictionary<values=utf8, indices=int32, ordered=false>, message: struct<field1: utf8>, oneofstring: utf8, oneofmessage: struct<field1: utf8>, simple_map: map<int32, utf8, items_nullable>, complex_map: map<utf8, struct<field1: utf8>, items_nullable>, simple_list: list<item: utf8, nullable>, complex_list: list<item: struct<field1: utf8>, nullable>>, nullable>, nullable
+    - all_the_types_no_any_b: type=list<item: struct<str: utf8, int32: int32, int64: int64, sint32: int32, sin64: int64, uint32: uint32, uint64: uint64, fixed32: uint32, fixed64: uint64, sfixed32: int32, bool: bool, bytes: binary, double: float64, enum: dictionary<values=utf8, indices=int32, ordered=false>, message: struct<field1: utf8>, oneofstring: utf8, oneofmessage: struct<field1: utf8>, simple_map: map<int32, utf8, items_nullable>, complex_map: map<utf8, struct<field1: utf8>, items_nullable>, simple_list: list<item: utf8, nullable>, complex_list: list<item: struct<field1: utf8>, nullable>>, nullable>, nullable`
+
+	complexNested := util_message.ComplexNested{
+		AllTheTypesNoAnyA: []*util_message.AllTheTypesNoAny{f.msg.(*util_message.AllTheTypesNoAny)},
+		AllTheTypesNoAnyB: []*util_message.AllTheTypesNoAny{f.msg.(*util_message.AllTheTypesNoAny)},
+	}
+	jsonStr = fmt.Sprintf(`[{ "all_the_types_no_any_a": [%s], "all_the_types_no_any_b": [%s] }]`, f.jsonStr, f.jsonStr)
+	pmr = NewProtobufMessageReflection(&complexNested)
+	CheckSchema(t, pmr, schema)
+	CheckRecord(t, pmr, jsonStr)
+
+	// exclude one value
+	complexNested = util_message.ComplexNested{
+		AllTheTypesNoAnyB: []*util_message.AllTheTypesNoAny{f.msg.(*util_message.AllTheTypesNoAny)},
+	}
+	jsonStr = fmt.Sprintf(`[{ "all_the_types_no_any_a": [], "all_the_types_no_any_b": [%s] }]`, f.jsonStr)
+	pmr = NewProtobufMessageReflection(&complexNested)
+	CheckSchema(t, pmr, schema)
+	CheckRecord(t, pmr, jsonStr)
+
+	// exclude both values
+	complexNested = util_message.ComplexNested{}
+	jsonStr = `[{ "all_the_types_no_any_a": [], "all_the_types_no_any_b": [] }]`
+	pmr = NewProtobufMessageReflection(&complexNested)
+	CheckSchema(t, pmr, schema)
+	CheckRecord(t, pmr, jsonStr)
+
+	schema = `schema:
+  fields: 2
+    - complex_nested: type=struct<all_the_types_no_any_a: list<item: struct<str: utf8, int32: int32, int64: int64, sint32: int32, sin64: int64, uint32: uint32, uint64: uint64, fixed32: uint32, fixed64: uint64, sfixed32: int32, bool: bool, bytes: binary, double: float64, enum: dictionary<values=utf8, indices=int32, ordered=false>, message: struct<field1: utf8>, oneofstring: utf8, oneofmessage: struct<field1: utf8>, simple_map: map<int32, utf8, items_nullable>, complex_map: map<utf8, struct<field1: utf8>, items_nullable>, simple_list: list<item: utf8, nullable>, complex_list: list<item: struct<field1: utf8>, nullable>>, nullable>, all_the_types_no_any_b: list<item: struct<str: utf8, int32: int32, int64: int64, sint32: int32, sin64: int64, uint32: uint32, uint64: uint64, fixed32: uint32, fixed64: uint64, sfixed32: int32, bool: bool, bytes: binary, double: float64, enum: dictionary<values=utf8, indices=int32, ordered=false>, message: struct<field1: utf8>, oneofstring: utf8, oneofmessage: struct<field1: utf8>, simple_map: map<int32, utf8, items_nullable>, complex_map: map<utf8, struct<field1: utf8>, items_nullable>, simple_list: list<item: utf8, nullable>, complex_list: list<item: struct<field1: utf8>, nullable>>, nullable>>, nullable
+    - simple_nested: type=struct<simple_a: list<item: struct<field1: utf8>, nullable>, simple_b: list<item: struct<field1: utf8>, nullable>>, nullable`
+
+	deepNested := util_message.DeepNested{
+		ComplexNested: &complexNested,
+		SimpleNested:  &simpleNested,
+	}
+	jsonStr = `[{ "simple_nested": {"simple_a":[], "simple_b":[]}, "complex_nested": {"all_the_types_no_any_a": [], "all_the_types_no_any_b": []} }]`
+	pmr = NewProtobufMessageReflection(&deepNested)
+	CheckSchema(t, pmr, schema)
+	CheckRecord(t, pmr, jsonStr)
+
+	// exclude one value
+	deepNested = util_message.DeepNested{
+		ComplexNested: &complexNested,
+	}
+	jsonStr = `[{ "simple_nested": null, "complex_nested": {"all_the_types_no_any_a": [], "all_the_types_no_any_b": []} }]`
+	pmr = NewProtobufMessageReflection(&deepNested)
+	CheckSchema(t, pmr, schema)
+	CheckRecord(t, pmr, jsonStr)
+
+	// exclude both values
+	deepNested = util_message.DeepNested{}
+	pmr = NewProtobufMessageReflection(&deepNested)
+	jsonStr = `[{ "simple_nested": null, "complex_nested": null }]`
+	CheckSchema(t, pmr, schema)
+	CheckRecord(t, pmr, jsonStr)
 }
 
 type testProtobufReflection struct {
diff --git a/go/arrow/util/util_message/types.pb.go b/go/arrow/util/util_message/types.pb.go
index 80e18847c1970..6486b2cc87a09 100644
--- a/go/arrow/util/util_message/types.pb.go
+++ b/go/arrow/util/util_message/types.pb.go
@@ -23,12 +23,11 @@
 package util_message
 
 import (
-	reflect "reflect"
-	sync "sync"
-
 	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
 	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
 	anypb "google.golang.org/protobuf/types/known/anypb"
+	reflect "reflect"
+	sync "sync"
 )
 
 const (
@@ -84,6 +83,52 @@ func (AllTheTypes_ExampleEnum) EnumDescriptor() ([]byte, []int) {
 	return file_messages_types_proto_rawDescGZIP(), []int{1, 0}
 }
 
+type AllTheTypesNoAny_ExampleEnum int32
+
+const (
+	AllTheTypesNoAny_OPTION_0 AllTheTypesNoAny_ExampleEnum = 0
+	AllTheTypesNoAny_OPTION_1 AllTheTypesNoAny_ExampleEnum = 1
+)
+
+// Enum value maps for AllTheTypesNoAny_ExampleEnum.
+var (
+	AllTheTypesNoAny_ExampleEnum_name = map[int32]string{
+		0: "OPTION_0",
+		1: "OPTION_1",
+	}
+	AllTheTypesNoAny_ExampleEnum_value = map[string]int32{
+		"OPTION_0": 0,
+		"OPTION_1": 1,
+	}
+)
+
+func (x AllTheTypesNoAny_ExampleEnum) Enum() *AllTheTypesNoAny_ExampleEnum {
+	p := new(AllTheTypesNoAny_ExampleEnum)
+	*p = x
+	return p
+}
+
+func (x AllTheTypesNoAny_ExampleEnum) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (AllTheTypesNoAny_ExampleEnum) Descriptor() protoreflect.EnumDescriptor {
+	return file_messages_types_proto_enumTypes[1].Descriptor()
+}
+
+func (AllTheTypesNoAny_ExampleEnum) Type() protoreflect.EnumType {
+	return &file_messages_types_proto_enumTypes[1]
+}
+
+func (x AllTheTypesNoAny_ExampleEnum) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Use AllTheTypesNoAny_ExampleEnum.Descriptor instead.
+func (AllTheTypesNoAny_ExampleEnum) EnumDescriptor() ([]byte, []int) {
+	return file_messages_types_proto_rawDescGZIP(), []int{2, 0}
+}
+
 type ExampleMessage struct {
 	state         protoimpl.MessageState
 	sizeCache     protoimpl.SizeCache
@@ -372,6 +417,404 @@ func (*AllTheTypes_Oneofstring) isAllTheTypes_Oneof() {}
 
 func (*AllTheTypes_Oneofmessage) isAllTheTypes_Oneof() {}
 
+type AllTheTypesNoAny struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	Str      string                       `protobuf:"bytes,1,opt,name=str,proto3" json:"str,omitempty"`
+	Int32    int32                        `protobuf:"varint,2,opt,name=int32,proto3" json:"int32,omitempty"`
+	Int64    int64                        `protobuf:"varint,3,opt,name=int64,proto3" json:"int64,omitempty"`
+	Sint32   int32                        `protobuf:"zigzag32,4,opt,name=sint32,proto3" json:"sint32,omitempty"`
+	Sin64    int64                        `protobuf:"zigzag64,5,opt,name=sin64,proto3" json:"sin64,omitempty"`
+	Uint32   uint32                       `protobuf:"varint,6,opt,name=uint32,proto3" json:"uint32,omitempty"`
+	Uint64   uint64                       `protobuf:"varint,7,opt,name=uint64,proto3" json:"uint64,omitempty"`
+	Fixed32  uint32                       `protobuf:"fixed32,8,opt,name=fixed32,proto3" json:"fixed32,omitempty"`
+	Fixed64  uint64                       `protobuf:"fixed64,9,opt,name=fixed64,proto3" json:"fixed64,omitempty"`
+	Sfixed32 int32                        `protobuf:"fixed32,10,opt,name=sfixed32,proto3" json:"sfixed32,omitempty"`
+	Bool     bool                         `protobuf:"varint,11,opt,name=bool,proto3" json:"bool,omitempty"`
+	Bytes    []byte                       `protobuf:"bytes,12,opt,name=bytes,proto3" json:"bytes,omitempty"`
+	Double   float64                      `protobuf:"fixed64,13,opt,name=double,proto3" json:"double,omitempty"`
+	Enum     AllTheTypesNoAny_ExampleEnum `protobuf:"varint,14,opt,name=enum,proto3,enum=AllTheTypesNoAny_ExampleEnum" json:"enum,omitempty"`
+	Message  *ExampleMessage              `protobuf:"bytes,15,opt,name=message,proto3" json:"message,omitempty"`
+	// Types that are assignable to Oneof:
+	//
+	//	*AllTheTypesNoAny_Oneofstring
+	//	*AllTheTypesNoAny_Oneofmessage
+	Oneof       isAllTheTypesNoAny_Oneof   `protobuf_oneof:"oneof"`
+	SimpleMap   map[int32]string           `protobuf:"bytes,19,rep,name=simple_map,json=simpleMap,proto3" json:"simple_map,omitempty" protobuf_key:"varint,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+	ComplexMap  map[string]*ExampleMessage `protobuf:"bytes,20,rep,name=complex_map,json=complexMap,proto3" json:"complex_map,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+	SimpleList  []string                   `protobuf:"bytes,21,rep,name=simple_list,json=simpleList,proto3" json:"simple_list,omitempty"`
+	ComplexList []*ExampleMessage          `protobuf:"bytes,22,rep,name=complex_list,json=complexList,proto3" json:"complex_list,omitempty"`
+}
+
+func (x *AllTheTypesNoAny) Reset() {
+	*x = AllTheTypesNoAny{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_messages_types_proto_msgTypes[2]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *AllTheTypesNoAny) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*AllTheTypesNoAny) ProtoMessage() {}
+
+func (x *AllTheTypesNoAny) ProtoReflect() protoreflect.Message {
+	mi := &file_messages_types_proto_msgTypes[2]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use AllTheTypesNoAny.ProtoReflect.Descriptor instead.
+func (*AllTheTypesNoAny) Descriptor() ([]byte, []int) {
+	return file_messages_types_proto_rawDescGZIP(), []int{2}
+}
+
+func (x *AllTheTypesNoAny) GetStr() string {
+	if x != nil {
+		return x.Str
+	}
+	return ""
+}
+
+func (x *AllTheTypesNoAny) GetInt32() int32 {
+	if x != nil {
+		return x.Int32
+	}
+	return 0
+}
+
+func (x *AllTheTypesNoAny) GetInt64() int64 {
+	if x != nil {
+		return x.Int64
+	}
+	return 0
+}
+
+func (x *AllTheTypesNoAny) GetSint32() int32 {
+	if x != nil {
+		return x.Sint32
+	}
+	return 0
+}
+
+func (x *AllTheTypesNoAny) GetSin64() int64 {
+	if x != nil {
+		return x.Sin64
+	}
+	return 0
+}
+
+func (x *AllTheTypesNoAny) GetUint32() uint32 {
+	if x != nil {
+		return x.Uint32
+	}
+	return 0
+}
+
+func (x *AllTheTypesNoAny) GetUint64() uint64 {
+	if x != nil {
+		return x.Uint64
+	}
+	return 0
+}
+
+func (x *AllTheTypesNoAny) GetFixed32() uint32 {
+	if x != nil {
+		return x.Fixed32
+	}
+	return 0
+}
+
+func (x *AllTheTypesNoAny) GetFixed64() uint64 {
+	if x != nil {
+		return x.Fixed64
+	}
+	return 0
+}
+
+func (x *AllTheTypesNoAny) GetSfixed32() int32 {
+	if x != nil {
+		return x.Sfixed32
+	}
+	return 0
+}
+
+func (x *AllTheTypesNoAny) GetBool() bool {
+	if x != nil {
+		return x.Bool
+	}
+	return false
+}
+
+func (x *AllTheTypesNoAny) GetBytes() []byte {
+	if x != nil {
+		return x.Bytes
+	}
+	return nil
+}
+
+func (x *AllTheTypesNoAny) GetDouble() float64 {
+	if x != nil {
+		return x.Double
+	}
+	return 0
+}
+
+func (x *AllTheTypesNoAny) GetEnum() AllTheTypesNoAny_ExampleEnum {
+	if x != nil {
+		return x.Enum
+	}
+	return AllTheTypesNoAny_OPTION_0
+}
+
+func (x *AllTheTypesNoAny) GetMessage() *ExampleMessage {
+	if x != nil {
+		return x.Message
+	}
+	return nil
+}
+
+func (m *AllTheTypesNoAny) GetOneof() isAllTheTypesNoAny_Oneof {
+	if m != nil {
+		return m.Oneof
+	}
+	return nil
+}
+
+func (x *AllTheTypesNoAny) GetOneofstring() string {
+	if x, ok := x.GetOneof().(*AllTheTypesNoAny_Oneofstring); ok {
+		return x.Oneofstring
+	}
+	return ""
+}
+
+func (x *AllTheTypesNoAny) GetOneofmessage() *ExampleMessage {
+	if x, ok := x.GetOneof().(*AllTheTypesNoAny_Oneofmessage); ok {
+		return x.Oneofmessage
+	}
+	return nil
+}
+
+func (x *AllTheTypesNoAny) GetSimpleMap() map[int32]string {
+	if x != nil {
+		return x.SimpleMap
+	}
+	return nil
+}
+
+func (x *AllTheTypesNoAny) GetComplexMap() map[string]*ExampleMessage {
+	if x != nil {
+		return x.ComplexMap
+	}
+	return nil
+}
+
+func (x *AllTheTypesNoAny) GetSimpleList() []string {
+	if x != nil {
+		return x.SimpleList
+	}
+	return nil
+}
+
+func (x *AllTheTypesNoAny) GetComplexList() []*ExampleMessage {
+	if x != nil {
+		return x.ComplexList
+	}
+	return nil
+}
+
+type isAllTheTypesNoAny_Oneof interface {
+	isAllTheTypesNoAny_Oneof()
+}
+
+type AllTheTypesNoAny_Oneofstring struct {
+	Oneofstring string `protobuf:"bytes,16,opt,name=oneofstring,proto3,oneof"`
+}
+
+type AllTheTypesNoAny_Oneofmessage struct {
+	Oneofmessage *ExampleMessage `protobuf:"bytes,17,opt,name=oneofmessage,proto3,oneof"`
+}
+
+func (*AllTheTypesNoAny_Oneofstring) isAllTheTypesNoAny_Oneof() {}
+
+func (*AllTheTypesNoAny_Oneofmessage) isAllTheTypesNoAny_Oneof() {}
+
+type SimpleNested struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	SimpleA []*ExampleMessage `protobuf:"bytes,1,rep,name=simple_a,json=simpleA,proto3" json:"simple_a,omitempty"`
+	SimpleB []*ExampleMessage `protobuf:"bytes,2,rep,name=simple_b,json=simpleB,proto3" json:"simple_b,omitempty"`
+}
+
+func (x *SimpleNested) Reset() {
+	*x = SimpleNested{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_messages_types_proto_msgTypes[3]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *SimpleNested) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*SimpleNested) ProtoMessage() {}
+
+func (x *SimpleNested) ProtoReflect() protoreflect.Message {
+	mi := &file_messages_types_proto_msgTypes[3]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use SimpleNested.ProtoReflect.Descriptor instead.
+func (*SimpleNested) Descriptor() ([]byte, []int) {
+	return file_messages_types_proto_rawDescGZIP(), []int{3}
+}
+
+func (x *SimpleNested) GetSimpleA() []*ExampleMessage {
+	if x != nil {
+		return x.SimpleA
+	}
+	return nil
+}
+
+func (x *SimpleNested) GetSimpleB() []*ExampleMessage {
+	if x != nil {
+		return x.SimpleB
+	}
+	return nil
+}
+
+type ComplexNested struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	AllTheTypesNoAnyA []*AllTheTypesNoAny `protobuf:"bytes,1,rep,name=all_the_types_no_any_a,json=allTheTypesNoAnyA,proto3" json:"all_the_types_no_any_a,omitempty"`
+	AllTheTypesNoAnyB []*AllTheTypesNoAny `protobuf:"bytes,2,rep,name=all_the_types_no_any_b,json=allTheTypesNoAnyB,proto3" json:"all_the_types_no_any_b,omitempty"`
+}
+
+func (x *ComplexNested) Reset() {
+	*x = ComplexNested{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_messages_types_proto_msgTypes[4]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *ComplexNested) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*ComplexNested) ProtoMessage() {}
+
+func (x *ComplexNested) ProtoReflect() protoreflect.Message {
+	mi := &file_messages_types_proto_msgTypes[4]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use ComplexNested.ProtoReflect.Descriptor instead.
+func (*ComplexNested) Descriptor() ([]byte, []int) {
+	return file_messages_types_proto_rawDescGZIP(), []int{4}
+}
+
+func (x *ComplexNested) GetAllTheTypesNoAnyA() []*AllTheTypesNoAny {
+	if x != nil {
+		return x.AllTheTypesNoAnyA
+	}
+	return nil
+}
+
+func (x *ComplexNested) GetAllTheTypesNoAnyB() []*AllTheTypesNoAny {
+	if x != nil {
+		return x.AllTheTypesNoAnyB
+	}
+	return nil
+}
+
+type DeepNested struct {
+	state         protoimpl.MessageState
+	sizeCache     protoimpl.SizeCache
+	unknownFields protoimpl.UnknownFields
+
+	ComplexNested *ComplexNested `protobuf:"bytes,1,opt,name=complex_nested,json=complexNested,proto3" json:"complex_nested,omitempty"`
+	SimpleNested  *SimpleNested  `protobuf:"bytes,2,opt,name=simple_nested,json=simpleNested,proto3" json:"simple_nested,omitempty"`
+}
+
+func (x *DeepNested) Reset() {
+	*x = DeepNested{}
+	if protoimpl.UnsafeEnabled {
+		mi := &file_messages_types_proto_msgTypes[5]
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		ms.StoreMessageInfo(mi)
+	}
+}
+
+func (x *DeepNested) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*DeepNested) ProtoMessage() {}
+
+func (x *DeepNested) ProtoReflect() protoreflect.Message {
+	mi := &file_messages_types_proto_msgTypes[5]
+	if protoimpl.UnsafeEnabled && x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use DeepNested.ProtoReflect.Descriptor instead.
+func (*DeepNested) Descriptor() ([]byte, []int) {
+	return file_messages_types_proto_rawDescGZIP(), []int{5}
+}
+
+func (x *DeepNested) GetComplexNested() *ComplexNested {
+	if x != nil {
+		return x.ComplexNested
+	}
+	return nil
+}
+
+func (x *DeepNested) GetSimpleNested() *SimpleNested {
+	if x != nil {
+		return x.SimpleNested
+	}
+	return nil
+}
+
 var File_messages_types_proto protoreflect.FileDescriptor
 
 var file_messages_types_proto_rawDesc = []byte{
@@ -439,9 +882,90 @@ var file_messages_types_proto_rawDesc = []byte{
 	0x02, 0x38, 0x01, 0x22, 0x29, 0x0a, 0x0b, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x45, 0x6e,
 	0x75, 0x6d, 0x12, 0x0c, 0x0a, 0x08, 0x4f, 0x50, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x30, 0x10, 0x00,
 	0x12, 0x0c, 0x0a, 0x08, 0x4f, 0x50, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x31, 0x10, 0x01, 0x42, 0x07,
-	0x0a, 0x05, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42, 0x11, 0x5a, 0x0f, 0x2e, 0x2e, 0x2f, 0x75, 0x74,
-	0x69, 0x6c, 0x5f, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74,
-	0x6f, 0x33,
+	0x0a, 0x05, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x22, 0x95, 0x07, 0x0a, 0x10, 0x41, 0x6c, 0x6c, 0x54,
+	0x68, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x4e, 0x6f, 0x41, 0x6e, 0x79, 0x12, 0x10, 0x0a, 0x03,
+	0x73, 0x74, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x74, 0x72, 0x12, 0x14,
+	0x0a, 0x05, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x69,
+	0x6e, 0x74, 0x33, 0x32, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x18, 0x03, 0x20,
+	0x01, 0x28, 0x03, 0x52, 0x05, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x69,
+	0x6e, 0x74, 0x33, 0x32, 0x18, 0x04, 0x20, 0x01, 0x28, 0x11, 0x52, 0x06, 0x73, 0x69, 0x6e, 0x74,
+	0x33, 0x32, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x69, 0x6e, 0x36, 0x34, 0x18, 0x05, 0x20, 0x01, 0x28,
+	0x12, 0x52, 0x05, 0x73, 0x69, 0x6e, 0x36, 0x34, 0x12, 0x16, 0x0a, 0x06, 0x75, 0x69, 0x6e, 0x74,
+	0x33, 0x32, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32,
+	0x12, 0x16, 0x0a, 0x06, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x18, 0x07, 0x20, 0x01, 0x28, 0x04,
+	0x52, 0x06, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x12, 0x18, 0x0a, 0x07, 0x66, 0x69, 0x78, 0x65,
+	0x64, 0x33, 0x32, 0x18, 0x08, 0x20, 0x01, 0x28, 0x07, 0x52, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64,
+	0x33, 0x32, 0x12, 0x18, 0x0a, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64, 0x36, 0x34, 0x18, 0x09, 0x20,
+	0x01, 0x28, 0x06, 0x52, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64, 0x36, 0x34, 0x12, 0x1a, 0x0a, 0x08,
+	0x73, 0x66, 0x69, 0x78, 0x65, 0x64, 0x33, 0x32, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0f, 0x52, 0x08,
+	0x73, 0x66, 0x69, 0x78, 0x65, 0x64, 0x33, 0x32, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x6f, 0x6c,
+	0x18, 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x62, 0x6f, 0x6f, 0x6c, 0x12, 0x14, 0x0a, 0x05,
+	0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x62, 0x79, 0x74,
+	0x65, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x18, 0x0d, 0x20, 0x01,
+	0x28, 0x01, 0x52, 0x06, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x12, 0x31, 0x0a, 0x04, 0x65, 0x6e,
+	0x75, 0x6d, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x1d, 0x2e, 0x41, 0x6c, 0x6c, 0x54, 0x68,
+	0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x4e, 0x6f, 0x41, 0x6e, 0x79, 0x2e, 0x45, 0x78, 0x61, 0x6d,
+	0x70, 0x6c, 0x65, 0x45, 0x6e, 0x75, 0x6d, 0x52, 0x04, 0x65, 0x6e, 0x75, 0x6d, 0x12, 0x29, 0x0a,
+	0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0f,
+	0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52,
+	0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x22, 0x0a, 0x0b, 0x6f, 0x6e, 0x65, 0x6f,
+	0x66, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x18, 0x10, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52,
+	0x0b, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x35, 0x0a, 0x0c,
+	0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x11, 0x20, 0x01,
+	0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73,
+	0x61, 0x67, 0x65, 0x48, 0x00, 0x52, 0x0c, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x6d, 0x65, 0x73, 0x73,
+	0x61, 0x67, 0x65, 0x12, 0x3f, 0x0a, 0x0a, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x5f, 0x6d, 0x61,
+	0x70, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x20, 0x2e, 0x41, 0x6c, 0x6c, 0x54, 0x68, 0x65,
+	0x54, 0x79, 0x70, 0x65, 0x73, 0x4e, 0x6f, 0x41, 0x6e, 0x79, 0x2e, 0x53, 0x69, 0x6d, 0x70, 0x6c,
+	0x65, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x73, 0x69, 0x6d, 0x70, 0x6c,
+	0x65, 0x4d, 0x61, 0x70, 0x12, 0x42, 0x0a, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x5f,
+	0x6d, 0x61, 0x70, 0x18, 0x14, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x41, 0x6c, 0x6c, 0x54,
+	0x68, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x4e, 0x6f, 0x41, 0x6e, 0x79, 0x2e, 0x43, 0x6f, 0x6d,
+	0x70, 0x6c, 0x65, 0x78, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x63, 0x6f,
+	0x6d, 0x70, 0x6c, 0x65, 0x78, 0x4d, 0x61, 0x70, 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x69, 0x6d, 0x70,
+	0x6c, 0x65, 0x5f, 0x6c, 0x69, 0x73, 0x74, 0x18, 0x15, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0a, 0x73,
+	0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4c, 0x69, 0x73, 0x74, 0x12, 0x32, 0x0a, 0x0c, 0x63, 0x6f, 0x6d,
+	0x70, 0x6c, 0x65, 0x78, 0x5f, 0x6c, 0x69, 0x73, 0x74, 0x18, 0x16, 0x20, 0x03, 0x28, 0x0b, 0x32,
+	0x0f, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65,
+	0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x4c, 0x69, 0x73, 0x74, 0x1a, 0x3c, 0x0a,
+	0x0e, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12,
+	0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x6b, 0x65,
+	0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09,
+	0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x1a, 0x4e, 0x0a, 0x0f, 0x43,
+	0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x4d, 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10,
+	0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79,
+	0x12, 0x25, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32,
+	0x0f, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65,
+	0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x29, 0x0a, 0x0b, 0x45,
+	0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x45, 0x6e, 0x75, 0x6d, 0x12, 0x0c, 0x0a, 0x08, 0x4f, 0x50,
+	0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x30, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x4f, 0x50, 0x54, 0x49,
+	0x4f, 0x4e, 0x5f, 0x31, 0x10, 0x01, 0x42, 0x07, 0x0a, 0x05, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x22,
+	0x66, 0x0a, 0x0c, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4e, 0x65, 0x73, 0x74, 0x65, 0x64, 0x12,
+	0x2a, 0x0a, 0x08, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x5f, 0x61, 0x18, 0x01, 0x20, 0x03, 0x28,
+	0x0b, 0x32, 0x0f, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61,
+	0x67, 0x65, 0x52, 0x07, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x41, 0x12, 0x2a, 0x0a, 0x08, 0x73,
+	0x69, 0x6d, 0x70, 0x6c, 0x65, 0x5f, 0x62, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0f, 0x2e,
+	0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x07,
+	0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x42, 0x22, 0x9b, 0x01, 0x0a, 0x0d, 0x43, 0x6f, 0x6d, 0x70,
+	0x6c, 0x65, 0x78, 0x4e, 0x65, 0x73, 0x74, 0x65, 0x64, 0x12, 0x44, 0x0a, 0x16, 0x61, 0x6c, 0x6c,
+	0x5f, 0x74, 0x68, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73, 0x5f, 0x6e, 0x6f, 0x5f, 0x61, 0x6e,
+	0x79, 0x5f, 0x61, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x41, 0x6c, 0x6c, 0x54,
+	0x68, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x4e, 0x6f, 0x41, 0x6e, 0x79, 0x52, 0x11, 0x61, 0x6c,
+	0x6c, 0x54, 0x68, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x4e, 0x6f, 0x41, 0x6e, 0x79, 0x41, 0x12,
+	0x44, 0x0a, 0x16, 0x61, 0x6c, 0x6c, 0x5f, 0x74, 0x68, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x73,
+	0x5f, 0x6e, 0x6f, 0x5f, 0x61, 0x6e, 0x79, 0x5f, 0x62, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32,
+	0x11, 0x2e, 0x41, 0x6c, 0x6c, 0x54, 0x68, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x4e, 0x6f, 0x41,
+	0x6e, 0x79, 0x52, 0x11, 0x61, 0x6c, 0x6c, 0x54, 0x68, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x4e,
+	0x6f, 0x41, 0x6e, 0x79, 0x42, 0x22, 0x77, 0x0a, 0x0a, 0x44, 0x65, 0x65, 0x70, 0x4e, 0x65, 0x73,
+	0x74, 0x65, 0x64, 0x12, 0x35, 0x0a, 0x0e, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x5f, 0x6e,
+	0x65, 0x73, 0x74, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0e, 0x2e, 0x43, 0x6f,
+	0x6d, 0x70, 0x6c, 0x65, 0x78, 0x4e, 0x65, 0x73, 0x74, 0x65, 0x64, 0x52, 0x0d, 0x63, 0x6f, 0x6d,
+	0x70, 0x6c, 0x65, 0x78, 0x4e, 0x65, 0x73, 0x74, 0x65, 0x64, 0x12, 0x32, 0x0a, 0x0d, 0x73, 0x69,
+	0x6d, 0x70, 0x6c, 0x65, 0x5f, 0x6e, 0x65, 0x73, 0x74, 0x65, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28,
+	0x0b, 0x32, 0x0d, 0x2e, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4e, 0x65, 0x73, 0x74, 0x65, 0x64,
+	0x52, 0x0c, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4e, 0x65, 0x73, 0x74, 0x65, 0x64, 0x42, 0x11,
+	0x5a, 0x0f, 0x2e, 0x2e, 0x2f, 0x75, 0x74, 0x69, 0x6c, 0x5f, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67,
+	0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
 }
 
 var (
@@ -456,30 +980,50 @@ func file_messages_types_proto_rawDescGZIP() []byte {
 	return file_messages_types_proto_rawDescData
 }
 
-var file_messages_types_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
-var file_messages_types_proto_msgTypes = make([]protoimpl.MessageInfo, 4)
+var file_messages_types_proto_enumTypes = make([]protoimpl.EnumInfo, 2)
+var file_messages_types_proto_msgTypes = make([]protoimpl.MessageInfo, 10)
 var file_messages_types_proto_goTypes = []interface{}{
-	(AllTheTypes_ExampleEnum)(0), // 0: AllTheTypes.ExampleEnum
-	(*ExampleMessage)(nil),       // 1: ExampleMessage
-	(*AllTheTypes)(nil),          // 2: AllTheTypes
-	nil,                          // 3: AllTheTypes.SimpleMapEntry
-	nil,                          // 4: AllTheTypes.ComplexMapEntry
-	(*anypb.Any)(nil),            // 5: google.protobuf.Any
+	(AllTheTypes_ExampleEnum)(0),      // 0: AllTheTypes.ExampleEnum
+	(AllTheTypesNoAny_ExampleEnum)(0), // 1: AllTheTypesNoAny.ExampleEnum
+	(*ExampleMessage)(nil),            // 2: ExampleMessage
+	(*AllTheTypes)(nil),               // 3: AllTheTypes
+	(*AllTheTypesNoAny)(nil),          // 4: AllTheTypesNoAny
+	(*SimpleNested)(nil),              // 5: SimpleNested
+	(*ComplexNested)(nil),             // 6: ComplexNested
+	(*DeepNested)(nil),                // 7: DeepNested
+	nil,                               // 8: AllTheTypes.SimpleMapEntry
+	nil,                               // 9: AllTheTypes.ComplexMapEntry
+	nil,                               // 10: AllTheTypesNoAny.SimpleMapEntry
+	nil,                               // 11: AllTheTypesNoAny.ComplexMapEntry
+	(*anypb.Any)(nil),                 // 12: google.protobuf.Any
 }
 var file_messages_types_proto_depIdxs = []int32{
-	0, // 0: AllTheTypes.enum:type_name -> AllTheTypes.ExampleEnum
-	1, // 1: AllTheTypes.message:type_name -> ExampleMessage
-	1, // 2: AllTheTypes.oneofmessage:type_name -> ExampleMessage
-	5, // 3: AllTheTypes.any:type_name -> google.protobuf.Any
-	3, // 4: AllTheTypes.simple_map:type_name -> AllTheTypes.SimpleMapEntry
-	4, // 5: AllTheTypes.complex_map:type_name -> AllTheTypes.ComplexMapEntry
-	1, // 6: AllTheTypes.complex_list:type_name -> ExampleMessage
-	1, // 7: AllTheTypes.ComplexMapEntry.value:type_name -> ExampleMessage
-	8, // [8:8] is the sub-list for method output_type
-	8, // [8:8] is the sub-list for method input_type
-	8, // [8:8] is the sub-list for extension type_name
-	8, // [8:8] is the sub-list for extension extendee
-	0, // [0:8] is the sub-list for field type_name
+	0,  // 0: AllTheTypes.enum:type_name -> AllTheTypes.ExampleEnum
+	2,  // 1: AllTheTypes.message:type_name -> ExampleMessage
+	2,  // 2: AllTheTypes.oneofmessage:type_name -> ExampleMessage
+	12, // 3: AllTheTypes.any:type_name -> google.protobuf.Any
+	8,  // 4: AllTheTypes.simple_map:type_name -> AllTheTypes.SimpleMapEntry
+	9,  // 5: AllTheTypes.complex_map:type_name -> AllTheTypes.ComplexMapEntry
+	2,  // 6: AllTheTypes.complex_list:type_name -> ExampleMessage
+	1,  // 7: AllTheTypesNoAny.enum:type_name -> AllTheTypesNoAny.ExampleEnum
+	2,  // 8: AllTheTypesNoAny.message:type_name -> ExampleMessage
+	2,  // 9: AllTheTypesNoAny.oneofmessage:type_name -> ExampleMessage
+	10, // 10: AllTheTypesNoAny.simple_map:type_name -> AllTheTypesNoAny.SimpleMapEntry
+	11, // 11: AllTheTypesNoAny.complex_map:type_name -> AllTheTypesNoAny.ComplexMapEntry
+	2,  // 12: AllTheTypesNoAny.complex_list:type_name -> ExampleMessage
+	2,  // 13: SimpleNested.simple_a:type_name -> ExampleMessage
+	2,  // 14: SimpleNested.simple_b:type_name -> ExampleMessage
+	4,  // 15: ComplexNested.all_the_types_no_any_a:type_name -> AllTheTypesNoAny
+	4,  // 16: ComplexNested.all_the_types_no_any_b:type_name -> AllTheTypesNoAny
+	6,  // 17: DeepNested.complex_nested:type_name -> ComplexNested
+	5,  // 18: DeepNested.simple_nested:type_name -> SimpleNested
+	2,  // 19: AllTheTypes.ComplexMapEntry.value:type_name -> ExampleMessage
+	2,  // 20: AllTheTypesNoAny.ComplexMapEntry.value:type_name -> ExampleMessage
+	21, // [21:21] is the sub-list for method output_type
+	21, // [21:21] is the sub-list for method input_type
+	21, // [21:21] is the sub-list for extension type_name
+	21, // [21:21] is the sub-list for extension extendee
+	0,  // [0:21] is the sub-list for field type_name
 }
 
 func init() { file_messages_types_proto_init() }
@@ -512,18 +1056,70 @@ func file_messages_types_proto_init() {
 				return nil
 			}
 		}
+		file_messages_types_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*AllTheTypesNoAny); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_messages_types_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*SimpleNested); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_messages_types_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*ComplexNested); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
+		file_messages_types_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} {
+			switch v := v.(*DeepNested); i {
+			case 0:
+				return &v.state
+			case 1:
+				return &v.sizeCache
+			case 2:
+				return &v.unknownFields
+			default:
+				return nil
+			}
+		}
 	}
 	file_messages_types_proto_msgTypes[1].OneofWrappers = []interface{}{
 		(*AllTheTypes_Oneofstring)(nil),
 		(*AllTheTypes_Oneofmessage)(nil),
 	}
+	file_messages_types_proto_msgTypes[2].OneofWrappers = []interface{}{
+		(*AllTheTypesNoAny_Oneofstring)(nil),
+		(*AllTheTypesNoAny_Oneofmessage)(nil),
+	}
 	type x struct{}
 	out := protoimpl.TypeBuilder{
 		File: protoimpl.DescBuilder{
 			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
 			RawDescriptor: file_messages_types_proto_rawDesc,
-			NumEnums:      1,
-			NumMessages:   4,
+			NumEnums:      2,
+			NumMessages:   10,
 			NumExtensions: 0,
 			NumServices:   0,
 		},
diff --git a/go/go.mod b/go/go.mod
index a5e359741c26c..77f98cefb0f0e 100644
--- a/go/go.mod
+++ b/go/go.mod
@@ -14,9 +14,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-module github.com/apache/arrow/go/v17
+module github.com/apache/arrow/go/v18
 
-go 1.21
+go 1.22
 
 require (
 	github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c
@@ -35,9 +35,9 @@ require (
 	github.com/stretchr/testify v1.9.0
 	github.com/zeebo/xxh3 v1.0.2
 	golang.org/x/exp v0.0.0-20240222234643-814bf88cf225
-	golang.org/x/sync v0.7.0
-	golang.org/x/sys v0.22.0
-	golang.org/x/tools v0.23.0
+	golang.org/x/sync v0.8.0
+	golang.org/x/sys v0.23.0
+	golang.org/x/tools v0.24.0
 	golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028
 	gonum.org/v1/gonum v0.15.0
 	google.golang.org/grpc v1.63.2
@@ -47,9 +47,9 @@ require (
 
 require (
 	github.com/google/uuid v1.6.0
-	github.com/hamba/avro/v2 v2.22.1
+	github.com/hamba/avro/v2 v2.25.0
 	github.com/huandu/xstrings v1.4.0
-	github.com/substrait-io/substrait-go v0.4.2
+	github.com/substrait-io/substrait-go v0.7.0
 	github.com/tidwall/sjson v1.2.5
 )
 
@@ -75,9 +75,9 @@ require (
 	github.com/tidwall/gjson v1.14.2 // indirect
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.0 // indirect
-	golang.org/x/mod v0.19.0 // indirect
-	golang.org/x/net v0.27.0 // indirect
-	golang.org/x/text v0.16.0 // indirect
+	golang.org/x/mod v0.20.0 // indirect
+	golang.org/x/net v0.28.0 // indirect
+	golang.org/x/text v0.17.0 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
 	modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect
diff --git a/go/go.sum b/go/go.sum
index 6ce51c83350a0..6f22e11aef03a 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -24,8 +24,8 @@ github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8c
 github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
 github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no=
 github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
-github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE=
-github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4=
+github.com/go-playground/validator/v10 v10.11.1 h1:prmOlTVv+YjZjmRmNSF3VmspqJIxJWXmqUsHwfTRRkQ=
+github.com/go-playground/validator/v10 v10.11.1/go.mod h1:i+3WkQ1FvaUjjxh1kSvIA4dMGDBiPU55YFDl0WbKdWU=
 github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
 github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
 github.com/goccy/go-yaml v1.11.0 h1:n7Z+zx8S9f9KgzG6KtQKf+kwqXZlLNR2F6018Dgau54=
@@ -43,8 +43,8 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu
 github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/hamba/avro/v2 v2.22.1 h1:q1rAbfJsrbMaZPDLQvwUQMfQzp6H+hGXvckmU/lXemk=
-github.com/hamba/avro/v2 v2.22.1/go.mod h1:HOeTrE3kvWnBAgsufqhAzDDV5gvS0QXs65Z6BHfGgbg=
+github.com/hamba/avro/v2 v2.25.0 h1:9qig/K4VP5tMq6DuKGfI6YdXncTkPJT1IJDMSv82EeI=
+github.com/hamba/avro/v2 v2.25.0/go.mod h1:I8glyswHnpED3Nlx2ZdUe+4LJnCOOyiCzLMno9i/Uu0=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
@@ -99,8 +99,8 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/
 github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
 github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
 github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
-github.com/substrait-io/substrait-go v0.4.2 h1:buDnjsb3qAqTaNbOR7VKmNgXf4lYQxWEcnSGUWBtmN8=
-github.com/substrait-io/substrait-go v0.4.2/go.mod h1:qhpnLmrcvAnlZsUyPXZRqldiHapPTXC3t7xFgDi3aQg=
+github.com/substrait-io/substrait-go v0.7.0 h1:53yi73t4wW383+RD1YuhXhbjhP1KzF9GCxPC7SsRlqc=
+github.com/substrait-io/substrait-go v0.7.0/go.mod h1:7mjSvIaxk94bOF+YZn/vBOpHK4DWTpBv7nC/btjXCmc=
 github.com/tidwall/gjson v1.14.2 h1:6BBkirS0rAHjumnjHF6qgy5d2YAJ1TLIaFE2lzfOLqo=
 github.com/tidwall/gjson v1.14.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
 github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
@@ -113,25 +113,25 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
 github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
 github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
 github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
-golang.org/x/crypto v0.25.0 h1:ypSNr+bnYL2YhwoMt2zPxHFmbAN1KZs/njMG3hxUp30=
-golang.org/x/crypto v0.25.0/go.mod h1:T+wALwcMOSE0kXgUAnPAHqTLW+XHgcELELW8VaDgm/M=
+golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
+golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ=
 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc=
-golang.org/x/mod v0.19.0 h1:fEdghXQSo20giMthA7cd28ZC+jts4amQ3YMXiP5oMQ8=
-golang.org/x/mod v0.19.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
-golang.org/x/net v0.27.0 h1:5K3Njcw06/l2y9vpGCSdcxWOYHOUk3dVNGDXN+FvAys=
-golang.org/x/net v0.27.0/go.mod h1:dDi0PyhWNoiUOrAS8uXv/vnScO4wnHQO4mj9fn/RytE=
-golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
-golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
+golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
+golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
+golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
+golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
-golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
-golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
-golang.org/x/tools v0.23.0 h1:SGsXPZ+2l4JsgaCKkx+FQ9YZ5XEtA1GZYuoDjenLjvg=
-golang.org/x/tools v0.23.0/go.mod h1:pnu6ufv6vQkll6szChhK3C3L/ruaIv5eBeztNG8wtsI=
+golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM=
+golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
+golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
+golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
+golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
 gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ=
diff --git a/go/internal/bitutils/bit_block_counter.go b/go/internal/bitutils/bit_block_counter.go
index 677c497c14d23..89e50b2dc6b1d 100644
--- a/go/internal/bitutils/bit_block_counter.go
+++ b/go/internal/bitutils/bit_block_counter.go
@@ -21,8 +21,8 @@ import (
 	"math/bits"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 func loadWord(byt []byte) uint64 {
diff --git a/go/internal/bitutils/bit_block_counter_test.go b/go/internal/bitutils/bit_block_counter_test.go
index 7ddd9ca343624..064d4b46b452f 100644
--- a/go/internal/bitutils/bit_block_counter_test.go
+++ b/go/internal/bitutils/bit_block_counter_test.go
@@ -19,9 +19,9 @@ package bitutils_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
 	"github.com/stretchr/testify/assert"
 	"golang.org/x/exp/rand"
 )
diff --git a/go/internal/bitutils/bit_run_reader.go b/go/internal/bitutils/bit_run_reader.go
index aaba0bed8eb19..cce6792a6d0c8 100644
--- a/go/internal/bitutils/bit_run_reader.go
+++ b/go/internal/bitutils/bit_run_reader.go
@@ -22,9 +22,9 @@ import (
 	"math/bits"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 // BitRun represents a run of bits with the same value of length Len
diff --git a/go/internal/bitutils/bit_run_reader_test.go b/go/internal/bitutils/bit_run_reader_test.go
index 528fef09c313b..e3e53c92621cd 100644
--- a/go/internal/bitutils/bit_run_reader_test.go
+++ b/go/internal/bitutils/bit_run_reader_test.go
@@ -21,9 +21,9 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/internal/bitutils/bit_set_run_reader.go b/go/internal/bitutils/bit_set_run_reader.go
index f84d7d975dba4..2c6a39f5352e6 100644
--- a/go/internal/bitutils/bit_set_run_reader.go
+++ b/go/internal/bitutils/bit_set_run_reader.go
@@ -20,8 +20,8 @@ import (
 	"encoding/binary"
 	"math/bits"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 // IsMultipleOf64 returns whether v is a multiple of 64.
diff --git a/go/internal/bitutils/bit_set_run_reader_test.go b/go/internal/bitutils/bit_set_run_reader_test.go
index 322906804cae8..c42f8b0d6dce7 100644
--- a/go/internal/bitutils/bit_set_run_reader_test.go
+++ b/go/internal/bitutils/bit_set_run_reader_test.go
@@ -20,9 +20,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/internal/utils"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/internal/bitutils/bitmap_generate.go b/go/internal/bitutils/bitmap_generate.go
index 1871b9570092e..a6d43b4622f93 100644
--- a/go/internal/bitutils/bitmap_generate.go
+++ b/go/internal/bitutils/bitmap_generate.go
@@ -16,7 +16,7 @@
 
 package bitutils
 
-import "github.com/apache/arrow/go/v17/arrow/bitutil"
+import "github.com/apache/arrow/go/v18/arrow/bitutil"
 
 // GenerateBits writes sequential bits to a bitmap. Bits preceding the
 // initial start offset are preserved, bits following the bitmap may
diff --git a/go/internal/bitutils/bitmap_generate_test.go b/go/internal/bitutils/bitmap_generate_test.go
index 1367fe773c00d..d75f5a72147b1 100644
--- a/go/internal/bitutils/bitmap_generate_test.go
+++ b/go/internal/bitutils/bitmap_generate_test.go
@@ -19,7 +19,7 @@ package bitutils_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/internal/bitutils"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
 	"golang.org/x/exp/rand"
 )
 
diff --git a/go/internal/hashing/xxh3_memo_table.gen.go b/go/internal/hashing/xxh3_memo_table.gen.go
index 47626d317f10e..c8f56ed21565e 100644
--- a/go/internal/hashing/xxh3_memo_table.gen.go
+++ b/go/internal/hashing/xxh3_memo_table.gen.go
@@ -21,9 +21,9 @@ package hashing
 import (
 	"math"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 type payloadInt8 struct {
diff --git a/go/internal/hashing/xxh3_memo_table.gen.go.tmpl b/go/internal/hashing/xxh3_memo_table.gen.go.tmpl
index 34fd25246f6a4..b852a9d79a39b 100644
--- a/go/internal/hashing/xxh3_memo_table.gen.go.tmpl
+++ b/go/internal/hashing/xxh3_memo_table.gen.go.tmpl
@@ -17,8 +17,8 @@
 package hashing
 
 import (
-  "github.com/apache/arrow/go/v17/arrow/bitutil"  
-  "github.com/apache/arrow/go/v17/internal/utils"  
+  "github.com/apache/arrow/go/v18/arrow/bitutil"  
+  "github.com/apache/arrow/go/v18/internal/utils"  
 )
 
 {{range .In}}
diff --git a/go/internal/types/extension_types.go b/go/internal/types/extension_types.go
index e0c9c4c185c9d..33ada2d488f71 100644
--- a/go/internal/types/extension_types.go
+++ b/go/internal/types/extension_types.go
@@ -18,237 +18,15 @@
 package types
 
 import (
-	"bytes"
 	"encoding/binary"
 	"fmt"
 	"reflect"
-	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/internal/json"
-	"github.com/google/uuid"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
 	"golang.org/x/xerrors"
 )
 
-var UUID = NewUUIDType()
-
-type UUIDBuilder struct {
-	*array.ExtensionBuilder
-}
-
-func NewUUIDBuilder(builder *array.ExtensionBuilder) *UUIDBuilder {
-	return &UUIDBuilder{ExtensionBuilder: builder}
-}
-
-func (b *UUIDBuilder) Append(v uuid.UUID) {
-	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).Append(v[:])
-}
-
-func (b *UUIDBuilder) UnsafeAppend(v uuid.UUID) {
-	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).UnsafeAppend(v[:])
-}
-
-func (b *UUIDBuilder) AppendValueFromString(s string) error {
-	if s == array.NullValueStr {
-		b.AppendNull()
-		return nil
-	}
-
-	uid, err := uuid.Parse(s)
-	if err != nil {
-		return err
-	}
-
-	b.Append(uid)
-	return nil
-}
-
-func (b *UUIDBuilder) AppendValues(v []uuid.UUID, valid []bool) {
-	if len(v) != len(valid) && len(valid) != 0 {
-		panic("len(v) != len(valid) && len(valid) != 0")
-	}
-
-	data := make([][]byte, len(v))
-	for i := range v {
-		if len(valid) > 0 && !valid[i] {
-			continue
-		}
-		data[i] = v[i][:]
-	}
-	b.ExtensionBuilder.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(data, valid)
-}
-
-func (b *UUIDBuilder) UnmarshalOne(dec *json.Decoder) error {
-	t, err := dec.Token()
-	if err != nil {
-		return err
-	}
-
-	var val uuid.UUID
-	switch v := t.(type) {
-	case string:
-		val, err = uuid.Parse(v)
-		if err != nil {
-			return err
-		}
-	case []byte:
-		val, err = uuid.ParseBytes(v)
-		if err != nil {
-			return err
-		}
-	case nil:
-		b.AppendNull()
-		return nil
-	default:
-		return &json.UnmarshalTypeError{
-			Value:  fmt.Sprint(t),
-			Type:   reflect.TypeOf([]byte{}),
-			Offset: dec.InputOffset(),
-			Struct: fmt.Sprintf("FixedSizeBinary[%d]", 16),
-		}
-	}
-
-	b.Append(val)
-	return nil
-}
-
-func (b *UUIDBuilder) Unmarshal(dec *json.Decoder) error {
-	for dec.More() {
-		if err := b.UnmarshalOne(dec); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (b *UUIDBuilder) UnmarshalJSON(data []byte) error {
-	dec := json.NewDecoder(bytes.NewReader(data))
-	t, err := dec.Token()
-	if err != nil {
-		return err
-	}
-
-	if delim, ok := t.(json.Delim); !ok || delim != '[' {
-		return fmt.Errorf("uuid builder must unpack from json array, found %s", delim)
-	}
-
-	return b.Unmarshal(dec)
-}
-
-// UUIDArray is a simple array which is a FixedSizeBinary(16)
-type UUIDArray struct {
-	array.ExtensionArrayBase
-}
-
-func (a *UUIDArray) String() string {
-	arr := a.Storage().(*array.FixedSizeBinary)
-	o := new(strings.Builder)
-	o.WriteString("[")
-	for i := 0; i < arr.Len(); i++ {
-		if i > 0 {
-			o.WriteString(" ")
-		}
-		switch {
-		case a.IsNull(i):
-			o.WriteString(array.NullValueStr)
-		default:
-			fmt.Fprintf(o, "%q", a.Value(i))
-		}
-	}
-	o.WriteString("]")
-	return o.String()
-}
-
-func (a *UUIDArray) Value(i int) uuid.UUID {
-	if a.IsNull(i) {
-		return uuid.Nil
-	}
-	return uuid.Must(uuid.FromBytes(a.Storage().(*array.FixedSizeBinary).Value(i)))
-}
-
-func (a *UUIDArray) ValueStr(i int) string {
-	switch {
-	case a.IsNull(i):
-		return array.NullValueStr
-	default:
-		return a.Value(i).String()
-	}
-}
-
-func (a *UUIDArray) MarshalJSON() ([]byte, error) {
-	arr := a.Storage().(*array.FixedSizeBinary)
-	values := make([]interface{}, a.Len())
-	for i := 0; i < a.Len(); i++ {
-		if a.IsValid(i) {
-			values[i] = uuid.Must(uuid.FromBytes(arr.Value(i))).String()
-		}
-	}
-	return json.Marshal(values)
-}
-
-func (a *UUIDArray) GetOneForMarshal(i int) interface{} {
-	if a.IsNull(i) {
-		return nil
-	}
-	return a.Value(i)
-}
-
-// UUIDType is a simple extension type that represents a FixedSizeBinary(16)
-// to be used for representing UUIDs
-type UUIDType struct {
-	arrow.ExtensionBase
-}
-
-// NewUUIDType is a convenience function to create an instance of UUIDType
-// with the correct storage type
-func NewUUIDType() *UUIDType {
-	return &UUIDType{ExtensionBase: arrow.ExtensionBase{Storage: &arrow.FixedSizeBinaryType{ByteWidth: 16}}}
-}
-
-// ArrayType returns TypeOf(UUIDArray{}) for constructing UUID arrays
-func (*UUIDType) ArrayType() reflect.Type {
-	return reflect.TypeOf(UUIDArray{})
-}
-
-func (*UUIDType) ExtensionName() string {
-	return "uuid"
-}
-
-func (e *UUIDType) String() string {
-	return fmt.Sprintf("extension_type<storage=%s>", e.Storage)
-}
-
-func (e *UUIDType) MarshalJSON() ([]byte, error) {
-	return []byte(fmt.Sprintf(`{"name":"%s","metadata":%s}`, e.ExtensionName(), e.Serialize())), nil
-}
-
-// Serialize returns "uuid-serialized" for testing proper metadata passing
-func (*UUIDType) Serialize() string {
-	return "uuid-serialized"
-}
-
-// Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16} and the data to be
-// "uuid-serialized" in order to correctly create a UUIDType for testing deserialize.
-func (*UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
-	if data != "uuid-serialized" {
-		return nil, fmt.Errorf("type identifier did not match: '%s'", data)
-	}
-	if !arrow.TypeEqual(storageType, &arrow.FixedSizeBinaryType{ByteWidth: 16}) {
-		return nil, fmt.Errorf("invalid storage type for UUIDType: %s", storageType.Name())
-	}
-	return NewUUIDType(), nil
-}
-
-// ExtensionEquals returns true if both extensions have the same name
-func (e *UUIDType) ExtensionEquals(other arrow.ExtensionType) bool {
-	return e.ExtensionName() == other.ExtensionName()
-}
-
-func (*UUIDType) NewBuilder(bldr *array.ExtensionBuilder) array.Builder {
-	return NewUUIDBuilder(bldr)
-}
-
 // Parametric1Array is a simple int32 array for use with the Parametric1Type
 // in testing a parameterized user-defined extension type.
 type Parametric1Array struct {
@@ -517,14 +295,14 @@ func (SmallintType) ArrayType() reflect.Type { return reflect.TypeOf(SmallintArr
 
 func (SmallintType) ExtensionName() string { return "smallint" }
 
-func (SmallintType) Serialize() string { return "smallint" }
+func (SmallintType) Serialize() string { return "smallint-serialized" }
 
 func (s *SmallintType) ExtensionEquals(other arrow.ExtensionType) bool {
 	return s.Name() == other.Name()
 }
 
 func (SmallintType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
-	if data != "smallint" {
+	if data != "smallint-serialized" {
 		return nil, fmt.Errorf("type identifier did not match: '%s'", data)
 	}
 	if !arrow.TypeEqual(storageType, arrow.PrimitiveTypes.Int16) {
diff --git a/go/internal/types/extension_types_test.go b/go/internal/types/extension_types_test.go
deleted file mode 100644
index 632375c2685c0..0000000000000
--- a/go/internal/types/extension_types_test.go
+++ /dev/null
@@ -1,101 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package types_test
-
-import (
-	"bytes"
-	"testing"
-
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/json"
-	"github.com/apache/arrow/go/v17/internal/types"
-	"github.com/google/uuid"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-var testUUID = uuid.New()
-
-func TestExtensionBuilder(t *testing.T) {
-	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
-	defer mem.AssertSize(t, 0)
-	extBuilder := array.NewExtensionBuilder(mem, types.NewUUIDType())
-	defer extBuilder.Release()
-	builder := types.NewUUIDBuilder(extBuilder)
-	builder.Append(testUUID)
-	arr := builder.NewArray()
-	defer arr.Release()
-	arrStr := arr.String()
-	assert.Equal(t, "[\""+testUUID.String()+"\"]", arrStr)
-	jsonStr, err := json.Marshal(arr)
-	assert.NoError(t, err)
-
-	arr1, _, err := array.FromJSON(mem, types.NewUUIDType(), bytes.NewReader(jsonStr))
-	defer arr1.Release()
-	assert.NoError(t, err)
-	assert.Equal(t, arr, arr1)
-}
-
-func TestExtensionRecordBuilder(t *testing.T) {
-	schema := arrow.NewSchema([]arrow.Field{
-		{Name: "uuid", Type: types.NewUUIDType()},
-	}, nil)
-	builder := array.NewRecordBuilder(memory.DefaultAllocator, schema)
-	builder.Field(0).(*types.UUIDBuilder).Append(testUUID)
-	record := builder.NewRecord()
-	b, err := record.MarshalJSON()
-	require.NoError(t, err)
-	require.Equal(t, "[{\"uuid\":\""+testUUID.String()+"\"}\n]", string(b))
-	record1, _, err := array.RecordFromJSON(memory.DefaultAllocator, schema, bytes.NewReader(b))
-	require.NoError(t, err)
-	require.Equal(t, record, record1)
-}
-
-func TestUUIDStringRoundTrip(t *testing.T) {
-	// 1. create array
-	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
-	defer mem.AssertSize(t, 0)
-
-	extBuilder := array.NewExtensionBuilder(mem, types.NewUUIDType())
-	defer extBuilder.Release()
-	b := types.NewUUIDBuilder(extBuilder)
-	b.Append(uuid.Nil)
-	b.AppendNull()
-	b.Append(uuid.NameSpaceURL)
-	b.AppendNull()
-	b.Append(testUUID)
-
-	arr := b.NewArray()
-	defer arr.Release()
-
-	// 2. create array via AppendValueFromString
-	extBuilder1 := array.NewExtensionBuilder(mem, types.NewUUIDType())
-	defer extBuilder1.Release()
-	b1 := types.NewUUIDBuilder(extBuilder1)
-	defer b1.Release()
-
-	for i := 0; i < arr.Len(); i++ {
-		assert.NoError(t, b1.AppendValueFromString(arr.ValueStr(i)))
-	}
-
-	arr1 := b1.NewArray()
-	defer arr1.Release()
-
-	assert.True(t, array.Equal(arr, arr1))
-}
diff --git a/go/internal/utils/transpose_ints_def.go b/go/internal/utils/transpose_ints_def.go
index 83f0a256a774f..2095b3d08c54b 100644
--- a/go/internal/utils/transpose_ints_def.go
+++ b/go/internal/utils/transpose_ints_def.go
@@ -19,7 +19,7 @@ package utils
 import (
 	"errors"
 
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 //go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata -d arch=avx2 transpose_ints_simd.go.tmpl=transpose_ints_avx2_amd64.go
diff --git a/go/internal/utils/transpose_ints_test.go b/go/internal/utils/transpose_ints_test.go
index c26782086802c..427a1ad041c55 100644
--- a/go/internal/utils/transpose_ints_test.go
+++ b/go/internal/utils/transpose_ints_test.go
@@ -22,7 +22,7 @@ import (
 	"math/rand"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 var (
diff --git a/go/parquet/cmd/parquet_reader/dumper.go b/go/parquet/cmd/parquet_reader/dumper.go
index a7504fdd4e849..bab9939bfd243 100644
--- a/go/parquet/cmd/parquet_reader/dumper.go
+++ b/go/parquet/cmd/parquet_reader/dumper.go
@@ -22,9 +22,9 @@ import (
 	"reflect"
 	"time"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 const defaultBatchSize = 128
diff --git a/go/parquet/cmd/parquet_reader/main.go b/go/parquet/cmd/parquet_reader/main.go
index d0577868e8670..4e480aeb8660b 100644
--- a/go/parquet/cmd/parquet_reader/main.go
+++ b/go/parquet/cmd/parquet_reader/main.go
@@ -25,11 +25,11 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/internal/json"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 
 	"github.com/docopt/docopt-go"
 )
@@ -154,7 +154,7 @@ func main() {
 			if descr.ConvertedType() != schema.ConvertedTypes.None {
 				fmt.Printf("/%s", descr.ConvertedType())
 				if descr.ConvertedType() == schema.ConvertedTypes.Decimal {
-					dec := descr.LogicalType().(*schema.DecimalLogicalType)
+					dec := descr.LogicalType().(schema.DecimalLogicalType)
 					fmt.Printf("(%d,%d)", dec.Precision(), dec.Scale())
 				}
 			}
diff --git a/go/parquet/cmd/parquet_schema/main.go b/go/parquet/cmd/parquet_schema/main.go
index d0ff87feb5a56..0cd0700e4e41e 100644
--- a/go/parquet/cmd/parquet_schema/main.go
+++ b/go/parquet/cmd/parquet_schema/main.go
@@ -20,8 +20,8 @@ import (
 	"fmt"
 	"os"
 
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/docopt/docopt-go"
 )
 
diff --git a/go/parquet/compress/brotli.go b/go/parquet/compress/brotli.go
index e7574d2b1a779..9d3a22c9e58e8 100644
--- a/go/parquet/compress/brotli.go
+++ b/go/parquet/compress/brotli.go
@@ -21,7 +21,7 @@ import (
 	"io"
 
 	"github.com/andybalholm/brotli"
-	"github.com/apache/arrow/go/v17/parquet/internal/debug"
+	"github.com/apache/arrow/go/v18/parquet/internal/debug"
 )
 
 type brotliCodec struct{}
diff --git a/go/parquet/compress/compress.go b/go/parquet/compress/compress.go
index 83b174e449c9f..92f2ae99bb13f 100644
--- a/go/parquet/compress/compress.go
+++ b/go/parquet/compress/compress.go
@@ -23,7 +23,7 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 )
 
 // Compression is an alias to the thrift compression codec enum type for easy use
@@ -49,8 +49,9 @@ var Codecs = struct {
 	Brotli Compression
 	// LZ4 unsupported in this library due to problematic issues between the Hadoop LZ4 spec vs regular lz4
 	// see: http://mail-archives.apache.org/mod_mbox/arrow-dev/202007.mbox/%3CCAAri41v24xuA8MGHLDvgSnE+7AAgOhiEukemW_oPNHMvfMmrWw@mail.gmail.com%3E
-	Lz4  Compression
-	Zstd Compression
+	Lz4    Compression
+	Zstd   Compression
+	Lz4Raw Compression
 }{
 	Uncompressed: Compression(parquet.CompressionCodec_UNCOMPRESSED),
 	Snappy:       Compression(parquet.CompressionCodec_SNAPPY),
@@ -59,17 +60,12 @@ var Codecs = struct {
 	Brotli:       Compression(parquet.CompressionCodec_BROTLI),
 	Lz4:          Compression(parquet.CompressionCodec_LZ4),
 	Zstd:         Compression(parquet.CompressionCodec_ZSTD),
+	Lz4Raw:       Compression(parquet.CompressionCodec_LZ4_RAW),
 }
 
 // Codec is an interface which is implemented for each compression type in order to make the interactions easy to
 // implement. Most consumers won't be calling GetCodec directly.
 type Codec interface {
-	// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed data
-	NewReader(io.Reader) io.ReadCloser
-	// NewWriter provides a wrapper around a write stream to compress data before writing it.
-	NewWriter(io.Writer) io.WriteCloser
-	// NewWriterLevel is like NewWriter but allows specifying the compression level
-	NewWriterLevel(io.Writer, int) (io.WriteCloser, error)
 	// Encode encodes a block of data given by src and returns the compressed block. dst should be either nil
 	// or sized large enough to fit the compressed block (use CompressBound to allocate). dst and src should not
 	// overlap since some of the compression types don't allow it.
@@ -90,6 +86,16 @@ type Codec interface {
 	Decode(dst, src []byte) []byte
 }
 
+// StreamingCodec is an interface that may be implemented for compression codecs that expose a streaming API.
+type StreamingCodec interface {
+	// NewReader provides a reader that wraps a stream with compressed data to stream the uncompressed data
+	NewReader(io.Reader) io.ReadCloser
+	// NewWriter provides a wrapper around a write stream to compress data before writing it.
+	NewWriter(io.Writer) io.WriteCloser
+	// NewWriterLevel is like NewWriter but allows specifying the compression level
+	NewWriterLevel(io.Writer, int) (io.WriteCloser, error)
+}
+
 var codecs = map[Compression]Codec{}
 
 // RegisterCodec adds or overrides a codec implementation for a given compression algorithm.
diff --git a/go/parquet/compress/compress_test.go b/go/parquet/compress/compress_test.go
index 2aaec95bf2eee..5aac74759e1f9 100644
--- a/go/parquet/compress/compress_test.go
+++ b/go/parquet/compress/compress_test.go
@@ -22,7 +22,7 @@ import (
 	"math/rand"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/compress"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -66,8 +66,8 @@ func TestCompressDataOneShot(t *testing.T) {
 		{compress.Codecs.Gzip},
 		{compress.Codecs.Brotli},
 		{compress.Codecs.Zstd},
+		{compress.Codecs.Lz4Raw},
 		// {compress.Codecs.Lzo},
-		// {compress.Codecs.Lz4},
 	}
 
 	for _, tt := range tests {
@@ -107,9 +107,11 @@ func TestCompressReaderWriter(t *testing.T) {
 			var buf bytes.Buffer
 			codec, err := compress.GetCodec(tt.c)
 			assert.NoError(t, err)
+			streamingCodec, ok := codec.(compress.StreamingCodec)
+			assert.True(t, ok)
 			data := makeRandomData(RandomDataSize)
 
-			wr := codec.NewWriter(&buf)
+			wr := streamingCodec.NewWriter(&buf)
 
 			const chunkSize = 1111
 			input := data
@@ -129,7 +131,7 @@ func TestCompressReaderWriter(t *testing.T) {
 			}
 			wr.Close()
 
-			rdr := codec.NewReader(&buf)
+			rdr := streamingCodec.NewReader(&buf)
 			out, err := io.ReadAll(rdr)
 			assert.NoError(t, err)
 			assert.Exactly(t, data, out)
diff --git a/go/parquet/compress/lz4_raw.go b/go/parquet/compress/lz4_raw.go
new file mode 100644
index 0000000000000..788d9520a668b
--- /dev/null
+++ b/go/parquet/compress/lz4_raw.go
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compress
+
+import (
+	"sync"
+
+	"github.com/pierrec/lz4/v4"
+)
+
+// lz4.Compressor is not goroutine-safe, so we use a pool to amortize the cost
+// of allocating a new one for each call to Encode().
+var compressorPool = sync.Pool{New: func() interface{} { return new(lz4.Compressor) }}
+
+func compressBlock(src, dst []byte) (int, error) {
+	c := compressorPool.Get().(*lz4.Compressor)
+	defer compressorPool.Put(c)
+	return c.CompressBlock(src, dst)
+}
+
+type lz4RawCodec struct{}
+
+func (c lz4RawCodec) Encode(dst, src []byte) []byte {
+	n, err := compressBlock(src, dst[:cap(dst)])
+	if err != nil {
+		panic(err)
+	}
+
+	return dst[:n]
+}
+
+func (c lz4RawCodec) EncodeLevel(dst, src []byte, _ int) []byte {
+	// the lz4 block implementation does not allow level to be set
+	return c.Encode(dst, src)
+}
+
+func (lz4RawCodec) Decode(dst, src []byte) []byte {
+	n, err := lz4.UncompressBlock(src, dst)
+	if err != nil {
+		panic(err)
+	}
+
+	return dst[:n]
+}
+
+func (c lz4RawCodec) CompressBound(len int64) int64 {
+	return int64(lz4.CompressBlockBound(int(len)))
+}
+
+func init() {
+	RegisterCodec(Codecs.Lz4Raw, lz4RawCodec{})
+}
diff --git a/go/parquet/compress/zstd.go b/go/parquet/compress/zstd.go
index e81d364fceeb4..ea2126be18cc9 100644
--- a/go/parquet/compress/zstd.go
+++ b/go/parquet/compress/zstd.go
@@ -20,7 +20,7 @@ import (
 	"io"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/parquet/internal/debug"
+	"github.com/apache/arrow/go/v18/parquet/internal/debug"
 	"github.com/klauspost/compress/zstd"
 )
 
diff --git a/go/parquet/doc.go b/go/parquet/doc.go
index c580b8e317a67..a4fdd6e5dda21 100644
--- a/go/parquet/doc.go
+++ b/go/parquet/doc.go
@@ -30,9 +30,9 @@
 //
 // You can download the library and cli utilities via:
 //
-//	go get -u github.com/apache/arrow/go/v17/parquet
-//	go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_reader@latest
-//	go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_schema@latest
+//	go get -u github.com/apache/arrow/go/v18/parquet
+//	go install github.com/apache/arrow/go/v18/parquet/cmd/parquet_reader@latest
+//	go install github.com/apache/arrow/go/v18/parquet/cmd/parquet_schema@latest
 //
 // # Modules
 //
diff --git a/go/parquet/encryption_properties.go b/go/parquet/encryption_properties.go
index f29bf80da9b75..e9cb07d18bfe4 100644
--- a/go/parquet/encryption_properties.go
+++ b/go/parquet/encryption_properties.go
@@ -20,7 +20,7 @@ import (
 	"crypto/rand"
 	"unicode/utf8"
 
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 )
 
 // Constants that will be used as the default values with encryption/decryption
diff --git a/go/parquet/encryption_properties_test.go b/go/parquet/encryption_properties_test.go
index 0519b7a9db96b..8f50e5880b555 100644
--- a/go/parquet/encryption_properties_test.go
+++ b/go/parquet/encryption_properties_test.go
@@ -19,8 +19,8 @@ package parquet_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/encryption_read_config_test.go b/go/parquet/encryption_read_config_test.go
index e78dffc641905..1e2de16416d31 100644
--- a/go/parquet/encryption_read_config_test.go
+++ b/go/parquet/encryption_read_config_test.go
@@ -23,10 +23,10 @@ import (
 	"path"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/parquet/encryption_write_config_test.go b/go/parquet/encryption_write_config_test.go
index 6b5c171e6ea0d..01a5c2be93a46 100644
--- a/go/parquet/encryption_write_config_test.go
+++ b/go/parquet/encryption_write_config_test.go
@@ -23,10 +23,10 @@ import (
 	"path/filepath"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/parquet/file/column_reader.go b/go/parquet/file/column_reader.go
index 74a1b4486a703..38ebcf2893c46 100644
--- a/go/parquet/file/column_reader.go
+++ b/go/parquet/file/column_reader.go
@@ -21,13 +21,13 @@ import (
 	"fmt"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/column_reader_test.go b/go/parquet/file/column_reader_test.go
index 7d8c600138e06..b744c561d3b96 100755
--- a/go/parquet/file/column_reader_test.go
+++ b/go/parquet/file/column_reader_test.go
@@ -24,12 +24,12 @@ import (
 	"sync"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
diff --git a/go/parquet/file/column_reader_types.gen.go b/go/parquet/file/column_reader_types.gen.go
index 1e7837e5679fb..e0d0afac38ac2 100644
--- a/go/parquet/file/column_reader_types.gen.go
+++ b/go/parquet/file/column_reader_types.gen.go
@@ -21,9 +21,9 @@ package file
 import (
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
 )
 
 // Int32ColumnChunkReader is the Typed Column chunk reader instance for reading
diff --git a/go/parquet/file/column_reader_types.gen.go.tmpl b/go/parquet/file/column_reader_types.gen.go.tmpl
index a1cccbad16e37..b6056836d76f4 100644
--- a/go/parquet/file/column_reader_types.gen.go.tmpl
+++ b/go/parquet/file/column_reader_types.gen.go.tmpl
@@ -17,8 +17,8 @@
 package file
 
 import (
-    "github.com/apache/arrow/go/v17/parquet"
-    "github.com/apache/arrow/go/v17/parquet/internal/encoding"
+    "github.com/apache/arrow/go/v18/parquet"
+    "github.com/apache/arrow/go/v18/parquet/internal/encoding"
 )
 
 {{range .In}}
diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go
index 520622f0da6c6..bbf30e03087d5 100755
--- a/go/parquet/file/column_writer.go
+++ b/go/parquet/file/column_writer.go
@@ -21,14 +21,14 @@ import (
 	"encoding/binary"
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=../internal/encoding/physical_types.tmpldata column_writer_types.gen.go.tmpl
diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go
index cd2408f4fba5d..009c8c8bc51fd 100755
--- a/go/parquet/file/column_writer_test.go
+++ b/go/parquet/file/column_writer_test.go
@@ -24,22 +24,22 @@ import (
 	"sync"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	arrutils "github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/pqarrow"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	arrutils "github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/pqarrow"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 	"github.com/stretchr/testify/suite"
diff --git a/go/parquet/file/column_writer_types.gen.go b/go/parquet/file/column_writer_types.gen.go
index d6c0e8142ab1b..612b4095098a1 100644
--- a/go/parquet/file/column_writer_types.gen.go
+++ b/go/parquet/file/column_writer_types.gen.go
@@ -19,14 +19,14 @@
 package file
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 // Int32ColumnChunkWriter is the typed interface for writing columns to a parquet
diff --git a/go/parquet/file/column_writer_types.gen.go.tmpl b/go/parquet/file/column_writer_types.gen.go.tmpl
index 6fbd3d4ffde15..cb48dd64ceedc 100644
--- a/go/parquet/file/column_writer_types.gen.go.tmpl
+++ b/go/parquet/file/column_writer_types.gen.go.tmpl
@@ -19,11 +19,11 @@ package file
 import (
     "fmt"
 
-    "github.com/apache/arrow/go/v17/internal/utils"
-    "github.com/apache/arrow/go/v17/parquet"
-    "github.com/apache/arrow/go/v17/parquet/metadata"
-    "github.com/apache/arrow/go/v17/parquet/internal/encoding"
-    format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+    "github.com/apache/arrow/go/v18/internal/utils"
+    "github.com/apache/arrow/go/v18/parquet"
+    "github.com/apache/arrow/go/v18/parquet/metadata"
+    "github.com/apache/arrow/go/v18/parquet/internal/encoding"
+    format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 )
 
 {{range .In}}
diff --git a/go/parquet/file/file_reader.go b/go/parquet/file/file_reader.go
index 145c09bb5fcd4..f25b882e00647 100644
--- a/go/parquet/file/file_reader.go
+++ b/go/parquet/file/file_reader.go
@@ -25,10 +25,10 @@ import (
 	"runtime"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
 	"golang.org/x/xerrors"
 )
 
@@ -233,7 +233,7 @@ func (f *Reader) parseMetaData() error {
 func (f *Reader) handleAadPrefix(fileDecrypt *parquet.FileDecryptionProperties, algo *parquet.Algorithm) (string, error) {
 	aadPrefixInProps := fileDecrypt.AadPrefix()
 	aadPrefix := []byte(aadPrefixInProps)
-	fileHasAadPrefix := algo.Aad.AadPrefix != nil && len(algo.Aad.AadPrefix) > 0
+	fileHasAadPrefix := len(algo.Aad.AadPrefix) > 0
 	aadPrefixInFile := algo.Aad.AadPrefix
 
 	if algo.Aad.SupplyAadPrefix && aadPrefixInProps == "" {
diff --git a/go/parquet/file/file_reader_mmap.go b/go/parquet/file/file_reader_mmap.go
index c3e487b536f73..77afb6b639bff 100644
--- a/go/parquet/file/file_reader_mmap.go
+++ b/go/parquet/file/file_reader_mmap.go
@@ -22,7 +22,7 @@ package file
 import (
 	"io"
 
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/parquet"
 	"golang.org/x/exp/mmap"
 	"golang.org/x/xerrors"
 )
diff --git a/go/parquet/file/file_reader_mmap_windows.go b/go/parquet/file/file_reader_mmap_windows.go
index 776eb98d5d282..87aaafd9e7d81 100644
--- a/go/parquet/file/file_reader_mmap_windows.go
+++ b/go/parquet/file/file_reader_mmap_windows.go
@@ -22,7 +22,7 @@ package file
 import (
 	"errors"
 
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/parquet"
 )
 
 func mmapOpen(filename string) (parquet.ReaderAtSeeker, error) {
diff --git a/go/parquet/file/file_reader_test.go b/go/parquet/file/file_reader_test.go
index d4faf26086f93..74926c958e2f7 100644
--- a/go/parquet/file/file_reader_test.go
+++ b/go/parquet/file/file_reader_test.go
@@ -18,6 +18,7 @@ package file_test
 
 import (
 	"bytes"
+	"context"
 	"crypto/rand"
 	"encoding/binary"
 	"fmt"
@@ -26,16 +27,19 @@ import (
 	"path"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/thrift"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/thrift"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/pqarrow"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	libthrift "github.com/apache/thrift/lib/go/thrift"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -448,6 +452,55 @@ func TestRleBooleanEncodingFileRead(t *testing.T) {
 	assert.Equal(t, expected, values[:len(expected)])
 }
 
+type mockBadReader struct {
+	cnt    int
+	reader *os.File
+}
+
+func (m *mockBadReader) Seek(offset int64, whence int) (int64, error) {
+	return m.reader.Seek(offset, whence)
+}
+
+func (m *mockBadReader) ReadAt(p []byte, off int64) (n int, err error) {
+	if m.cnt == 0 {
+		return 0, fmt.Errorf("mock error")
+	}
+	m.cnt--
+	return m.reader.ReadAt(p, off)
+}
+
+func TestBadReader(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	filePath := path.Join(dir, "byte_stream_split_extended.gzip.parquet")
+	f, err := os.Open(filePath)
+	assert.NoError(t, err)
+	defer f.Close()
+
+	reader := &mockBadReader{
+		cnt:    2,
+		reader: f,
+	}
+	r, err := file.NewParquetReader(reader, file.WithReadProps(&parquet.ReaderProperties{
+		BufferSize:            int64(1024),
+		BufferedStreamEnabled: true,
+	}))
+	assert.NoError(t, err)
+
+	fileReader, err := pqarrow.NewFileReader(r, pqarrow.ArrowReadProperties{}, memory.DefaultAllocator)
+	assert.NoError(t, err)
+
+	columnReader, err := fileReader.GetColumn(context.Background(), 0)
+	assert.NoError(t, err)
+
+	_, err = columnReader.NextBatch(1)
+	assert.ErrorContains(t, err, "mock error") // Expect an error to occur.
+}
+
 func TestByteStreamSplitEncodingFileRead(t *testing.T) {
 	dir := os.Getenv("PARQUET_TEST_DATA")
 	if dir == "" {
@@ -582,3 +635,188 @@ func TestByteStreamSplitEncodingFileRead(t *testing.T) {
 		})
 	}
 }
+
+func TestDeltaBinaryPackedMultipleBatches(t *testing.T) {
+	size := 10
+	batchSize := size / 2 // write 2 batches
+
+	// Define the schema for the test data
+	fields := []arrow.Field{
+		{Name: "int64", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
+	}
+	schema := arrow.NewSchema(fields, nil)
+
+	// Create a record batch with the test data
+	b := array.NewRecordBuilder(memory.DefaultAllocator, schema)
+	defer b.Release()
+
+	for i := 0; i < size; i++ {
+		b.Field(0).(*array.Int64Builder).Append(int64(i))
+	}
+	rec := b.NewRecord()
+	defer rec.Release()
+
+	// Write the data to Parquet using the file writer
+	props := parquet.NewWriterProperties(
+		parquet.WithDictionaryDefault(false),
+		parquet.WithEncoding(parquet.Encodings.DeltaBinaryPacked))
+	writerProps := pqarrow.DefaultWriterProps()
+
+	var buf bytes.Buffer
+	pw, err := pqarrow.NewFileWriter(schema, &buf, props, writerProps)
+	require.NoError(t, err)
+	require.NoError(t, pw.Write(rec))
+	require.NoError(t, pw.Close())
+
+	// Read the data back from the Parquet file
+	reader, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
+	require.NoError(t, err)
+	defer reader.Close()
+
+	pr, err := pqarrow.NewFileReader(reader, pqarrow.ArrowReadProperties{BatchSize: int64(batchSize)}, memory.DefaultAllocator)
+	require.NoError(t, err)
+
+	rr, err := pr.GetRecordReader(context.Background(), nil, nil)
+	require.NoError(t, err)
+
+	totalRows := 0
+	for rr.Next() {
+		rec := rr.Record()
+		for i := 0; i < int(rec.NumRows()); i++ {
+			col := rec.Column(0).(*array.Int64)
+
+			val := col.Value(i)
+			require.Equal(t, val, int64(totalRows+i))
+		}
+		totalRows += int(rec.NumRows())
+	}
+
+	require.Equalf(t, size, totalRows, "Expected %d rows, but got %d rows", size, totalRows)
+}
+
+// Test read file lz4_raw_compressed.parquet
+// Contents documented at https://github.com/apache/parquet-testing/commit/ddd898958803cb89b7156c6350584d1cda0fe8de
+func TestLZ4RawFileRead(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	props := parquet.NewReaderProperties(memory.DefaultAllocator)
+	fileReader, err := file.OpenParquetFile(path.Join(dir, "lz4_raw_compressed.parquet"),
+		false, file.WithReadProps(props))
+	require.NoError(t, err)
+	defer fileReader.Close()
+
+	nRows := 4
+	nCols := 3
+	require.Equal(t, 1, fileReader.NumRowGroups())
+	rgr := fileReader.RowGroup(0)
+	require.EqualValues(t, nRows, rgr.NumRows())
+	require.EqualValues(t, nCols, rgr.NumColumns())
+
+	rdr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	rowsInt64, ok := rdr.(*file.Int64ColumnChunkReader)
+	require.True(t, ok)
+
+	valsInt64 := make([]int64, nRows)
+	total, read, err := rowsInt64.ReadBatch(int64(nRows), valsInt64, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsInt64 := []int64{
+		1593604800,
+		1593604800,
+		1593604801,
+		1593604801,
+	}
+	require.Equal(t, expectedValsInt64, valsInt64)
+
+	rdr, err = rgr.Column(1)
+	require.NoError(t, err)
+
+	rowsByteArray, ok := rdr.(*file.ByteArrayColumnChunkReader)
+	require.True(t, ok)
+
+	valsByteArray := make([]parquet.ByteArray, nRows)
+	total, read, err = rowsByteArray.ReadBatch(int64(nRows), valsByteArray, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsByteArray := []parquet.ByteArray{
+		[]byte("abc"),
+		[]byte("def"),
+		[]byte("abc"),
+		[]byte("def"),
+	}
+	require.Equal(t, expectedValsByteArray, valsByteArray)
+
+	rdr, err = rgr.Column(2)
+	require.NoError(t, err)
+
+	rowsFloat64, ok := rdr.(*file.Float64ColumnChunkReader)
+	require.True(t, ok)
+
+	valsFloat64 := make([]float64, nRows)
+	total, read, err = rowsFloat64.ReadBatch(int64(nRows), valsFloat64, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsFloat64 := []float64{
+		42.0,
+		7.7,
+		42.125,
+		7.7,
+	}
+	require.Equal(t, expectedValsFloat64, valsFloat64)
+}
+
+// Test read file lz4_raw_compressed_larger.parquet
+// Contents documented at https://github.com/apache/parquet-testing/commit/ddd898958803cb89b7156c6350584d1cda0fe8de
+func TestLZ4RawLargerFileRead(t *testing.T) {
+	dir := os.Getenv("PARQUET_TEST_DATA")
+	if dir == "" {
+		t.Skip("no path supplied with PARQUET_TEST_DATA")
+	}
+	require.DirExists(t, dir)
+
+	props := parquet.NewReaderProperties(memory.DefaultAllocator)
+	fileReader, err := file.OpenParquetFile(path.Join(dir, "lz4_raw_compressed_larger.parquet"),
+		false, file.WithReadProps(props))
+	require.NoError(t, err)
+	defer fileReader.Close()
+
+	nRows := 10000
+	nCols := 1
+	require.Equal(t, 1, fileReader.NumRowGroups())
+	rgr := fileReader.RowGroup(0)
+	require.EqualValues(t, nRows, rgr.NumRows())
+	require.EqualValues(t, nCols, rgr.NumColumns())
+
+	rdr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	rows, ok := rdr.(*file.ByteArrayColumnChunkReader)
+	require.True(t, ok)
+
+	vals := make([]parquet.ByteArray, nRows)
+	total, read, err := rows.ReadBatch(int64(nRows), vals, nil, nil)
+	require.NoError(t, err)
+	require.Equal(t, int64(nRows), total)
+	require.Equal(t, nRows, read)
+
+	expectedValsHead := []parquet.ByteArray{
+		[]byte("c7ce6bef-d5b0-4863-b199-8ea8c7fb117b"),
+		[]byte("e8fb9197-cb9f-4118-b67f-fbfa65f61843"),
+		[]byte("885136e1-0aa1-4fdb-8847-63d87b07c205"),
+		[]byte("ce7b2019-8ebe-4906-a74d-0afa2409e5df"),
+		[]byte("a9ee2527-821b-4b71-a926-03f73c3fc8b7"),
+	}
+	require.Equal(t, expectedValsHead, vals[:len(expectedValsHead)])
+}
diff --git a/go/parquet/file/file_writer.go b/go/parquet/file/file_writer.go
index ce5e13c24d05a..6fb64f3b8c315 100644
--- a/go/parquet/file/file_writer.go
+++ b/go/parquet/file/file_writer.go
@@ -21,11 +21,11 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 // Writer is the primary interface for writing a parquet file
diff --git a/go/parquet/file/file_writer_test.go b/go/parquet/file/file_writer_test.go
index e5ad1b07e25de..12ac93d1ef4b2 100644
--- a/go/parquet/file/file_writer_test.go
+++ b/go/parquet/file/file_writer_test.go
@@ -22,13 +22,13 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
@@ -260,7 +260,7 @@ func (t *SerializeTestSuite) TestSmallFile() {
 		compress.Codecs.Brotli,
 		compress.Codecs.Gzip,
 		compress.Codecs.Zstd,
-		// compress.Codecs.Lz4,
+		compress.Codecs.Lz4Raw,
 		// compress.Codecs.Lzo,
 	}
 	for _, c := range codecs {
@@ -540,3 +540,59 @@ func TestBatchedByteStreamSplitFileRoundtrip(t *testing.T) {
 
 	require.NoError(t, rdr.Close())
 }
+
+func TestLZ4RawFileRoundtrip(t *testing.T) {
+	input := []int64{
+		-1, 0, 1, 2, 3, 4, 5, 123456789, -123456789,
+	}
+
+	size := len(input)
+
+	field, err := schema.NewPrimitiveNodeLogical("int64", parquet.Repetitions.Required, nil, parquet.Types.Int64, 0, 1)
+	require.NoError(t, err)
+
+	schema, err := schema.NewGroupNode("test", parquet.Repetitions.Required, schema.FieldList{field}, 0)
+	require.NoError(t, err)
+
+	sink := encoding.NewBufferWriter(0, memory.DefaultAllocator)
+	writer := file.NewParquetWriter(sink, schema, file.WithWriterProps(parquet.NewWriterProperties(parquet.WithCompression(compress.Codecs.Lz4Raw))))
+
+	rgw := writer.AppendRowGroup()
+	cw, err := rgw.NextColumn()
+	require.NoError(t, err)
+
+	i64ColumnWriter, ok := cw.(*file.Int64ColumnChunkWriter)
+	require.True(t, ok)
+
+	nVals, err := i64ColumnWriter.WriteBatch(input, nil, nil)
+	require.NoError(t, err)
+	require.EqualValues(t, size, nVals)
+
+	require.NoError(t, cw.Close())
+	require.NoError(t, rgw.Close())
+	require.NoError(t, writer.Close())
+
+	rdr, err := file.NewParquetReader(bytes.NewReader(sink.Bytes()))
+	require.NoError(t, err)
+
+	require.Equal(t, 1, rdr.NumRowGroups())
+	require.EqualValues(t, size, rdr.NumRows())
+
+	rgr := rdr.RowGroup(0)
+	cr, err := rgr.Column(0)
+	require.NoError(t, err)
+
+	i64ColumnReader, ok := cr.(*file.Int64ColumnChunkReader)
+	require.True(t, ok)
+
+	output := make([]int64, size)
+
+	total, valuesRead, err := i64ColumnReader.ReadBatch(int64(size), output, nil, nil)
+	require.NoError(t, err)
+	require.EqualValues(t, size, total)
+	require.EqualValues(t, size, valuesRead)
+
+	require.Equal(t, input, output)
+
+	require.NoError(t, rdr.Close())
+}
diff --git a/go/parquet/file/level_conversion.go b/go/parquet/file/level_conversion.go
index 9ab92bc74167e..29aa613de0db6 100755
--- a/go/parquet/file/level_conversion.go
+++ b/go/parquet/file/level_conversion.go
@@ -22,11 +22,11 @@ import (
 	"math/bits"
 	"unsafe"
 
-	shared_utils "github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/bmi"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	shared_utils "github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/bmi"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/level_conversion_test.go b/go/parquet/file/level_conversion_test.go
index 34a107163a197..740c0e674469b 100644
--- a/go/parquet/file/level_conversion_test.go
+++ b/go/parquet/file/level_conversion_test.go
@@ -20,9 +20,9 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/parquet/internal/bmi"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/parquet/internal/bmi"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/file/page_reader.go b/go/parquet/file/page_reader.go
index 3c166253cdabe..91dcc3c66aa5d 100644
--- a/go/parquet/file/page_reader.go
+++ b/go/parquet/file/page_reader.go
@@ -23,13 +23,13 @@ import (
 	"sync"
 
 	"github.com/JohnCGriffin/overflow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/thrift"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/thrift"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/page_writer.go b/go/parquet/file/page_writer.go
index 1c23917fe202c..82cd37e1a7774 100644
--- a/go/parquet/file/page_writer.go
+++ b/go/parquet/file/page_writer.go
@@ -20,15 +20,15 @@ import (
 	"bytes"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/thrift"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/thrift"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
 	libthrift "github.com/apache/thrift/lib/go/thrift"
 	"golang.org/x/xerrors"
 )
diff --git a/go/parquet/file/record_reader.go b/go/parquet/file/record_reader.go
index 1a1310195a88a..765f4a9d34b33 100755
--- a/go/parquet/file/record_reader.go
+++ b/go/parquet/file/record_reader.go
@@ -23,14 +23,14 @@ import (
 	"unsafe"
 
 	"github.com/JohnCGriffin/overflow"
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
@@ -645,7 +645,7 @@ func (rr *recordReader) ReadRecords(numRecords int64) (int64, error) {
 		}
 	}
 
-	return recordsRead, nil
+	return recordsRead, rr.Err()
 }
 
 func (rr *recordReader) ReleaseValidBits() *memory.Buffer {
diff --git a/go/parquet/file/row_group_reader.go b/go/parquet/file/row_group_reader.go
index 1e8444e1fc1d0..25ca6d87d895f 100644
--- a/go/parquet/file/row_group_reader.go
+++ b/go/parquet/file/row_group_reader.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/row_group_writer.go b/go/parquet/file/row_group_writer.go
index dfb10d584cce6..d18ff270939d2 100644
--- a/go/parquet/file/row_group_writer.go
+++ b/go/parquet/file/row_group_writer.go
@@ -17,10 +17,10 @@
 package file
 
 import (
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/file/row_group_writer_test.go b/go/parquet/file/row_group_writer_test.go
index 2fdba06381f55..0074611235245 100644
--- a/go/parquet/file/row_group_writer_test.go
+++ b/go/parquet/file/row_group_writer_test.go
@@ -20,10 +20,10 @@ import (
 	"bytes"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/apache/thrift/lib/go/thrift"
 	"github.com/stretchr/testify/assert"
 )
diff --git a/go/parquet/internal/bmi/bmi_test.go b/go/parquet/internal/bmi/bmi_test.go
index 2b7cc59000ac1..41a74ba3afcc3 100644
--- a/go/parquet/internal/bmi/bmi_test.go
+++ b/go/parquet/internal/bmi/bmi_test.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/parquet/internal/bmi"
+	"github.com/apache/arrow/go/v18/parquet/internal/bmi"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/internal/encoding/boolean_decoder.go b/go/parquet/internal/encoding/boolean_decoder.go
index d21fb3dd56603..772fe96fde8f0 100644
--- a/go/parquet/internal/encoding/boolean_decoder.go
+++ b/go/parquet/internal/encoding/boolean_decoder.go
@@ -23,10 +23,10 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	shared_utils "github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	shared_utils "github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
 )
 
 // PlainBooleanDecoder is for the Plain Encoding type, there is no
diff --git a/go/parquet/internal/encoding/boolean_encoder.go b/go/parquet/internal/encoding/boolean_encoder.go
index f77ae05fe30ef..b95707cb2b1da 100644
--- a/go/parquet/internal/encoding/boolean_encoder.go
+++ b/go/parquet/internal/encoding/boolean_encoder.go
@@ -19,10 +19,10 @@ package encoding
 import (
 	"encoding/binary"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/debug"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/debug"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
 )
 
 const (
diff --git a/go/parquet/internal/encoding/byte_array_decoder.go b/go/parquet/internal/encoding/byte_array_decoder.go
index 12aaed110b0bc..6a87e5f3b9832 100644
--- a/go/parquet/internal/encoding/byte_array_decoder.go
+++ b/go/parquet/internal/encoding/byte_array_decoder.go
@@ -19,12 +19,12 @@ package encoding
 import (
 	"encoding/binary"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	pqutils "github.com/apache/arrow/go/v17/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	pqutils "github.com/apache/arrow/go/v18/parquet/internal/utils"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/byte_array_encoder.go b/go/parquet/internal/encoding/byte_array_encoder.go
index fe6b9b147b0bd..518c2e7f7324e 100644
--- a/go/parquet/internal/encoding/byte_array_encoder.go
+++ b/go/parquet/internal/encoding/byte_array_encoder.go
@@ -21,11 +21,11 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
 )
 
 // PlainByteArrayEncoder encodes byte arrays according to the spec for Plain encoding
diff --git a/go/parquet/internal/encoding/byte_stream_split.go b/go/parquet/internal/encoding/byte_stream_split.go
index 3772aa876173f..e5fe91ada6d77 100644
--- a/go/parquet/internal/encoding/byte_stream_split.go
+++ b/go/parquet/internal/encoding/byte_stream_split.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/debug"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/decoder.go b/go/parquet/internal/encoding/decoder.go
index 71bfc872f133c..12a670198afa6 100644
--- a/go/parquet/internal/encoding/decoder.go
+++ b/go/parquet/internal/encoding/decoder.go
@@ -20,16 +20,16 @@ import (
 	"bytes"
 	"reflect"
 
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	shared_utils "github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/debug"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	shared_utils "github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/debug"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/delta_bit_packing.go b/go/parquet/internal/encoding/delta_bit_packing.go
index db42998818b39..ac91953a7f903 100644
--- a/go/parquet/internal/encoding/delta_bit_packing.go
+++ b/go/parquet/internal/encoding/delta_bit_packing.go
@@ -19,20 +19,20 @@ package encoding
 import (
 	"bytes"
 	"errors"
+	"fmt"
 	"math"
 	"math/bits"
-	"reflect"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	shared_utils "github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	shared_utils "github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
 )
 
 // see the deltaBitPack encoder for a description of the encoding format that is
 // used for delta-bitpacking.
-type deltaBitPackDecoder struct {
+type deltaBitPackDecoder[T int32 | int64] struct {
 	decoder
 
 	mem memory.Allocator
@@ -52,18 +52,20 @@ type deltaBitPackDecoder struct {
 
 	totalValues uint64
 	lastVal     int64
+
+	miniBlockValues []T
 }
 
 // returns the number of bytes read so far
-func (d *deltaBitPackDecoder) bytesRead() int64 {
+func (d *deltaBitPackDecoder[T]) bytesRead() int64 {
 	return d.bitdecoder.CurOffset()
 }
 
-func (d *deltaBitPackDecoder) Allocator() memory.Allocator { return d.mem }
+func (d *deltaBitPackDecoder[T]) Allocator() memory.Allocator { return d.mem }
 
 // SetData sets the bytes and the expected number of values to decode
 // into the decoder, updating the decoder and allowing it to be reused.
-func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) error {
+func (d *deltaBitPackDecoder[T]) SetData(nvalues int, data []byte) error {
 	// set our data into the underlying decoder for the type
 	if err := d.decoder.SetData(nvalues, data); err != nil {
 		return err
@@ -103,7 +105,7 @@ func (d *deltaBitPackDecoder) SetData(nvalues int, data []byte) error {
 }
 
 // initialize a block to decode
-func (d *deltaBitPackDecoder) initBlock() error {
+func (d *deltaBitPackDecoder[T]) initBlock() error {
 	// first we grab the min delta value that we'll start from
 	var ok bool
 	if d.minDelta, ok = d.bitdecoder.GetZigZagVlqInt(); !ok {
@@ -126,16 +128,9 @@ func (d *deltaBitPackDecoder) initBlock() error {
 	return nil
 }
 
-// DeltaBitPackInt32Decoder decodes Int32 values which are packed using the Delta BitPacking algorithm.
-type DeltaBitPackInt32Decoder struct {
-	*deltaBitPackDecoder
-
-	miniBlockValues []int32
-}
-
-func (d *DeltaBitPackInt32Decoder) unpackNextMini() error {
+func (d *deltaBitPackDecoder[T]) unpackNextMini() error {
 	if d.miniBlockValues == nil {
-		d.miniBlockValues = make([]int32, 0, int(d.valsPerMini))
+		d.miniBlockValues = make([]T, 0, int(d.valsPerMini))
 	} else {
 		d.miniBlockValues = d.miniBlockValues[:0]
 	}
@@ -149,7 +144,7 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error {
 		}
 
 		d.lastVal += int64(delta) + int64(d.minDelta)
-		d.miniBlockValues = append(d.miniBlockValues, int32(d.lastVal))
+		d.miniBlockValues = append(d.miniBlockValues, T(d.lastVal))
 	}
 	d.miniBlockIdx++
 	return nil
@@ -157,15 +152,15 @@ func (d *DeltaBitPackInt32Decoder) unpackNextMini() error {
 
 // Decode retrieves min(remaining values, len(out)) values from the data and returns the number
 // of values actually decoded and any errors encountered.
-func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) {
-	max := shared_utils.Min(len(out), int(d.totalValues))
+func (d *deltaBitPackDecoder[T]) Decode(out []T) (int, error) {
+	max := shared_utils.Min(len(out), int(d.nvals))
 	if max == 0 {
 		return 0, nil
 	}
 
 	out = out[:max]
 	if !d.usedFirst { // starting value to calculate deltas against
-		out[0] = int32(d.lastVal)
+		out[0] = T(d.lastVal)
 		out = out[1:]
 		d.usedFirst = true
 	}
@@ -198,7 +193,7 @@ func (d *DeltaBitPackInt32Decoder) Decode(out []int32) (int, error) {
 }
 
 // DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap
-func (d *DeltaBitPackInt32Decoder) DecodeSpaced(out []int32, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
+func (d *deltaBitPackDecoder[T]) DecodeSpaced(out []T, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
 	toread := len(out) - nullCount
 	values, err := d.Decode(out[:toread])
 	if err != nil {
@@ -211,101 +206,23 @@ func (d *DeltaBitPackInt32Decoder) DecodeSpaced(out []int32, nullCount int, vali
 	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
 }
 
-// Type returns the physical parquet type that this decoder decodes, in this case Int32
-func (DeltaBitPackInt32Decoder) Type() parquet.Type {
-	return parquet.Types.Int32
-}
-
-// DeltaBitPackInt64Decoder decodes a delta bit packed int64 column of data.
-type DeltaBitPackInt64Decoder struct {
-	*deltaBitPackDecoder
-
-	miniBlockValues []int64
-}
-
-func (d *DeltaBitPackInt64Decoder) unpackNextMini() error {
-	if d.miniBlockValues == nil {
-		d.miniBlockValues = make([]int64, 0, int(d.valsPerMini))
-	} else {
-		d.miniBlockValues = d.miniBlockValues[:0]
-	}
-
-	d.deltaBitWidth = d.deltaBitWidths.Bytes()[int(d.miniBlockIdx)]
-	d.currentMiniBlockVals = d.valsPerMini
-
-	for j := 0; j < int(d.valsPerMini); j++ {
-		delta, ok := d.bitdecoder.GetValue(int(d.deltaBitWidth))
-		if !ok {
-			return errors.New("parquet: eof exception")
-		}
-
-		d.lastVal += int64(delta) + d.minDelta
-		d.miniBlockValues = append(d.miniBlockValues, d.lastVal)
-	}
-	d.miniBlockIdx++
-	return nil
-}
-
-// Decode retrieves min(remaining values, len(out)) values from the data and returns the number
-// of values actually decoded and any errors encountered.
-func (d *DeltaBitPackInt64Decoder) Decode(out []int64) (int, error) {
-	max := shared_utils.Min(len(out), d.nvals)
-	if max == 0 {
-		return 0, nil
-	}
-
-	out = out[:max]
-	if !d.usedFirst {
-		out[0] = d.lastVal
-		out = out[1:]
-		d.usedFirst = true
-	}
-
-	var err error
-	for len(out) > 0 {
-		if d.currentBlockVals == 0 {
-			err = d.initBlock()
-			if err != nil {
-				return 0, err
-			}
-		}
-		if d.currentMiniBlockVals == 0 {
-			err = d.unpackNextMini()
-		}
-
-		if err != nil {
-			return 0, err
-		}
-
-		start := int(d.valsPerMini - d.currentMiniBlockVals)
-		numCopied := copy(out, d.miniBlockValues[start:])
-
-		out = out[numCopied:]
-		d.currentBlockVals -= uint32(numCopied)
-		d.currentMiniBlockVals -= uint32(numCopied)
+// Type returns the underlying physical type this decoder works with
+func (dec *deltaBitPackDecoder[T]) Type() parquet.Type {
+	switch v := any(dec).(type) {
+	case *deltaBitPackDecoder[int32]:
+		return parquet.Types.Int32
+	case *deltaBitPackDecoder[int64]:
+		return parquet.Types.Int64
+	default:
+		panic(fmt.Sprintf("deltaBitPackDecoder is not supported for type: %T", v))
 	}
-	d.nvals -= max
-	return max, nil
-}
-
-// Type returns the physical parquet type that this decoder decodes, in this case Int64
-func (DeltaBitPackInt64Decoder) Type() parquet.Type {
-	return parquet.Types.Int64
 }
 
-// DecodeSpaced is like Decode, but the result is spaced out appropriately based on the passed in bitmap
-func (d DeltaBitPackInt64Decoder) DecodeSpaced(out []int64, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
-	toread := len(out) - nullCount
-	values, err := d.Decode(out[:toread])
-	if err != nil {
-		return values, err
-	}
-	if values != toread {
-		return values, errors.New("parquet: number of values / definition levels read did not match")
-	}
+// DeltaBitPackInt32Decoder decodes Int32 values which are packed using the Delta BitPacking algorithm.
+type DeltaBitPackInt32Decoder = deltaBitPackDecoder[int32]
 
-	return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
-}
+// DeltaBitPackInt64Decoder decodes Int64 values which are packed using the Delta BitPacking algorithm.
+type DeltaBitPackInt64Decoder = deltaBitPackDecoder[int64]
 
 const (
 	// block size must be a multiple of 128
@@ -333,7 +250,7 @@ const (
 //
 // Sets aside bytes at the start of the internal buffer where the header will be written,
 // and only writes the header when FlushValues is called before returning it.
-type deltaBitPackEncoder struct {
+type deltaBitPackEncoder[T int32 | int64] struct {
 	encoder
 
 	bitWriter  *utils.BitWriter
@@ -348,7 +265,7 @@ type deltaBitPackEncoder struct {
 }
 
 // flushBlock flushes out a finished block for writing to the underlying encoder
-func (enc *deltaBitPackEncoder) flushBlock() {
+func (enc *deltaBitPackEncoder[T]) flushBlock() {
 	if len(enc.deltas) == 0 {
 		return
 	}
@@ -400,9 +317,8 @@ func (enc *deltaBitPackEncoder) flushBlock() {
 
 // putInternal is the implementation for actually writing data which must be
 // integral data as int, int8, int32, or int64.
-func (enc *deltaBitPackEncoder) putInternal(data interface{}) {
-	v := reflect.ValueOf(data)
-	if v.Len() == 0 {
+func (enc *deltaBitPackEncoder[T]) Put(in []T) {
+	if len(in) == 0 {
 		return
 	}
 
@@ -412,16 +328,16 @@ func (enc *deltaBitPackEncoder) putInternal(data interface{}) {
 		enc.numMiniBlocks = defaultNumMiniBlocks
 		enc.miniBlockSize = defaultNumValuesPerMini
 
-		enc.firstVal = v.Index(0).Int()
+		enc.firstVal = int64(in[0])
 		enc.currentVal = enc.firstVal
 		idx = 1
 
 		enc.bitWriter = utils.NewBitWriter(enc.sink)
 	}
 
-	enc.totalVals += uint64(v.Len())
-	for ; idx < v.Len(); idx++ {
-		val := v.Index(idx).Int()
+	enc.totalVals += uint64(len(in))
+	for ; idx < len(in); idx++ {
+		val := int64(in[idx])
 		enc.deltas = append(enc.deltas, val-enc.currentVal)
 		enc.currentVal = val
 		if len(enc.deltas) == int(enc.blockSize) {
@@ -432,7 +348,7 @@ func (enc *deltaBitPackEncoder) putInternal(data interface{}) {
 
 // FlushValues flushes any remaining data and returns the finished encoded buffer
 // or returns nil and any error encountered during flushing.
-func (enc *deltaBitPackEncoder) FlushValues() (Buffer, error) {
+func (enc *deltaBitPackEncoder[T]) FlushValues() (Buffer, error) {
 	if enc.bitWriter != nil {
 		// write any remaining values
 		enc.flushBlock()
@@ -465,7 +381,7 @@ func (enc *deltaBitPackEncoder) FlushValues() (Buffer, error) {
 }
 
 // EstimatedDataEncodedSize returns the current amount of data actually flushed out and written
-func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 {
+func (enc *deltaBitPackEncoder[T]) EstimatedDataEncodedSize() int64 {
 	if enc.bitWriter == nil {
 		return 0
 	}
@@ -473,56 +389,33 @@ func (enc *deltaBitPackEncoder) EstimatedDataEncodedSize() int64 {
 	return int64(enc.bitWriter.Written())
 }
 
-// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data.
-type DeltaBitPackInt32Encoder struct {
-	*deltaBitPackEncoder
-}
-
-// Put writes the values from the provided slice of int32 to the encoder
-func (enc DeltaBitPackInt32Encoder) Put(in []int32) {
-	enc.putInternal(in)
-}
-
-// PutSpaced takes a slice of int32 along with a bitmap that describes the nulls and an offset into the bitmap
+// PutSpaced takes a slice of values along with a bitmap that describes the nulls and an offset into the bitmap
 // in order to write spaced data to the encoder.
-func (enc DeltaBitPackInt32Encoder) PutSpaced(in []int32, validBits []byte, validBitsOffset int64) {
+func (enc *deltaBitPackEncoder[T]) PutSpaced(in []T, validBits []byte, validBitsOffset int64) {
 	buffer := memory.NewResizableBuffer(enc.mem)
-	buffer.Reserve(arrow.Int32Traits.BytesRequired(len(in)))
+	dt := arrow.GetDataType[T]().(arrow.FixedWidthDataType)
+	buffer.Reserve(dt.Bytes() * len(in))
 	defer buffer.Release()
 
-	data := arrow.Int32Traits.CastFromBytes(buffer.Buf())
+	data := arrow.GetData[T](buffer.Buf())
 	nvalid := spacedCompress(in, data, validBits, validBitsOffset)
 	enc.Put(data[:nvalid])
 }
 
-// Type returns the underlying physical type this encoder works with, in this case Int32
-func (DeltaBitPackInt32Encoder) Type() parquet.Type {
-	return parquet.Types.Int32
-}
-
-// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for int32 data.
-type DeltaBitPackInt64Encoder struct {
-	*deltaBitPackEncoder
-}
-
-// Put writes the values from the provided slice of int64 to the encoder
-func (enc DeltaBitPackInt64Encoder) Put(in []int64) {
-	enc.putInternal(in)
+// Type returns the underlying physical type this encoder works with
+func (dec *deltaBitPackEncoder[T]) Type() parquet.Type {
+	switch v := any(dec).(type) {
+	case *deltaBitPackEncoder[int32]:
+		return parquet.Types.Int32
+	case *deltaBitPackEncoder[int64]:
+		return parquet.Types.Int64
+	default:
+		panic(fmt.Sprintf("deltaBitPackEncoder is not supported for type: %T", v))
+	}
 }
 
-// PutSpaced takes a slice of int64 along with a bitmap that describes the nulls and an offset into the bitmap
-// in order to write spaced data to the encoder.
-func (enc DeltaBitPackInt64Encoder) PutSpaced(in []int64, validBits []byte, validBitsOffset int64) {
-	buffer := memory.NewResizableBuffer(enc.mem)
-	buffer.Reserve(arrow.Int64Traits.BytesRequired(len(in)))
-	defer buffer.Release()
+// DeltaBitPackInt32Encoder is an encoder for the delta bitpacking encoding for Int32 data.
+type DeltaBitPackInt32Encoder = deltaBitPackEncoder[int32]
 
-	data := arrow.Int64Traits.CastFromBytes(buffer.Buf())
-	nvalid := spacedCompress(in, data, validBits, validBitsOffset)
-	enc.Put(data[:nvalid])
-}
-
-// Type returns the underlying physical type this encoder works with, in this case Int64
-func (DeltaBitPackInt64Encoder) Type() parquet.Type {
-	return parquet.Types.Int64
-}
+// DeltaBitPackInt64Encoder is an encoder for the delta bitpacking encoding for Int64 data.
+type DeltaBitPackInt64Encoder = deltaBitPackEncoder[int64]
diff --git a/go/parquet/internal/encoding/delta_byte_array.go b/go/parquet/internal/encoding/delta_byte_array.go
index d8c9fb92c65ef..62c8d08999972 100644
--- a/go/parquet/internal/encoding/delta_byte_array.go
+++ b/go/parquet/internal/encoding/delta_byte_array.go
@@ -17,9 +17,9 @@
 package encoding
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
 	"golang.org/x/xerrors"
 )
 
@@ -53,11 +53,14 @@ func (enc *DeltaByteArrayEncoder) EstimatedDataEncodedSize() int64 {
 
 func (enc *DeltaByteArrayEncoder) initEncoders() {
 	enc.prefixEncoder = &DeltaBitPackInt32Encoder{
-		deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}}
+		encoder: newEncoderBase(enc.encoding, nil, enc.mem),
+	}
 	enc.suffixEncoder = &DeltaLengthByteArrayEncoder{
 		newEncoderBase(enc.encoding, nil, enc.mem),
 		&DeltaBitPackInt32Encoder{
-			deltaBitPackEncoder: &deltaBitPackEncoder{encoder: newEncoderBase(enc.encoding, nil, enc.mem)}}}
+			encoder: newEncoderBase(enc.encoding, nil, enc.mem),
+		},
+	}
 }
 
 // Type returns the underlying physical type this operates on, in this case ByteArrays only
@@ -160,9 +163,9 @@ func (d *DeltaByteArrayDecoder) Allocator() memory.Allocator { return d.mem }
 // blocks of suffix data in order to initialize the decoder.
 func (d *DeltaByteArrayDecoder) SetData(nvalues int, data []byte) error {
 	prefixLenDec := DeltaBitPackInt32Decoder{
-		deltaBitPackDecoder: &deltaBitPackDecoder{
-			decoder: newDecoderBase(d.encoding, d.descr),
-			mem:     d.mem}}
+		decoder: newDecoderBase(d.encoding, d.descr),
+		mem:     d.mem,
+	}
 
 	if err := prefixLenDec.SetData(nvalues, data); err != nil {
 		return err
diff --git a/go/parquet/internal/encoding/delta_byte_array_test.go b/go/parquet/internal/encoding/delta_byte_array_test.go
index c2e4e6849396e..ec344cbecf845 100644
--- a/go/parquet/internal/encoding/delta_byte_array_test.go
+++ b/go/parquet/internal/encoding/delta_byte_array_test.go
@@ -20,8 +20,8 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/internal/encoding/delta_length_byte_array.go b/go/parquet/internal/encoding/delta_length_byte_array.go
index eb7a74ecdf117..87c48d574ed68 100644
--- a/go/parquet/internal/encoding/delta_length_byte_array.go
+++ b/go/parquet/internal/encoding/delta_length_byte_array.go
@@ -17,9 +17,9 @@
 package encoding
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
 	"golang.org/x/xerrors"
 )
 
@@ -110,9 +110,9 @@ func (d *DeltaLengthByteArrayDecoder) Allocator() memory.Allocator { return d.me
 // followed by the rest of the byte array data immediately after.
 func (d *DeltaLengthByteArrayDecoder) SetData(nvalues int, data []byte) error {
 	dec := DeltaBitPackInt32Decoder{
-		deltaBitPackDecoder: &deltaBitPackDecoder{
-			decoder: newDecoderBase(d.encoding, d.descr),
-			mem:     d.mem}}
+		decoder: newDecoderBase(d.encoding, d.descr),
+		mem:     d.mem,
+	}
 
 	if err := dec.SetData(nvalues, data); err != nil {
 		return err
diff --git a/go/parquet/internal/encoding/encoder.go b/go/parquet/internal/encoding/encoder.go
index 74a6d8dac6566..2373449370f23 100644
--- a/go/parquet/internal/encoding/encoder.go
+++ b/go/parquet/internal/encoding/encoder.go
@@ -21,14 +21,14 @@ import (
 	"math/bits"
 	"reflect"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/parquet"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 //go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata plain_encoder_types.gen.go.tmpl typed_encoder.gen.go.tmpl
diff --git a/go/parquet/internal/encoding/encoding_benchmarks_test.go b/go/parquet/internal/encoding/encoding_benchmarks_test.go
index 6bf0aec0d2035..2ca414eec6b90 100644
--- a/go/parquet/internal/encoding/encoding_benchmarks_test.go
+++ b/go/parquet/internal/encoding/encoding_benchmarks_test.go
@@ -21,14 +21,14 @@ import (
 	"math"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/hashing"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/hashing"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 const (
@@ -634,3 +634,48 @@ func BenchmarkByteStreamSplitDecodingFixedLenByteArray(b *testing.B) {
 		})
 	}
 }
+
+func BenchmarkDeltaBinaryPackedEncodingInt32(b *testing.B) {
+	for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 {
+		b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) {
+			values := make([]int32, sz)
+			for idx := range values {
+				values[idx] = 64
+			}
+			encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.DeltaBinaryPacked,
+				false, nil, memory.DefaultAllocator).(encoding.Int32Encoder)
+			b.ResetTimer()
+			b.SetBytes(int64(len(values) * arrow.Int32SizeBytes))
+			for n := 0; n < b.N; n++ {
+				encoder.Put(values)
+				buf, _ := encoder.FlushValues()
+				buf.Release()
+			}
+		})
+	}
+}
+
+func BenchmarkDeltaBinaryPackedDecodingInt32(b *testing.B) {
+	for sz := MINSIZE; sz < MAXSIZE+1; sz *= 2 {
+		b.Run(fmt.Sprintf("len %d", sz), func(b *testing.B) {
+			output := make([]int32, sz)
+			values := make([]int32, sz)
+			for idx := range values {
+				values[idx] = 64
+			}
+			encoder := encoding.NewEncoder(parquet.Types.Int32, parquet.Encodings.DeltaBinaryPacked,
+				false, nil, memory.DefaultAllocator).(encoding.Int32Encoder)
+			encoder.Put(values)
+			buf, _ := encoder.FlushValues()
+			defer buf.Release()
+
+			decoder := encoding.NewDecoder(parquet.Types.Int32, parquet.Encodings.DeltaBinaryPacked, nil, memory.DefaultAllocator)
+			b.ResetTimer()
+			b.SetBytes(int64(len(values) * arrow.Int32SizeBytes))
+			for n := 0; n < b.N; n++ {
+				decoder.SetData(sz, buf.Bytes())
+				decoder.(encoding.Int32Decoder).Decode(output)
+			}
+		})
+	}
+}
diff --git a/go/parquet/internal/encoding/encoding_test.go b/go/parquet/internal/encoding/encoding_test.go
index 5e95ab16e2676..4d681eaf02307 100644
--- a/go/parquet/internal/encoding/encoding_test.go
+++ b/go/parquet/internal/encoding/encoding_test.go
@@ -26,13 +26,13 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
index e264697a8c547..7e319845a8089 100644
--- a/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
+++ b/go/parquet/internal/encoding/fixed_len_byte_array_decoder.go
@@ -20,8 +20,8 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
index a93164e305fdf..9e6377db868f1 100644
--- a/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
+++ b/go/parquet/internal/encoding/fixed_len_byte_array_encoder.go
@@ -19,9 +19,9 @@ package encoding
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/parquet"
 )
 
 // PlainFixedLenByteArrayEncoder writes the raw bytes of the byte array
diff --git a/go/parquet/internal/encoding/levels.go b/go/parquet/internal/encoding/levels.go
index d7ee657b34f6c..81c9011c78e1e 100644
--- a/go/parquet/internal/encoding/levels.go
+++ b/go/parquet/internal/encoding/levels.go
@@ -24,11 +24,11 @@ import (
 	"math/bits"
 
 	"github.com/JohnCGriffin/overflow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	shared_utils "github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	shared_utils "github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
 )
 
 // LevelEncoder is for handling the encoding of Definition and Repetition levels
diff --git a/go/parquet/internal/encoding/levels_test.go b/go/parquet/internal/encoding/levels_test.go
index cce2cbe1ee495..1990df90a0195 100644
--- a/go/parquet/internal/encoding/levels_test.go
+++ b/go/parquet/internal/encoding/levels_test.go
@@ -21,11 +21,11 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/internal/encoding/memo_table.go b/go/parquet/internal/encoding/memo_table.go
index 7cf073cf910f6..117ca85346d57 100644
--- a/go/parquet/internal/encoding/memo_table.go
+++ b/go/parquet/internal/encoding/memo_table.go
@@ -20,11 +20,11 @@ import (
 	"math"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/hashing"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/hashing"
+	"github.com/apache/arrow/go/v18/parquet"
 )
 
 //go:generate go run ../../../arrow/_tools/tmpl/main.go -i -data=physical_types.tmpldata memo_table_types.gen.go.tmpl
diff --git a/go/parquet/internal/encoding/memo_table_test.go b/go/parquet/internal/encoding/memo_table_test.go
index 9032872502871..904502cafc193 100644
--- a/go/parquet/internal/encoding/memo_table_test.go
+++ b/go/parquet/internal/encoding/memo_table_test.go
@@ -20,11 +20,11 @@ import (
 	"math"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/hashing"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/hashing"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go b/go/parquet/internal/encoding/memo_table_types.gen.go
index 6d8d86d780fae..1169afc455ff5 100644
--- a/go/parquet/internal/encoding/memo_table_types.gen.go
+++ b/go/parquet/internal/encoding/memo_table_types.gen.go
@@ -19,8 +19,8 @@
 package encoding
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
 )
 
 // standard map based implementation of memo tables which can be more efficient
diff --git a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
index 3912c3eeaa87b..9708b0b97e527 100644
--- a/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
+++ b/go/parquet/internal/encoding/memo_table_types.gen.go.tmpl
@@ -17,7 +17,7 @@
 package encoding
 
 import (
-  "github.com/apache/arrow/go/v17/parquet"
+  "github.com/apache/arrow/go/v18/parquet"
 )
 
 // standard map based implementation of memo tables which can be more efficient
diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go b/go/parquet/internal/encoding/plain_encoder_types.gen.go
index 34e8bf540ce06..b651fe2d71864 100644
--- a/go/parquet/internal/encoding/plain_encoder_types.gen.go
+++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go
@@ -24,11 +24,11 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
index bc5cebd698188..1f2bc047464ea 100644
--- a/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
+++ b/go/parquet/internal/encoding/plain_encoder_types.gen.go.tmpl
@@ -20,10 +20,10 @@ import (
   "encoding/binary"
   "fmt"
 
-  "github.com/apache/arrow/go/v17/arrow"
-  "github.com/apache/arrow/go/v17/parquet"
-  "github.com/apache/arrow/go/v17/internal/utils"
-  "github.com/apache/arrow/go/v17/internal/bitutils"
+  "github.com/apache/arrow/go/v18/arrow"
+  "github.com/apache/arrow/go/v18/parquet"
+  "github.com/apache/arrow/go/v18/internal/utils"
+  "github.com/apache/arrow/go/v18/internal/bitutils"
 )
 
 var (
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go b/go/parquet/internal/encoding/typed_encoder.gen.go
index 663c1164c565e..e67c976adc042 100644
--- a/go/parquet/internal/encoding/typed_encoder.gen.go
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go
@@ -22,15 +22,15 @@ import (
 	"fmt"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	shared_utils "github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	shared_utils "github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
@@ -86,8 +86,9 @@ func (int32EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema
 	case format.Encoding_PLAIN:
 		return &PlainInt32Encoder{encoder: newEncoderBase(e, descr, mem)}
 	case format.Encoding_DELTA_BINARY_PACKED:
-		return DeltaBitPackInt32Encoder{&deltaBitPackEncoder{
-			encoder: newEncoderBase(e, descr, mem)}}
+		return &DeltaBitPackInt32Encoder{
+			encoder: newEncoderBase(e, descr, mem),
+		}
 	case format.Encoding_BYTE_STREAM_SPLIT:
 		return &ByteStreamSplitInt32Encoder{PlainInt32Encoder: PlainInt32Encoder{encoder: newEncoderBase(e, descr, mem)}}
 	default:
@@ -118,10 +119,9 @@ func (int32DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useD
 			mem = memory.DefaultAllocator
 		}
 		return &DeltaBitPackInt32Decoder{
-			deltaBitPackDecoder: &deltaBitPackDecoder{
-				decoder: newDecoderBase(format.Encoding(e), descr),
-				mem:     mem,
-			}}
+			decoder: newDecoderBase(format.Encoding(e), descr),
+			mem:     mem,
+		}
 	case parquet.Encodings.ByteStreamSplit:
 		return &ByteStreamSplitInt32Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
 	default:
@@ -327,8 +327,9 @@ func (int64EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *schema
 	case format.Encoding_PLAIN:
 		return &PlainInt64Encoder{encoder: newEncoderBase(e, descr, mem)}
 	case format.Encoding_DELTA_BINARY_PACKED:
-		return DeltaBitPackInt64Encoder{&deltaBitPackEncoder{
-			encoder: newEncoderBase(e, descr, mem)}}
+		return &DeltaBitPackInt64Encoder{
+			encoder: newEncoderBase(e, descr, mem),
+		}
 	case format.Encoding_BYTE_STREAM_SPLIT:
 		return &ByteStreamSplitInt64Encoder{PlainInt64Encoder: PlainInt64Encoder{encoder: newEncoderBase(e, descr, mem)}}
 	default:
@@ -359,10 +360,9 @@ func (int64DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column, useD
 			mem = memory.DefaultAllocator
 		}
 		return &DeltaBitPackInt64Decoder{
-			deltaBitPackDecoder: &deltaBitPackDecoder{
-				decoder: newDecoderBase(format.Encoding(e), descr),
-				mem:     mem,
-			}}
+			decoder: newDecoderBase(format.Encoding(e), descr),
+			mem:     mem,
+		}
 	case parquet.Encodings.ByteStreamSplit:
 		return &ByteStreamSplitInt64Decoder{decoder: newDecoderBase(format.Encoding(e), descr)}
 	default:
@@ -1306,7 +1306,8 @@ func (byteArrayEncoderTraits) Encoder(e format.Encoding, useDict bool, descr *sc
 		return &DeltaLengthByteArrayEncoder{
 			encoder: newEncoderBase(e, descr, mem),
 			lengthEncoder: &DeltaBitPackInt32Encoder{
-				&deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}},
+				encoder: newEncoderBase(e, descr, mem),
+			},
 		}
 	case format.Encoding_DELTA_BYTE_ARRAY:
 		return &DeltaByteArrayEncoder{
diff --git a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
index ebd7733135a52..601d90712baa6 100644
--- a/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
+++ b/go/parquet/internal/encoding/typed_encoder.gen.go.tmpl
@@ -17,13 +17,13 @@
 package encoding
 
 import (
-  "github.com/apache/arrow/go/v17/parquet"
-  "github.com/apache/arrow/go/v17/parquet/schema"
-  format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-  "github.com/apache/arrow/go/v17/arrow"
-  "github.com/apache/arrow/go/v17/parquet/internal/utils"
-  shared_utils "github.com/apache/arrow/go/v17/internal/utils"
-  "github.com/apache/arrow/go/v17/internal/bitutils"
+  "github.com/apache/arrow/go/v18/parquet"
+  "github.com/apache/arrow/go/v18/parquet/schema"
+  format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+  "github.com/apache/arrow/go/v18/arrow"
+  "github.com/apache/arrow/go/v18/parquet/internal/utils"
+  shared_utils "github.com/apache/arrow/go/v18/internal/utils"
+  "github.com/apache/arrow/go/v18/internal/bitutils"
 )
 
 // fully typed encoder interfaces to enable writing against encoder/decoders
@@ -79,15 +79,17 @@ func ({{.lower}}EncoderTraits) Encoder(e format.Encoding, useDict bool, descr *s
 {{- end}}
 {{- if or (eq .Name "Int32") (eq .Name "Int64")}}
   case format.Encoding_DELTA_BINARY_PACKED:
-    return DeltaBitPack{{.Name}}Encoder{&deltaBitPackEncoder{
-      encoder: newEncoderBase(e, descr, mem)}}
+    return &DeltaBitPack{{.Name}}Encoder{
+      encoder: newEncoderBase(e, descr, mem),
+    }
 {{- end}}
 {{- if eq .Name "ByteArray"}}
   case format.Encoding_DELTA_LENGTH_BYTE_ARRAY:
     return &DeltaLengthByteArrayEncoder{
       encoder: newEncoderBase(e, descr, mem),
       lengthEncoder: &DeltaBitPackInt32Encoder{
-        &deltaBitPackEncoder{encoder: newEncoderBase(e, descr, mem)}},
+        encoder: newEncoderBase(e, descr, mem),
+      },
     }
   case format.Encoding_DELTA_BYTE_ARRAY:
     return &DeltaByteArrayEncoder{
@@ -135,10 +137,9 @@ func ({{.lower}}DecoderTraits) Decoder(e parquet.Encoding, descr *schema.Column,
       mem = memory.DefaultAllocator
     }
     return &DeltaBitPack{{.Name}}Decoder{
-      deltaBitPackDecoder: &deltaBitPackDecoder{
-        decoder: newDecoderBase(format.Encoding(e), descr),
-        mem:     mem,
-      }}
+      decoder: newDecoderBase(format.Encoding(e), descr),
+      mem:     mem,
+    }
 {{- end}}
 {{- if eq .Name "ByteArray"}}
   case parquet.Encodings.DeltaLengthByteArray:
diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go
index 6962c95d4f818..fb81ba8729cca 100644
--- a/go/parquet/internal/encoding/types.go
+++ b/go/parquet/internal/encoding/types.go
@@ -20,11 +20,11 @@ import (
 	"io"
 	"sync"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/encryption/aes.go b/go/parquet/internal/encryption/aes.go
index c613e4a02e3a8..1e861ffd2a1d0 100644
--- a/go/parquet/internal/encryption/aes.go
+++ b/go/parquet/internal/encryption/aes.go
@@ -29,7 +29,7 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/parquet"
 )
 
 // important constants for handling the aes encryption
diff --git a/go/parquet/internal/encryption/decryptor.go b/go/parquet/internal/encryption/decryptor.go
index 6a28f6ed7b234..6af9a4aacfe15 100644
--- a/go/parquet/internal/encryption/decryptor.go
+++ b/go/parquet/internal/encryption/decryptor.go
@@ -19,8 +19,8 @@ package encryption
 import (
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
 )
 
 // FileDecryptor is an interface used by the filereader for decrypting an
diff --git a/go/parquet/internal/encryption/encryptor.go b/go/parquet/internal/encryption/encryptor.go
index fd2860ef323c4..57ff0a4173cdf 100644
--- a/go/parquet/internal/encryption/encryptor.go
+++ b/go/parquet/internal/encryption/encryptor.go
@@ -19,8 +19,8 @@ package encryption
 import (
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
 )
 
 // FileEncryptor is the interface for constructing encryptors for the different
diff --git a/go/parquet/internal/testutils/pagebuilder.go b/go/parquet/internal/testutils/pagebuilder.go
index cd1437638c485..e3b8ffccb341c 100644
--- a/go/parquet/internal/testutils/pagebuilder.go
+++ b/go/parquet/internal/testutils/pagebuilder.go
@@ -22,13 +22,13 @@ import (
 	"io"
 	"reflect"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/mock"
 )
 
diff --git a/go/parquet/internal/testutils/primitive_typed.go b/go/parquet/internal/testutils/primitive_typed.go
index 55366dc861352..d97677c54d727 100644
--- a/go/parquet/internal/testutils/primitive_typed.go
+++ b/go/parquet/internal/testutils/primitive_typed.go
@@ -20,11 +20,11 @@ import (
 	"fmt"
 	"reflect"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 type PrimitiveTypedTest struct {
diff --git a/go/parquet/internal/testutils/random.go b/go/parquet/internal/testutils/random.go
index 568d1a6b5f1df..1f4b1b7068beb 100644
--- a/go/parquet/internal/testutils/random.go
+++ b/go/parquet/internal/testutils/random.go
@@ -24,14 +24,14 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/endian"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/pqarrow"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/endian"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/pqarrow"
 
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/stat/distuv"
diff --git a/go/parquet/internal/testutils/random_arrow.go b/go/parquet/internal/testutils/random_arrow.go
index fe52932e51576..f9a199de77963 100644
--- a/go/parquet/internal/testutils/random_arrow.go
+++ b/go/parquet/internal/testutils/random_arrow.go
@@ -17,10 +17,10 @@
 package testutils
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"golang.org/x/exp/rand"
 )
 
diff --git a/go/parquet/internal/testutils/utils.go b/go/parquet/internal/testutils/utils.go
index 057a055884561..823f7fbd07d1d 100644
--- a/go/parquet/internal/testutils/utils.go
+++ b/go/parquet/internal/testutils/utils.go
@@ -19,7 +19,7 @@ package testutils
 import (
 	"reflect"
 
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/parquet"
 )
 
 var typeToParquetTypeMap = map[reflect.Type]parquet.Type{
diff --git a/go/parquet/internal/thrift/helpers.go b/go/parquet/internal/thrift/helpers.go
index e2600763dbf3c..f8b0f2170c45d 100644
--- a/go/parquet/internal/thrift/helpers.go
+++ b/go/parquet/internal/thrift/helpers.go
@@ -23,7 +23,7 @@ import (
 	"context"
 	"io"
 
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
 	"github.com/apache/thrift/lib/go/thrift"
 )
 
diff --git a/go/parquet/internal/utils/bit_benchmark_test.go b/go/parquet/internal/utils/bit_benchmark_test.go
index 2227c22d3b4e5..d171e81e952fa 100644
--- a/go/parquet/internal/utils/bit_benchmark_test.go
+++ b/go/parquet/internal/utils/bit_benchmark_test.go
@@ -20,9 +20,9 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/parquet/internal/testutils"
 )
 
 type linearBitRunReader struct {
diff --git a/go/parquet/internal/utils/bit_packing_arm64.go b/go/parquet/internal/utils/bit_packing_arm64.go
index a8d3a996d8c46..89a00b0c63b9d 100644
--- a/go/parquet/internal/utils/bit_packing_arm64.go
+++ b/go/parquet/internal/utils/bit_packing_arm64.go
@@ -23,7 +23,7 @@ import (
 	"github.com/klauspost/cpuid/v2"
 	// import for side effect of initializing feature flags
 	// based on ARM_ENABLE_EXT env var
-	_ "github.com/apache/arrow/go/v17/parquet/internal/bmi"
+	_ "github.com/apache/arrow/go/v18/parquet/internal/bmi"
 )
 
 func init() {
diff --git a/go/parquet/internal/utils/bit_reader.go b/go/parquet/internal/utils/bit_reader.go
index d66968047adc8..2343b5500242c 100644
--- a/go/parquet/internal/utils/bit_reader.go
+++ b/go/parquet/internal/utils/bit_reader.go
@@ -24,10 +24,10 @@ import (
 	"reflect"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 // masks for grabbing the trailing bits based on the number of trailing bits desired
diff --git a/go/parquet/internal/utils/bit_reader_test.go b/go/parquet/internal/utils/bit_reader_test.go
index 24e4927ff8eca..5bb1c9a70190f 100644
--- a/go/parquet/internal/utils/bit_reader_test.go
+++ b/go/parquet/internal/utils/bit_reader_test.go
@@ -25,11 +25,11 @@ import (
 	"strconv"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 	"golang.org/x/exp/rand"
diff --git a/go/parquet/internal/utils/bit_writer.go b/go/parquet/internal/utils/bit_writer.go
index bb7d7a5e0c4fa..ab0cb3ce58445 100644
--- a/go/parquet/internal/utils/bit_writer.go
+++ b/go/parquet/internal/utils/bit_writer.go
@@ -21,7 +21,7 @@ import (
 	"io"
 	"log"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
 )
 
 // WriterAtBuffer is a convenience struct for providing a WriteAt function
diff --git a/go/parquet/internal/utils/bitmap_writer.go b/go/parquet/internal/utils/bitmap_writer.go
index 011330bde550f..163e928f4b689 100644
--- a/go/parquet/internal/utils/bitmap_writer.go
+++ b/go/parquet/internal/utils/bitmap_writer.go
@@ -20,7 +20,7 @@ import (
 	"encoding/binary"
 	"math/bits"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
 )
 
 // BitmapWriter is an interface for bitmap writers so that we can use multiple
diff --git a/go/parquet/internal/utils/bitmap_writer_test.go b/go/parquet/internal/utils/bitmap_writer_test.go
index 893b003a8957c..39838e87d3223 100644
--- a/go/parquet/internal/utils/bitmap_writer_test.go
+++ b/go/parquet/internal/utils/bitmap_writer_test.go
@@ -22,8 +22,8 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/parquet/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/parquet/internal/utils"
 	"github.com/stretchr/testify/suite"
 )
 
diff --git a/go/parquet/internal/utils/rle.go b/go/parquet/internal/utils/rle.go
index affda41ec0ecb..bf24a5822341d 100644
--- a/go/parquet/internal/utils/rle.go
+++ b/go/parquet/internal/utils/rle.go
@@ -24,10 +24,10 @@ import (
 	"encoding/binary"
 	"math"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go b/go/parquet/internal/utils/typed_rle_dict.gen.go
index be986b60786ba..80f76ef12d71a 100644
--- a/go/parquet/internal/utils/typed_rle_dict.gen.go
+++ b/go/parquet/internal/utils/typed_rle_dict.gen.go
@@ -19,9 +19,9 @@
 package utils
 
 import (
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl
index 11b393e4a7ab8..992270d8d8e00 100644
--- a/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl
+++ b/go/parquet/internal/utils/typed_rle_dict.gen.go.tmpl
@@ -17,9 +17,9 @@
 package utils
 
 import (
-  "github.com/apache/arrow/go/v17/parquet"
-  "github.com/apache/arrow/go/v17/internal/bitutils"
-  "github.com/apache/arrow/go/v17/internal/utils"
+  "github.com/apache/arrow/go/v18/parquet"
+  "github.com/apache/arrow/go/v18/internal/bitutils"
+  "github.com/apache/arrow/go/v18/internal/utils"
 )
 
 {{range .In}}
diff --git a/go/parquet/metadata/app_version.go b/go/parquet/metadata/app_version.go
index fa54aec347575..345e9d440a1ca 100644
--- a/go/parquet/metadata/app_version.go
+++ b/go/parquet/metadata/app_version.go
@@ -21,8 +21,8 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 var (
@@ -164,7 +164,7 @@ func (v AppVersion) HasCorrectStatistics(coltype parquet.Type, logicalType schem
 	// parquet-cpp-arrow version 4.0.0 fixed Decimal comparisons for creating min/max stats
 	// parquet-cpp also becomes parquet-cpp-arrow as of version 4.0.0
 	if v.App == "parquet-cpp" || (v.App == "parquet-cpp-arrow" && v.LessThan(parquet1655FixedVersion)) {
-		if _, ok := logicalType.(*schema.DecimalLogicalType); ok && coltype == parquet.Types.FixedLenByteArray {
+		if _, ok := logicalType.(schema.DecimalLogicalType); ok && coltype == parquet.Types.FixedLenByteArray {
 			return false
 		}
 	}
diff --git a/go/parquet/metadata/column_chunk.go b/go/parquet/metadata/column_chunk.go
index 5bc5c049cd592..a05b3c9124154 100644
--- a/go/parquet/metadata/column_chunk.go
+++ b/go/parquet/metadata/column_chunk.go
@@ -22,13 +22,13 @@ import (
 	"io"
 	"reflect"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/thrift"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/thrift"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/metadata/file.go b/go/parquet/metadata/file.go
index fde319d0b32e5..2f7fe53303c3f 100644
--- a/go/parquet/metadata/file.go
+++ b/go/parquet/metadata/file.go
@@ -24,12 +24,12 @@ import (
 	"reflect"
 	"unicode/utf8"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/thrift"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/thrift"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/metadata/metadata_test.go b/go/parquet/metadata/metadata_test.go
index a631d8b925bd2..8ecb95cf41e9d 100644
--- a/go/parquet/metadata/metadata_test.go
+++ b/go/parquet/metadata/metadata_test.go
@@ -21,9 +21,9 @@ import (
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/parquet/metadata/row_group.go b/go/parquet/metadata/row_group.go
index c9a55ffdb5b16..5373fb6f783fc 100644
--- a/go/parquet/metadata/row_group.go
+++ b/go/parquet/metadata/row_group.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"reflect"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encryption"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encryption"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 // RowGroupMetaData is a proxy around the thrift RowGroup meta data object
diff --git a/go/parquet/metadata/stat_compare_test.go b/go/parquet/metadata/stat_compare_test.go
index f759ee9fe1734..dafbf3ed04f1a 100644
--- a/go/parquet/metadata/stat_compare_test.go
+++ b/go/parquet/metadata/stat_compare_test.go
@@ -20,8 +20,8 @@ import (
 	"encoding/binary"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
diff --git a/go/parquet/metadata/statistics.go b/go/parquet/metadata/statistics.go
index 604fa50ee3b07..e7ffc2a096370 100644
--- a/go/parquet/metadata/statistics.go
+++ b/go/parquet/metadata/statistics.go
@@ -22,15 +22,15 @@ import (
 	"math"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/debug"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/debug"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 //go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=statistics_types.tmpldata statistics_types.gen.go.tmpl
diff --git a/go/parquet/metadata/statistics_test.go b/go/parquet/metadata/statistics_test.go
index 9760c0ee7d2fb..913629959e928 100644
--- a/go/parquet/metadata/statistics_test.go
+++ b/go/parquet/metadata/statistics_test.go
@@ -21,12 +21,12 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/metadata/statistics_types.gen.go b/go/parquet/metadata/statistics_types.gen.go
index a8670e221b706..0c383fc7f5414 100644
--- a/go/parquet/metadata/statistics_types.gen.go
+++ b/go/parquet/metadata/statistics_types.gen.go
@@ -22,15 +22,15 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	shared_utils "github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	shared_utils "github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 )
 
 type minmaxPairInt32 [2]int32
diff --git a/go/parquet/metadata/statistics_types.gen.go.tmpl b/go/parquet/metadata/statistics_types.gen.go.tmpl
index 4cf47d1915f83..4b3c2a7158ac8 100644
--- a/go/parquet/metadata/statistics_types.gen.go.tmpl
+++ b/go/parquet/metadata/statistics_types.gen.go.tmpl
@@ -19,13 +19,13 @@ package metadata
 import (
   "fmt"
 
-  "github.com/apache/arrow/go/v17/arrow"
-  "github.com/apache/arrow/go/v17/parquet"
-  "github.com/apache/arrow/go/v17/parquet/schema"
-  "github.com/apache/arrow/go/v17/parquet/internal/utils"
-  shared_utils "github.com/apache/arrow/go/v17/internal/utils"
-  "github.com/apache/arrow/go/v17/parquet/internal/encoding"
-  "github.com/apache/arrow/go/v17/internal/bitutils"
+  "github.com/apache/arrow/go/v18/arrow"
+  "github.com/apache/arrow/go/v18/parquet"
+  "github.com/apache/arrow/go/v18/parquet/schema"
+  "github.com/apache/arrow/go/v18/parquet/internal/utils"
+  shared_utils "github.com/apache/arrow/go/v18/internal/utils"
+  "github.com/apache/arrow/go/v18/parquet/internal/encoding"
+  "github.com/apache/arrow/go/v18/internal/bitutils"
 )
 
 {{range .In}}
diff --git a/go/parquet/pqarrow/column_readers.go b/go/parquet/pqarrow/column_readers.go
index 661d163213394..1e5d5958e6732 100644
--- a/go/parquet/pqarrow/column_readers.go
+++ b/go/parquet/pqarrow/column_readers.go
@@ -26,16 +26,16 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"golang.org/x/sync/errgroup"
 )
 
diff --git a/go/parquet/pqarrow/encode_arrow.go b/go/parquet/pqarrow/encode_arrow.go
index 9f5c870fa7875..5526c98d7872c 100644
--- a/go/parquet/pqarrow/encode_arrow.go
+++ b/go/parquet/pqarrow/encode_arrow.go
@@ -25,16 +25,16 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/debug"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/debug"
 )
 
 // get the count of the number of leaf arrays for the type
diff --git a/go/parquet/pqarrow/encode_arrow_test.go b/go/parquet/pqarrow/encode_arrow_test.go
index 4f955eae4e133..a238a78133e55 100644
--- a/go/parquet/pqarrow/encode_arrow_test.go
+++ b/go/parquet/pqarrow/encode_arrow_test.go
@@ -25,22 +25,23 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/bitutil"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/decimal256"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
-	"github.com/apache/arrow/go/v17/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v17/parquet/pqarrow"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/bitutil"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/decimal256"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/types"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v18/parquet/pqarrow"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/google/uuid"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -715,16 +716,6 @@ type ParquetIOTestSuite struct {
 	suite.Suite
 }
 
-func (ps *ParquetIOTestSuite) SetupTest() {
-	ps.NoError(arrow.RegisterExtensionType(types.NewUUIDType()))
-}
-
-func (ps *ParquetIOTestSuite) TearDownTest() {
-	if arrow.GetExtensionType("uuid") != nil {
-		ps.NoError(arrow.UnregisterExtensionType("uuid"))
-	}
-}
-
 func (ps *ParquetIOTestSuite) makeSimpleSchema(typ arrow.DataType, rep parquet.Repetition) *schema.GroupNode {
 	byteWidth := int32(-1)
 
@@ -2053,9 +2044,7 @@ func (ps *ParquetIOTestSuite) TestArrowExtensionTypeRoundTrip() {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(ps.T(), 0)
 
-	extBuilder := array.NewExtensionBuilder(mem, types.NewUUIDType())
-	defer extBuilder.Release()
-	builder := types.NewUUIDBuilder(extBuilder)
+	builder := extensions.NewUUIDBuilder(mem)
 	builder.Append(uuid.New())
 	arr := builder.NewArray()
 	defer arr.Release()
@@ -2078,22 +2067,23 @@ func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() {
 
 	{
 		// Prepare `written` table with the extension type registered.
-		extType := types.NewUUIDType()
+		extType := types.NewSmallintType()
 		bldr := array.NewExtensionBuilder(mem, extType)
 		defer bldr.Release()
 
-		bldr.Builder.(*array.FixedSizeBinaryBuilder).AppendValues(
-			[][]byte{nil, []byte("abcdefghijklmno0"), []byte("abcdefghijklmno1"), []byte("abcdefghijklmno2")},
+		bldr.Builder.(*array.Int16Builder).AppendValues(
+			[]int16{0, 0, 1, 2},
 			[]bool{false, true, true, true})
 
 		arr := bldr.NewArray()
 		defer arr.Release()
 
-		if arrow.GetExtensionType("uuid") != nil {
-			ps.NoError(arrow.UnregisterExtensionType("uuid"))
+		if arrow.GetExtensionType("smallint") != nil {
+			ps.NoError(arrow.UnregisterExtensionType("smallint"))
+			defer arrow.RegisterExtensionType(extType)
 		}
 
-		fld := arrow.Field{Name: "uuid", Type: arr.DataType(), Nullable: true}
+		fld := arrow.Field{Name: "smallint", Type: arr.DataType(), Nullable: true}
 		cnk := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
 		defer arr.Release() // NewChunked
 		written = array.NewTable(arrow.NewSchema([]arrow.Field{fld}, nil), []arrow.Column{*arrow.NewColumn(fld, cnk)}, -1)
@@ -2103,16 +2093,16 @@ func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() {
 
 	{
 		// Prepare `expected` table with the extension type unregistered in the underlying type.
-		bldr := array.NewFixedSizeBinaryBuilder(mem, &arrow.FixedSizeBinaryType{ByteWidth: 16})
+		bldr := array.NewInt16Builder(mem)
 		defer bldr.Release()
 		bldr.AppendValues(
-			[][]byte{nil, []byte("abcdefghijklmno0"), []byte("abcdefghijklmno1"), []byte("abcdefghijklmno2")},
+			[]int16{0, 0, 1, 2},
 			[]bool{false, true, true, true})
 
 		arr := bldr.NewArray()
 		defer arr.Release()
 
-		fld := arrow.Field{Name: "uuid", Type: arr.DataType(), Nullable: true}
+		fld := arrow.Field{Name: "smallint", Type: arr.DataType(), Nullable: true}
 		cnk := arrow.NewChunked(arr.DataType(), []arrow.Array{arr})
 		defer arr.Release() // NewChunked
 		expected = array.NewTable(arrow.NewSchema([]arrow.Field{fld}, nil), []arrow.Column{*arrow.NewColumn(fld, cnk)}, -1)
@@ -2149,13 +2139,55 @@ func (ps *ParquetIOTestSuite) TestArrowUnknownExtensionTypeRoundTrip() {
 	ps.Truef(array.Equal(exc, tbc), "expected: %T %s\ngot: %T %s", exc, exc, tbc, tbc)
 
 	expectedMd := arrow.MetadataFrom(map[string]string{
-		ipc.ExtensionTypeKeyName:     "uuid",
-		ipc.ExtensionMetadataKeyName: "uuid-serialized",
+		ipc.ExtensionTypeKeyName:     "smallint",
+		ipc.ExtensionMetadataKeyName: "smallint-serialized",
 		"PARQUET:field_id":           "-1",
 	})
 	ps.Truef(expectedMd.Equal(tbl.Column(0).Field().Metadata), "expected: %v\ngot: %v", expectedMd, tbl.Column(0).Field().Metadata)
 }
 
+func (ps *ParquetIOTestSuite) TestArrowExtensionTypeLogicalType() {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(ps.T(), 0)
+
+	jsonType, err := extensions.NewJSONType(arrow.BinaryTypes.String)
+	ps.NoError(err)
+
+	sch := arrow.NewSchema([]arrow.Field{
+		{Name: "uuid", Type: extensions.NewUUIDType()},
+		{Name: "json", Type: jsonType},
+	},
+		nil,
+	)
+	bldr := array.NewRecordBuilder(mem, sch)
+	defer bldr.Release()
+
+	bldr.Field(0).(*extensions.UUIDBuilder).Append(uuid.New())
+	bldr.Field(1).(*array.ExtensionBuilder).AppendValueFromString(`{"hello": ["world", 2, true], "world": null}`)
+	rec := bldr.NewRecord()
+	defer rec.Release()
+
+	var buf bytes.Buffer
+	wr, err := pqarrow.NewFileWriter(
+		sch,
+		&buf,
+		parquet.NewWriterProperties(),
+		pqarrow.DefaultWriterProps(),
+	)
+	ps.Require().NoError(err)
+
+	ps.Require().NoError(wr.Write(rec))
+	ps.Require().NoError(wr.Close())
+
+	rdr, err := file.NewParquetReader(bytes.NewReader(buf.Bytes()))
+	ps.Require().NoError(err)
+	defer rdr.Close()
+
+	pqSchema := rdr.MetaData().Schema
+	ps.True(pqSchema.Column(0).LogicalType().Equals(schema.UUIDLogicalType{}))
+	ps.True(pqSchema.Column(1).LogicalType().Equals(schema.JSONLogicalType{}))
+}
+
 func TestWriteTableMemoryAllocation(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	sc := arrow.NewSchema([]arrow.Field{
@@ -2165,7 +2197,7 @@ func TestWriteTableMemoryAllocation(t *testing.T) {
 			arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
 			arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true})},
 		{Name: "arr_i64", Type: arrow.ListOf(arrow.PrimitiveTypes.Int64)},
-		{Name: "uuid", Type: types.NewUUIDType(), Nullable: true},
+		{Name: "uuid", Type: extensions.NewUUIDType(), Nullable: true},
 	}, nil)
 
 	bld := array.NewRecordBuilder(mem, sc)
@@ -2178,7 +2210,7 @@ func TestWriteTableMemoryAllocation(t *testing.T) {
 	abld := bld.Field(3).(*array.ListBuilder)
 	abld.Append(true)
 	abld.ValueBuilder().(*array.Int64Builder).Append(2)
-	bld.Field(4).(*types.UUIDBuilder).Append(uuid.MustParse("00000000-0000-0000-0000-000000000001"))
+	bld.Field(4).(*extensions.UUIDBuilder).Append(uuid.MustParse("00000000-0000-0000-0000-000000000001"))
 
 	rec := bld.NewRecord()
 	bld.Release()
diff --git a/go/parquet/pqarrow/encode_dict_compute.go b/go/parquet/pqarrow/encode_dict_compute.go
index c698cdaf91fb1..647bb69db78d5 100644
--- a/go/parquet/pqarrow/encode_dict_compute.go
+++ b/go/parquet/pqarrow/encode_dict_compute.go
@@ -21,14 +21,14 @@ package pqarrow
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/debug"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/debug"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
 )
 
 func isDictEncoding(enc parquet.Encoding) bool {
diff --git a/go/parquet/pqarrow/encode_dict_nocompute.go b/go/parquet/pqarrow/encode_dict_nocompute.go
index 26efa87538b66..aa405a90e8a12 100644
--- a/go/parquet/pqarrow/encode_dict_nocompute.go
+++ b/go/parquet/pqarrow/encode_dict_nocompute.go
@@ -21,8 +21,8 @@ package pqarrow
 import (
 	"errors"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/parquet/file"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/parquet/file"
 )
 
 func writeDictionaryArrow(*arrowWriteContext, file.ColumnChunkWriter, arrow.Array, []int16, []int16, bool) (err error) {
diff --git a/go/parquet/pqarrow/encode_dictionary_test.go b/go/parquet/pqarrow/encode_dictionary_test.go
index 200b81b3fec86..cacdc7e39cab3 100644
--- a/go/parquet/pqarrow/encode_dictionary_test.go
+++ b/go/parquet/pqarrow/encode_dictionary_test.go
@@ -26,14 +26,14 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/compute"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/internal/testutils"
-	"github.com/apache/arrow/go/v17/parquet/pqarrow"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/compute"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/internal/testutils"
+	"github.com/apache/arrow/go/v18/parquet/pqarrow"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/stretchr/testify/suite"
diff --git a/go/parquet/pqarrow/file_reader.go b/go/parquet/pqarrow/file_reader.go
index 3f958dab6f1ba..a2e84d9ce2795 100755
--- a/go/parquet/pqarrow/file_reader.go
+++ b/go/parquet/pqarrow/file_reader.go
@@ -18,18 +18,19 @@ package pqarrow
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"io"
 	"sync"
 	"sync/atomic"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/arrio"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/arrio"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"golang.org/x/sync/errgroup"
 	"golang.org/x/xerrors"
 )
@@ -375,6 +376,10 @@ func (fr *FileReader) ReadRowGroups(ctx context.Context, indices, rowGroups []in
 		data.data.Release()
 	}
 
+	// if the context is in error, but we haven't set an error yet, then it means that the parent context
+	// was cancelled. In this case, we should exit early as some columns may not have been read yet.
+	err = errors.Join(err, ctx.Err())
+
 	if err != nil {
 		// if we encountered an error, consume any waiting data on the channel
 		// so the goroutines don't leak and so memory can get cleaned up. we already
diff --git a/go/parquet/pqarrow/file_reader_test.go b/go/parquet/pqarrow/file_reader_test.go
index d7f03ac0531b7..fe5a4547a775c 100644
--- a/go/parquet/pqarrow/file_reader_test.go
+++ b/go/parquet/pqarrow/file_reader_test.go
@@ -26,14 +26,14 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/pqarrow"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/pqarrow"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -167,6 +167,29 @@ func TestArrowReaderAdHocReadFloat16s(t *testing.T) {
 	}
 }
 
+func TestArrowReaderCanceledContext(t *testing.T) {
+	dataDir := getDataDir()
+
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	filename := filepath.Join(dataDir, "int32_decimal.parquet")
+	require.FileExists(t, filename)
+
+	rdr, err := file.OpenParquetFile(filename, false, file.WithReadProps(parquet.NewReaderProperties(mem)))
+	require.NoError(t, err)
+	defer rdr.Close()
+	arrowRdr, err := pqarrow.NewFileReader(rdr, pqarrow.ArrowReadProperties{}, mem)
+	require.NoError(t, err)
+
+	// create a canceled context
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	_, err = arrowRdr.ReadTable(ctx)
+	require.ErrorIs(t, err, context.Canceled)
+}
+
 func TestRecordReaderParallel(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(t, 0)
diff --git a/go/parquet/pqarrow/file_writer.go b/go/parquet/pqarrow/file_writer.go
index b1d266ff29080..539c544829e3b 100644
--- a/go/parquet/pqarrow/file_writer.go
+++ b/go/parquet/pqarrow/file_writer.go
@@ -22,12 +22,12 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
 	"golang.org/x/xerrors"
 )
 
@@ -246,7 +246,7 @@ func (fw *FileWriter) Write(rec arrow.Record) error {
 		}
 	}
 	fw.colIdx = 0
-	return nil
+	return fw.rgw.Close()
 }
 
 // WriteTable writes an arrow table to the underlying file using chunkSize to determine
diff --git a/go/parquet/pqarrow/file_writer_test.go b/go/parquet/pqarrow/file_writer_test.go
index fc965279a928d..5b807389a3eb1 100644
--- a/go/parquet/pqarrow/file_writer_test.go
+++ b/go/parquet/pqarrow/file_writer_test.go
@@ -22,11 +22,11 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/pqarrow"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/pqarrow"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -55,7 +55,11 @@ func TestFileWriterRowGroupNumRows(t *testing.T) {
 	numRows, err := writer.RowGroupNumRows()
 	require.NoError(t, err)
 	assert.Equal(t, 4, numRows)
+
+	// Make sure that row group stats are up-to-date immediately after writing
+	bytesWritten := writer.RowGroupTotalBytesWritten()
 	require.NoError(t, writer.Close())
+	require.Equal(t, bytesWritten, writer.RowGroupTotalBytesWritten())
 }
 
 func TestFileWriterNumRows(t *testing.T) {
diff --git a/go/parquet/pqarrow/helpers.go b/go/parquet/pqarrow/helpers.go
index a9a4242fdb44c..237de4366c03e 100644
--- a/go/parquet/pqarrow/helpers.go
+++ b/go/parquet/pqarrow/helpers.go
@@ -17,7 +17,7 @@
 package pqarrow
 
 import (
-	"github.com/apache/arrow/go/v17/arrow"
+	"github.com/apache/arrow/go/v18/arrow"
 )
 
 func releaseArrays(arrays []arrow.Array) {
@@ -38,6 +38,8 @@ func releaseArrayData(data []arrow.ArrayData) {
 
 func releaseColumns(columns []arrow.Column) {
 	for _, col := range columns {
-		col.Release()
+		if col.Data() != nil { // data can be nil due to the way columns are constructed in ReadRowGroups
+			col.Release()
+		}
 	}
 }
diff --git a/go/parquet/pqarrow/path_builder.go b/go/parquet/pqarrow/path_builder.go
index c28072afe2c24..ff439d59a6e75 100644
--- a/go/parquet/pqarrow/path_builder.go
+++ b/go/parquet/pqarrow/path_builder.go
@@ -21,12 +21,12 @@ import (
 	"sync/atomic"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/bitutils"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/bitutils"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/pqarrow/path_builder_test.go b/go/parquet/pqarrow/path_builder_test.go
index 4b7a8f9094b76..364f836d0bbca 100644
--- a/go/parquet/pqarrow/path_builder_test.go
+++ b/go/parquet/pqarrow/path_builder_test.go
@@ -20,10 +20,10 @@ import (
 	"context"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/memory"
 	"github.com/google/uuid"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -364,12 +364,12 @@ func TestNestedExtensionListsWithSomeNulls(t *testing.T) {
 	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
 	defer mem.AssertSize(t, 0)
 
-	listType := arrow.ListOf(types.NewUUIDType())
+	listType := arrow.ListOf(extensions.NewUUIDType())
 	bldr := array.NewListBuilder(mem, listType)
 	defer bldr.Release()
 
 	nestedBldr := bldr.ValueBuilder().(*array.ListBuilder)
-	vb := nestedBldr.ValueBuilder().(*types.UUIDBuilder)
+	vb := nestedBldr.ValueBuilder().(*extensions.UUIDBuilder)
 
 	uuid1 := uuid.New()
 	uuid3 := uuid.New()
diff --git a/go/parquet/pqarrow/properties.go b/go/parquet/pqarrow/properties.go
index d3cf4de6ac74f..25a299c86f5f5 100755
--- a/go/parquet/pqarrow/properties.go
+++ b/go/parquet/pqarrow/properties.go
@@ -19,9 +19,9 @@ package pqarrow
 import (
 	"context"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet/internal/encoding"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet/internal/encoding"
 )
 
 // ArrowWriterProperties are used to determine how to manipulate the arrow data
diff --git a/go/parquet/pqarrow/reader_writer_test.go b/go/parquet/pqarrow/reader_writer_test.go
index c573dbe43a562..e020c7d9457a9 100644
--- a/go/parquet/pqarrow/reader_writer_test.go
+++ b/go/parquet/pqarrow/reader_writer_test.go
@@ -19,15 +19,19 @@ package pqarrow_test
 import (
 	"bytes"
 	"context"
+	"fmt"
+	"math"
 	"testing"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/array"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/pqarrow"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/array"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/pqarrow"
+	"github.com/stretchr/testify/require"
 	"golang.org/x/exp/rand"
 	"gonum.org/v1/gonum/stat/distuv"
 )
@@ -275,3 +279,110 @@ func BenchmarkReadColumnFloat64(b *testing.B) {
 		benchReadTable(b, tt.name, tbl, int64(arrow.Int32Traits.BytesRequired(SIZELEN)))
 	}
 }
+
+var compressTestCases = []struct {
+	c compress.Compression
+}{
+	{compress.Codecs.Uncompressed},
+	{compress.Codecs.Snappy},
+	{compress.Codecs.Gzip},
+	{compress.Codecs.Brotli},
+	{compress.Codecs.Zstd},
+	{compress.Codecs.Lz4Raw},
+	// {compress.Codecs.Lzo},
+}
+
+func buildTableForTest(mem memory.Allocator) arrow.Table {
+	schema := arrow.NewSchema(
+		[]arrow.Field{
+			{Name: "int64s", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "strings", Type: arrow.BinaryTypes.String},
+			{Name: "bools", Type: arrow.FixedWidthTypes.Boolean},
+			{Name: "repeated_int64s", Type: arrow.PrimitiveTypes.Int64},
+			{Name: "repeated_strings", Type: arrow.BinaryTypes.String},
+			{Name: "repeated_bools", Type: arrow.FixedWidthTypes.Boolean},
+		},
+		nil,
+	)
+	bldr := array.NewRecordBuilder(mem, schema)
+	defer bldr.Release()
+
+	for i := 0; i < SIZELEN; i++ {
+		bldr.Field(0).(*array.Int64Builder).Append(int64(i))
+		bldr.Field(1).(*array.StringBuilder).Append(fmt.Sprint(i))
+		bldr.Field(2).(*array.BooleanBuilder).Append(i%2 == 0)
+		bldr.Field(3).(*array.Int64Builder).Append(0)
+		bldr.Field(4).(*array.StringBuilder).Append("the string is the same")
+		bldr.Field(5).(*array.BooleanBuilder).Append(true)
+	}
+
+	rec := bldr.NewRecord()
+	return array.NewTableFromRecords(schema, []arrow.Record{rec})
+}
+
+func BenchmarkWriteTableCompressed(b *testing.B) {
+	mem := memory.DefaultAllocator
+	table := buildTableForTest(mem)
+	defer table.Release()
+
+	var uncompressedSize uint64
+	for idxCol := 0; int64(idxCol) < table.NumCols(); idxCol++ {
+		column := table.Column(idxCol)
+		for _, chunk := range column.Data().Chunks() {
+			uncompressedSize += chunk.Data().SizeInBytes()
+		}
+	}
+
+	var buf bytes.Buffer
+	buf.Grow(int(uncompressedSize))
+	for _, tc := range compressTestCases {
+		b.Run(fmt.Sprintf("codec=%s", tc.c), func(b *testing.B) {
+			buf.Reset()
+			b.ResetTimer()
+			b.SetBytes(int64(uncompressedSize))
+			for n := 0; n < b.N; n++ {
+				require.NoError(b,
+					pqarrow.WriteTable(
+						table,
+						&buf,
+						math.MaxInt64,
+						parquet.NewWriterProperties(parquet.WithAllocator(mem), parquet.WithCompression(tc.c)),
+						pqarrow.DefaultWriterProps(),
+					),
+				)
+			}
+		})
+	}
+}
+
+func BenchmarkReadTableCompressed(b *testing.B) {
+	ctx := context.Background()
+	mem := memory.DefaultAllocator
+	table := buildTableForTest(mem)
+	defer table.Release()
+
+	for _, tc := range compressTestCases {
+		b.Run(fmt.Sprintf("codec=%s", tc.c), func(b *testing.B) {
+			var buf bytes.Buffer
+			err := pqarrow.WriteTable(
+				table,
+				&buf,
+				math.MaxInt64,
+				parquet.NewWriterProperties(parquet.WithAllocator(mem), parquet.WithCompression(tc.c)),
+				pqarrow.DefaultWriterProps(),
+			)
+			require.NoError(b, err)
+
+			compressedBytes := buf.Len()
+			rdr := bytes.NewReader(buf.Bytes())
+
+			b.ResetTimer()
+			b.SetBytes(int64(compressedBytes))
+			for n := 0; n < b.N; n++ {
+				tab, err := pqarrow.ReadTable(ctx, rdr, nil, pqarrow.ArrowReadProperties{}, mem)
+				require.NoError(b, err)
+				defer tab.Release()
+			}
+		})
+	}
+}
diff --git a/go/parquet/pqarrow/schema.go b/go/parquet/pqarrow/schema.go
index 8b3ea854b7a8f..4882077671f0f 100644
--- a/go/parquet/pqarrow/schema.go
+++ b/go/parquet/pqarrow/schema.go
@@ -22,15 +22,14 @@ import (
 	"math"
 	"strconv"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/decimal128"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/file"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/decimal128"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/file"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"golang.org/x/xerrors"
 )
 
@@ -120,6 +119,15 @@ func (sm *SchemaManifest) GetFieldIndices(indices []int) ([]int, error) {
 	return ret, nil
 }
 
+// ExtensionCustomParquetType is an interface that Arrow ExtensionTypes may implement
+// to specify the target LogicalType to use when converting to Parquet.
+//
+// The PrimitiveType is not configurable, and is determined by a fixed mapping from
+// the extension's StorageType to a Parquet type (see getParquetType in pqarrow source).
+type ExtensionCustomParquetType interface {
+	ParquetLogicalType() schema.LogicalType
+}
+
 func isDictionaryReadSupported(dt arrow.DataType) bool {
 	return arrow.IsBinaryLike(dt.ID())
 }
@@ -250,104 +258,14 @@ func structToNode(typ *arrow.StructType, name string, nullable bool, props *parq
 }
 
 func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties, arrprops ArrowWriterProperties) (schema.Node, error) {
-	var (
-		logicalType schema.LogicalType = schema.NoLogicalType{}
-		typ         parquet.Type
-		repType     = repFromNullable(field.Nullable)
-		length      = -1
-		precision   = -1
-		scale       = -1
-		err         error
-	)
+	repType := repFromNullable(field.Nullable)
 
+	// Handle complex types i.e. GroupNodes
 	switch field.Type.ID() {
 	case arrow.NULL:
-		typ = parquet.Types.Int32
-		logicalType = &schema.NullLogicalType{}
 		if repType != parquet.Repetitions.Optional {
 			return nil, xerrors.New("nulltype arrow field must be nullable")
 		}
-	case arrow.BOOL:
-		typ = parquet.Types.Boolean
-	case arrow.UINT8:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(8, false)
-	case arrow.INT8:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(8, true)
-	case arrow.UINT16:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(16, false)
-	case arrow.INT16:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(16, true)
-	case arrow.UINT32:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(32, false)
-	case arrow.INT32:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewIntLogicalType(32, true)
-	case arrow.UINT64:
-		typ = parquet.Types.Int64
-		logicalType = schema.NewIntLogicalType(64, false)
-	case arrow.INT64:
-		typ = parquet.Types.Int64
-		logicalType = schema.NewIntLogicalType(64, true)
-	case arrow.FLOAT32:
-		typ = parquet.Types.Float
-	case arrow.FLOAT64:
-		typ = parquet.Types.Double
-	case arrow.STRING, arrow.LARGE_STRING:
-		logicalType = schema.StringLogicalType{}
-		fallthrough
-	case arrow.BINARY, arrow.LARGE_BINARY:
-		typ = parquet.Types.ByteArray
-	case arrow.FIXED_SIZE_BINARY:
-		typ = parquet.Types.FixedLenByteArray
-		length = field.Type.(*arrow.FixedSizeBinaryType).ByteWidth
-	case arrow.DECIMAL, arrow.DECIMAL256:
-		dectype := field.Type.(arrow.DecimalType)
-		precision = int(dectype.GetPrecision())
-		scale = int(dectype.GetScale())
-
-		if props.StoreDecimalAsInteger() && 1 <= precision && precision <= 18 {
-			if precision <= 9 {
-				typ = parquet.Types.Int32
-			} else {
-				typ = parquet.Types.Int64
-			}
-		} else {
-			typ = parquet.Types.FixedLenByteArray
-			length = int(DecimalSize(int32(precision)))
-		}
-
-		logicalType = schema.NewDecimalLogicalType(int32(precision), int32(scale))
-	case arrow.DATE32:
-		typ = parquet.Types.Int32
-		logicalType = schema.DateLogicalType{}
-	case arrow.DATE64:
-		typ = parquet.Types.Int32
-		logicalType = schema.DateLogicalType{}
-	case arrow.TIMESTAMP:
-		typ, logicalType, err = getTimestampMeta(field.Type.(*arrow.TimestampType), props, arrprops)
-		if err != nil {
-			return nil, err
-		}
-	case arrow.TIME32:
-		typ = parquet.Types.Int32
-		logicalType = schema.NewTimeLogicalType(true, schema.TimeUnitMillis)
-	case arrow.TIME64:
-		typ = parquet.Types.Int64
-		timeType := field.Type.(*arrow.Time64Type)
-		if timeType.Unit == arrow.Nanosecond {
-			logicalType = schema.NewTimeLogicalType(true, schema.TimeUnitNanos)
-		} else {
-			logicalType = schema.NewTimeLogicalType(true, schema.TimeUnitMicros)
-		}
-	case arrow.FLOAT16:
-		typ = parquet.Types.FixedLenByteArray
-		length = arrow.Float16SizeBytes
-		logicalType = schema.Float16LogicalType{}
 	case arrow.STRUCT:
 		return structToNode(field.Type.(*arrow.StructType), field.Name, field.Nullable, props, arrprops)
 	case arrow.FIXED_SIZE_LIST, arrow.LIST:
@@ -369,16 +287,6 @@ func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties
 		dictType := field.Type.(*arrow.DictionaryType)
 		return fieldToNode(name, arrow.Field{Name: name, Type: dictType.ValueType, Nullable: field.Nullable, Metadata: field.Metadata},
 			props, arrprops)
-	case arrow.EXTENSION:
-		return fieldToNode(name, arrow.Field{
-			Name:     name,
-			Type:     field.Type.(arrow.ExtensionType).StorageType(),
-			Nullable: field.Nullable,
-			Metadata: arrow.MetadataFrom(map[string]string{
-				ipc.ExtensionTypeKeyName:     field.Type.(arrow.ExtensionType).ExtensionName(),
-				ipc.ExtensionMetadataKeyName: field.Type.(arrow.ExtensionType).Serialize(),
-			}),
-		}, props, arrprops)
 	case arrow.MAP:
 		mapType := field.Type.(*arrow.MapType)
 		keyNode, err := fieldToNode("key", mapType.KeyField(), props, arrprops)
@@ -402,8 +310,12 @@ func fieldToNode(name string, field arrow.Field, props *parquet.WriterProperties
 			}, -1)
 		}
 		return schema.MapOf(field.Name, keyNode, valueNode, repFromNullable(field.Nullable), -1)
-	default:
-		return nil, fmt.Errorf("%w: support for %s", arrow.ErrNotImplemented, field.Type.ID())
+	}
+
+	// Not a GroupNode
+	typ, logicalType, length, err := getParquetType(field.Type, props, arrprops)
+	if err != nil {
+		return nil, err
 	}
 
 	return schema.NewPrimitiveNodeLogical(name, repType, logicalType, typ, length, fieldIDFromMeta(field.Metadata))
@@ -472,7 +384,7 @@ func (s schemaTree) RecordLeaf(leaf *SchemaField) {
 	s.manifest.ColIndexToField[leaf.ColIndex] = leaf
 }
 
-func arrowInt(log *schema.IntLogicalType) (arrow.DataType, error) {
+func arrowInt(log schema.IntLogicalType) (arrow.DataType, error) {
 	switch log.BitWidth() {
 	case 8:
 		if log.IsSigned() {
@@ -499,7 +411,7 @@ func arrowInt(log *schema.IntLogicalType) (arrow.DataType, error) {
 	}
 }
 
-func arrowTime32(logical *schema.TimeLogicalType) (arrow.DataType, error) {
+func arrowTime32(logical schema.TimeLogicalType) (arrow.DataType, error) {
 	if logical.TimeUnit() == schema.TimeUnitMillis {
 		return arrow.FixedWidthTypes.Time32ms, nil
 	}
@@ -507,7 +419,7 @@ func arrowTime32(logical *schema.TimeLogicalType) (arrow.DataType, error) {
 	return nil, xerrors.New(logical.String() + " cannot annotate a time32")
 }
 
-func arrowTime64(logical *schema.TimeLogicalType) (arrow.DataType, error) {
+func arrowTime64(logical schema.TimeLogicalType) (arrow.DataType, error) {
 	switch logical.TimeUnit() {
 	case schema.TimeUnitMicros:
 		return arrow.FixedWidthTypes.Time64us, nil
@@ -518,7 +430,7 @@ func arrowTime64(logical *schema.TimeLogicalType) (arrow.DataType, error) {
 	}
 }
 
-func arrowTimestamp(logical *schema.TimestampLogicalType) (arrow.DataType, error) {
+func arrowTimestamp(logical schema.TimestampLogicalType) (arrow.DataType, error) {
 	tz := ""
 
 	// ConvertedTypes are adjusted to UTC per backward compatibility guidelines
@@ -539,7 +451,7 @@ func arrowTimestamp(logical *schema.TimestampLogicalType) (arrow.DataType, error
 	}
 }
 
-func arrowDecimal(logical *schema.DecimalLogicalType) arrow.DataType {
+func arrowDecimal(logical schema.DecimalLogicalType) arrow.DataType {
 	if logical.Precision() <= decimal128.MaxPrecision {
 		return &arrow.Decimal128Type{Precision: logical.Precision(), Scale: logical.Scale()}
 	}
@@ -550,11 +462,11 @@ func arrowFromInt32(logical schema.LogicalType) (arrow.DataType, error) {
 	switch logtype := logical.(type) {
 	case schema.NoLogicalType:
 		return arrow.PrimitiveTypes.Int32, nil
-	case *schema.TimeLogicalType:
+	case schema.TimeLogicalType:
 		return arrowTime32(logtype)
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
-	case *schema.IntLogicalType:
+	case schema.IntLogicalType:
 		return arrowInt(logtype)
 	case schema.DateLogicalType:
 		return arrow.FixedWidthTypes.Date32, nil
@@ -569,13 +481,13 @@ func arrowFromInt64(logical schema.LogicalType) (arrow.DataType, error) {
 	}
 
 	switch logtype := logical.(type) {
-	case *schema.IntLogicalType:
+	case schema.IntLogicalType:
 		return arrowInt(logtype)
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
-	case *schema.TimeLogicalType:
+	case schema.TimeLogicalType:
 		return arrowTime64(logtype)
-	case *schema.TimestampLogicalType:
+	case schema.TimestampLogicalType:
 		return arrowTimestamp(logtype)
 	default:
 		return nil, xerrors.New(logical.String() + " cannot annotate int64")
@@ -586,7 +498,7 @@ func arrowFromByteArray(logical schema.LogicalType) (arrow.DataType, error) {
 	switch logtype := logical.(type) {
 	case schema.StringLogicalType:
 		return arrow.BinaryTypes.String, nil
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
 	case schema.NoLogicalType,
 		schema.EnumLogicalType,
@@ -600,7 +512,7 @@ func arrowFromByteArray(logical schema.LogicalType) (arrow.DataType, error) {
 
 func arrowFromFLBA(logical schema.LogicalType, length int) (arrow.DataType, error) {
 	switch logtype := logical.(type) {
-	case *schema.DecimalLogicalType:
+	case schema.DecimalLogicalType:
 		return arrowDecimal(logtype), nil
 	case schema.NoLogicalType, schema.IntervalLogicalType, schema.UUIDLogicalType:
 		return &arrow.FixedSizeBinaryType{ByteWidth: int(length)}, nil
@@ -611,6 +523,84 @@ func arrowFromFLBA(logical schema.LogicalType, length int) (arrow.DataType, erro
 	}
 }
 
+func getParquetType(typ arrow.DataType, props *parquet.WriterProperties, arrprops ArrowWriterProperties) (parquet.Type, schema.LogicalType, int, error) {
+	switch typ.ID() {
+	case arrow.NULL:
+		return parquet.Types.Int32, schema.NullLogicalType{}, -1, nil
+	case arrow.BOOL:
+		return parquet.Types.Boolean, schema.NoLogicalType{}, -1, nil
+	case arrow.UINT8:
+		return parquet.Types.Int32, schema.NewIntLogicalType(8, false), -1, nil
+	case arrow.INT8:
+		return parquet.Types.Int32, schema.NewIntLogicalType(8, true), -1, nil
+	case arrow.UINT16:
+		return parquet.Types.Int32, schema.NewIntLogicalType(16, false), -1, nil
+	case arrow.INT16:
+		return parquet.Types.Int32, schema.NewIntLogicalType(16, true), -1, nil
+	case arrow.UINT32:
+		return parquet.Types.Int32, schema.NewIntLogicalType(32, false), -1, nil
+	case arrow.INT32:
+		return parquet.Types.Int32, schema.NewIntLogicalType(32, true), -1, nil
+	case arrow.UINT64:
+		return parquet.Types.Int64, schema.NewIntLogicalType(64, false), -1, nil
+	case arrow.INT64:
+		return parquet.Types.Int64, schema.NewIntLogicalType(64, true), -1, nil
+	case arrow.FLOAT32:
+		return parquet.Types.Float, schema.NoLogicalType{}, -1, nil
+	case arrow.FLOAT64:
+		return parquet.Types.Double, schema.NoLogicalType{}, -1, nil
+	case arrow.STRING, arrow.LARGE_STRING:
+		return parquet.Types.ByteArray, schema.StringLogicalType{}, -1, nil
+	case arrow.BINARY, arrow.LARGE_BINARY:
+		return parquet.Types.ByteArray, schema.NoLogicalType{}, -1, nil
+	case arrow.FIXED_SIZE_BINARY:
+		return parquet.Types.FixedLenByteArray, schema.NoLogicalType{}, typ.(*arrow.FixedSizeBinaryType).ByteWidth, nil
+	case arrow.DECIMAL, arrow.DECIMAL256:
+		dectype := typ.(arrow.DecimalType)
+		precision := int(dectype.GetPrecision())
+		scale := int(dectype.GetScale())
+
+		if !props.StoreDecimalAsInteger() || precision > 18 {
+			return parquet.Types.FixedLenByteArray, schema.NewDecimalLogicalType(int32(precision), int32(scale)), int(DecimalSize(int32(precision))), nil
+		}
+
+		pqType := parquet.Types.Int32
+		if precision > 9 {
+			pqType = parquet.Types.Int64
+		}
+
+		return pqType, schema.NoLogicalType{}, -1, nil
+	case arrow.DATE32:
+		return parquet.Types.Int32, schema.DateLogicalType{}, -1, nil
+	case arrow.DATE64:
+		return parquet.Types.Int32, schema.DateLogicalType{}, -1, nil
+	case arrow.TIMESTAMP:
+		pqType, logicalType, err := getTimestampMeta(typ.(*arrow.TimestampType), props, arrprops)
+		return pqType, logicalType, -1, err
+	case arrow.TIME32:
+		return parquet.Types.Int32, schema.NewTimeLogicalType(true, schema.TimeUnitMillis), -1, nil
+	case arrow.TIME64:
+		pqTimeUnit := schema.TimeUnitMicros
+		if typ.(*arrow.Time64Type).Unit == arrow.Nanosecond {
+			pqTimeUnit = schema.TimeUnitNanos
+		}
+
+		return parquet.Types.Int64, schema.NewTimeLogicalType(true, pqTimeUnit), -1, nil
+	case arrow.FLOAT16:
+		return parquet.Types.FixedLenByteArray, schema.Float16LogicalType{}, arrow.Float16SizeBytes, nil
+	case arrow.EXTENSION:
+		storageType := typ.(arrow.ExtensionType).StorageType()
+		pqType, logicalType, length, err := getParquetType(storageType, props, arrprops)
+		if withCustomType, ok := typ.(ExtensionCustomParquetType); ok {
+			logicalType = withCustomType.ParquetLogicalType()
+		}
+
+		return pqType, logicalType, length, err
+	default:
+		return parquet.Type(0), nil, 0, fmt.Errorf("%w: support for %s", arrow.ErrNotImplemented, typ.ID())
+	}
+}
+
 func getArrowType(physical parquet.Type, logical schema.LogicalType, typeLen int) (arrow.DataType, error) {
 	if !logical.IsValid() || logical.Equals(schema.NullLogicalType{}) {
 		return arrow.Null, nil
diff --git a/go/parquet/pqarrow/schema_test.go b/go/parquet/pqarrow/schema_test.go
index 3cbcb803fa68f..528200fd0e7d9 100644
--- a/go/parquet/pqarrow/schema_test.go
+++ b/go/parquet/pqarrow/schema_test.go
@@ -20,21 +20,21 @@ import (
 	"encoding/base64"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	"github.com/apache/arrow/go/v17/arrow/flight"
-	"github.com/apache/arrow/go/v17/arrow/ipc"
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/types"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/metadata"
-	"github.com/apache/arrow/go/v17/parquet/pqarrow"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow"
+	"github.com/apache/arrow/go/v18/arrow/extensions"
+	"github.com/apache/arrow/go/v18/arrow/flight"
+	"github.com/apache/arrow/go/v18/arrow/ipc"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/metadata"
+	"github.com/apache/arrow/go/v18/parquet/pqarrow"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
 func TestGetOriginSchemaBase64(t *testing.T) {
-	uuidType := types.NewUUIDType()
+	uuidType := extensions.NewUUIDType()
 	md := arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})
 	extMd := arrow.NewMetadata([]string{ipc.ExtensionMetadataKeyName, ipc.ExtensionTypeKeyName, "PARQUET:field_id"}, []string{uuidType.Serialize(), uuidType.ExtensionName(), "-1"})
 	origArrSc := arrow.NewSchema([]arrow.Field{
@@ -44,10 +44,6 @@ func TestGetOriginSchemaBase64(t *testing.T) {
 	}, nil)
 
 	arrSerializedSc := flight.SerializeSchema(origArrSc, memory.DefaultAllocator)
-	if err := arrow.RegisterExtensionType(uuidType); err != nil {
-		t.Fatal(err)
-	}
-	defer arrow.UnregisterExtensionType(uuidType.ExtensionName())
 	pqschema, err := pqarrow.ToParquet(origArrSc, nil, pqarrow.DefaultWriterProps())
 	require.NoError(t, err)
 
@@ -71,11 +67,7 @@ func TestGetOriginSchemaBase64(t *testing.T) {
 }
 
 func TestGetOriginSchemaUnregisteredExtension(t *testing.T) {
-	uuidType := types.NewUUIDType()
-	if err := arrow.RegisterExtensionType(uuidType); err != nil {
-		t.Fatal(err)
-	}
-
+	uuidType := extensions.NewUUIDType()
 	md := arrow.NewMetadata([]string{"PARQUET:field_id"}, []string{"-1"})
 	origArrSc := arrow.NewSchema([]arrow.Field{
 		{Name: "f1", Type: arrow.BinaryTypes.String, Metadata: md},
@@ -90,6 +82,7 @@ func TestGetOriginSchemaUnregisteredExtension(t *testing.T) {
 	kv.Append("ARROW:schema", base64.StdEncoding.EncodeToString(arrSerializedSc))
 
 	arrow.UnregisterExtensionType(uuidType.ExtensionName())
+	defer arrow.RegisterExtensionType(uuidType)
 	arrsc, err := pqarrow.FromParquet(pqschema, nil, kv)
 	require.NoError(t, err)
 
diff --git a/go/parquet/reader_properties.go b/go/parquet/reader_properties.go
index 1b7a9ef5dcc17..a9db8efaffb23 100644
--- a/go/parquet/reader_properties.go
+++ b/go/parquet/reader_properties.go
@@ -21,8 +21,8 @@ import (
 	"fmt"
 	"io"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/internal/utils"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/internal/utils"
 )
 
 // ReaderProperties are used to define how the file reader will handle buffering and allocating buffers
diff --git a/go/parquet/reader_writer_properties_test.go b/go/parquet/reader_writer_properties_test.go
index 784c644c13590..f07219c6463aa 100644
--- a/go/parquet/reader_writer_properties_test.go
+++ b/go/parquet/reader_writer_properties_test.go
@@ -20,9 +20,9 @@ import (
 	"bytes"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/compress"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/compress"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/schema/column.go b/go/parquet/schema/column.go
index b4c169eff06a4..e3cd9f709cddb 100644
--- a/go/parquet/schema/column.go
+++ b/go/parquet/schema/column.go
@@ -20,8 +20,8 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 )
 
 // Column encapsulates the information necessary to interpret primitive
diff --git a/go/parquet/schema/converted_types.go b/go/parquet/schema/converted_types.go
index 681f96dfe6c88..b2b6f50cbf682 100644
--- a/go/parquet/schema/converted_types.go
+++ b/go/parquet/schema/converted_types.go
@@ -17,7 +17,7 @@
 package schema
 
 import (
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 )
 
 // ConvertedType corresponds to the ConvertedType in the parquet.Thrift,
@@ -113,13 +113,9 @@ func (p ConvertedType) ToLogicalType(convertedDecimal DecimalMetadata) LogicalTy
 	case ConvertedTypes.TimeMicros:
 		return NewTimeLogicalType(true /* adjustedToUTC */, TimeUnitMicros)
 	case ConvertedTypes.TimestampMillis:
-		t := NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitMillis)
-		t.(*TimestampLogicalType).fromConverted = true
-		return t
+		return NewTimestampLogicalTypeWithOpts(WithTSIsAdjustedToUTC(), WithTSTimeUnitType(TimeUnitMillis), WithTSFromConverted())
 	case ConvertedTypes.TimestampMicros:
-		t := NewTimestampLogicalType(true /* adjustedToUTC */, TimeUnitMicros)
-		t.(*TimestampLogicalType).fromConverted = true
-		return t
+		return NewTimestampLogicalTypeWithOpts(WithTSIsAdjustedToUTC(), WithTSTimeUnitType(TimeUnitMicros), WithTSFromConverted())
 	case ConvertedTypes.Interval:
 		return IntervalLogicalType{}
 	case ConvertedTypes.Int8:
diff --git a/go/parquet/schema/converted_types_test.go b/go/parquet/schema/converted_types_test.go
index bd15d24c5695c..074bcb823942a 100644
--- a/go/parquet/schema/converted_types_test.go
+++ b/go/parquet/schema/converted_types_test.go
@@ -19,7 +19,7 @@ package schema_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/schema/helpers.go b/go/parquet/schema/helpers.go
index ae0bdd6307560..87022442128b1 100644
--- a/go/parquet/schema/helpers.go
+++ b/go/parquet/schema/helpers.go
@@ -17,7 +17,7 @@
 package schema
 
 import (
-	"github.com/apache/arrow/go/v17/parquet"
+	"github.com/apache/arrow/go/v18/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/schema/helpers_test.go b/go/parquet/schema/helpers_test.go
index 8c19070df2789..a9127d155f213 100644
--- a/go/parquet/schema/helpers_test.go
+++ b/go/parquet/schema/helpers_test.go
@@ -21,8 +21,8 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/schema/logical_types.go b/go/parquet/schema/logical_types.go
index 94a7eaaf07b5b..fa46ea0172f76 100644
--- a/go/parquet/schema/logical_types.go
+++ b/go/parquet/schema/logical_types.go
@@ -20,10 +20,10 @@ import (
 	"fmt"
 	"math"
 
-	"github.com/apache/arrow/go/v17/internal/json"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/internal/debug"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/internal/debug"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 )
 
 // DecimalMetadata is a struct for managing scale and precision information between
@@ -45,21 +45,21 @@ func getLogicalType(l *format.LogicalType) LogicalType {
 	case l.IsSetENUM():
 		return EnumLogicalType{}
 	case l.IsSetDECIMAL():
-		return &DecimalLogicalType{typ: l.DECIMAL}
+		return DecimalLogicalType{typ: l.DECIMAL}
 	case l.IsSetDATE():
 		return DateLogicalType{}
 	case l.IsSetTIME():
 		if timeUnitFromThrift(l.TIME.Unit) == TimeUnitUnknown {
 			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Time logical type")
 		}
-		return &TimeLogicalType{typ: l.TIME}
+		return TimeLogicalType{typ: l.TIME}
 	case l.IsSetTIMESTAMP():
 		if timeUnitFromThrift(l.TIMESTAMP.Unit) == TimeUnitUnknown {
 			panic("parquet: TimeUnit must be one of MILLIS, MICROS, or NANOS for Timestamp logical type")
 		}
-		return &TimestampLogicalType{typ: l.TIMESTAMP}
+		return TimestampLogicalType{typ: l.TIMESTAMP}
 	case l.IsSetINTEGER():
-		return &IntLogicalType{typ: l.INTEGER}
+		return IntLogicalType{typ: l.INTEGER}
 	case l.IsSetUNKNOWN():
 		return NullLogicalType{}
 	case l.IsSetJSON():
@@ -344,7 +344,7 @@ func NewDecimalLogicalType(precision int32, scale int32) LogicalType {
 	if scale < 0 || scale > precision {
 		panic("parquet: scale must be a non-negative integer that does not exceed precision for decimal logical type")
 	}
-	return &DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
+	return DecimalLogicalType{typ: &format.DecimalType{Precision: precision, Scale: scale}}
 }
 
 // DecimalLogicalType is used to represent a decimal value of a given
@@ -405,7 +405,7 @@ func (t DecimalLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t DecimalLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*DecimalLogicalType)
+	other, ok := rhs.(DecimalLogicalType)
 	if !ok {
 		return false
 	}
@@ -509,7 +509,7 @@ func createTimeUnit(unit TimeUnitType) *format.TimeUnit {
 
 // NewTimeLogicalType returns a time type of the given unit.
 func NewTimeLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
-	return &TimeLogicalType{typ: &format.TimeType{
+	return TimeLogicalType{typ: &format.TimeType{
 		IsAdjustedToUTC: isAdjustedToUTC,
 		Unit:            createTimeUnit(unit),
 	}}
@@ -584,7 +584,7 @@ func (t TimeLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t TimeLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*TimeLogicalType)
+	other, ok := rhs.(TimeLogicalType)
 	if !ok {
 		return false
 	}
@@ -595,7 +595,7 @@ func (t TimeLogicalType) Equals(rhs LogicalType) bool {
 // NewTimestampLogicalType returns a logical timestamp type with "forceConverted"
 // set to false
 func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
-	return &TimestampLogicalType{
+	return TimestampLogicalType{
 		typ: &format.TimestampType{
 			IsAdjustedToUTC: isAdjustedToUTC,
 			Unit:            createTimeUnit(unit),
@@ -608,7 +608,7 @@ func NewTimestampLogicalType(isAdjustedToUTC bool, unit TimeUnitType) LogicalTyp
 // NewTimestampLogicalTypeForce returns a timestamp logical type with
 // "forceConverted" set to true
 func NewTimestampLogicalTypeForce(isAdjustedToUTC bool, unit TimeUnitType) LogicalType {
-	return &TimestampLogicalType{
+	return TimestampLogicalType{
 		typ: &format.TimestampType{
 			IsAdjustedToUTC: isAdjustedToUTC,
 			Unit:            createTimeUnit(unit),
@@ -654,14 +654,14 @@ func WithTSFromConverted() TimestampOpt {
 //
 // TimestampType Unit defaults to milliseconds (TimeUnitMillis)
 func NewTimestampLogicalTypeWithOpts(opts ...TimestampOpt) LogicalType {
-	ts := &TimestampLogicalType{
+	ts := TimestampLogicalType{
 		typ: &format.TimestampType{
 			Unit: createTimeUnit(TimeUnitMillis), // default to milliseconds
 		},
 	}
 
 	for _, o := range opts {
-		o(ts)
+		o(&ts)
 	}
 
 	return ts
@@ -760,7 +760,7 @@ func (t TimestampLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t TimestampLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*TimestampLogicalType)
+	other, ok := rhs.(TimestampLogicalType)
 	if !ok {
 		return false
 	}
@@ -778,7 +778,7 @@ func NewIntLogicalType(bitWidth int8, signed bool) LogicalType {
 	default:
 		panic("parquet: bit width must be exactly 8, 16, 32, or 64 for Int logical type")
 	}
-	return &IntLogicalType{
+	return IntLogicalType{
 		typ: &format.IntType{
 			BitWidth: bitWidth,
 			IsSigned: signed,
@@ -864,7 +864,7 @@ func (t IntLogicalType) toThrift() *format.LogicalType {
 }
 
 func (t IntLogicalType) Equals(rhs LogicalType) bool {
-	other, ok := rhs.(*IntLogicalType)
+	other, ok := rhs.(IntLogicalType)
 	if !ok {
 		return false
 	}
diff --git a/go/parquet/schema/logical_types_test.go b/go/parquet/schema/logical_types_test.go
index da70b5a36139e..395d1504182fe 100644
--- a/go/parquet/schema/logical_types_test.go
+++ b/go/parquet/schema/logical_types_test.go
@@ -19,9 +19,9 @@ package schema_test
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/internal/json"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/internal/json"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -38,18 +38,18 @@ func TestConvertedLogicalEquivalences(t *testing.T) {
 		{"list", schema.ConvertedTypes.List, schema.NewListLogicalType(), schema.NewListLogicalType()},
 		{"enum", schema.ConvertedTypes.Enum, schema.EnumLogicalType{}, schema.EnumLogicalType{}},
 		{"date", schema.ConvertedTypes.Date, schema.DateLogicalType{}, schema.DateLogicalType{}},
-		{"timemilli", schema.ConvertedTypes.TimeMillis, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimeLogicalType{}},
-		{"timemicro", schema.ConvertedTypes.TimeMicros, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimeLogicalType{}},
-		{"timestampmilli", schema.ConvertedTypes.TimestampMillis, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimestampLogicalType{}},
-		{"timestampmicro", schema.ConvertedTypes.TimestampMicros, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimestampLogicalType{}},
-		{"uint8", schema.ConvertedTypes.Uint8, schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"uint16", schema.ConvertedTypes.Uint16, schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"uint32", schema.ConvertedTypes.Uint32, schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"uint64", schema.ConvertedTypes.Uint64, schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), &schema.IntLogicalType{}},
-		{"int8", schema.ConvertedTypes.Int8, schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
-		{"int16", schema.ConvertedTypes.Int16, schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
-		{"int32", schema.ConvertedTypes.Int32, schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
-		{"int64", schema.ConvertedTypes.Int64, schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), &schema.IntLogicalType{}},
+		{"timemilli", schema.ConvertedTypes.TimeMillis, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.TimeLogicalType{}},
+		{"timemicro", schema.ConvertedTypes.TimeMicros, schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.TimeLogicalType{}},
+		{"timestampmilli", schema.ConvertedTypes.TimestampMillis, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMillis), schema.TimestampLogicalType{}},
+		{"timestampmicro", schema.ConvertedTypes.TimestampMicros, schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitMicros), schema.TimestampLogicalType{}},
+		{"uint8", schema.ConvertedTypes.Uint8, schema.NewIntLogicalType(8 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"uint16", schema.ConvertedTypes.Uint16, schema.NewIntLogicalType(16 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"uint32", schema.ConvertedTypes.Uint32, schema.NewIntLogicalType(32 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"uint64", schema.ConvertedTypes.Uint64, schema.NewIntLogicalType(64 /* bitWidth */, false /* signed */), schema.IntLogicalType{}},
+		{"int8", schema.ConvertedTypes.Int8, schema.NewIntLogicalType(8 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
+		{"int16", schema.ConvertedTypes.Int16, schema.NewIntLogicalType(16 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
+		{"int32", schema.ConvertedTypes.Int32, schema.NewIntLogicalType(32 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
+		{"int64", schema.ConvertedTypes.Int64, schema.NewIntLogicalType(64 /* bitWidth */, true /* signed */), schema.IntLogicalType{}},
 		{"json", schema.ConvertedTypes.JSON, schema.JSONLogicalType{}, schema.JSONLogicalType{}},
 		{"bson", schema.ConvertedTypes.BSON, schema.BSONLogicalType{}, schema.BSONLogicalType{}},
 		{"interval", schema.ConvertedTypes.Interval, schema.IntervalLogicalType{}, schema.IntervalLogicalType{}},
@@ -72,8 +72,8 @@ func TestConvertedLogicalEquivalences(t *testing.T) {
 		fromMake := schema.NewDecimalLogicalType(10, 4)
 		assert.IsType(t, fromMake, fromConverted)
 		assert.True(t, fromConverted.Equals(fromMake))
-		assert.IsType(t, &schema.DecimalLogicalType{}, fromConverted)
-		assert.IsType(t, &schema.DecimalLogicalType{}, fromMake)
+		assert.IsType(t, schema.DecimalLogicalType{}, fromConverted)
+		assert.IsType(t, schema.DecimalLogicalType{}, fromMake)
 		assert.True(t, schema.NewDecimalLogicalType(16, 0).Equals(schema.NewDecimalLogicalType(16, 0)))
 	})
 }
@@ -160,12 +160,12 @@ func TestNewTypeIncompatibility(t *testing.T) {
 		{"uuid", schema.UUIDLogicalType{}, schema.UUIDLogicalType{}},
 		{"float16", schema.Float16LogicalType{}, schema.Float16LogicalType{}},
 		{"null", schema.NullLogicalType{}, schema.NullLogicalType{}},
-		{"not-utc-time_milli", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), &schema.TimeLogicalType{}},
-		{"not-utc-time-micro", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), &schema.TimeLogicalType{}},
-		{"not-utc-time-nano", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimeLogicalType{}},
-		{"utc-time-nano", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimeLogicalType{}},
-		{"not-utc-timestamp-nano", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimestampLogicalType{}},
-		{"utc-timestamp-nano", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), &schema.TimestampLogicalType{}},
+		{"not-utc-time_milli", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMillis), schema.TimeLogicalType{}},
+		{"not-utc-time-micro", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitMicros), schema.TimeLogicalType{}},
+		{"not-utc-time-nano", schema.NewTimeLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimeLogicalType{}},
+		{"utc-time-nano", schema.NewTimeLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimeLogicalType{}},
+		{"not-utc-timestamp-nano", schema.NewTimestampLogicalType(false /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimestampLogicalType{}},
+		{"utc-timestamp-nano", schema.NewTimestampLogicalType(true /* adjustedToUTC */, schema.TimeUnitNanos), schema.TimestampLogicalType{}},
 	}
 
 	for _, tt := range tests {
diff --git a/go/parquet/schema/node.go b/go/parquet/schema/node.go
index 08eec33019278..c395caf8a26c8 100644
--- a/go/parquet/schema/node.go
+++ b/go/parquet/schema/node.go
@@ -19,8 +19,8 @@ package schema
 import (
 	"fmt"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 	"github.com/apache/thrift/lib/go/thrift"
 	"golang.org/x/xerrors"
 )
diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go
index 5b4f6b5e1cb98..51d0a84f2244f 100644
--- a/go/parquet/schema/reflection.go
+++ b/go/parquet/schema/reflection.go
@@ -22,10 +22,10 @@ import (
 	"strconv"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/internal/utils"
-	"github.com/apache/arrow/go/v17/parquet"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/internal/utils"
+	"github.com/apache/arrow/go/v18/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 )
 
 type taggedInfo struct {
@@ -639,7 +639,7 @@ func typeFromNode(n Node) reflect.Type {
 		}
 
 		if n.RepetitionType() == parquet.Repetitions.Optional {
-			typ = reflect.PtrTo(typ)
+			typ = reflect.PointerTo(typ)
 		} else if n.RepetitionType() == parquet.Repetitions.Repeated {
 			typ = reflect.SliceOf(typ)
 		}
@@ -707,7 +707,7 @@ func typeFromNode(n Node) reflect.Type {
 				elemType = reflect.SliceOf(elemType)
 			}
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				elemType = reflect.PtrTo(elemType)
+				elemType = reflect.PointerTo(elemType)
 			}
 			return elemType
 		case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
@@ -778,7 +778,7 @@ func typeFromNode(n Node) reflect.Type {
 
 			mapType := reflect.MapOf(keyType, valType)
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				mapType = reflect.PtrTo(mapType)
+				mapType = reflect.PointerTo(mapType)
 			}
 			return mapType
 		default:
@@ -796,7 +796,7 @@ func typeFromNode(n Node) reflect.Type {
 				return reflect.SliceOf(structType)
 			}
 			if gnode.RepetitionType() == parquet.Repetitions.Optional {
-				return reflect.PtrTo(structType)
+				return reflect.PointerTo(structType)
 			}
 			return structType
 		}
diff --git a/go/parquet/schema/reflection_test.go b/go/parquet/schema/reflection_test.go
index ca47459611611..6877f33c0169f 100644
--- a/go/parquet/schema/reflection_test.go
+++ b/go/parquet/schema/reflection_test.go
@@ -22,9 +22,9 @@ import (
 	"reflect"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/arrow/float16"
-	"github.com/apache/arrow/go/v17/parquet"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/arrow/float16"
+	"github.com/apache/arrow/go/v18/parquet"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/stretchr/testify/assert"
 )
 
diff --git a/go/parquet/schema/schema.go b/go/parquet/schema/schema.go
index 81dca82ecad00..c8d53e647d6f9 100644
--- a/go/parquet/schema/schema.go
+++ b/go/parquet/schema/schema.go
@@ -35,8 +35,8 @@ import (
 	"io"
 	"strings"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 	"golang.org/x/xerrors"
 )
 
diff --git a/go/parquet/schema/schema_element_test.go b/go/parquet/schema/schema_element_test.go
index 4f57652be6c9e..e427ba6485e64 100644
--- a/go/parquet/schema/schema_element_test.go
+++ b/go/parquet/schema/schema_element_test.go
@@ -19,8 +19,8 @@ package schema
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
@@ -192,7 +192,7 @@ func (s *SchemaElementConstructionSuite) TestSimple() {
 
 func (s *SchemaElementConstructionSuite) reconstructDecimal(c schemaElementConstructArgs) *decimalSchemaElementConstruction {
 	ret := s.reconstruct(c)
-	dec := c.logical.(*DecimalLogicalType)
+	dec := c.logical.(DecimalLogicalType)
 	return &decimalSchemaElementConstruction{*ret, int(dec.Precision()), int(dec.Scale())}
 }
 
@@ -359,7 +359,7 @@ func (s *SchemaElementConstructionSuite) TestTemporal() {
 
 func (s *SchemaElementConstructionSuite) reconstructInteger(c schemaElementConstructArgs) *intSchemaElementConstruction {
 	base := s.reconstruct(c)
-	l := c.logical.(*IntLogicalType)
+	l := c.logical.(IntLogicalType)
 	return &intSchemaElementConstruction{
 		*base,
 		l.BitWidth(),
diff --git a/go/parquet/schema/schema_flatten_test.go b/go/parquet/schema/schema_flatten_test.go
index a128232d46309..e95d56f4d0617 100644
--- a/go/parquet/schema/schema_flatten_test.go
+++ b/go/parquet/schema/schema_flatten_test.go
@@ -19,8 +19,8 @@ package schema
 import (
 	"testing"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
 )
diff --git a/go/parquet/schema/schema_test.go b/go/parquet/schema/schema_test.go
index f0bd941bcb429..fa6c74492460f 100644
--- a/go/parquet/schema/schema_test.go
+++ b/go/parquet/schema/schema_test.go
@@ -20,9 +20,9 @@ import (
 	"os"
 	"testing"
 
-	"github.com/apache/arrow/go/v17/parquet"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
-	"github.com/apache/arrow/go/v17/parquet/schema"
+	"github.com/apache/arrow/go/v18/parquet"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/parquet/schema"
 	"github.com/apache/thrift/lib/go/thrift"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/suite"
diff --git a/go/parquet/types.go b/go/parquet/types.go
index 71336a7987cd8..ff696c1178e18 100644
--- a/go/parquet/types.go
+++ b/go/parquet/types.go
@@ -24,8 +24,8 @@ import (
 	"time"
 	"unsafe"
 
-	"github.com/apache/arrow/go/v17/arrow"
-	format "github.com/apache/arrow/go/v17/parquet/internal/gen-go/parquet"
+	"github.com/apache/arrow/go/v18/arrow"
+	format "github.com/apache/arrow/go/v18/parquet/internal/gen-go/parquet"
 )
 
 const (
diff --git a/go/parquet/writer_properties.go b/go/parquet/writer_properties.go
index 9ff7e5a582215..c38bb3b81fa6d 100644
--- a/go/parquet/writer_properties.go
+++ b/go/parquet/writer_properties.go
@@ -17,8 +17,8 @@
 package parquet
 
 import (
-	"github.com/apache/arrow/go/v17/arrow/memory"
-	"github.com/apache/arrow/go/v17/parquet/compress"
+	"github.com/apache/arrow/go/v18/arrow/memory"
+	"github.com/apache/arrow/go/v18/parquet/compress"
 )
 
 // Constants for default property values used for the default reader, writer and column props.
@@ -46,7 +46,7 @@ const (
 	DefaultStatsEnabled = true
 	// If the stats are larger than 4K the writer will skip writing them out anyways.
 	DefaultMaxStatsSize int64 = 4096
-	DefaultCreatedBy          = "parquet-go version 17.0.0-SNAPSHOT"
+	DefaultCreatedBy          = "parquet-go version 18.0.0-SNAPSHOT"
 	DefaultRootName           = "schema"
 )
 
diff --git a/java/.gitattributes b/java/.gitattributes
index 596615322fb3e..366d3c2b3cdf6 100644
--- a/java/.gitattributes
+++ b/java/.gitattributes
@@ -1,2 +1,3 @@
 .gitattributes export-ignore
 .gitignore export-ignore
+* text=auto eol=lf
diff --git a/java/.mvn/extensions.xml b/java/.mvn/extensions.xml
index d6e80695e22d0..ae632dccf0c70 100644
--- a/java/.mvn/extensions.xml
+++ b/java/.mvn/extensions.xml
@@ -23,11 +23,11 @@
     <extension>
         <groupId>com.gradle</groupId>
         <artifactId>develocity-maven-extension</artifactId>
-        <version>1.21.5</version>
+        <version>1.22.1</version>
     </extension>
     <extension>
         <groupId>com.gradle</groupId>
         <artifactId>common-custom-user-data-maven-extension</artifactId>
-        <version>2.0</version>
+        <version>2.0.1</version>
     </extension>
 </extensions>
diff --git a/java/README.md b/java/README.md
index 25e35c10973e9..9f1b1c63c8f41 100644
--- a/java/README.md
+++ b/java/README.md
@@ -85,7 +85,7 @@ variable are set, the system property takes precedence.
 
 ## Java Properties
 
- * For Java 9 or later, should set `-Dio.netty.tryReflectionSetAccessible=true`.
+ * `-Dio.netty.tryReflectionSetAccessible=true` should be set.
 This fixes `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available`. thrown by Netty.
  * To support duplicate fields in a `StructVector` enable `-Darrow.struct.conflict.policy=CONFLICT_APPEND`.
 Duplicate fields are ignored (`CONFLICT_REPLACE`) by default and overwritten. To support different policies for
diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml
index 4dac64de1e7ab..2c02e72e9c838 100644
--- a/java/adapter/avro/pom.xml
+++ b/java/adapter/avro/pom.xml
@@ -23,7 +23,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -56,4 +56,18 @@ under the License.
       <version>${dep.avro.version}</version>
     </dependency>
   </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
 </project>
diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java
index f8022a9385134..44ccbc74511dd 100644
--- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java
+++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java
@@ -50,6 +50,7 @@
 public class AvroToArrowIteratorTest extends AvroTestBase {
 
   @BeforeEach
+  @Override
   public void init() {
     final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
     this.config = new AvroToArrowConfigBuilder(allocator).setTargetBatchSize(3).build();
diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml
index 742dc9a82dc47..5ebb4089cf72f 100644
--- a/java/adapter/jdbc/pom.xml
+++ b/java/adapter/jdbc/pom.xml
@@ -23,7 +23,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -59,7 +59,7 @@ under the License.
     <dependency>
       <groupId>com.h2database</groupId>
       <artifactId>h2</artifactId>
-      <version>2.2.224</version>
+      <version>2.3.232</version>
       <scope>test</scope>
     </dependency>
 
@@ -82,7 +82,6 @@ under the License.
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-annotations</artifactId>
-      <scope>test</scope>
     </dependency>
 
     <dependency>
@@ -93,24 +92,39 @@ under the License.
 
   </dependencies>
 
-  <profiles>
-    <profile>
-      <id>jdk11+</id>
-      <activation>
-        <jdk>[11,]</jdk>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <configuration combine.self="override">
-              <argLine>--add-reads=org.apache.arrow.adapter.jdbc=com.fasterxml.jackson.dataformat.yaml --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -Duser.timezone=UTC</argLine>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>analyze</id>
+            <phase>verify</phase>
+            <configuration>
+              <ignoredNonTestScopedDependencies>
+                <ignoredNonTestScopedDependency>com.fasterxml.jackson.core:jackson-annotations</ignoredNonTestScopedDependency>
+              </ignoredNonTestScopedDependencies>
             </configuration>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
-
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration combine.self="override">
+          <argLine>--add-reads=org.apache.arrow.adapter.jdbc=com.fasterxml.jackson.dataformat.yaml --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED -Duser.timezone=UTC</argLine>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
 </project>
diff --git a/java/adapter/jdbc/src/main/java/module-info.java b/java/adapter/jdbc/src/main/java/module-info.java
index 5b59ce768472a..04977222c1530 100644
--- a/java/adapter/jdbc/src/main/java/module-info.java
+++ b/java/adapter/jdbc/src/main/java/module-info.java
@@ -20,6 +20,7 @@
   exports org.apache.arrow.adapter.jdbc;
   exports org.apache.arrow.adapter.jdbc.binder;
 
+  requires com.fasterxml.jackson.annotation;
   requires com.fasterxml.jackson.databind;
   requires java.sql;
   requires jdk.unsupported;
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
index 783a373c6d0a7..ea9ffe55d334a 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
@@ -211,6 +211,8 @@ public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) {
    *
    * <p>Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo,
    * Calendar)}.
+   *
+   * @see JdbcToArrowUtils#reportUnsupportedTypesAsUnknown(Function)
    */
   public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter(
       Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 8397d4c9e0dc4..aecb734a8bbf7 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -18,6 +18,7 @@
 
 import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
 import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
+import static org.apache.arrow.vector.types.Types.MinorType;
 
 import java.io.IOException;
 import java.math.RoundingMode;
@@ -37,6 +38,7 @@
 import java.util.Locale;
 import java.util.Map;
 import java.util.TimeZone;
+import java.util.function.Function;
 import org.apache.arrow.adapter.jdbc.consumer.ArrayConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer;
 import org.apache.arrow.adapter.jdbc.consumer.BinaryConsumer;
@@ -80,6 +82,7 @@
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.extension.OpaqueType;
 import org.apache.arrow.vector.types.DateUnit;
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.pojo.ArrowType;
@@ -216,11 +219,28 @@ public static ArrowType getArrowTypeFromJdbcType(
       case Types.STRUCT:
         return new ArrowType.Struct();
       default:
-        // no-op, shouldn't get here
         throw new UnsupportedOperationException("Unmapped JDBC type: " + fieldInfo.getJdbcType());
     }
   }
 
+  /**
+   * Wrap a JDBC to Arrow type converter such that {@link UnsupportedOperationException} becomes
+   * {@link OpaqueType}.
+   *
+   * @param typeConverter The type converter to wrap.
+   * @param vendorName The database name to report as the Opaque type's vendor name.
+   */
+  public static Function<JdbcFieldInfo, ArrowType> reportUnsupportedTypesAsOpaque(
+      Function<JdbcFieldInfo, ArrowType> typeConverter, String vendorName) {
+    return (final JdbcFieldInfo fieldInfo) -> {
+      try {
+        return typeConverter.apply(fieldInfo);
+      } catch (UnsupportedOperationException e) {
+        return new OpaqueType(MinorType.NULL.getType(), fieldInfo.getTypeName(), vendorName);
+      }
+    };
+  }
+
   /**
    * Create Arrow {@link Schema} object for the given JDBC {@link java.sql.ResultSetMetaData}.
    *
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java
index 30b2305f3f916..a3d615a7e1958 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java
@@ -96,6 +96,11 @@ public ColumnBinder visit(ArrowType.Union type) {
     throw new UnsupportedOperationException("No column binder implemented for type " + type);
   }
 
+  @Override
+  public ColumnBinder visit(ArrowType.RunEndEncoded type) {
+    throw new UnsupportedOperationException("No column binder implemented for type " + type);
+  }
+
   @Override
   public ColumnBinder visit(ArrowType.Map type) {
     return new MapBinder((MapVector) vector);
@@ -284,4 +289,9 @@ public ColumnBinder visit(ArrowType.Duration type) {
   public ColumnBinder visit(ArrowType.ListView type) {
     throw new UnsupportedOperationException("No column binder implemented for type " + type);
   }
+
+  @Override
+  public ColumnBinder visit(ArrowType.LargeListView type) {
+    throw new UnsupportedOperationException("No column binder implemented for type " + type);
+  }
 }
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java
index 7deba1cbffebd..9fcdd42414dfa 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java
@@ -86,12 +86,8 @@ public void consume(ResultSet resultSet) throws SQLException {
             while ((dataBuffer.writerIndex() + bytes.length) > dataBuffer.capacity()) {
               vector.reallocDataBuffer();
             }
-            MemoryUtil.UNSAFE.copyMemory(
-                bytes,
-                MemoryUtil.BYTE_ARRAY_BASE_OFFSET,
-                null,
-                dataBuffer.memoryAddress() + startIndex + totalBytes,
-                bytes.length);
+            MemoryUtil.copyToMemory(
+                bytes, 0, dataBuffer.memoryAddress() + startIndex + totalBytes, bytes.length);
 
             totalBytes += bytes.length;
             read += readSize;
@@ -133,12 +129,8 @@ public void consume(ResultSet resultSet) throws SQLException {
           while ((dataBuffer.writerIndex() + bytes.length) > dataBuffer.capacity()) {
             vector.reallocDataBuffer();
           }
-          MemoryUtil.UNSAFE.copyMemory(
-              bytes,
-              MemoryUtil.BYTE_ARRAY_BASE_OFFSET,
-              null,
-              dataBuffer.memoryAddress() + startIndex + totalBytes,
-              bytes.length);
+          MemoryUtil.copyToMemory(
+              bytes, 0, dataBuffer.memoryAddress() + startIndex + totalBytes, bytes.length);
 
           totalBytes += bytes.length;
           read += readSize;
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
index 726e1905c4242..39c0085603f17 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java
@@ -91,6 +91,7 @@ public static Stream<Arguments> getTestData()
    */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
index 5537e1acba2bc..2274f51745973 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java
@@ -32,19 +32,27 @@
 import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues;
 import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues;
 import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import java.io.IOException;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
 import java.sql.ResultSetMetaData;
 import java.sql.SQLException;
+import java.sql.Statement;
 import java.util.Arrays;
 import java.util.Calendar;
+import java.util.function.Function;
 import java.util.stream.Stream;
 import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
+import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
 import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
 import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
 import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper;
 import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils;
 import org.apache.arrow.adapter.jdbc.Table;
+import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.BitVector;
@@ -62,7 +70,12 @@
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.extension.OpaqueType;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
@@ -132,6 +145,7 @@ public static Stream<Arguments> getTestData()
   /** Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
@@ -189,6 +203,44 @@ public void testJdbcSchemaMetadata(Table table) throws SQLException, ClassNotFou
     JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema);
   }
 
+  @Test
+  void testOpaqueType() throws SQLException, ClassNotFoundException {
+    try (BufferAllocator allocator = new RootAllocator()) {
+      String url = "jdbc:h2:mem:JdbcToArrowTest";
+      String driver = "org.h2.Driver";
+      Class.forName(driver);
+      conn = DriverManager.getConnection(url);
+      try (Statement stmt = conn.createStatement()) {
+        stmt.executeUpdate("CREATE TABLE unknowntype (a GEOMETRY, b INT)");
+      }
+
+      String query = "SELECT * FROM unknowntype";
+      Calendar calendar = Calendar.getInstance();
+      Function<JdbcFieldInfo, ArrowType> typeConverter =
+          (field) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(field, calendar);
+      JdbcToArrowConfig config =
+          new JdbcToArrowConfigBuilder()
+              .setAllocator(allocator)
+              .setJdbcToArrowTypeConverter(
+                  JdbcToArrowUtils.reportUnsupportedTypesAsOpaque(typeConverter, "H2"))
+              .build();
+      Schema schema;
+      try (Statement stmt = conn.createStatement();
+          ResultSet rs = stmt.executeQuery(query)) {
+        schema =
+            assertDoesNotThrow(() -> JdbcToArrowUtils.jdbcToArrowSchema(rs.getMetaData(), config));
+      }
+
+      Schema expected =
+          new Schema(
+              Arrays.asList(
+                  Field.nullable(
+                      "A", new OpaqueType(Types.MinorType.NULL.getType(), "GEOMETRY", "H2")),
+                  Field.nullable("B", Types.MinorType.INT.getType())));
+      assertEquals(expected, schema);
+    }
+  }
+
   /**
    * This method calls the assert methods for various DataSets.
    *
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java
index 337220a42fbce..456d338f6bd75 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java
@@ -42,9 +42,10 @@ public static Stream<Arguments> getTestData() throws IOException {
         Arguments.of(getTable("h2/test1_map_h2.yml", JdbcToArrowMapDataTypeTest.class)));
   }
 
-  /** Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column */
+  /** Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column. */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
index 205b7e16f2f09..2009268980afe 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java
@@ -113,6 +113,7 @@ public static Stream<Arguments> getTestData()
    */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
index 382d20f45d4b1..2108afec4c945 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java
@@ -59,6 +59,7 @@ public static Stream<Arguments> getTestData()
    */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
index 7966f62e175e3..bea7d4d37c50e 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
@@ -87,6 +87,7 @@ public static Stream<Arguments> getTestData()
    */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
index 0f60c89d1c03c..14396997d2863 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java
@@ -91,6 +91,7 @@ public static Stream<Arguments> getTestData()
    */
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
index 40fd39ac0c555..de9eff327ef6f 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java
@@ -76,6 +76,7 @@ public class JdbcToArrowVectorIteratorTest extends JdbcToArrowTest {
 
   @ParameterizedTest
   @MethodSource("getTestData")
+  @Override
   public void testJdbcToArrowValues(Table table)
       throws SQLException, IOException, ClassNotFoundException {
     this.initializeDatabase(table);
diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml
index b216ad5abeb14..cf35397c9917b 100644
--- a/java/adapter/orc/pom.xml
+++ b/java/adapter/orc/pom.xml
@@ -23,7 +23,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -61,7 +61,7 @@ under the License.
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.9.2</version>
+      <version>1.9.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>
@@ -160,6 +160,15 @@ under the License.
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/java/adapter/orc/src/main/cpp/jni_wrapper.cpp b/java/adapter/orc/src/main/cpp/jni_wrapper.cpp
index cc629c9c432b4..6acf1084c3337 100644
--- a/java/adapter/orc/src/main/cpp/jni_wrapper.cpp
+++ b/java/adapter/orc/src/main/cpp/jni_wrapper.cpp
@@ -49,7 +49,7 @@ static jmethodID orc_memory_constructor;
 static jclass record_batch_class;
 static jmethodID record_batch_constructor;
 
-static jint JNI_VERSION = JNI_VERSION_1_6;
+static jint JNI_VERSION = JNI_VERSION_10;
 
 using arrow::internal::checked_cast;
 using arrow::jni::ConcurrentMap;
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
index 36e96a8d6ce5d..6971b53638e48 100644
--- a/java/algorithm/pom.xml
+++ b/java/algorithm/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-algorithm</artifactId>
   <name>Arrow Algorithms</name>
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
index 817e890a5abe1..ac8b5a4be56aa 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
@@ -77,7 +77,7 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> co
           BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex);
         } else {
           BitVectorHelper.setBit(dstValidityBuffer, dstIndex);
-          MemoryUtil.UNSAFE.copyMemory(
+          MemoryUtil.copyMemory(
               srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth),
               dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth),
               valueWidth);
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
index 8f58dc0dcee0f..a3aca83441d2f 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
@@ -91,7 +91,7 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator<V> co
           int valueLength =
               srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH))
                   - srcOffset;
-          MemoryUtil.UNSAFE.copyMemory(
+          MemoryUtil.copyMemory(
               srcValueBuffer.memoryAddress() + srcOffset,
               dstValueBuffer.memoryAddress() + dstOffset,
               valueLength);
diff --git a/java/bom/pom.xml b/java/bom/pom.xml
index 5fafbf38c7cdf..ce05f25bc6c9e 100644
--- a/java/bom/pom.xml
+++ b/java/bom/pom.xml
@@ -17,37 +17,73 @@ KIND, either express or implied.  See the License for the
 specific language governing permissions and limitations
 under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" child.project.url.inherit.append.path="false" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
 
   <parent>
     <groupId>org.apache</groupId>
     <artifactId>apache</artifactId>
-    <version>31</version>
+    <version>33</version>
+    <relativePath></relativePath>
   </parent>
 
   <groupId>org.apache.arrow</groupId>
   <artifactId>arrow-bom</artifactId>
-  <version>17.0.0-SNAPSHOT</version>
+  <version>18.0.0-SNAPSHOT</version>
   <packaging>pom</packaging>
+
   <name>Arrow Bill of Materials</name>
   <description>Arrow Bill of Materials</description>
+  <url>https://arrow.apache.org/</url>
+
+  <mailingLists>
+    <mailingList>
+      <name>Developer List</name>
+      <subscribe>dev-subscribe@arrow.apache.org</subscribe>
+      <unsubscribe>dev-unsubscribe@arrow.apache.org</unsubscribe>
+      <post>dev@arrow.apache.org</post>
+      <archive>https://lists.apache.org/list.html?dev@arrow.apache.org</archive>
+    </mailingList>
+    <mailingList>
+      <name>Commits List</name>
+      <subscribe>commits-subscribe@arrow.apache.org</subscribe>
+      <unsubscribe>commits-unsubscribe@arrow.apache.org</unsubscribe>
+      <post>commits@arrow.apache.org</post>
+      <archive>https://lists.apache.org/list.html?commits@arrow.apache.org</archive>
+    </mailingList>
+    <mailingList>
+      <name>Issues List</name>
+      <subscribe>issues-subscribe@arrow.apache.org</subscribe>
+      <unsubscribe>issues-unsubscribe@arrow.apache.org</unsubscribe>
+      <archive>https://lists.apache.org/list.html?issues@arrow.apache.org</archive>
+    </mailingList>
+    <mailingList>
+      <name>GitHub List</name>
+      <subscribe>github-subscribe@arrow.apache.org</subscribe>
+      <unsubscribe>github-unsubscribe@arrow.apache.org</unsubscribe>
+      <archive>https://lists.apache.org/list.html?github@arrow.apache.org</archive>
+    </mailingList>
+  </mailingLists>
+
+  <scm child.scm.connection.inherit.append.path="false" child.scm.developerConnection.inherit.append.path="false" child.scm.url.inherit.append.path="false">
+    <connection>scm:git:https://github.com/apache/arrow.git</connection>
+    <developerConnection>scm:git:https://github.com/apache/arrow.git</developerConnection>
+    <tag>main</tag>
+    <url>https://github.com/apache/arrow/tree/${project.scm.tag}</url>
+  </scm>
+
+  <issueManagement>
+    <system>GitHub</system>
+    <url>https://github.com/apache/arrow/issues</url>
+  </issueManagement>
 
   <properties>
     <arrow.vector.classifier></arrow.vector.classifier>
     <!-- org.apache:apache overrides -->
-    <maven.compiler.source>1.8</maven.compiler.source>
-    <maven.compiler.target>1.8</maven.compiler.target>
-    <maven.plugin.tools.version>3.12.0</maven.plugin.tools.version>
-    <surefire.version>3.2.5</surefire.version>
-    <version.apache-rat-plugin>0.16.1</version.apache-rat-plugin>
-    <version.maven-assembly-plugin>3.7.1</version.maven-assembly-plugin>
-    <version.maven-compiler-plugin>3.12.1</version.maven-compiler-plugin>
-    <version.maven-dependency-plugin>3.6.1</version.maven-dependency-plugin>
-    <version.maven-gpg-plugin>3.2.4</version.maven-gpg-plugin>
-    <version.maven-jar-plugin>3.2.2</version.maven-jar-plugin>
-    <version.maven-javadoc-plugin>3.6.3</version.maven-javadoc-plugin>
-    <version.maven-project-info-reports-plugin>3.5.0</version.maven-project-info-reports-plugin>
+    <minimalJavaBuildVersion>11</minimalJavaBuildVersion>
+    <maven.compiler.source>11</maven.compiler.source>
+    <maven.compiler.target>11</maven.compiler.target>
+    <maven.compiler.release>11</maven.compiler.release>
   </properties>
 
   <dependencyManagement>
@@ -169,6 +205,11 @@ under the License.
           <artifactId>spotless-maven-plugin</artifactId>
           <version>2.30.0</version>
         </plugin>
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>versions-maven-plugin</artifactId>
+          <version>2.17.1</version>
+        </plugin>
       </plugins>
     </pluginManagement>
     <plugins>
diff --git a/java/c/pom.xml b/java/c/pom.xml
index b5a995de1ba2f..fe57bd2ea0ec5 100644
--- a/java/c/pom.xml
+++ b/java/c/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>arrow-c-data</artifactId>
@@ -91,5 +91,16 @@ under the License.
         </includes>
       </resource>
     </resources>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
+    </plugins>
   </build>
 </project>
diff --git a/java/c/src/main/cpp/jni_wrapper.cc b/java/c/src/main/cpp/jni_wrapper.cc
index fea53aff49f40..35c2b7787e779 100644
--- a/java/c/src/main/cpp/jni_wrapper.cc
+++ b/java/c/src/main/cpp/jni_wrapper.cc
@@ -43,7 +43,7 @@ jmethodID kPrivateDataGetNextMethod;
 jmethodID kPrivateDataGetSchemaMethod;
 jmethodID kCDataExceptionConstructor;
 
-jint JNI_VERSION = JNI_VERSION_1_6;
+jint JNI_VERSION = JNI_VERSION_10;
 
 class JniPendingException : public std::runtime_error {
  public:
diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java b/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
index 820a1522749c6..0c6b5de4486bc 100644
--- a/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
+++ b/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
@@ -90,7 +90,7 @@ void export(ArrowArray array, FieldVector vector, DictionaryProvider dictionaryP
 
       data.buffers = new ArrayList<>(vector.getExportedCDataBufferCount());
       data.buffers_ptrs =
-          allocator.buffer((long) (vector.getExportedCDataBufferCount()) * Long.BYTES);
+          allocator.buffer((long) vector.getExportedCDataBufferCount() * Long.BYTES);
       vector.exportCDataBuffers(data.buffers, data.buffers_ptrs, NULL);
 
       if (dictionaryEncoding != null) {
diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java b/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java
index 06e401627ef01..ad9f16ae9ceed 100644
--- a/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java
+++ b/java/c/src/main/java/org/apache/arrow/c/ArrowSchema.java
@@ -52,6 +52,7 @@
  */
 public class ArrowSchema implements BaseStruct {
   private static final int SIZE_OF = 72;
+  private static final int INDEX_RELEASE_CALLBACK = 56;
 
   private ArrowBuf data;
 
@@ -103,7 +104,9 @@ public static ArrowSchema wrap(long memoryAddress) {
    * @return A new ArrowSchema instance
    */
   public static ArrowSchema allocateNew(BufferAllocator allocator) {
-    return new ArrowSchema(allocator.buffer(ArrowSchema.SIZE_OF));
+    ArrowSchema schema = new ArrowSchema(allocator.buffer(ArrowSchema.SIZE_OF));
+    schema.markReleased();
+    return schema;
   }
 
   ArrowSchema(ArrowBuf data) {
@@ -111,6 +114,11 @@ public static ArrowSchema allocateNew(BufferAllocator allocator) {
     this.data = data;
   }
 
+  /** Mark the schema as released. */
+  public void markReleased() {
+    directBuffer().putLong(INDEX_RELEASE_CALLBACK, NULL);
+  }
+
   @Override
   public long memoryAddress() {
     checkNotNull(data, "ArrowSchema is already closed");
diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
index 5f262d3dc3315..150c11e41edff 100644
--- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
+++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
@@ -47,7 +47,9 @@
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
@@ -183,6 +185,11 @@ public List<ArrowBuf> visit(ArrowType.Union type) {
     }
   }
 
+  @Override
+  public List<ArrowBuf> visit(ArrowType.RunEndEncoded type) {
+    throw new UnsupportedOperationException("Importing buffers for type: " + type);
+  }
+
   @Override
   public List<ArrowBuf> visit(ArrowType.Map type) {
     return Arrays.asList(maybeImportBitmap(type), importOffsets(type, MapVector.OFFSET_WIDTH));
@@ -230,7 +237,7 @@ public List<ArrowBuf> visit(ArrowType.Utf8 type) {
   private List<ArrowBuf> visitVariableWidthView(ArrowType type) {
     final int viewBufferIndex = 1;
     final int variadicSizeBufferIndex = this.buffers.length - 1;
-    final long numOfVariadicBuffers = this.buffers.length - 3;
+    final long numOfVariadicBuffers = this.buffers.length - 3L;
     final long variadicSizeBufferCapacity = numOfVariadicBuffers * Long.BYTES;
     List<ArrowBuf> buffers = new ArrayList<>();
 
@@ -400,7 +407,17 @@ public List<ArrowBuf> visit(ArrowType.Duration type) {
 
   @Override
   public List<ArrowBuf> visit(ArrowType.ListView type) {
-    throw new UnsupportedOperationException(
-        "Importing buffers for view type: " + type + " not supported");
+    return Arrays.asList(
+        maybeImportBitmap(type),
+        importFixedBytes(type, 1, ListViewVector.OFFSET_WIDTH),
+        importFixedBytes(type, 2, ListViewVector.SIZE_WIDTH));
+  }
+
+  @Override
+  public List<ArrowBuf> visit(ArrowType.LargeListView type) {
+    return Arrays.asList(
+        maybeImportBitmap(type),
+        importFixedBytes(type, 1, LargeListViewVector.OFFSET_WIDTH),
+        importFixedBytes(type, 2, LargeListViewVector.SIZE_WIDTH));
   }
 }
diff --git a/java/c/src/main/java/org/apache/arrow/c/Format.java b/java/c/src/main/java/org/apache/arrow/c/Format.java
index aff51e7b734ab..f77a555d18481 100644
--- a/java/c/src/main/java/org/apache/arrow/c/Format.java
+++ b/java/c/src/main/java/org/apache/arrow/c/Format.java
@@ -229,6 +229,10 @@ static String asString(ArrowType arrowType) {
         return "vu";
       case BinaryView:
         return "vz";
+      case ListView:
+        return "+vl";
+      case LargeListView:
+        return "+vL";
       case NONE:
         throw new IllegalArgumentException("Arrow type ID is NONE");
       default:
@@ -313,6 +317,10 @@ static ArrowType asType(String format, long flags)
         return new ArrowType.Utf8View();
       case "vz":
         return new ArrowType.BinaryView();
+      case "+vl":
+        return new ArrowType.ListView();
+      case "+vL":
+        return new ArrowType.LargeListView();
       default:
         String[] parts = format.split(":", 2);
         if (parts.length == 2) {
diff --git a/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java b/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java
index 46f09ae5f0e8f..511358a5e62fa 100644
--- a/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/ArrowArrayUtilityTest.java
@@ -50,6 +50,13 @@ void afterEach() {
     allocator.close();
   }
 
+  @Test
+  void arraySchemaInit() {
+    ArrowSchema schema = ArrowSchema.allocateNew(allocator);
+    assertThat(schema.snapshot().release).isEqualTo(0);
+    schema.close();
+  }
+
   // ------------------------------------------------------------
   // BufferImportTypeVisitor
 
@@ -103,14 +110,14 @@ allocator, dummyHandle, new ArrowFieldNode(/* length= */ 0, 0), new long[] {0}))
   @Test
   void cleanupAfterFailure() throws Exception {
     // Note values are all dummy values here
-    long address = MemoryUtil.UNSAFE.allocateMemory(16);
+    long address = MemoryUtil.allocateMemory(16);
     try (BufferImportTypeVisitor visitor =
         new BufferImportTypeVisitor(
             allocator, dummyHandle, new ArrowFieldNode(0, 0), new long[] {address})) {
       // This fails, but only after we've already imported a buffer.
       assertThrows(IllegalStateException.class, () -> visitor.visit(new ArrowType.Int(32, true)));
     } finally {
-      MemoryUtil.UNSAFE.freeMemory(address);
+      MemoryUtil.freeMemory(address);
     }
   }
 
@@ -119,7 +126,7 @@ void bufferAssociatedWithAllocator() throws Exception {
     // Note values are all dummy values here
     final long bufferSize = 16;
     final long fieldLength = bufferSize / IntVector.TYPE_WIDTH;
-    long address = MemoryUtil.UNSAFE.allocateMemory(bufferSize);
+    long address = MemoryUtil.allocateMemory(bufferSize);
     long baseline = allocator.getAllocatedMemory();
     ArrowFieldNode fieldNode = new ArrowFieldNode(fieldLength, 0);
     try (BufferImportTypeVisitor visitor =
@@ -134,7 +141,7 @@ void bufferAssociatedWithAllocator() throws Exception {
           .isEqualTo(allocator);
       assertThat(allocator.getAllocatedMemory()).isEqualTo(baseline + bufferSize);
     } finally {
-      MemoryUtil.UNSAFE.freeMemory(address);
+      MemoryUtil.freeMemory(address);
     }
     assertThat(allocator.getAllocatedMemory()).isEqualTo(baseline);
   }
@@ -161,7 +168,7 @@ void releaseRetain() {
   @Test
   void associate() {
     final long bufferSize = 16;
-    final long address = MemoryUtil.UNSAFE.allocateMemory(bufferSize);
+    final long address = MemoryUtil.allocateMemory(bufferSize);
     try {
       ArrowArray array = ArrowArray.allocateNew(allocator);
       ReferenceCountedArrowArray handle = new ReferenceCountedArrowArray(array);
@@ -173,7 +180,7 @@ void associate() {
       handle.release();
       assertThat(array.isClosed()).isTrue();
     } finally {
-      MemoryUtil.UNSAFE.freeMemory(address);
+      MemoryUtil.freeMemory(address);
     }
   }
 }
diff --git a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
index ce0e82586b766..8cd4913f22dd2 100644
--- a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
@@ -247,8 +247,8 @@ private void createStructVector(StructVector vector) {
 
     // Write the values to child 1
     child1.allocateNew();
-    child1.set(0, "01234567890".getBytes());
-    child1.set(1, "012345678901234567".getBytes());
+    child1.set(0, "01234567890".getBytes(StandardCharsets.UTF_8));
+    child1.set(1, "012345678901234567".getBytes(StandardCharsets.UTF_8));
     vector.setIndexDefined(0);
 
     // Write the values to child 2
@@ -269,8 +269,8 @@ private void createStructVectorInline(StructVector vector) {
 
     // Write the values to child 1
     child1.allocateNew();
-    child1.set(0, "012345678".getBytes());
-    child1.set(1, "01234".getBytes());
+    child1.set(0, "012345678".getBytes(StandardCharsets.UTF_8));
+    child1.set(1, "01234".getBytes(StandardCharsets.UTF_8));
     vector.setIndexDefined(0);
 
     // Write the values to child 2
diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
index 6591d1f730990..d8286465e475f 100644
--- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -84,7 +84,9 @@
 import org.apache.arrow.vector.compare.VectorEqualsVisitor;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
@@ -526,14 +528,6 @@ public void testVarBinaryVector() {
     }
   }
 
-  private String generateString(String str, int repetition) {
-    StringBuilder aRepeated = new StringBuilder();
-    for (int i = 0; i < repetition; i++) {
-      aRepeated.append(str);
-    }
-    return aRepeated.toString();
-  }
-
   @Test
   public void testViewVector() {
     // ViewVarCharVector with short strings
@@ -683,6 +677,46 @@ public void testFixedSizeListVector() {
     }
   }
 
+  @Test
+  public void testListViewVector() {
+    try (final ListViewVector vector = ListViewVector.empty("v", allocator)) {
+      setVector(
+          vector,
+          Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()),
+          Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()),
+          new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, ListViewVector.class));
+    }
+  }
+
+  @Test
+  public void testEmptyListViewVector() {
+    try (final ListViewVector vector = ListViewVector.empty("v", allocator)) {
+      setVector(vector, new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, ListViewVector.class));
+    }
+  }
+
+  @Test
+  public void testLargeListViewVector() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("v", allocator)) {
+      setVector(
+          vector,
+          Arrays.stream(new int[] {1, 2}).boxed().collect(Collectors.toList()),
+          Arrays.stream(new int[] {3, 4}).boxed().collect(Collectors.toList()),
+          new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, LargeListViewVector.class));
+    }
+  }
+
+  @Test
+  public void testEmptyLargeListViewVector() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("v", allocator)) {
+      setVector(vector, new ArrayList<Integer>());
+      assertTrue(roundtrip(vector, LargeListViewVector.class));
+    }
+  }
+
   @Test
   public void testMapVector() {
     int count = 5;
diff --git a/java/c/src/test/python/integration_tests.py b/java/c/src/test/python/integration_tests.py
index ab2ee1742f366..b0a86e9c66e59 100644
--- a/java/c/src/test/python/integration_tests.py
+++ b/java/c/src/test/python/integration_tests.py
@@ -352,6 +352,53 @@ def test_reader_complex_roundtrip(self):
         ]
         self.round_trip_reader(schema, data)
 
+    def test_listview_array(self):
+        self.round_trip_array(lambda: pa.array(
+            [[], [0], [1, 2], [4, 5, 6]], pa.list_view(pa.int64())
+            # disabled check_metadata since in Java API the listview
+            # internal field name ("item") is not preserved 
+            # during round trips (it becomes "$data$").
+        ), check_metadata=False)
+
+    def test_empty_listview_array(self):
+        with pa.BufferOutputStream() as bos:
+            schema = pa.schema([pa.field("f0", pa.list_view(pa.int32()), True)])
+            with ipc.new_stream(bos, schema) as writer:
+                src = pa.RecordBatch.from_arrays(
+                    [pa.array([[]], pa.list_view(pa.int32()))], schema=schema)
+                writer.write(src)
+        data_bytes = bos.getvalue()
+
+        def recreate_batch():
+            with pa.input_stream(data_bytes) as ios:
+                with ipc.open_stream(ios) as reader:
+                    return reader.read_next_batch()
+
+        self.round_trip_record_batch(recreate_batch)
+
+    def test_largelistview_array(self):
+        self.round_trip_array(lambda: pa.array(
+            [[], [0], [1, 2], [4, 5, 6]], pa.large_list_view(pa.int64())
+            # disabled check_metadata since in Java API the listview
+            # internal field name ("item") is not preserved
+            # during round trips (it becomes "$data$").
+        ), check_metadata=False)
+
+    def test_empty_largelistview_array(self):
+        with pa.BufferOutputStream() as bos:
+            schema = pa.schema([pa.field("f0", pa.large_list_view(pa.int32()), True)])
+            with ipc.new_stream(bos, schema) as writer:
+                src = pa.RecordBatch.from_arrays(
+                    [pa.array([[]], pa.large_list_view(pa.int32()))], schema=schema)
+                writer.write(src)
+        data_bytes = bos.getvalue()
+
+        def recreate_batch():
+            with pa.input_stream(data_bytes) as ios:
+                with ipc.open_stream(ios) as reader:
+                    return reader.read_next_batch()
+
+        self.round_trip_record_batch(recreate_batch)
 
 if __name__ == '__main__':
     unittest.main(verbosity=2)
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
index 561877bd5cd36..3af668cfaa7a8 100644
--- a/java/compression/pom.xml
+++ b/java/compression/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-compression</artifactId>
   <name>Arrow Compression</name>
@@ -50,12 +50,12 @@ under the License.
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-compress</artifactId>
-      <version>1.26.2</version>
+      <version>1.27.1</version>
     </dependency>
     <dependency>
       <groupId>com.github.luben</groupId>
       <artifactId>zstd-jni</artifactId>
-      <version>1.5.6-3</version>
+      <version>1.5.6-6</version>
     </dependency>
   </dependencies>
 </project>
diff --git a/java/compression/src/main/java/module-info.java b/java/compression/src/main/java/module-info.java
index 6bf989e4c142e..113a1dba9d45f 100644
--- a/java/compression/src/main/java/module-info.java
+++ b/java/compression/src/main/java/module-info.java
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+import org.apache.arrow.vector.compression.CompressionCodec;
+
 module org.apache.arrow.compression {
   exports org.apache.arrow.compression;
 
@@ -22,4 +24,8 @@
   requires org.apache.arrow.memory.core;
   requires org.apache.arrow.vector;
   requires org.apache.commons.compress;
+
+  // Also defined under META-INF/services to support non-modular applications
+  provides CompressionCodec.Factory with
+      org.apache.arrow.compression.CommonsCompressionFactory;
 }
diff --git a/java/compression/src/main/resources/META-INF/services/org.apache.arrow.vector.compression.CompressionCodec$Factory b/java/compression/src/main/resources/META-INF/services/org.apache.arrow.vector.compression.CompressionCodec$Factory
new file mode 100644
index 0000000000000..ccdcef9aed96a
--- /dev/null
+++ b/java/compression/src/main/resources/META-INF/services/org.apache.arrow.vector.compression.CompressionCodec$Factory
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.arrow.compression.CommonsCompressionFactory
diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecServiceProvider.java b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecServiceProvider.java
new file mode 100644
index 0000000000000..795e05d7cb123
--- /dev/null
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodecServiceProvider.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.compression;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.junit.jupiter.api.Test;
+
+public class TestCompressionCodecServiceProvider {
+
+  /**
+   * When arrow-compression is in the classpath/module-path, {@link
+   * CompressionCodec.Factory#INSTANCE} should be able to handle all codec types.
+   */
+  @Test
+  public void testSupportedCompressionTypes() {
+    assertThrows( // no-compression doesn't support any actual compression types
+        IllegalArgumentException.class,
+        () -> checkAllCodecTypes(NoCompressionCodec.Factory.INSTANCE));
+    assertThrows( // commons-compression doesn't support the uncompressed type
+        IllegalArgumentException.class,
+        () -> checkAllCodecTypes(CommonsCompressionFactory.INSTANCE));
+    checkAllCodecTypes( // and the winner is...
+        CompressionCodec.Factory.INSTANCE); // combines the two above to support all types
+  }
+
+  private void checkAllCodecTypes(CompressionCodec.Factory factory) {
+    for (CompressionUtil.CodecType codecType : CompressionUtil.CodecType.values()) {
+      assertNotNull(factory.createCodec(codecType));
+    }
+  }
+}
diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml
index 00e812a4c6ae6..a02513269b1b2 100644
--- a/java/dataset/pom.xml
+++ b/java/dataset/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>arrow-dataset</artifactId>
@@ -32,8 +32,8 @@ under the License.
 
   <properties>
     <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir>
-    <parquet.version>1.14.1</parquet.version>
-    <avro.version>1.11.3</avro.version>
+    <parquet.version>1.14.2</parquet.version>
+    <avro.version>1.12.0</avro.version>
   </properties>
 
   <dependencies>
@@ -130,7 +130,7 @@ under the License.
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <version>1.9.2</version>
+      <version>1.9.4</version>
       <scope>test</scope>
       <exclusions>
         <exclusion>
@@ -156,7 +156,7 @@ under the License.
     <dependency>
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>
-      <version>2.16.1</version>
+      <version>2.17.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -165,6 +165,7 @@ under the License.
       <scope>test</scope>
     </dependency>
   </dependencies>
+
   <build>
     <resources>
       <resource>
@@ -179,6 +180,7 @@ under the License.
       <plugin>
         <artifactId>maven-surefire-plugin</artifactId>
         <configuration>
+          <argLine combine.self="override">--add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
           <enableAssertions>false</enableAssertions>
           <systemPropertyVariables>
             <arrow.test.dataRoot>${project.basedir}/../../testing/data</arrow.test.dataRoot>
@@ -200,26 +202,15 @@ under the License.
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
-
-  <profiles>
-    <profile>
-      <id>jdk11+</id>
-      <activation>
-        <jdk>[11,]</jdk>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <configuration>
-              <argLine combine.self="override">--add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
-            </configuration>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
 </project>
diff --git a/java/dataset/src/main/cpp/jni_util.cc b/java/dataset/src/main/cpp/jni_util.cc
index 8e899527f6a99..1fd15696e6e5f 100644
--- a/java/dataset/src/main/cpp/jni_util.cc
+++ b/java/dataset/src/main/cpp/jni_util.cc
@@ -28,7 +28,7 @@ namespace arrow {
 namespace dataset {
 namespace jni {
 
-jint JNI_VERSION = JNI_VERSION_1_6;
+jint JNI_VERSION = JNI_VERSION_10;
 
 class ReservationListenableMemoryPool::Impl {
  public:
diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc
index 79efbeb74fc54..49cc85251c8e9 100644
--- a/java/dataset/src/main/cpp/jni_wrapper.cc
+++ b/java/dataset/src/main/cpp/jni_wrapper.cc
@@ -25,6 +25,9 @@
 #include "arrow/c/helpers.h"
 #include "arrow/dataset/api.h"
 #include "arrow/dataset/file_base.h"
+#ifdef ARROW_CSV
+#include "arrow/dataset/file_csv.h"
+#endif
 #include "arrow/filesystem/api.h"
 #include "arrow/filesystem/path_util.h"
 #include "arrow/engine/substrait/util.h"
@@ -51,7 +54,7 @@ jmethodID unreserve_memory_method;
 
 jlong default_memory_pool_id = -1L;
 
-jint JNI_VERSION = JNI_VERSION_1_6;
+jint JNI_VERSION = JNI_VERSION_10;
 
 class JniPendingException : public std::runtime_error {
  public:
@@ -363,6 +366,138 @@ std::shared_ptr<arrow::Buffer> LoadArrowBufferFromByteBuffer(JNIEnv* env, jobjec
   return buffer;
 }
 
+inline bool ParseBool(const std::string& value) { return value == "true" ? true : false; }
+
+inline char ParseChar(const std::string& key, const std::string& value) {
+  if (value.size() != 1) {
+    JniThrow("Option " + key + " should be a char, but is " + value);
+  }
+  return value.at(0);
+}
+
+/// \brief Construct FragmentScanOptions from config map
+#ifdef ARROW_CSV
+
+bool SetCsvConvertOptions(arrow::csv::ConvertOptions& options, const std::string& key,
+                          const std::string& value) {
+  if (key == "column_types") {
+    int64_t schema_address = std::stol(value);
+    ArrowSchema* c_schema = reinterpret_cast<ArrowSchema*>(schema_address);
+    auto schema = JniGetOrThrow(arrow::ImportSchema(c_schema));
+    auto& column_types = options.column_types;
+    for (auto field : schema->fields()) {
+      column_types[field->name()] = field->type();
+    }
+  } else if (key == "strings_can_be_null") {
+    options.strings_can_be_null = ParseBool(value);
+  } else if (key == "check_utf8") {
+    options.check_utf8 = ParseBool(value);
+  } else if (key == "null_values") {
+    options.null_values = {value};
+  } else if (key == "true_values") {
+    options.true_values = {value};
+  } else if (key == "false_values") {
+    options.false_values = {value};
+  } else if (key == "quoted_strings_can_be_null") {
+    options.quoted_strings_can_be_null = ParseBool(value);
+  } else if (key == "auto_dict_encode") {
+    options.auto_dict_encode = ParseBool(value);
+  } else if (key == "auto_dict_max_cardinality") {
+    options.auto_dict_max_cardinality = std::stoi(value);
+  } else if (key == "decimal_point") {
+    options.decimal_point = ParseChar(key, value);
+  } else if (key == "include_missing_columns") {
+    options.include_missing_columns = ParseBool(value);
+  } else {
+    return false;
+  }
+  return true;
+}
+
+bool SetCsvParseOptions(arrow::csv::ParseOptions& options, const std::string& key,
+                        const std::string& value) {
+  if (key == "delimiter") {
+    options.delimiter = ParseChar(key, value);
+  } else if (key == "quoting") {
+    options.quoting = ParseBool(value);
+  } else if (key == "quote_char") {
+    options.quote_char = ParseChar(key, value);
+  } else if (key == "double_quote") {
+    options.double_quote = ParseBool(value);
+  } else if (key == "escaping") {
+    options.escaping = ParseBool(value);
+  } else if (key == "escape_char") {
+    options.escape_char = ParseChar(key, value);
+  } else if (key == "newlines_in_values") {
+    options.newlines_in_values = ParseBool(value);
+  } else if (key == "ignore_empty_lines") {
+    options.ignore_empty_lines = ParseBool(value);
+  } else {
+    return false;
+  }
+  return true;
+}
+
+bool SetCsvReadOptions(arrow::csv::ReadOptions& options, const std::string& key,
+                       const std::string& value) {
+  if (key == "use_threads") {
+    options.use_threads = ParseBool(value);
+  } else if (key == "block_size") {
+    options.block_size = std::stoi(value);
+  } else if (key == "skip_rows") {
+    options.skip_rows = std::stoi(value);
+  } else if (key == "skip_rows_after_names") {
+    options.skip_rows_after_names = std::stoi(value);
+  } else if (key == "autogenerate_column_names") {
+    options.autogenerate_column_names = ParseBool(value);
+  } else {
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<arrow::dataset::FragmentScanOptions> ToCsvFragmentScanOptions(
+    const std::unordered_map<std::string, std::string>& configs) {
+  std::shared_ptr<arrow::dataset::CsvFragmentScanOptions> options =
+      std::make_shared<arrow::dataset::CsvFragmentScanOptions>();
+  for (const auto& [key, value] : configs) {
+    bool setValid = SetCsvParseOptions(options->parse_options, key, value) ||
+                    SetCsvConvertOptions(options->convert_options, key, value) ||
+                    SetCsvReadOptions(options->read_options, key, value);
+    if (!setValid) {
+      JniThrow("Config " + key + " is not supported.");
+    }
+  }
+  return options;
+}
+#endif
+
+arrow::Result<std::shared_ptr<arrow::dataset::FragmentScanOptions>>
+GetFragmentScanOptions(jint file_format_id,
+                       const std::unordered_map<std::string, std::string>& configs) {
+  switch (file_format_id) {
+#ifdef ARROW_CSV
+    case 3:
+      return ToCsvFragmentScanOptions(configs);
+#endif
+    default:
+      return arrow::Status::Invalid("Illegal file format id: ", file_format_id);
+  }
+}
+
+std::unordered_map<std::string, std::string> ToStringMap(JNIEnv* env,
+                                                         jobjectArray& str_array) {
+  int length = env->GetArrayLength(str_array);
+  std::unordered_map<std::string, std::string> map;
+  map.reserve(length / 2);
+  for (int i = 0; i < length; i += 2) {
+    auto key = reinterpret_cast<jstring>(env->GetObjectArrayElement(str_array, i));
+    auto value = reinterpret_cast<jstring>(env->GetObjectArrayElement(str_array, i + 1));
+    map[JStringToCString(env, key)] = JStringToCString(env, value);
+  }
+  return map;
+}
+
 /*
  * Class:     org_apache_arrow_dataset_jni_NativeMemoryPool
  * Method:    getDefaultMemoryPool
@@ -501,12 +636,13 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_closeDataset
 /*
  * Class:     org_apache_arrow_dataset_jni_JniWrapper
  * Method:    createScanner
- * Signature: (J[Ljava/lang/String;Ljava/nio/ByteBuffer;Ljava/nio/ByteBuffer;JJ)J
+ * Signature:
+ * (J[Ljava/lang/String;Ljava/nio/ByteBuffer;Ljava/nio/ByteBuffer;JI;[Ljava/lang/String;J)J
  */
 JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_createScanner(
     JNIEnv* env, jobject, jlong dataset_id, jobjectArray columns,
-    jobject substrait_projection, jobject substrait_filter,
-    jlong batch_size, jlong memory_pool_id) {
+    jobject substrait_projection, jobject substrait_filter, jlong batch_size,
+    jint file_format_id, jobjectArray options, jlong memory_pool_id) {
   JNI_METHOD_START
   arrow::MemoryPool* pool = reinterpret_cast<arrow::MemoryPool*>(memory_pool_id);
   if (pool == nullptr) {
@@ -555,6 +691,12 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_createScann
     }
     JniAssertOkOrThrow(scanner_builder->Filter(*filter_expr));
   }
+  if (file_format_id != -1 && options != nullptr) {
+    std::unordered_map<std::string, std::string> option_map = ToStringMap(env, options);
+    std::shared_ptr<arrow::dataset::FragmentScanOptions> scan_options =
+        JniGetOrThrow(GetFragmentScanOptions(file_format_id, option_map));
+    JniAssertOkOrThrow(scanner_builder->FragmentScanOptions(scan_options));
+  }
   JniAssertOkOrThrow(scanner_builder->BatchSize(batch_size));
 
   auto scanner = JniGetOrThrow(scanner_builder->Finish());
@@ -668,14 +810,29 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Fina
 /*
  * Class:     org_apache_arrow_dataset_file_JniWrapper
  * Method:    makeFileSystemDatasetFactory
- * Signature: (Ljava/lang/String;II)J
+ * Signature: (Ljava/lang/String;II;Ljava/lang/String;Ljava/lang/String)J
  */
 JNIEXPORT jlong JNICALL
-Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory__Ljava_lang_String_2I(
-    JNIEnv* env, jobject, jstring uri, jint file_format_id) {
+Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory(
+    JNIEnv* env, jobject, jstring uri, jint file_format_id, jobjectArray options) {
   JNI_METHOD_START
   std::shared_ptr<arrow::dataset::FileFormat> file_format =
       JniGetOrThrow(GetFileFormat(file_format_id));
+  if (options != nullptr) {
+    std::unordered_map<std::string, std::string> option_map = ToStringMap(env, options);
+    std::shared_ptr<arrow::dataset::FragmentScanOptions> scan_options =
+        JniGetOrThrow(GetFragmentScanOptions(file_format_id, option_map));
+    file_format->default_fragment_scan_options = scan_options;
+#ifdef ARROW_CSV
+    if (file_format_id == 3) {
+      std::shared_ptr<arrow::dataset::CsvFileFormat> csv_file_format =
+          std::dynamic_pointer_cast<arrow::dataset::CsvFileFormat>(file_format);
+      csv_file_format->parse_options =
+          std::dynamic_pointer_cast<arrow::dataset::CsvFragmentScanOptions>(scan_options)
+              ->parse_options;
+    }
+#endif
+  }
   arrow::dataset::FileSystemFactoryOptions options;
   std::shared_ptr<arrow::dataset::DatasetFactory> d =
       JniGetOrThrow(arrow::dataset::FileSystemDatasetFactory::Make(
@@ -686,16 +843,31 @@ Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory__Ljav
 
 /*
  * Class:     org_apache_arrow_dataset_file_JniWrapper
- * Method:    makeFileSystemDatasetFactory
- * Signature: ([Ljava/lang/String;II)J
+ * Method:    makeFileSystemDatasetFactoryWithFiles
+ * Signature: ([Ljava/lang/String;II;[Ljava/lang/String)J
  */
 JNIEXPORT jlong JNICALL
-Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactory___3Ljava_lang_String_2I(
-    JNIEnv* env, jobject, jobjectArray uris, jint file_format_id) {
+Java_org_apache_arrow_dataset_file_JniWrapper_makeFileSystemDatasetFactoryWithFiles(
+    JNIEnv* env, jobject, jobjectArray uris, jint file_format_id, jobjectArray options) {
   JNI_METHOD_START
 
   std::shared_ptr<arrow::dataset::FileFormat> file_format =
       JniGetOrThrow(GetFileFormat(file_format_id));
+  if (options != nullptr) {
+    std::unordered_map<std::string, std::string> option_map = ToStringMap(env, options);
+    std::shared_ptr<arrow::dataset::FragmentScanOptions> scan_options =
+        JniGetOrThrow(GetFragmentScanOptions(file_format_id, option_map));
+    file_format->default_fragment_scan_options = scan_options;
+#ifdef ARROW_CSV
+    if (file_format_id == 3) {
+      std::shared_ptr<arrow::dataset::CsvFileFormat> csv_file_format =
+          std::dynamic_pointer_cast<arrow::dataset::CsvFileFormat>(file_format);
+      csv_file_format->parse_options =
+          std::dynamic_pointer_cast<arrow::dataset::CsvFragmentScanOptions>(scan_options)
+              ->parse_options;
+    }
+#endif
+  }
   arrow::dataset::FileSystemFactoryOptions options;
 
   std::vector<std::string> uri_vec = ToStringVector(env, uris);
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java b/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java
index 36ac6288af6d0..fcf124a61f812 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/file/FileSystemDatasetFactory.java
@@ -16,8 +16,10 @@
  */
 package org.apache.arrow.dataset.file;
 
+import java.util.Optional;
 import org.apache.arrow.dataset.jni.NativeDatasetFactory;
 import org.apache.arrow.dataset.jni.NativeMemoryPool;
+import org.apache.arrow.dataset.scanner.FragmentScanOptions;
 import org.apache.arrow.memory.BufferAllocator;
 
 /** Java binding of the C++ FileSystemDatasetFactory. */
@@ -25,19 +27,45 @@ public class FileSystemDatasetFactory extends NativeDatasetFactory {
 
   public FileSystemDatasetFactory(
       BufferAllocator allocator, NativeMemoryPool memoryPool, FileFormat format, String uri) {
-    super(allocator, memoryPool, createNative(format, uri));
+    super(allocator, memoryPool, createNative(format, uri, Optional.empty()));
+  }
+
+  public FileSystemDatasetFactory(
+      BufferAllocator allocator,
+      NativeMemoryPool memoryPool,
+      FileFormat format,
+      String uri,
+      Optional<FragmentScanOptions> fragmentScanOptions) {
+    super(allocator, memoryPool, createNative(format, uri, fragmentScanOptions));
   }
 
   public FileSystemDatasetFactory(
       BufferAllocator allocator, NativeMemoryPool memoryPool, FileFormat format, String[] uris) {
-    super(allocator, memoryPool, createNative(format, uris));
+    super(allocator, memoryPool, createNative(format, uris, Optional.empty()));
+  }
+
+  public FileSystemDatasetFactory(
+      BufferAllocator allocator,
+      NativeMemoryPool memoryPool,
+      FileFormat format,
+      String[] uris,
+      Optional<FragmentScanOptions> fragmentScanOptions) {
+    super(allocator, memoryPool, createNative(format, uris, fragmentScanOptions));
   }
 
-  private static long createNative(FileFormat format, String uri) {
-    return JniWrapper.get().makeFileSystemDatasetFactory(uri, format.id());
+  private static long createNative(
+      FileFormat format, String uri, Optional<FragmentScanOptions> fragmentScanOptions) {
+    return JniWrapper.get()
+        .makeFileSystemDatasetFactory(
+            uri, format.id(), fragmentScanOptions.map(FragmentScanOptions::serialize).orElse(null));
   }
 
-  private static long createNative(FileFormat format, String[] uris) {
-    return JniWrapper.get().makeFileSystemDatasetFactory(uris, format.id());
+  private static long createNative(
+      FileFormat format, String[] uris, Optional<FragmentScanOptions> fragmentScanOptions) {
+    return JniWrapper.get()
+        .makeFileSystemDatasetFactoryWithFiles(
+            uris,
+            format.id(),
+            fragmentScanOptions.map(FragmentScanOptions::serialize).orElse(null));
   }
 }
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java b/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java
index dfac293ccb588..d2f842f99e588 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/file/JniWrapper.java
@@ -37,22 +37,26 @@ private JniWrapper() {}
    * intermediate shared_ptr of the factory instance.
    *
    * @param uri file uri to read, either a file or a directory
-   * @param fileFormat file format ID
+   * @param fileFormat file format ID.
+   * @param serializedFragmentScanOptions serialized FragmentScanOptions.
    * @return the native pointer of the arrow::dataset::FileSystemDatasetFactory instance.
    * @see FileFormat
    */
-  public native long makeFileSystemDatasetFactory(String uri, int fileFormat);
+  public native long makeFileSystemDatasetFactory(
+      String uri, int fileFormat, String[] serializedFragmentScanOptions);
 
   /**
    * Create FileSystemDatasetFactory and return its native pointer. The pointer is pointing to a
    * intermediate shared_ptr of the factory instance.
    *
    * @param uris List of file uris to read, each path pointing to an individual file
-   * @param fileFormat file format ID
+   * @param fileFormat file format ID.
+   * @param serializedFragmentScanOptions serialized FragmentScanOptions.
    * @return the native pointer of the arrow::dataset::FileSystemDatasetFactory instance.
    * @see FileFormat
    */
-  public native long makeFileSystemDatasetFactory(String[] uris, int fileFormat);
+  public native long makeFileSystemDatasetFactoryWithFiles(
+      String[] uris, int fileFormat, String[] serializedFragmentScanOptions);
 
   /**
    * Write the content in a {@link org.apache.arrow.c.ArrowArrayStream} into files. This internally
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
index b5aa3d918acd9..6637c113d9edc 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java
@@ -71,6 +71,8 @@ private JniWrapper() {}
    * @param substraitProjection substrait extended expression to evaluate for project new columns
    * @param substraitFilter substrait extended expression to evaluate for apply filter
    * @param batchSize batch size of scanned record batches.
+   * @param fileFormat file format ID.
+   * @param serializedFragmentScanOptions serialized FragmentScanOptions.
    * @param memoryPool identifier of memory pool used in the native scanner.
    * @return the native pointer of the arrow::dataset::Scanner instance.
    */
@@ -80,6 +82,8 @@ public native long createScanner(
       ByteBuffer substraitProjection,
       ByteBuffer substraitFilter,
       long batchSize,
+      int fileFormat,
+      String[] serializedFragmentScanOptions,
       long memoryPool);
 
   /**
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java
index 83a9ff1f32243..8f8cdc49d4877 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java
@@ -16,6 +16,7 @@
  */
 package org.apache.arrow.dataset.jni;
 
+import org.apache.arrow.dataset.scanner.FragmentScanOptions;
 import org.apache.arrow.dataset.scanner.ScanOptions;
 import org.apache.arrow.dataset.source.Dataset;
 
@@ -37,7 +38,13 @@ public synchronized NativeScanner newScan(ScanOptions options) {
     if (closed) {
       throw new NativeInstanceReleasedException();
     }
-
+    int fileFormatId = -1;
+    String[] serialized = null;
+    if (options.getFragmentScanOptions().isPresent()) {
+      FragmentScanOptions fragmentScanOptions = options.getFragmentScanOptions().get();
+      fileFormatId = fragmentScanOptions.fileFormat().id();
+      serialized = fragmentScanOptions.serialize();
+    }
     long scannerId =
         JniWrapper.get()
             .createScanner(
@@ -46,6 +53,8 @@ public synchronized NativeScanner newScan(ScanOptions options) {
                 options.getSubstraitProjection().orElse(null),
                 options.getSubstraitFilter().orElse(null),
                 options.getBatchSize(),
+                fileFormatId,
+                serialized,
                 context.getMemoryPool().getNativeInstanceId());
 
     return new NativeScanner(context, scannerId);
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/FragmentScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/FragmentScanOptions.java
new file mode 100644
index 0000000000000..d48d0bd2b76b9
--- /dev/null
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/FragmentScanOptions.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.dataset.scanner;
+
+import org.apache.arrow.dataset.file.FileFormat;
+
+/** The file fragment scan options interface. It is used to transfer to JNI call. */
+public interface FragmentScanOptions {
+  FileFormat fileFormat();
+
+  String[] serialize();
+}
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java
index 837016ad1e9d1..68fc3943b3edd 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java
@@ -27,6 +27,8 @@ public class ScanOptions {
   private final Optional<ByteBuffer> substraitProjection;
   private final Optional<ByteBuffer> substraitFilter;
 
+  private final Optional<FragmentScanOptions> fragmentScanOptions;
+
   /**
    * Constructor.
    *
@@ -65,6 +67,7 @@ public ScanOptions(long batchSize, Optional<String[]> columns) {
     this.columns = columns;
     this.substraitProjection = Optional.empty();
     this.substraitFilter = Optional.empty();
+    this.fragmentScanOptions = Optional.empty();
   }
 
   public ScanOptions(long batchSize) {
@@ -87,12 +90,17 @@ public Optional<ByteBuffer> getSubstraitFilter() {
     return substraitFilter;
   }
 
+  public Optional<FragmentScanOptions> getFragmentScanOptions() {
+    return fragmentScanOptions;
+  }
+
   /** Builder for Options used during scanning. */
   public static class Builder {
     private final long batchSize;
     private Optional<String[]> columns;
     private ByteBuffer substraitProjection;
     private ByteBuffer substraitFilter;
+    private FragmentScanOptions fragmentScanOptions;
 
     /**
      * Constructor.
@@ -140,6 +148,18 @@ public Builder substraitFilter(ByteBuffer substraitFilter) {
       return this;
     }
 
+    /**
+     * Set the FragmentScanOptions.
+     *
+     * @param fragmentScanOptions fragment scan options
+     * @return the ScanOptions configured.
+     */
+    public Builder fragmentScanOptions(FragmentScanOptions fragmentScanOptions) {
+      Preconditions.checkNotNull(fragmentScanOptions);
+      this.fragmentScanOptions = fragmentScanOptions;
+      return this;
+    }
+
     public ScanOptions build() {
       return new ScanOptions(this);
     }
@@ -150,5 +170,6 @@ private ScanOptions(Builder builder) {
     columns = builder.columns;
     substraitProjection = Optional.ofNullable(builder.substraitProjection);
     substraitFilter = Optional.ofNullable(builder.substraitFilter);
+    fragmentScanOptions = Optional.ofNullable(builder.fragmentScanOptions);
   }
 }
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvConvertOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvConvertOptions.java
new file mode 100644
index 0000000000000..15e257896b80e
--- /dev/null
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvConvertOptions.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.dataset.scanner.csv;
+
+import java.util.Map;
+import java.util.Optional;
+import org.apache.arrow.c.ArrowSchema;
+
+public class CsvConvertOptions {
+
+  private final Map<String, String> configs;
+
+  private Optional<ArrowSchema> cSchema = Optional.empty();
+
+  public CsvConvertOptions(Map<String, String> configs) {
+    this.configs = configs;
+  }
+
+  public Optional<ArrowSchema> getArrowSchema() {
+    return cSchema;
+  }
+
+  public Map<String, String> getConfigs() {
+    return configs;
+  }
+
+  public void set(String key, String value) {
+    configs.put(key, value);
+  }
+
+  public void setArrowSchema(ArrowSchema cSchema) {
+    this.cSchema = Optional.of(cSchema);
+  }
+}
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java
new file mode 100644
index 0000000000000..dddc36d38714e
--- /dev/null
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.dataset.scanner.csv;
+
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.arrow.dataset.file.FileFormat;
+import org.apache.arrow.dataset.scanner.FragmentScanOptions;
+import org.apache.arrow.dataset.utils.MapUtil;
+
+public class CsvFragmentScanOptions implements FragmentScanOptions {
+  private final CsvConvertOptions convertOptions;
+  private final Map<String, String> readOptions;
+  private final Map<String, String> parseOptions;
+
+  /**
+   * CSV scan options, map to CPP struct CsvFragmentScanOptions. The key in config map is the field
+   * name of mapping cpp struct
+   *
+   * <p>Currently, multi-valued options (which are std::vector values in C++) only support having a
+   * single value set. For example, for the null_values option, only one string can be set as the
+   * null value.
+   *
+   * @param convertOptions similar to CsvFragmentScanOptions#convert_options in CPP, the ArrowSchema
+   *     represents column_types, convert data option such as null value recognition.
+   * @param readOptions similar to CsvFragmentScanOptions#read_options in CPP, specify how to read
+   *     the file such as block_size
+   * @param parseOptions similar to CsvFragmentScanOptions#parse_options in CPP, parse file option
+   *     such as delimiter
+   */
+  public CsvFragmentScanOptions(
+      CsvConvertOptions convertOptions,
+      Map<String, String> readOptions,
+      Map<String, String> parseOptions) {
+    this.convertOptions = convertOptions;
+    this.readOptions = readOptions;
+    this.parseOptions = parseOptions;
+  }
+
+  /**
+   * File format.
+   *
+   * @return file format.
+   */
+  @Override
+  public FileFormat fileFormat() {
+    return FileFormat.CSV;
+  }
+
+  /**
+   * This is an internal function to invoke by serializer. Serialize this class to string array and
+   * then called by JNI call.
+   *
+   * @return string array as Map JNI bridge format.
+   */
+  @Override
+  public String[] serialize() {
+    Map<String, String> options =
+        Stream.concat(
+                Stream.concat(readOptions.entrySet().stream(), parseOptions.entrySet().stream()),
+                convertOptions.getConfigs().entrySet().stream())
+            .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+
+    if (convertOptions.getArrowSchema().isPresent()) {
+      options.put(
+          "column_types", Long.toString(convertOptions.getArrowSchema().get().memoryAddress()));
+    }
+    return MapUtil.convertMapToStringArray(options);
+  }
+
+  public CsvConvertOptions getConvertOptions() {
+    return convertOptions;
+  }
+
+  public Map<String, String> getReadOptions() {
+    return readOptions;
+  }
+
+  public Map<String, String> getParseOptions() {
+    return parseOptions;
+  }
+}
diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/utils/MapUtil.java b/java/dataset/src/main/java/org/apache/arrow/dataset/utils/MapUtil.java
new file mode 100644
index 0000000000000..4df6cf1e0e05e
--- /dev/null
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/utils/MapUtil.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.dataset.utils;
+
+import java.util.Map;
+
+/** The utility class for Map. */
+public class MapUtil {
+  private MapUtil() {}
+
+  /**
+   * Convert the map to string array as JNI bridge.
+   *
+   * @param config config map
+   * @return string array for serialization
+   */
+  public static String[] convertMapToStringArray(Map<String, String> config) {
+    if (config.isEmpty()) {
+      return null;
+    }
+    String[] configs = new String[config.size() * 2];
+    int i = 0;
+    for (Map.Entry<String, String> entry : config.entrySet()) {
+      configs[i++] = entry.getKey();
+      configs[i++] = entry.getValue();
+    }
+    return configs;
+  }
+}
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestFragmentScanOptions.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestFragmentScanOptions.java
new file mode 100644
index 0000000000000..ed6344f0f9cb7
--- /dev/null
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TestFragmentScanOptions.java
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.dataset;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import com.google.common.collect.ImmutableMap;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Optional;
+import org.apache.arrow.c.ArrowSchema;
+import org.apache.arrow.c.CDataDictionaryProvider;
+import org.apache.arrow.c.Data;
+import org.apache.arrow.dataset.file.FileFormat;
+import org.apache.arrow.dataset.file.FileSystemDatasetFactory;
+import org.apache.arrow.dataset.jni.NativeMemoryPool;
+import org.apache.arrow.dataset.scanner.ScanOptions;
+import org.apache.arrow.dataset.scanner.Scanner;
+import org.apache.arrow.dataset.scanner.csv.CsvConvertOptions;
+import org.apache.arrow.dataset.scanner.csv.CsvFragmentScanOptions;
+import org.apache.arrow.dataset.source.Dataset;
+import org.apache.arrow.dataset.source.DatasetFactory;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ValueIterableVector;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.Text;
+import org.hamcrest.collection.IsIterableContainingInOrder;
+import org.junit.jupiter.api.Test;
+
+public class TestFragmentScanOptions {
+
+  private CsvFragmentScanOptions create(
+      ArrowSchema cSchema,
+      Map<String, String> convertOptionsMap,
+      Map<String, String> readOptions,
+      Map<String, String> parseOptions) {
+    CsvConvertOptions convertOptions = new CsvConvertOptions(convertOptionsMap);
+    convertOptions.setArrowSchema(cSchema);
+    return new CsvFragmentScanOptions(convertOptions, readOptions, parseOptions);
+  }
+
+  @Test
+  public void testCsvConvertOptions() throws Exception {
+    final Schema schema =
+        new Schema(
+            Arrays.asList(
+                Field.nullable("Id", new ArrowType.Int(32, true)),
+                Field.nullable("Name", new ArrowType.Utf8()),
+                Field.nullable("Language", new ArrowType.Utf8())),
+            null);
+    String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
+    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    try (ArrowSchema cSchema = ArrowSchema.allocateNew(allocator);
+        ArrowSchema cSchema2 = ArrowSchema.allocateNew(allocator);
+        CDataDictionaryProvider provider = new CDataDictionaryProvider()) {
+      Data.exportSchema(allocator, schema, provider, cSchema);
+      Data.exportSchema(allocator, schema, provider, cSchema2);
+      CsvFragmentScanOptions fragmentScanOptions1 =
+          create(cSchema, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of("delimiter", ";"));
+      CsvFragmentScanOptions fragmentScanOptions2 =
+          create(cSchema2, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of("delimiter", ";"));
+      ScanOptions options =
+          new ScanOptions.Builder(/*batchSize*/ 32768)
+              .columns(Optional.empty())
+              .fragmentScanOptions(fragmentScanOptions1)
+              .build();
+      try (DatasetFactory datasetFactory =
+              new FileSystemDatasetFactory(
+                  allocator,
+                  NativeMemoryPool.getDefault(),
+                  FileFormat.CSV,
+                  path,
+                  Optional.of(fragmentScanOptions2));
+          Dataset dataset = datasetFactory.finish();
+          Scanner scanner = dataset.newScan(options);
+          ArrowReader reader = scanner.scanBatches()) {
+        assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
+        int rowCount = 0;
+        while (reader.loadNextBatch()) {
+          final ValueIterableVector<Integer> idVector =
+              (ValueIterableVector<Integer>) reader.getVectorSchemaRoot().getVector("Id");
+          assertThat(idVector.getValueIterable(), IsIterableContainingInOrder.contains(1, 2, 3));
+          rowCount += reader.getVectorSchemaRoot().getRowCount();
+        }
+        assertEquals(3, rowCount);
+      }
+    }
+  }
+
+  @Test
+  public void testCsvConvertOptionsDelimiterNotSet() throws Exception {
+    final Schema schema =
+        new Schema(
+            Arrays.asList(
+                Field.nullable("Id", new ArrowType.Int(32, true)),
+                Field.nullable("Name", new ArrowType.Utf8()),
+                Field.nullable("Language", new ArrowType.Utf8())),
+            null);
+    String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
+    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    try (ArrowSchema cSchema = ArrowSchema.allocateNew(allocator);
+        ArrowSchema cSchema2 = ArrowSchema.allocateNew(allocator);
+        CDataDictionaryProvider provider = new CDataDictionaryProvider()) {
+      Data.exportSchema(allocator, schema, provider, cSchema);
+      Data.exportSchema(allocator, schema, provider, cSchema2);
+      CsvFragmentScanOptions fragmentScanOptions1 =
+          create(cSchema, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of());
+      CsvFragmentScanOptions fragmentScanOptions2 =
+          create(cSchema2, ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of());
+      ScanOptions options =
+          new ScanOptions.Builder(/*batchSize*/ 32768)
+              .columns(Optional.empty())
+              .fragmentScanOptions(fragmentScanOptions1)
+              .build();
+      try (DatasetFactory datasetFactory =
+              new FileSystemDatasetFactory(
+                  allocator,
+                  NativeMemoryPool.getDefault(),
+                  FileFormat.CSV,
+                  path,
+                  Optional.of(fragmentScanOptions2));
+          Dataset dataset = datasetFactory.finish();
+          Scanner scanner = dataset.newScan(options);
+          ArrowReader reader = scanner.scanBatches()) {
+        int rowCount = 0;
+        while (reader.loadNextBatch()) {
+          final ValueIterableVector<Text> idVector =
+              (ValueIterableVector<Text>)
+                  reader.getVectorSchemaRoot().getVector("Id;Name;Language");
+          assertThat(
+              idVector.getValueIterable(),
+              IsIterableContainingInOrder.contains(
+                  new Text("1;Juno;Java"), new Text("2;Peter;Python"), new Text("3;Celin;C++")));
+          rowCount += reader.getVectorSchemaRoot().getRowCount();
+        }
+        assertEquals(3, rowCount);
+      }
+    }
+  }
+
+  @Test
+  public void testCsvConvertOptionsNoOption() throws Exception {
+    final Schema schema =
+        new Schema(
+            Collections.singletonList(Field.nullable("Id;Name;Language", new ArrowType.Utf8())),
+            null);
+    String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
+    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    ScanOptions options =
+        new ScanOptions.Builder(/*batchSize*/ 32768).columns(Optional.empty()).build();
+    try (DatasetFactory datasetFactory =
+            new FileSystemDatasetFactory(
+                allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
+        Dataset dataset = datasetFactory.finish();
+        Scanner scanner = dataset.newScan(options);
+        ArrowReader reader = scanner.scanBatches()) {
+
+      assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
+      int rowCount = 0;
+      while (reader.loadNextBatch()) {
+        final ValueIterableVector<Text> idVector =
+            (ValueIterableVector<Text>) reader.getVectorSchemaRoot().getVector("Id;Name;Language");
+        assertThat(
+            idVector.getValueIterable(),
+            IsIterableContainingInOrder.contains(
+                new Text("1;Juno;Java"), new Text("2;Peter;Python"), new Text("3;Celin;C++")));
+        rowCount += reader.getVectorSchemaRoot().getRowCount();
+      }
+      assertEquals(3, rowCount);
+    }
+  }
+
+  @Test
+  public void testCsvReadParseAndReadOptions() throws Exception {
+    final Schema schema =
+        new Schema(
+            Arrays.asList(
+                Field.nullable("Id", new ArrowType.Int(64, true)),
+                Field.nullable("Name", new ArrowType.Utf8()),
+                Field.nullable("Language", new ArrowType.Utf8())),
+            null);
+    String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
+    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    CsvFragmentScanOptions fragmentScanOptions =
+        new CsvFragmentScanOptions(
+            new CsvConvertOptions(ImmutableMap.of()),
+            ImmutableMap.of("skip_rows_after_names", "1"),
+            ImmutableMap.of("delimiter", ";"));
+    ScanOptions options =
+        new ScanOptions.Builder(/*batchSize*/ 32768)
+            .columns(Optional.empty())
+            .fragmentScanOptions(fragmentScanOptions)
+            .build();
+    try (DatasetFactory datasetFactory =
+            new FileSystemDatasetFactory(
+                allocator,
+                NativeMemoryPool.getDefault(),
+                FileFormat.CSV,
+                path,
+                Optional.of(fragmentScanOptions));
+        Dataset dataset = datasetFactory.finish();
+        Scanner scanner = dataset.newScan(options);
+        ArrowReader reader = scanner.scanBatches()) {
+
+      assertEquals(schema.getFields(), reader.getVectorSchemaRoot().getSchema().getFields());
+      int rowCount = 0;
+      while (reader.loadNextBatch()) {
+        final ValueIterableVector<Long> idVector =
+            (ValueIterableVector<Long>) reader.getVectorSchemaRoot().getVector("Id");
+        assertThat(idVector.getValueIterable(), IsIterableContainingInOrder.contains(2L, 3L));
+        rowCount += reader.getVectorSchemaRoot().getRowCount();
+      }
+      assertEquals(2, rowCount);
+    }
+  }
+
+  @Test
+  public void testCsvReadOtherOptions() throws Exception {
+    String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
+    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    Map<String, String> convertOption =
+        ImmutableMap.of(
+            "check_utf8",
+            "true",
+            "null_values",
+            "NULL",
+            "true_values",
+            "True",
+            "false_values",
+            "False",
+            "quoted_strings_can_be_null",
+            "true",
+            "auto_dict_encode",
+            "false",
+            "auto_dict_max_cardinality",
+            "3456",
+            "decimal_point",
+            ".",
+            "include_missing_columns",
+            "false");
+    Map<String, String> readOption =
+        ImmutableMap.of(
+            "use_threads",
+            "true",
+            "block_size",
+            "1024",
+            "skip_rows",
+            "12",
+            "skip_rows_after_names",
+            "12",
+            "autogenerate_column_names",
+            "false");
+    Map<String, String> parseOption =
+        ImmutableMap.of(
+            "delimiter",
+            ".",
+            "quoting",
+            "true",
+            "quote_char",
+            "'",
+            "double_quote",
+            "False",
+            "escaping",
+            "true",
+            "escape_char",
+            "v",
+            "newlines_in_values",
+            "false",
+            "ignore_empty_lines",
+            "true");
+    CsvFragmentScanOptions fragmentScanOptions =
+        new CsvFragmentScanOptions(new CsvConvertOptions(convertOption), readOption, parseOption);
+    ScanOptions options =
+        new ScanOptions.Builder(/*batchSize*/ 32768)
+            .columns(Optional.empty())
+            .fragmentScanOptions(fragmentScanOptions)
+            .build();
+    try (DatasetFactory datasetFactory =
+            new FileSystemDatasetFactory(
+                allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
+        Dataset dataset = datasetFactory.finish();
+        Scanner scanner = dataset.newScan(options)) {
+      assertNotNull(scanner);
+    }
+  }
+
+  @Test
+  public void testCsvInvalidOption() throws Exception {
+    String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
+    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    Map<String, String> convertOption = ImmutableMap.of("not_exists_key_check_utf8", "true");
+    CsvFragmentScanOptions fragmentScanOptions =
+        new CsvFragmentScanOptions(
+            new CsvConvertOptions(convertOption), ImmutableMap.of(), ImmutableMap.of());
+    ScanOptions options =
+        new ScanOptions.Builder(/*batchSize*/ 32768)
+            .columns(Optional.empty())
+            .fragmentScanOptions(fragmentScanOptions)
+            .build();
+    try (DatasetFactory datasetFactory =
+            new FileSystemDatasetFactory(
+                allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
+        Dataset dataset = datasetFactory.finish()) {
+      assertThrows(RuntimeException.class, () -> dataset.newScan(options));
+    }
+
+    CsvFragmentScanOptions fragmentScanOptionsFaultValue =
+        new CsvFragmentScanOptions(
+            new CsvConvertOptions(ImmutableMap.of()),
+            ImmutableMap.of("", ""),
+            ImmutableMap.of("escape_char", "vbvb"));
+    ScanOptions optionsFault =
+        new ScanOptions.Builder(/*batchSize*/ 32768)
+            .columns(Optional.empty())
+            .fragmentScanOptions(fragmentScanOptionsFaultValue)
+            .build();
+    try (DatasetFactory datasetFactory =
+            new FileSystemDatasetFactory(
+                allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
+        Dataset dataset = datasetFactory.finish()) {
+      assertThrows(RuntimeException.class, () -> dataset.newScan(optionsFault));
+    }
+  }
+}
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java
index 9e6559824ce7f..e3495bd81ca79 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TextBasedWriteSupport.java
@@ -17,10 +17,13 @@
 package org.apache.arrow.dataset;
 
 import java.io.File;
-import java.io.FileWriter;
 import java.io.IOException;
+import java.io.Writer;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.StandardOpenOption;
 import java.util.Random;
 
 public class TextBasedWriteSupport {
@@ -43,7 +46,12 @@ public static TextBasedWriteSupport writeTempFile(
       File outputFolder, String fileExtension, String... values)
       throws URISyntaxException, IOException {
     TextBasedWriteSupport writer = new TextBasedWriteSupport(outputFolder, fileExtension);
-    try (FileWriter addValues = new FileWriter(new File(writer.uri), true)) {
+    try (Writer addValues =
+        Files.newBufferedWriter(
+            new File(writer.uri).toPath(),
+            StandardCharsets.UTF_8,
+            StandardOpenOption.CREATE,
+            StandardOpenOption.APPEND)) {
       for (Object value : values) {
         addValues.write(value + "\n");
       }
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
index 0b085d25b32eb..89ce208e8c6f6 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java
@@ -29,7 +29,6 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
@@ -407,7 +406,7 @@ public void testBaseArrowIpcRead() throws Exception {
     try (VectorSchemaRoot root = VectorSchemaRoot.create(sourceSchema, rootAllocator());
         FileOutputStream sink = new FileOutputStream(dataFile);
         ArrowFileWriter writer =
-            new ArrowFileWriter(root, /*dictionaryProvider=*/ null, sink.getChannel())) {
+            new ArrowFileWriter(root, /* provider= */ null, sink.getChannel())) {
       IntVector ints = (IntVector) root.getVector(0);
       ints.setSafe(0, 0);
       ints.setSafe(1, 1024);
@@ -562,7 +561,7 @@ private void checkParquetReadResult(
       Schema schema, List<GenericRecord> expected, List<ArrowRecordBatch> actual) {
     assertEquals(expected.size(), actual.stream().mapToInt(ArrowRecordBatch::getLength).sum());
     final int fieldCount = schema.getFields().size();
-    LinkedList<GenericRecord> expectedRemovable = new LinkedList<>(expected);
+    ArrayList<GenericRecord> expectedRemovable = new ArrayList<>(expected);
     try (VectorSchemaRoot vsr = VectorSchemaRoot.create(schema, rootAllocator())) {
       VectorLoader loader = new VectorLoader(vsr);
       for (ArrowRecordBatch batch : actual) {
@@ -578,7 +577,7 @@ private void checkParquetReadResult(
           }
         }
         for (int i = 0; i < batchRowCount; i++) {
-          expectedRemovable.poll();
+          expectedRemovable.remove(0);
         }
       }
       assertTrue(expectedRemovable.isEmpty());
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java b/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java
index f366c824d2ded..9fabc4a257fb3 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/jni/TestReservationListener.java
@@ -91,6 +91,7 @@ public void unreserve(long size) {
   }
 
   @Test
+  @SuppressWarnings("UnnecessaryAsync")
   public void testErrorThrownFromReservationListener() throws Exception {
     final String errorMessage = "ERROR_MESSAGE";
     ParquetWriteSupport writeSupport =
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java b/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java
index 97c185d7053d5..eec6570a639f2 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/substrait/TestAceroSubstraitConsumer.java
@@ -23,6 +23,7 @@
 
 import java.io.File;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.Arrays;
@@ -87,7 +88,8 @@ public void testRunQueryLocalFiles() throws Exception {
                                 TestAceroSubstraitConsumer.class
                                     .getClassLoader()
                                     .getResource("substrait/local_files_users.json")
-                                    .toURI())))
+                                    .toURI())),
+                        StandardCharsets.UTF_8)
                     .replace("FILENAME_PLACEHOLDER", writeSupport.getOutputURI()))) {
       assertEquals(schema, arrowReader.getVectorSchemaRoot().getSchema());
       int rowcount = 0;
@@ -134,7 +136,8 @@ public void testRunQueryNamedTable() throws Exception {
                               TestAceroSubstraitConsumer.class
                                   .getClassLoader()
                                   .getResource("substrait/named_table_users.json")
-                                  .toURI()))),
+                                  .toURI())),
+                      StandardCharsets.UTF_8),
                   mapTableToArrowReader)) {
         assertEquals(schema, arrowReader.getVectorSchemaRoot().getSchema());
         assertEquals(arrowReader.getVectorSchemaRoot().getSchema(), schema);
@@ -186,7 +189,8 @@ public void testRunQueryNamedTableWithException() throws Exception {
                                     TestAceroSubstraitConsumer.class
                                         .getClassLoader()
                                         .getResource("substrait/named_table_users.json")
-                                        .toURI()))),
+                                        .toURI())),
+                            StandardCharsets.UTF_8),
                         mapTableToArrowReader)) {
               assertEquals(schema, arrowReader.getVectorSchemaRoot().getSchema());
               int rowcount = 0;
@@ -311,12 +315,6 @@ public void testRunExtendedExpressionsFilter() throws Exception {
   @Test
   public void testRunExtendedExpressionsFilterWithProjectionsInsteadOfFilterException()
       throws Exception {
-    final Schema schema =
-        new Schema(
-            Arrays.asList(
-                Field.nullable("id", new ArrowType.Int(32, true)),
-                Field.nullable("name", new ArrowType.Utf8())),
-            null);
     // Substrait Extended Expression: Project New Column:
     // Expression ADD: id + 2
     // Expression CONCAT: name + '-' + name
@@ -360,12 +358,6 @@ public void testRunExtendedExpressionsFilterWithProjectionsInsteadOfFilterExcept
 
   @Test
   public void testRunExtendedExpressionsFilterWithEmptyFilterException() throws Exception {
-    final Schema schema =
-        new Schema(
-            Arrays.asList(
-                Field.nullable("id", new ArrowType.Int(32, true)),
-                Field.nullable("name", new ArrowType.Utf8())),
-            null);
     String base64EncodedSubstraitFilter = "";
     ByteBuffer substraitExpressionFilter = getByteBuffer(base64EncodedSubstraitFilter);
     ParquetWriteSupport writeSupport =
@@ -529,12 +521,6 @@ public void testRunExtendedExpressionsProjectionWithFilterInsteadOfProjectionExc
 
   @Test
   public void testRunExtendedExpressionsProjectionWithEmptyProjectionException() throws Exception {
-    final Schema schema =
-        new Schema(
-            Arrays.asList(
-                Field.nullable("id", new ArrowType.Int(32, true)),
-                Field.nullable("name", new ArrowType.Utf8())),
-            null);
     String base64EncodedSubstraitFilter = "";
     ByteBuffer substraitExpressionProjection = getByteBuffer(base64EncodedSubstraitFilter);
     ParquetWriteSupport writeSupport =
diff --git a/java/dataset/src/test/resources/data/student.csv b/java/dataset/src/test/resources/data/student.csv
new file mode 100644
index 0000000000000..3291946092156
--- /dev/null
+++ b/java/dataset/src/test/resources/data/student.csv
@@ -0,0 +1,4 @@
+Id;Name;Language
+1;Juno;Java
+2;Peter;Python
+3;Celin;C++
diff --git a/java/dev/checkstyle/checkstyle.xml b/java/dev/checkstyle/checkstyle.xml
index eb63c3ff0fc61..4b546ac56ea23 100644
--- a/java/dev/checkstyle/checkstyle.xml
+++ b/java/dev/checkstyle/checkstyle.xml
@@ -180,7 +180,7 @@
             <property name="target" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF, VARIABLE_DEF"/>
         </module>
         <module name="JavadocMethod">
-            <property name="scope" value="public"/>
+            <property name="accessModifiers" value="public"/>
             <property name="allowMissingParamTags" value="true"/>
             <property name="allowMissingReturnTag" value="true"/>
             <property name="allowedAnnotations" value="Override, Test"/>
diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml
index 5f82de2724b1d..a9724289c2252 100644
--- a/java/flight/flight-core/pom.xml
+++ b/java/flight/flight-core/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-flight</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>flight-core</artifactId>
@@ -32,6 +32,8 @@ under the License.
 
   <properties>
     <forkCount>1</forkCount>
+    <!-- List of add-opens arg line arguments for this module's tests -->
+    <surefire.add-opens.argLine>--add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</surefire.add-opens.argLine>
   </properties>
 
   <dependencies>
@@ -132,7 +134,7 @@ under the License.
     <dependency>
       <groupId>com.google.api.grpc</groupId>
       <artifactId>proto-google-common-protos</artifactId>
-      <version>2.40.0</version>
+      <version>2.44.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -144,82 +146,19 @@ under the License.
       <scope>test</scope>
     </dependency>
   </dependencies>
+
   <build>
     <plugins>
       <plugin>
         <artifactId>maven-surefire-plugin</artifactId>
         <configuration>
+          <argLine combine.self="override">--add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
           <enableAssertions>false</enableAssertions>
           <systemPropertyVariables>
             <arrow.test.dataRoot>${project.basedir}/../../../testing/data</arrow.test.dataRoot>
           </systemPropertyVariables>
         </configuration>
       </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-shade-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>shade-main</id>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <phase>package</phase>
-            <configuration>
-              <createDependencyReducedPom>false</createDependencyReducedPom>
-              <shadedArtifactAttached>true</shadedArtifactAttached>
-              <shadedClassifierName>shaded</shadedClassifierName>
-              <artifactSet>
-                <includes>
-                  <include>io.grpc:*</include>
-                  <include>com.google.protobuf:*</include>
-                </includes>
-              </artifactSet>
-              <relocations>
-                <relocation>
-                  <pattern>com.google.protobuf</pattern>
-                  <shadedPattern>arrow.flight.com.google.protobuf</shadedPattern>
-                </relocation>
-              </relocations>
-              <transformers>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"></transformer>
-              </transformers>
-            </configuration>
-          </execution>
-          <execution>
-            <id>shade-ext</id>
-            <goals>
-              <goal>shade</goal>
-            </goals>
-            <phase>package</phase>
-            <configuration>
-              <createDependencyReducedPom>false</createDependencyReducedPom>
-              <shadedArtifactAttached>true</shadedArtifactAttached>
-              <shadedClassifierName>shaded-ext</shadedClassifierName>
-              <artifactSet>
-                <includes>
-                  <include>io.grpc:*</include>
-                  <include>com.google.protobuf:*</include>
-                  <include>com.google.guava:*</include>
-                </includes>
-              </artifactSet>
-              <relocations>
-                <relocation>
-                  <pattern>com.google.protobuf</pattern>
-                  <shadedPattern>arrow.flight.com.google.protobuf</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>com.google.common</pattern>
-                  <shadedPattern>arrow.flight.com.google.common</shadedPattern>
-                </relocation>
-              </relocations>
-              <transformers>
-                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"></transformer>
-              </transformers>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
       <plugin>
         <groupId>org.xolstice.maven.plugins</groupId>
         <artifactId>protobuf-maven-plugin</artifactId>
@@ -261,46 +200,6 @@ under the License.
           </execution>
         </executions>
       </plugin>
-      <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <configuration>
-          <descriptorRefs>
-            <descriptorRef>jar-with-dependencies</descriptorRef>
-          </descriptorRefs>
-        </configuration>
-        <executions>
-          <execution>
-            <id>make-assembly</id>
-            <goals>
-              <goal>single</goal>
-            </goals>
-            <phase>package</phase>
-          </execution>
-        </executions>
-      </plugin>
     </plugins>
   </build>
-
-  <profiles>
-    <profile>
-      <id>jdk11+</id>
-      <activation>
-        <jdk>[11,]</jdk>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <configuration>
-              <argLine combine.self="override">--add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
-              <systemPropertyVariables>
-                <arrow.test.dataRoot>${project.basedir}/../../../testing/data</arrow.test.dataRoot>
-              </systemPropertyVariables>
-            </configuration>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
 </project>
diff --git a/java/flight/flight-core/src/main/java/module-info.java b/java/flight/flight-core/src/main/java/module-info.java
index f6bf5b73b0972..28dbb732c4713 100644
--- a/java/flight/flight-core/src/main/java/module-info.java
+++ b/java/flight/flight-core/src/main/java/module-info.java
@@ -26,17 +26,19 @@
   requires com.fasterxml.jackson.databind;
   requires com.google.common;
   requires com.google.errorprone.annotations;
+  requires com.google.protobuf;
+  requires com.google.protobuf.util;
   requires io.grpc;
   requires io.grpc.internal;
   requires io.grpc.netty;
   requires io.grpc.protobuf;
   requires io.grpc.stub;
+  requires io.netty.buffer;
   requires io.netty.common;
   requires io.netty.handler;
   requires io.netty.transport;
   requires org.apache.arrow.format;
   requires org.apache.arrow.memory.core;
   requires org.apache.arrow.vector;
-  requires protobuf.java;
   requires org.slf4j;
 }
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
index 62b3c1eedb69d..a15c3049aa6ad 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightClient.java
@@ -472,7 +472,7 @@ public void close() throws Exception {
     }
   }
 
-  /** A stream observer for Flight.PutResult */
+  /** A stream observer for Flight.PutResult. */
   private static class SetStreamObserver implements StreamObserver<Flight.PutResult> {
     private final BufferAllocator allocator;
     private final StreamListener<PutResult> listener;
diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
index 05dbe42c49172..ac761457f57fd 100644
--- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
+++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightServer.java
@@ -188,6 +188,7 @@ public static final class Builder {
     private CallHeaderAuthenticator headerAuthenticator = CallHeaderAuthenticator.NO_OP;
     private ExecutorService executor = null;
     private int maxInboundMessageSize = MAX_GRPC_MESSAGE_SIZE;
+    private int maxHeaderListSize = MAX_GRPC_MESSAGE_SIZE;
     private int backpressureThreshold = DEFAULT_BACKPRESSURE_THRESHOLD;
     private InputStream certChain;
     private InputStream key;
@@ -324,6 +325,7 @@ public FlightServer build() {
       builder
           .executor(exec)
           .maxInboundMessageSize(maxInboundMessageSize)
+          .maxInboundMetadataSize(maxHeaderListSize)
           .addService(
               ServerInterceptors.intercept(
                   flightService,
@@ -366,6 +368,11 @@ public FlightServer build() {
       return new FlightServer(location, builder.build(), grpcExecutor);
     }
 
+    public Builder setMaxHeaderListSize(int maxHeaderListSize) {
+      this.maxHeaderListSize = maxHeaderListSize;
+      return this;
+    }
+
     /**
      * Set the maximum size of a message. Defaults to "unlimited", depending on the underlying
      * transport.
diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
index 5ebeb44c1d36e..fc3f83e4eafd3 100644
--- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
+++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/TestFlightService.java
@@ -27,6 +27,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.Collections;
 import java.util.Optional;
+import java.util.Random;
 import org.apache.arrow.flight.impl.Flight;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
@@ -152,4 +153,76 @@ public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor
       assertEquals("No schema is present in FlightInfo", e.getMessage());
     }
   }
+
+  /**
+   * Test for GH-41584 where flight defaults for header size was not in sync b\w client and server.
+   */
+  @Test
+  public void testHeaderSizeExchangeInService() throws Exception {
+    final FlightProducer producer =
+        new NoOpFlightProducer() {
+          @Override
+          public FlightInfo getFlightInfo(CallContext context, FlightDescriptor descriptor) {
+            String longHeader =
+                context.getMiddleware(FlightConstants.HEADER_KEY).headers().get("long-header");
+            return new FlightInfo(
+                null,
+                descriptor,
+                Collections.emptyList(),
+                0,
+                0,
+                false,
+                IpcOption.DEFAULT,
+                longHeader.getBytes(StandardCharsets.UTF_8));
+          }
+        };
+
+    String headerVal = generateRandom(1024 * 10);
+    FlightCallHeaders callHeaders = new FlightCallHeaders();
+    callHeaders.insert("long-header", headerVal);
+    // sever with default header limit same as client
+    try (final FlightServer s =
+            FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer)
+                .build()
+                .start();
+        final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      FlightInfo flightInfo =
+          client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders));
+      assertEquals(Optional.empty(), flightInfo.getSchemaOptional());
+      assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema());
+      assertArrayEquals(flightInfo.getAppMetadata(), headerVal.getBytes(StandardCharsets.UTF_8));
+    }
+    // server with 15kb header limit
+    try (final FlightServer s =
+            FlightServer.builder(allocator, forGrpcInsecure(LOCALHOST, 0), producer)
+                .setMaxHeaderListSize(1024 * 15)
+                .build()
+                .start();
+        final FlightClient client = FlightClient.builder(allocator, s.getLocation()).build()) {
+      FlightInfo flightInfo =
+          client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders));
+      assertEquals(Optional.empty(), flightInfo.getSchemaOptional());
+      assertEquals(new Schema(Collections.emptyList()), flightInfo.getSchema());
+      assertArrayEquals(flightInfo.getAppMetadata(), headerVal.getBytes(StandardCharsets.UTF_8));
+
+      callHeaders.insert("another-header", headerVal + headerVal);
+      FlightRuntimeException e =
+          assertThrows(
+              FlightRuntimeException.class,
+              () ->
+                  client.getInfo(FlightDescriptor.path("test"), new HeaderCallOption(callHeaders)));
+      assertEquals("http2 exception", e.getMessage());
+    }
+  }
+
+  private static String generateRandom(int size) {
+    String aToZ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";
+    Random random = new Random();
+    StringBuilder res = new StringBuilder();
+    for (int i = 0; i < size; i++) {
+      int randIndex = random.nextInt(aToZ.length());
+      res.append(aToZ.charAt(randIndex));
+    }
+    return res.toString();
+  }
 }
diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml
index c5612644422e9..7da5156404dba 100644
--- a/java/flight/flight-integration-tests/pom.xml
+++ b/java/flight/flight-integration-tests/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-flight</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>flight-integration-tests</artifactId>
@@ -58,7 +58,7 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
     </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
@@ -69,19 +69,29 @@ under the License.
   <build>
     <plugins>
       <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <configuration>
-          <descriptorRefs>
-            <descriptorRef>jar-with-dependencies</descriptorRef>
-          </descriptorRefs>
-        </configuration>
+        <artifactId>maven-shade-plugin</artifactId>
         <executions>
           <execution>
             <id>make-assembly</id>
             <goals>
-              <goal>single</goal>
+              <goal>shade</goal>
             </goals>
             <phase>package</phase>
+            <configuration>
+              <createDependencyReducedPom>false</createDependencyReducedPom>
+              <shadedArtifactAttached>true</shadedArtifactAttached>
+              <shadedClassifierName>jar-with-dependencies</shadedClassifierName>
+              <filters>
+                <filter>
+                  <excludes>
+                    <exclude>**/module-info.class</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"></transformer>
+              </transformers>
+            </configuration>
           </execution>
         </executions>
       </plugin>
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java
index 76d79b226623d..69b02030ccd3d 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlExtensionScenario.java
@@ -16,24 +16,17 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
-import java.util.HashMap;
 import java.util.Map;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightInfo;
-import org.apache.arrow.flight.FlightStream;
 import org.apache.arrow.flight.Location;
 import org.apache.arrow.flight.SchemaResult;
-import org.apache.arrow.flight.Ticket;
 import org.apache.arrow.flight.sql.CancelResult;
 import org.apache.arrow.flight.sql.FlightSqlClient;
 import org.apache.arrow.flight.sql.FlightSqlProducer;
 import org.apache.arrow.flight.sql.impl.FlightSql;
 import org.apache.arrow.memory.BufferAllocator;
-import org.apache.arrow.util.Preconditions;
-import org.apache.arrow.vector.UInt4Vector;
 import org.apache.arrow.vector.VectorSchemaRoot;
-import org.apache.arrow.vector.complex.DenseUnionVector;
-import org.apache.arrow.vector.types.pojo.Schema;
 
 /**
  * Integration test scenario for validating Flight SQL specs across multiple implementations. This
@@ -53,69 +46,32 @@ public void client(BufferAllocator allocator, Location location, FlightClient cl
   }
 
   private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Exception {
-    FlightInfo info = sqlClient.getSqlInfo();
-    Ticket ticket = info.getEndpoints().get(0).getTicket();
-
-    Map<Integer, Object> infoValues = new HashMap<>();
-    try (FlightStream stream = sqlClient.getStream(ticket)) {
-      Schema actualSchema = stream.getSchema();
-      IntegrationAssertions.assertEquals(
-          FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, actualSchema);
-
-      while (stream.next()) {
-        UInt4Vector infoName = (UInt4Vector) stream.getRoot().getVector(0);
-        DenseUnionVector value = (DenseUnionVector) stream.getRoot().getVector(1);
-
-        for (int i = 0; i < stream.getRoot().getRowCount(); i++) {
-          final int code = infoName.get(i);
-          if (infoValues.containsKey(code)) {
-            throw new AssertionError("Duplicate SqlInfo value: " + code);
-          }
-          Object object;
-          byte typeId = value.getTypeId(i);
-          switch (typeId) {
-            case 0: // string
-              object =
-                  Preconditions.checkNotNull(
-                          value.getVarCharVector(typeId).getObject(value.getOffset(i)))
-                      .toString();
-              break;
-            case 1: // bool
-              object = value.getBitVector(typeId).getObject(value.getOffset(i));
-              break;
-            case 2: // int64
-              object = value.getBigIntVector(typeId).getObject(value.getOffset(i));
-              break;
-            case 3: // int32
-              object = value.getIntVector(typeId).getObject(value.getOffset(i));
-              break;
-            default:
-              throw new AssertionError("Decoding SqlInfo of type code " + typeId);
-          }
-          infoValues.put(code, object);
-        }
-      }
-    }
-
-    IntegrationAssertions.assertEquals(
-        Boolean.FALSE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SQL_VALUE));
-    IntegrationAssertions.assertEquals(
-        Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_VALUE));
-    IntegrationAssertions.assertEquals(
-        "min_version",
-        infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION_VALUE));
-    IntegrationAssertions.assertEquals(
-        "max_version",
-        infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION_VALUE));
-    IntegrationAssertions.assertEquals(
-        FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT_VALUE,
-        infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_VALUE));
-    IntegrationAssertions.assertEquals(
-        Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE));
-    IntegrationAssertions.assertEquals(
-        42, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE));
-    IntegrationAssertions.assertEquals(
-        7, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT_VALUE));
+    validate(
+        FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA,
+        sqlClient.getSqlInfo(),
+        sqlClient,
+        s -> {
+          Map<Integer, Object> infoValues = readSqlInfoStream(s);
+          IntegrationAssertions.assertEquals(
+              Boolean.FALSE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SQL_VALUE));
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_VALUE));
+          IntegrationAssertions.assertEquals(
+              "min_version",
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MIN_VERSION_VALUE));
+          IntegrationAssertions.assertEquals(
+              "max_version",
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_SUBSTRAIT_MAX_VERSION_VALUE));
+          IntegrationAssertions.assertEquals(
+              FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT_VALUE,
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_VALUE));
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE));
+          IntegrationAssertions.assertEquals(
+              42, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE));
+          IntegrationAssertions.assertEquals(
+              7, infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_TRANSACTION_TIMEOUT_VALUE));
+        });
   }
 
   private void validateStatementExecution(FlightSqlClient sqlClient) throws Exception {
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java
new file mode 100644
index 0000000000000..981ce89f1b88a
--- /dev/null
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlIngestionScenario.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.flight.integration.tests;
+
+import com.google.common.collect.ImmutableMap;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.arrow.flight.FlightClient;
+import org.apache.arrow.flight.FlightProducer;
+import org.apache.arrow.flight.Location;
+import org.apache.arrow.flight.sql.FlightSqlClient;
+import org.apache.arrow.flight.sql.FlightSqlClient.ExecuteIngestOptions;
+import org.apache.arrow.flight.sql.FlightSqlProducer;
+import org.apache.arrow.flight.sql.impl.FlightSql;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Integration test scenario for validating Flight SQL specs across multiple implementations. This
+ * should ensure that RPC objects are being built and parsed correctly for multiple languages and
+ * that the Arrow schemas are returned as expected.
+ */
+public class FlightSqlIngestionScenario extends FlightSqlScenario {
+
+  @Override
+  public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception {
+    FlightSqlScenarioProducer producer =
+        (FlightSqlScenarioProducer) super.producer(allocator, location);
+    producer
+        .getSqlInfoBuilder()
+        .withFlightSqlServerBulkIngestionTransaction(true)
+        .withFlightSqlServerBulkIngestion(true);
+    return producer;
+  }
+
+  @Override
+  public void client(BufferAllocator allocator, Location location, FlightClient client)
+      throws Exception {
+    try (final FlightSqlClient sqlClient = new FlightSqlClient(client)) {
+      validateMetadataRetrieval(sqlClient);
+      validateIngestion(allocator, sqlClient);
+    }
+  }
+
+  private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Exception {
+    validate(
+        FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA,
+        sqlClient.getSqlInfo(
+            FlightSql.SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED,
+            FlightSql.SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION),
+        sqlClient,
+        s -> {
+          Map<Integer, Object> infoValues = readSqlInfoStream(s);
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE,
+              infoValues.get(
+                  FlightSql.SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED_VALUE));
+          IntegrationAssertions.assertEquals(
+              Boolean.TRUE,
+              infoValues.get(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION_VALUE));
+        });
+  }
+
+  private VectorSchemaRoot getIngestVectorRoot(BufferAllocator allocator) {
+    Schema schema = FlightSqlScenarioProducer.getIngestSchema();
+    VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+    root.setRowCount(3);
+    return root;
+  }
+
+  private void validateIngestion(BufferAllocator allocator, FlightSqlClient sqlClient) {
+    try (VectorSchemaRoot data = getIngestVectorRoot(allocator)) {
+      TableDefinitionOptions tableDefinitionOptions =
+          TableDefinitionOptions.newBuilder()
+              .setIfExists(TableDefinitionOptions.TableExistsOption.TABLE_EXISTS_OPTION_REPLACE)
+              .setIfNotExist(
+                  TableDefinitionOptions.TableNotExistOption.TABLE_NOT_EXIST_OPTION_CREATE)
+              .build();
+      Map<String, String> options = new HashMap<>(ImmutableMap.of("key1", "val1", "key2", "val2"));
+      ExecuteIngestOptions executeIngestOptions =
+          new ExecuteIngestOptions(
+              "test_table", tableDefinitionOptions, true, "test_catalog", "test_schema", options);
+      FlightSqlClient.Transaction transaction =
+          new FlightSqlClient.Transaction(BULK_INGEST_TRANSACTION_ID);
+      long updatedRows = sqlClient.executeIngest(data, executeIngestOptions, transaction);
+
+      IntegrationAssertions.assertEquals(3L, updatedRows);
+    }
+  }
+}
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java
index 8918b252700ac..e370a30bdc6ff 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenario.java
@@ -16,8 +16,14 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
+import static java.util.Objects.isNull;
+
+import com.google.protobuf.Any;
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Consumer;
 import org.apache.arrow.flight.CallOption;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightInfo;
@@ -29,10 +35,14 @@
 import org.apache.arrow.flight.Ticket;
 import org.apache.arrow.flight.sql.FlightSqlClient;
 import org.apache.arrow.flight.sql.FlightSqlProducer;
+import org.apache.arrow.flight.sql.FlightSqlUtils;
 import org.apache.arrow.flight.sql.impl.FlightSql;
 import org.apache.arrow.flight.sql.util.TableRef;
 import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.UInt4Vector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.types.pojo.Schema;
 
 /**
@@ -52,6 +62,7 @@ public class FlightSqlScenario implements Scenario {
   public static final FlightSqlClient.SubstraitPlan SUBSTRAIT_PLAN =
       new FlightSqlClient.SubstraitPlan(SUBSTRAIT_PLAN_TEXT, SUBSTRAIT_VERSION);
   public static final byte[] TRANSACTION_ID = "transaction_id".getBytes(StandardCharsets.UTF_8);
+  public static final byte[] BULK_INGEST_TRANSACTION_ID = "123".getBytes(StandardCharsets.UTF_8);
 
   @Override
   public FlightProducer producer(BufferAllocator allocator, Location location) throws Exception {
@@ -150,15 +161,23 @@ private void validateMetadataRetrieval(FlightSqlClient sqlClient) throws Excepti
     validateSchema(
         FlightSqlProducer.Schemas.GET_TYPE_INFO_SCHEMA, sqlClient.getXdbcTypeInfoSchema(options));
 
-    validate(
-        FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA,
+    FlightInfo sqlInfoFlightInfo =
         sqlClient.getSqlInfo(
             new FlightSql.SqlInfo[] {
               FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME,
               FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY
             },
-            options),
-        sqlClient);
+            options);
+
+    Ticket ticket = sqlInfoFlightInfo.getEndpoints().get(0).getTicket();
+    FlightSql.CommandGetSqlInfo requestSqlInfoCommand =
+        FlightSqlUtils.unpackOrThrow(
+            Any.parseFrom(ticket.getBytes()), FlightSql.CommandGetSqlInfo.class);
+    IntegrationAssertions.assertEquals(
+        requestSqlInfoCommand.getInfo(0), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE);
+    IntegrationAssertions.assertEquals(
+        requestSqlInfoCommand.getInfo(1), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE);
+    validate(FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, sqlInfoFlightInfo, sqlClient);
     validateSchema(
         FlightSqlProducer.Schemas.GET_SQL_INFO_SCHEMA, sqlClient.getSqlInfoSchema(options));
   }
@@ -194,14 +213,64 @@ private void validatePreparedStatementExecution(
 
   protected void validate(Schema expectedSchema, FlightInfo flightInfo, FlightSqlClient sqlClient)
       throws Exception {
+    validate(expectedSchema, flightInfo, sqlClient, null);
+  }
+
+  protected void validate(
+      Schema expectedSchema,
+      FlightInfo flightInfo,
+      FlightSqlClient sqlClient,
+      Consumer<FlightStream> streamConsumer)
+      throws Exception {
     Ticket ticket = flightInfo.getEndpoints().get(0).getTicket();
     try (FlightStream stream = sqlClient.getStream(ticket)) {
       Schema actualSchema = stream.getSchema();
       IntegrationAssertions.assertEquals(expectedSchema, actualSchema);
+      if (!isNull(streamConsumer)) {
+        streamConsumer.accept(stream);
+      }
     }
   }
 
   protected void validateSchema(Schema expected, SchemaResult actual) {
     IntegrationAssertions.assertEquals(expected, actual.getSchema());
   }
+
+  protected Map<Integer, Object> readSqlInfoStream(FlightStream stream) {
+    Map<Integer, Object> infoValues = new HashMap<>();
+    while (stream.next()) {
+      UInt4Vector infoName = (UInt4Vector) stream.getRoot().getVector(0);
+      DenseUnionVector value = (DenseUnionVector) stream.getRoot().getVector(1);
+
+      for (int i = 0; i < stream.getRoot().getRowCount(); i++) {
+        final int code = infoName.get(i);
+        if (infoValues.containsKey(code)) {
+          throw new AssertionError("Duplicate SqlInfo value: " + code);
+        }
+        Object object;
+        byte typeId = value.getTypeId(i);
+        switch (typeId) {
+          case 0: // string
+            object =
+                Preconditions.checkNotNull(
+                        value.getVarCharVector(typeId).getObject(value.getOffset(i)))
+                    .toString();
+            break;
+          case 1: // bool
+            object = value.getBitVector(typeId).getObject(value.getOffset(i));
+            break;
+          case 2: // int64
+            object = value.getBigIntVector(typeId).getObject(value.getOffset(i));
+            break;
+          case 3: // int32
+            object = value.getIntVector(typeId).getObject(value.getOffset(i));
+            break;
+          default:
+            throw new AssertionError("Decoding SqlInfo of type code " + typeId);
+        }
+        infoValues.put(code, object);
+      }
+    }
+    return infoValues;
+  }
 }
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java
index b7a75b459d176..be746b575761d 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/FlightSqlScenarioProducer.java
@@ -16,13 +16,16 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
+import com.google.common.collect.ImmutableMap;
 import com.google.protobuf.Any;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.InvalidProtocolBufferException;
 import com.google.protobuf.Message;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import org.apache.arrow.flight.CallStatus;
 import org.apache.arrow.flight.Criteria;
 import org.apache.arrow.flight.FlightDescriptor;
@@ -38,6 +41,8 @@
 import org.apache.arrow.flight.sql.FlightSqlProducer;
 import org.apache.arrow.flight.sql.SqlInfoBuilder;
 import org.apache.arrow.flight.sql.impl.FlightSql;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption;
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.VectorSchemaRoot;
@@ -48,10 +53,27 @@
 
 /** Hardcoded Flight SQL producer used for cross-language integration tests. */
 public class FlightSqlScenarioProducer implements FlightSqlProducer {
+  public static final String SERVER_NAME = "Flight SQL Integration Test Server";
   private final BufferAllocator allocator;
 
+  private final SqlInfoBuilder sqlInfoBuilder;
+
+  /** Constructor. */
   public FlightSqlScenarioProducer(BufferAllocator allocator) {
     this.allocator = allocator;
+    sqlInfoBuilder =
+        new SqlInfoBuilder()
+            .withFlightSqlServerName(SERVER_NAME)
+            .withFlightSqlServerReadOnly(false)
+            .withFlightSqlServerSql(false)
+            .withFlightSqlServerSubstrait(true)
+            .withFlightSqlServerSubstraitMinVersion("min_version")
+            .withFlightSqlServerSubstraitMaxVersion("max_version")
+            .withFlightSqlServerTransaction(
+                FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT)
+            .withFlightSqlServerCancel(true)
+            .withFlightSqlServerStatementTimeout(42)
+            .withFlightSqlServerTransactionTimeout(7);
   }
 
   /**
@@ -109,6 +131,15 @@ static Schema getQueryWithTransactionSchema() {
                 null)));
   }
 
+  static Schema getIngestSchema() {
+    return new Schema(
+        Collections.singletonList(Field.nullable("test_field", new ArrowType.Int(64, true))));
+  }
+
+  protected SqlInfoBuilder getSqlInfoBuilder() {
+    return sqlInfoBuilder;
+  }
+
   @Override
   public void beginSavepoint(
       FlightSql.ActionBeginSavepointRequest request,
@@ -511,6 +542,44 @@ public Runnable acceptPutStatement(
             : FlightSqlScenario.UPDATE_STATEMENT_WITH_TRANSACTION_EXPECTED_ROWS);
   }
 
+  @Override
+  public Runnable acceptPutStatementBulkIngest(
+      FlightSql.CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+
+    IntegrationAssertions.assertEquals(
+        TableExistsOption.TABLE_EXISTS_OPTION_REPLACE,
+        command.getTableDefinitionOptions().getIfExists());
+    IntegrationAssertions.assertEquals(
+        TableNotExistOption.TABLE_NOT_EXIST_OPTION_CREATE,
+        command.getTableDefinitionOptions().getIfNotExist());
+    IntegrationAssertions.assertEquals("test_table", command.getTable());
+    IntegrationAssertions.assertEquals("test_catalog", command.getCatalog());
+    IntegrationAssertions.assertEquals("test_schema", command.getSchema());
+    IntegrationAssertions.assertEquals(true, command.getTemporary());
+    IntegrationAssertions.assertEquals(
+        FlightSqlScenario.BULK_INGEST_TRANSACTION_ID, command.getTransactionId().toByteArray());
+
+    Map<String, String> expectedOptions =
+        new HashMap<>(ImmutableMap.of("key1", "val1", "key2", "val2"));
+    IntegrationAssertions.assertEquals(expectedOptions.size(), command.getOptionsCount());
+
+    for (Map.Entry<String, String> optionEntry : expectedOptions.entrySet()) {
+      String key = optionEntry.getKey();
+      IntegrationAssertions.assertEquals(optionEntry.getValue(), command.getOptionsOrThrow(key));
+    }
+
+    IntegrationAssertions.assertEquals(getIngestSchema(), flightStream.getSchema());
+    long rowCount = 0;
+    while (flightStream.next()) {
+      rowCount += flightStream.getRoot().getRowCount();
+    }
+
+    return acceptPutReturnConstant(ackStream, rowCount);
+  }
+
   @Override
   public Runnable acceptPutSubstraitPlan(
       FlightSql.CommandStatementSubstraitPlan command,
@@ -577,35 +646,19 @@ public Runnable acceptPutPreparedStatementQuery(
   @Override
   public FlightInfo getFlightInfoSqlInfo(
       FlightSql.CommandGetSqlInfo request, CallContext context, FlightDescriptor descriptor) {
-    if (request.getInfoCount() == 2) {
-      // Integration test for the protocol messages
-      IntegrationAssertions.assertEquals(
-          request.getInfo(0), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE);
-      IntegrationAssertions.assertEquals(
-          request.getInfo(1), FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE);
-    }
     return getFlightInfoForSchema(request, descriptor, Schemas.GET_SQL_INFO_SCHEMA);
   }
 
   @Override
   public void getStreamSqlInfo(
       FlightSql.CommandGetSqlInfo command, CallContext context, ServerStreamListener listener) {
-    if (command.getInfoCount() == 2) {
+    if (command.getInfoCount() == 2
+        && command.getInfo(0) == FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE
+        && command.getInfo(1) == FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE) {
       // Integration test for the protocol messages
       putEmptyBatchToStreamListener(listener, Schemas.GET_SQL_INFO_SCHEMA);
       return;
     }
-    SqlInfoBuilder sqlInfoBuilder =
-        new SqlInfoBuilder()
-            .withFlightSqlServerSql(false)
-            .withFlightSqlServerSubstrait(true)
-            .withFlightSqlServerSubstraitMinVersion("min_version")
-            .withFlightSqlServerSubstraitMaxVersion("max_version")
-            .withFlightSqlServerTransaction(
-                FlightSql.SqlSupportedTransaction.SQL_SUPPORTED_TRANSACTION_SAVEPOINT)
-            .withFlightSqlServerCancel(true)
-            .withFlightSqlServerStatementTimeout(42)
-            .withFlightSqlServerTransactionTimeout(7);
     sqlInfoBuilder.send(command.getInfoList(), listener);
   }
 
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
index a294902a26d35..451edb6bd5a34 100644
--- a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/Scenarios.java
@@ -48,6 +48,7 @@ private Scenarios() {
     scenarios.put("poll_flight_info", PollFlightInfoScenario::new);
     scenarios.put("flight_sql", FlightSqlScenario::new);
     scenarios.put("flight_sql:extension", FlightSqlExtensionScenario::new);
+    scenarios.put("flight_sql:ingestion", FlightSqlIngestionScenario::new);
     scenarios.put("app_metadata_flight_info_endpoint", AppMetadataFlightInfoEndpointScenario::new);
     scenarios.put("session_options", SessionOptionsScenario::new);
   }
diff --git a/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java
new file mode 100644
index 0000000000000..10594d4cf0962
--- /dev/null
+++ b/java/flight/flight-integration-tests/src/main/java/org/apache/arrow/flight/integration/tests/TestBufferAllocationListener.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.flight.integration.tests;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.arrow.memory.AllocationListener;
+
+class TestBufferAllocationListener implements AllocationListener {
+  static class Entry {
+    StackTraceElement[] stackTrace;
+    long size;
+    boolean forAllocation;
+
+    public Entry(StackTraceElement[] stackTrace, long size, boolean forAllocation) {
+      this.stackTrace = stackTrace;
+      this.size = size;
+      this.forAllocation = forAllocation;
+    }
+  }
+
+  List<Entry> trail = new ArrayList<>();
+
+  public void onAllocation(long size) {
+    trail.add(new Entry(Thread.currentThread().getStackTrace(), size, true));
+  }
+
+  public void onRelease(long size) {
+    trail.add(new Entry(Thread.currentThread().getStackTrace(), size, false));
+  }
+
+  public void reThrowWithAddedAllocatorInfo(Exception e) {
+    StringBuilder sb = new StringBuilder();
+    sb.append(e.getMessage());
+    sb.append("\n");
+    sb.append("[[Buffer allocation and release trail during the test execution: \n");
+    for (Entry trailEntry : trail) {
+      sb.append(
+          String.format(
+              "%s: %d: %n%s",
+              trailEntry.forAllocation ? "allocate" : "release",
+              trailEntry.size,
+              getStackTraceAsString(trailEntry.stackTrace)));
+    }
+    sb.append("]]");
+    throw new IllegalStateException(sb.toString(), e);
+  }
+
+  private String getStackTraceAsString(StackTraceElement[] elements) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 1; i < elements.length; i++) {
+      StackTraceElement s = elements[i];
+      sb.append("\t");
+      sb.append(s);
+      sb.append("\n");
+    }
+    return sb.toString();
+  }
+}
diff --git a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
index bdf1c43ce9da6..8419432c66227 100644
--- a/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
+++ b/java/flight/flight-integration-tests/src/test/java/org/apache/arrow/flight/integration/tests/IntegrationTest.java
@@ -16,6 +16,10 @@
  */
 package org.apache.arrow.flight.integration.tests;
 
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
 import org.apache.arrow.flight.FlightClient;
 import org.apache.arrow.flight.FlightServer;
 import org.apache.arrow.flight.Location;
@@ -80,6 +84,11 @@ void flightSqlExtension() throws Exception {
     testScenario("flight_sql:extension");
   }
 
+  @Test
+  void flightSqlIngestion() throws Exception {
+    testScenario("flight_sql:ingestion");
+  }
+
   @Test
   void appMetadataFlightInfoEndpoint() throws Exception {
     testScenario("app_metadata_flight_info_endpoint");
@@ -91,9 +100,16 @@ void sessionOptions() throws Exception {
   }
 
   void testScenario(String scenarioName) throws Exception {
-    try (final BufferAllocator allocator = new RootAllocator()) {
+    TestBufferAllocationListener listener = new TestBufferAllocationListener();
+    try (final BufferAllocator allocator = new RootAllocator(listener, Long.MAX_VALUE)) {
+      final ExecutorService exec =
+          Executors.newCachedThreadPool(
+              new ThreadFactoryBuilder()
+                  .setNameFormat("integration-test-flight-server-executor-%d")
+                  .build());
       final FlightServer.Builder builder =
           FlightServer.builder()
+              .executor(exec)
               .allocator(allocator)
               .location(Location.forGrpcInsecure("0.0.0.0", 0));
       final Scenario scenario = Scenarios.getScenario(scenarioName);
@@ -108,6 +124,17 @@ void testScenario(String scenarioName) throws Exception {
           scenario.client(allocator, location, client);
         }
       }
+
+      // Shutdown the executor while allowing existing tasks to finish.
+      // Without this wait, allocator.close() may get invoked earlier than an executor thread may
+      // have finished freeing up resources
+      // In that case, allocator.close() can throw an IllegalStateException for memory leak, leading
+      // to flaky tests
+      exec.shutdown();
+      final boolean unused = exec.awaitTermination(3, TimeUnit.SECONDS);
+    } catch (IllegalStateException e) {
+      // this could be due to Allocator detecting memory leak. Add allocation trail to help debug
+      listener.reThrowWithAddedAllocatorInfo(e);
     }
   }
 }
diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml
index 5f7bd63f8f935..eb0ce04ca7fcb 100644
--- a/java/flight/flight-sql-jdbc-core/pom.xml
+++ b/java/flight/flight-sql-jdbc-core/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-flight</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>flight-sql-jdbc-core</artifactId>
@@ -83,14 +83,14 @@ under the License.
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest</artifactId>
-      <version>2.2</version>
+      <version>3.0</version>
       <scope>test</scope>
     </dependency>
 
     <dependency>
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>
-      <version>2.16.1</version>
+      <version>2.17.0</version>
       <scope>test</scope>
     </dependency>
 
@@ -132,10 +132,8 @@ under the License.
     </dependency>
 
     <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-      <version>3.0.2</version>
-      <scope>compile</scope>
+      <groupId>org.checkerframework</groupId>
+      <artifactId>checker-qual</artifactId>
     </dependency>
   </dependencies>
 
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
index 845f5372d3f74..0e9c79a0907a5 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandler.java
@@ -29,7 +29,6 @@
 import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
-import javax.annotation.Nullable;
 import org.apache.arrow.driver.jdbc.client.utils.ClientAuthenticationUtils;
 import org.apache.arrow.flight.CallOption;
 import org.apache.arrow.flight.CallStatus;
@@ -61,6 +60,7 @@
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.apache.calcite.avatica.Meta.StatementType;
+import org.checkerframework.checker.nullness.qual.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java
index 7acffb4bc9722..4c2a9b865f141 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java
@@ -276,5 +276,16 @@ public Boolean visit(ArrowType.Duration type) {
     public Boolean visit(ArrowType.ListView type) {
       throw new UnsupportedOperationException("Binding is not yet supported for type " + type);
     }
+
+    @Override
+    public Boolean visit(ArrowType.LargeListView type) {
+      throw new UnsupportedOperationException("Binding is not yet supported for type " + type);
+    }
+
+    @Override
+    public Boolean visit(ArrowType.RunEndEncoded type) {
+      throw new UnsupportedOperationException(
+          "No Avatica parameter binder implemented for type " + type);
+    }
   }
 }
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java
index 77b7a88536149..17b0f42dc7111 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java
@@ -278,5 +278,17 @@ public AvaticaParameter visit(ArrowType.ListView type) {
       throw new UnsupportedOperationException(
           "AvaticaParameter not yet supported for type " + type);
     }
+
+    @Override
+    public AvaticaParameter visit(ArrowType.LargeListView type) {
+      throw new UnsupportedOperationException(
+          "AvaticaParameter not yet supported for type " + type);
+    }
+
+    @Override
+    public AvaticaParameter visit(ArrowType.RunEndEncoded type) {
+      throw new UnsupportedOperationException(
+          "No Avatica parameter binder implemented for type " + type);
+    }
   }
 }
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java
index b651e1eb9bcae..b3c7a1ee5c6c1 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/VectorSchemaRootTransformer.java
@@ -44,6 +44,12 @@ class Builder {
     private final List<Field> newFields = new ArrayList<>();
     private final Collection<Task> tasks = new ArrayList<>();
 
+    /**
+     * Constructor for the VectorSchemaRootTransformer's Builder.
+     *
+     * @param schema The Arrow schema.
+     * @param bufferAllocator The BufferAllocator to use for allocating memory.
+     */
     public Builder(final Schema schema, final BufferAllocator bufferAllocator) {
       this.schema = schema;
       this.bufferAllocator =
@@ -127,6 +133,11 @@ public Builder addEmptyField(final String fieldName, final ArrowType fieldType)
       return this;
     }
 
+    /**
+     * Build the {@link VectorSchemaRoot} with applied transformation tasks.
+     *
+     * @return The built {@link VectorSchemaRoot}.
+     */
     public VectorSchemaRootTransformer build() {
       return (originalRoot, transformedRoot) -> {
         if (transformedRoot == null) {
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java
index 4f16a4fa60932..6beaba82360cc 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/ArrowFlightSqlClientHandlerBuilderTest.java
@@ -33,7 +33,7 @@
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.RegisterExtension;
 
-/** Test the behavior of ArrowFlightSqlClientHandler.Builder */
+/** Test the behavior of ArrowFlightSqlClientHandler.Builder. */
 public class ArrowFlightSqlClientHandlerBuilderTest {
 
   @RegisterExtension
diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml
index 23b2c9c7cbef4..148319e5d9d64 100644
--- a/java/flight/flight-sql-jdbc-driver/pom.xml
+++ b/java/flight/flight-sql-jdbc-driver/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-flight</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>flight-sql-jdbc-driver</artifactId>
@@ -59,6 +59,7 @@ under the License.
         <artifactId>maven-failsafe-plugin</artifactId>
         <executions>
           <execution>
+            <id>default-it</id>
             <goals>
               <goal>integration-test</goal>
               <goal>verify</goal>
@@ -161,6 +162,7 @@ under the License.
                     <exclude>META-INF/native/libio_grpc_netty*</exclude>
                     <exclude>META-INF/native/io_grpc_netty_shaded*</exclude>
                     <exclude>**/*.proto</exclude>
+                    <exclude>**/module-info.class</exclude>
                   </excludes>
                 </filter>
               </filters>
diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml
index 9c7b5b956d354..021c1e65ab5b3 100644
--- a/java/flight/flight-sql/pom.xml
+++ b/java/flight/flight-sql/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-flight</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>flight-sql</artifactId>
@@ -32,6 +32,8 @@ under the License.
 
   <properties>
     <forkCount>1</forkCount>
+    <!-- List of add-opens arg line arguments for this module's tests -->
+    <surefire.add-opens.argLine>--add-reads=org.apache.arrow.flight.sql=org.slf4j --add-reads=org.apache.arrow.flight.core=ALL-UNNAMED --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</surefire.add-opens.argLine>
   </properties>
 
   <dependencies>
@@ -87,7 +89,7 @@ under the License.
     <dependency>
       <groupId>org.apache.derby</groupId>
       <artifactId>derby</artifactId>
-      <version>10.14.2.0</version>
+      <version>10.15.2.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -108,6 +110,12 @@ under the License.
       <version>2.12.0</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-text</artifactId>
+      <version>1.12.0</version>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest</artifactId>
@@ -116,28 +124,8 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
       <optional>true</optional>
     </dependency>
   </dependencies>
-
-  <profiles>
-    <profile>
-      <id>jdk11+</id>
-      <activation>
-        <jdk>[11,]</jdk>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <configuration combine.self="override">
-              <argLine>--add-reads=org.apache.arrow.flight.sql=org.slf4j --add-reads=org.apache.arrow.flight.core=ALL-UNNAMED --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
-            </configuration>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
 </project>
diff --git a/java/flight/flight-sql/src/main/java/module-info.java b/java/flight/flight-sql/src/main/java/module-info.java
index 5514d5b870afd..42be9ce6d92db 100644
--- a/java/flight/flight-sql/src/main/java/module-info.java
+++ b/java/flight/flight-sql/src/main/java/module-info.java
@@ -21,9 +21,10 @@
   exports org.apache.arrow.flight.sql.util;
 
   requires com.google.common;
+  requires com.google.protobuf;
   requires java.sql;
   requires org.apache.arrow.flight.core;
   requires org.apache.arrow.memory.core;
   requires org.apache.arrow.vector;
-  requires protobuf.java;
+  requires org.apache.commons.cli;
 }
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
index 4bc12d86b1d0e..9a6ffdfdca847 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java
@@ -16,6 +16,7 @@
  */
 package org.apache.arrow.flight.sql;
 
+import static java.util.Objects.isNull;
 import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginSavepointRequest;
 import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginSavepointResult;
 import static org.apache.arrow.flight.sql.impl.FlightSql.ActionBeginTransactionRequest;
@@ -54,8 +55,10 @@
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.concurrent.ExecutionException;
+import java.util.function.Consumer;
 import java.util.stream.Collectors;
 import org.apache.arrow.flight.Action;
 import org.apache.arrow.flight.CallOption;
@@ -82,11 +85,14 @@
 import org.apache.arrow.flight.sql.impl.FlightSql;
 import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementResult;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
 import org.apache.arrow.flight.sql.util.TableRef;
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.util.AutoCloseables;
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
 import org.apache.arrow.vector.ipc.ReadChannel;
 import org.apache.arrow.vector.ipc.message.MessageSerializer;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -206,6 +212,130 @@ public SchemaResult getExecuteSubstraitSchema(
     return getExecuteSubstraitSchema(substraitPlan, /*transaction*/ null, options);
   }
 
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param data data to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final VectorSchemaRoot data,
+      final ExecuteIngestOptions ingestOptions,
+      final CallOption... options) {
+    return executeIngest(data, ingestOptions, /*transaction*/ null, options);
+  }
+
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param dataReader data stream to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final ArrowStreamReader dataReader,
+      final ExecuteIngestOptions ingestOptions,
+      final CallOption... options) {
+    return executeIngest(dataReader, ingestOptions, /*transaction*/ null, options);
+  }
+
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param data data to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param transaction The transaction that this ingest request is part of.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final VectorSchemaRoot data,
+      final ExecuteIngestOptions ingestOptions,
+      Transaction transaction,
+      final CallOption... options) {
+    return executeIngest(
+        data, ingestOptions, transaction, FlightClient.ClientStreamListener::putNext, options);
+  }
+
+  /**
+   * Execute a bulk ingest on the server.
+   *
+   * @param dataReader data stream to be ingested
+   * @param ingestOptions options for the ingest request.
+   * @param transaction The transaction that this ingest request is part of.
+   * @param options RPC-layer hints for this call.
+   * @return the number of rows affected.
+   */
+  public long executeIngest(
+      final ArrowStreamReader dataReader,
+      final ExecuteIngestOptions ingestOptions,
+      Transaction transaction,
+      final CallOption... options) {
+
+    try {
+      return executeIngest(
+          dataReader.getVectorSchemaRoot(),
+          ingestOptions,
+          transaction,
+          listener -> {
+            while (true) {
+              try {
+                if (!dataReader.loadNextBatch()) {
+                  break;
+                }
+              } catch (IOException e) {
+                throw CallStatus.UNKNOWN.withCause(e).toRuntimeException();
+              }
+              listener.putNext();
+            }
+          },
+          options);
+    } catch (IOException e) {
+      throw CallStatus.UNKNOWN.withCause(e).toRuntimeException();
+    }
+  }
+
+  private long executeIngest(
+      final VectorSchemaRoot data,
+      final ExecuteIngestOptions ingestOptions,
+      final Transaction transaction,
+      final Consumer<FlightClient.ClientStreamListener> dataPutter,
+      final CallOption... options) {
+    try {
+      final CommandStatementIngest.Builder builder = CommandStatementIngest.newBuilder();
+      if (transaction != null) {
+        builder.setTransactionId(ByteString.copyFrom(transaction.getTransactionId()));
+      }
+      ingestOptions.updateCommandBuilder(builder);
+
+      final FlightDescriptor descriptor =
+          FlightDescriptor.command(Any.pack(builder.build()).toByteArray());
+      try (final SyncPutListener putListener = new SyncPutListener()) {
+
+        final FlightClient.ClientStreamListener listener =
+            client.startPut(descriptor, data, putListener, options);
+        dataPutter.accept(listener);
+        listener.completed();
+        listener.getResult();
+
+        try (final PutResult result = putListener.read()) {
+          final DoPutUpdateResult doPutUpdateResult =
+              DoPutUpdateResult.parseFrom(result.getApplicationMetadata().nioBuffer());
+          return doPutUpdateResult.getRecordCount();
+        }
+      }
+    } catch (final InterruptedException e) {
+      throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+    } catch (final ExecutionException e) {
+      throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
+    } catch (final InvalidProtocolBufferException e) {
+      throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
+    }
+  }
+
   /**
    * Execute an update query on the server.
    *
@@ -245,8 +375,10 @@ public long executeUpdate(
       } finally {
         listener.getResult();
       }
-    } catch (final InterruptedException | ExecutionException e) {
+    } catch (final InterruptedException e) {
       throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+    } catch (final ExecutionException e) {
+      throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
     } catch (final InvalidProtocolBufferException e) {
       throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
     }
@@ -295,8 +427,10 @@ public long executeSubstraitUpdate(
       } finally {
         listener.getResult();
       }
-    } catch (final InterruptedException | ExecutionException e) {
+    } catch (final InterruptedException e) {
       throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+    } catch (final ExecutionException e) {
+      throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
     } catch (final InvalidProtocolBufferException e) {
       throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
     }
@@ -1003,6 +1137,82 @@ public void close() throws Exception {
     AutoCloseables.close(client);
   }
 
+  /** Class to encapsulate Flight SQL bulk ingest request options. * */
+  public static class ExecuteIngestOptions {
+    private final String table;
+    private final TableDefinitionOptions tableDefinitionOptions;
+    private final boolean useTemporaryTable;
+    private final String catalog;
+    private final String schema;
+    private final Map<String, String> options;
+
+    /**
+     * Constructor.
+     *
+     * @param table The table to load data into.
+     * @param tableDefinitionOptions The behavior for handling the table definition.
+     * @param catalog The catalog of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param schema The schema of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param options Backend-specific options. Can be null if there are no options to be set.
+     */
+    public ExecuteIngestOptions(
+        String table,
+        TableDefinitionOptions tableDefinitionOptions,
+        String catalog,
+        String schema,
+        Map<String, String> options) {
+      this(table, tableDefinitionOptions, false, catalog, schema, options);
+    }
+
+    /**
+     * Constructor.
+     *
+     * @param table The table to load data into.
+     * @param tableDefinitionOptions The behavior for handling the table definition.
+     * @param useTemporaryTable Use a temporary table for bulk ingestion. Temporary table may get
+     *     placed in a backend-specific schema and/or catalog and gets dropped at the end of the
+     *     session. If backend does not support ingesting using a temporary table or an explicit
+     *     choice of schema or catalog is incompatible with the server's namespacing decision, an
+     *     error is returned as part of {@link #executeIngest} request.
+     * @param catalog The catalog of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param schema The schema of the destination table to load data into. If null, a
+     *     backend-specific default may be used.
+     * @param options Backend-specific options. Can be null if there are no options to be set.
+     */
+    public ExecuteIngestOptions(
+        String table,
+        TableDefinitionOptions tableDefinitionOptions,
+        boolean useTemporaryTable,
+        String catalog,
+        String schema,
+        Map<String, String> options) {
+      this.table = table;
+      this.tableDefinitionOptions = tableDefinitionOptions;
+      this.useTemporaryTable = useTemporaryTable;
+      this.catalog = catalog;
+      this.schema = schema;
+      this.options = options;
+    }
+
+    protected void updateCommandBuilder(CommandStatementIngest.Builder builder) {
+      builder.setTable(table);
+      builder.setTableDefinitionOptions(tableDefinitionOptions);
+      builder.setTemporary(useTemporaryTable);
+      if (!isNull(catalog)) {
+        builder.setCatalog(catalog);
+      }
+      if (!isNull(schema)) {
+        builder.setSchema(schema);
+      }
+      if (!isNull(options)) {
+        builder.putAllOptions(options);
+      }
+    }
+  }
+
   /** Helper class to encapsulate Flight SQL prepared statement logic. */
   public static class PreparedStatement implements AutoCloseable {
     private final FlightClient client;
@@ -1140,10 +1350,12 @@ public FlightInfo execute(final CallOption... options) {
               }
             }
           }
-        } catch (final InterruptedException | ExecutionException e) {
+        } catch (final InterruptedException e) {
           throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+        } catch (final ExecutionException e) {
+          throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
         } catch (final InvalidProtocolBufferException e) {
-          throw CallStatus.INVALID_ARGUMENT.withCause(e).toRuntimeException();
+          throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
         }
       }
 
@@ -1198,10 +1410,12 @@ public long executeUpdate(final CallOption... options) {
               DoPutUpdateResult.parseFrom(metadata.nioBuffer());
           return doPutUpdateResult.getRecordCount();
         }
-      } catch (final InterruptedException | ExecutionException e) {
+      } catch (final InterruptedException e) {
         throw CallStatus.CANCELLED.withCause(e).toRuntimeException();
+      } catch (final ExecutionException e) {
+        throw CallStatus.CANCELLED.withCause(e.getCause()).toRuntimeException();
       } catch (final InvalidProtocolBufferException e) {
-        throw CallStatus.INVALID_ARGUMENT.withCause(e).toRuntimeException();
+        throw CallStatus.INTERNAL.withCause(e).toRuntimeException();
       }
     }
 
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
index 0afef79160621..9465e5ff88053 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlProducer.java
@@ -83,6 +83,7 @@
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementUpdate;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementUpdate;
 import org.apache.arrow.flight.sql.impl.FlightSql.DoPutUpdateResult;
@@ -281,7 +282,8 @@ default void getStream(CallContext context, Ticket ticket, ServerStreamListener
   /**
    * Depending on the provided command, method either: 1. Execute provided SQL query as an update
    * statement, or 2. Execute provided update SQL query prepared statement. In this case, parameters
-   * binding is allowed, or 3. Binds parameters to the provided prepared statement.
+   * binding is allowed, or 3. Binds parameters to the provided prepared statement, or 4. Bulk
+   * ingests data provided through the flightStream.
    *
    * @param context Per-call context.
    * @param flightStream The data stream being uploaded.
@@ -299,6 +301,12 @@ default Runnable acceptPut(
           context,
           flightStream,
           ackStream);
+    } else if (command.is(CommandStatementIngest.class)) {
+      return acceptPutStatementBulkIngest(
+          FlightSqlUtils.unpackOrThrow(command, CommandStatementIngest.class),
+          context,
+          flightStream,
+          ackStream);
     } else if (command.is(CommandStatementSubstraitPlan.class)) {
       return acceptPutSubstraitPlan(
           FlightSqlUtils.unpackOrThrow(command, CommandStatementSubstraitPlan.class),
@@ -777,6 +785,27 @@ Runnable acceptPutStatement(
       FlightStream flightStream,
       StreamListener<PutResult> ackStream);
 
+  /**
+   * Accepts uploaded data for a particular bulk ingest data stream.
+   *
+   * <p>`PutResult`s must be in the form of a {@link DoPutUpdateResult}.
+   *
+   * @param command The bulk ingestion request.
+   * @param context Per-call context.
+   * @param flightStream The data stream being uploaded.
+   * @param ackStream The result data stream.
+   * @return A runnable to process the stream.
+   */
+  default Runnable acceptPutStatementBulkIngest(
+      CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+    return () -> {
+      ackStream.onError(CallStatus.UNIMPLEMENTED.toRuntimeException());
+    };
+  }
+
   /**
    * Handle a Substrait plan with uploaded data.
    *
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java
index 9bb95047691ae..9e13e57d66c65 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlUtils.java
@@ -82,7 +82,15 @@ public final class FlightSqlUtils {
               + "Response Message: N/A");
 
   public static final List<ActionType> FLIGHT_SQL_ACTIONS =
-      ImmutableList.of(FLIGHT_SQL_CREATE_PREPARED_STATEMENT, FLIGHT_SQL_CLOSE_PREPARED_STATEMENT);
+      ImmutableList.of(
+          FLIGHT_SQL_BEGIN_SAVEPOINT,
+          FLIGHT_SQL_BEGIN_TRANSACTION,
+          FLIGHT_SQL_CREATE_PREPARED_STATEMENT,
+          FLIGHT_SQL_CLOSE_PREPARED_STATEMENT,
+          FLIGHT_SQL_CREATE_PREPARED_SUBSTRAIT_PLAN,
+          FLIGHT_SQL_CANCEL_QUERY,
+          FLIGHT_SQL_END_SAVEPOINT,
+          FLIGHT_SQL_END_TRANSACTION);
 
   /**
    * Helper to parse {@link com.google.protobuf.Any} objects to the specific protobuf object.
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java
index 5091017c13cd8..72fcae8c18003 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/NoOpFlightSqlProducer.java
@@ -91,6 +91,18 @@ public Runnable acceptPutStatement(
     throw CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException();
   }
 
+  @Override
+  public Runnable acceptPutStatementBulkIngest(
+      FlightSql.CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+    return () -> {
+      ackStream.onError(
+          CallStatus.UNIMPLEMENTED.withDescription("Not implemented.").toRuntimeException());
+    };
+  }
+
   @Override
   public Runnable acceptPutPreparedStatementUpdate(
       FlightSql.CommandPreparedStatementUpdate command,
diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
index 2a31bc77365e2..cbe4989d14744 100644
--- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
+++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/SqlInfoBuilder.java
@@ -148,6 +148,17 @@ public SqlInfoBuilder withFlightSqlServerCancel(boolean value) {
     return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_CANCEL_VALUE, value);
   }
 
+  /** Set a value for bulk ingestion support. */
+  public SqlInfoBuilder withFlightSqlServerBulkIngestion(boolean value) {
+    return withBooleanProvider(SqlInfo.FLIGHT_SQL_SERVER_BULK_INGESTION_VALUE, value);
+  }
+
+  /** Set a value for transaction support for bulk ingestion. */
+  public SqlInfoBuilder withFlightSqlServerBulkIngestionTransaction(boolean value) {
+    return withBooleanProvider(
+        SqlInfo.FLIGHT_SQL_SERVER_INGEST_TRANSACTIONS_SUPPORTED_VALUE, value);
+  }
+
   /** Set a value for statement timeouts. */
   public SqlInfoBuilder withFlightSqlServerStatementTimeout(int value) {
     return withIntProvider(SqlInfo.FLIGHT_SQL_SERVER_STATEMENT_TIMEOUT_VALUE, value);
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
index e7127faf97539..f9d0551a3aa22 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java
@@ -55,6 +55,7 @@
 import java.nio.file.NoSuchFileException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
 import java.sql.Connection;
 import java.sql.DatabaseMetaData;
 import java.sql.DriverManager;
@@ -82,6 +83,7 @@
 import java.util.function.BiConsumer;
 import java.util.function.Consumer;
 import java.util.function.Predicate;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.arrow.adapter.jdbc.ArrowVectorIterator;
 import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
@@ -112,6 +114,10 @@
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandGetTables;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementUpdate;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementQuery;
 import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementUpdate;
 import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedCaseSensitivity;
@@ -146,6 +152,7 @@
 import org.apache.commons.dbcp2.PoolingDataSource;
 import org.apache.commons.pool2.ObjectPool;
 import org.apache.commons.pool2.impl.GenericObjectPool;
+import org.apache.commons.text.StringEscapeUtils;
 import org.slf4j.Logger;
 
 /**
@@ -181,9 +188,8 @@ public static void main(String[] args) throws Exception {
 
   public FlightSqlExample(final Location location, final String dbName) {
     // TODO Constructor should not be doing work.
-    checkState(
-        removeDerbyDatabaseIfExists(dbName) && populateDerbyDatabase(dbName),
-        "Failed to reset Derby database!");
+    checkState(removeDerbyDatabaseIfExists(dbName), "Failed to clear Derby database!");
+    checkState(populateDerbyDatabase(dbName), "Failed to populate Derby database!");
     databaseUri = "jdbc:derby:target/" + dbName;
     final ConnectionFactory connectionFactory =
         new DriverManagerConnectionFactory(databaseUri, new Properties());
@@ -246,43 +252,44 @@ public FlightSqlExample(final Location location, final String dbName) {
                           : SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UNKNOWN)
           .withSqlAllTablesAreSelectable(true)
           .withSqlNullOrdering(SqlNullOrdering.SQL_NULLS_SORTED_AT_END)
-          .withSqlMaxColumnsInTable(42);
+          .withSqlMaxColumnsInTable(42)
+          .withFlightSqlServerBulkIngestion(true)
+          .withFlightSqlServerBulkIngestionTransaction(false);
     } catch (SQLException e) {
       throw new RuntimeException(e);
     }
   }
 
   public static boolean removeDerbyDatabaseIfExists(final String dbName) {
-    boolean wasSuccess;
     final Path path = Paths.get("target" + File.separator + dbName);
 
     try (final Stream<Path> walk = Files.walk(path)) {
       /*
        * Iterate over all paths to delete, mapping each path to the outcome of its own
-       * deletion as a boolean representing whether or not each individual operation was
-       * successful; then reduce all booleans into a single answer, and store that into
-       * `wasSuccess`, which will later be returned by this method.
+       * deletion as a boolean representing whether each individual operation was
+       * successful; then reduce all booleans into a single answer.
        * If for whatever reason the resulting `Stream<Boolean>` is empty, throw an `IOException`;
        * this not expected.
        */
-      wasSuccess =
+      boolean unused =
           walk.sorted(Comparator.reverseOrder())
               .map(Path::toFile)
               .map(File::delete)
               .reduce(Boolean::logicalAnd)
               .orElseThrow(IOException::new);
-    } catch (IOException e) {
+    } catch (NoSuchFileException e) {
       /*
        * The only acceptable scenario for an `IOException` to be thrown here is if
        * an attempt to delete an non-existing file takes place -- which should be
        * alright, since they would be deleted anyway.
        */
-      if (!(wasSuccess = e instanceof NoSuchFileException)) {
-        LOGGER.error(format("Failed attempt to clear DerbyDB: <%s>", e.getMessage()), e);
-      }
+      LOGGER.error(format("No existing Derby database to delete.: <%s>", e.getMessage()), e);
+      return true;
+    } catch (Exception e) {
+      LOGGER.error(format("Failed attempt to clear DerbyDB.: <%s>", e.getMessage()), e);
+      return false;
     }
-
-    return wasSuccess;
+    return true;
   }
 
   private static boolean populateDerbyDatabase(final String dbName) {
@@ -716,6 +723,34 @@ private static ByteBuffer serializeMetadata(final Schema schema) {
     }
   }
 
+  private static String getRootAsCSVNoHeader(final VectorSchemaRoot root) {
+    StringBuilder sb = new StringBuilder();
+    Schema schema = root.getSchema();
+    int rowCount = root.getRowCount();
+    List<FieldVector> fieldVectors = root.getFieldVectors();
+
+    List<Object> row = new ArrayList<>(schema.getFields().size());
+    for (int i = 0; i < rowCount; i++) {
+      if (i > 0) {
+        sb.append("\n");
+      }
+      row.clear();
+      for (FieldVector v : fieldVectors) {
+        row.add(v.getObject(i));
+      }
+      printRowAsCSV(sb, row);
+    }
+    return sb.toString();
+  }
+
+  private static void printRowAsCSV(StringBuilder sb, List<Object> values) {
+    sb.append(
+        values.stream()
+            .map(v -> isNull(v) ? "" : v.toString())
+            .map(StringEscapeUtils::escapeCsv)
+            .collect(Collectors.joining(",")));
+  }
+
   @Override
   public void getStreamPreparedStatement(
       final CommandPreparedStatementQuery command,
@@ -953,6 +988,138 @@ public Runnable acceptPutStatement(
     };
   }
 
+  @Override
+  public Runnable acceptPutStatementBulkIngest(
+      CommandStatementIngest command,
+      CallContext context,
+      FlightStream flightStream,
+      StreamListener<PutResult> ackStream) {
+
+    final String schema = command.hasSchema() ? command.getSchema() : null;
+    final String table = command.getTable();
+    final boolean temporary = command.getTemporary();
+    final boolean transactionId = command.hasTransactionId();
+    final TableDefinitionOptions tableDefinitionOptions =
+        command.hasTableDefinitionOptions() ? command.getTableDefinitionOptions() : null;
+
+    return () -> {
+      TableExistsOption ifExists = TableExistsOption.TABLE_EXISTS_OPTION_APPEND;
+      if (temporary) {
+        ackStream.onError(
+            CallStatus.UNIMPLEMENTED
+                .withDescription("Bulk ingestion using temporary tables is not supported")
+                .toRuntimeException());
+      } else if (transactionId) {
+        ackStream.onError(
+            CallStatus.UNIMPLEMENTED
+                .withDescription(
+                    "Bulk ingestion automatically happens in a transaction. Specifying explicit transaction is not supported.")
+                .toRuntimeException());
+      } else if (isNull(tableDefinitionOptions)) {
+        ackStream.onError(
+            CallStatus.INVALID_ARGUMENT
+                .withDescription("TableDefinitionOptions not provided.")
+                .toRuntimeException());
+      } else {
+        TableNotExistOption ifNotExist = tableDefinitionOptions.getIfNotExist();
+        ifExists = tableDefinitionOptions.getIfExists();
+
+        if (!TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL.equals(ifNotExist)) {
+          ackStream.onError(
+              CallStatus.UNIMPLEMENTED
+                  .withDescription(
+                      "Only supported option is TABLE_NOT_EXIST_OPTION_FAIL for TableNotExistsOption.")
+                  .toRuntimeException());
+        } else if (TableExistsOption.TABLE_EXISTS_OPTION_UNSPECIFIED.equals(ifExists)) {
+          ackStream.onError(
+              CallStatus.INVALID_ARGUMENT
+                  .withDescription("TableExistsOption must be specified")
+                  .toRuntimeException());
+        } else if (TableExistsOption.TABLE_EXISTS_OPTION_FAIL.equals(ifExists)) {
+          ackStream.onError(
+              CallStatus.UNIMPLEMENTED
+                  .withDescription("TABLE_EXISTS_OPTION_FAIL is not supported.")
+                  .toRuntimeException());
+        }
+      }
+
+      Path tempFile = null;
+      try {
+        tempFile = Files.createTempFile(null, null);
+
+        VectorSchemaRoot root = null;
+        int counter = 0;
+        while (flightStream.next()) {
+          if (counter > 0) {
+            Files.writeString(tempFile, "\n", StandardCharsets.UTF_8, StandardOpenOption.APPEND);
+          }
+          counter += 1;
+          root = flightStream.getRoot();
+          Files.writeString(
+              tempFile,
+              getRootAsCSVNoHeader(root),
+              StandardCharsets.UTF_8,
+              StandardOpenOption.APPEND);
+        }
+
+        if (counter > 0) {
+          Files.writeString(tempFile, "\n", StandardCharsets.UTF_8, StandardOpenOption.APPEND);
+        }
+
+        if (!isNull(root)) {
+          String header =
+              root.getSchema().getFields().stream()
+                  .map(Field::getName)
+                  .collect(Collectors.joining(","));
+
+          try (final Connection connection = dataSource.getConnection();
+              final PreparedStatement preparedStatement =
+                  connection.prepareStatement(
+                      "CALL SYSCS_UTIL.SYSCS_IMPORT_DATA (?,?,?,null,?,?,?,?,?)")) {
+
+            preparedStatement.setString(1, schema);
+            preparedStatement.setString(2, table);
+            preparedStatement.setString(3, header);
+            preparedStatement.setString(4, tempFile.toString());
+            preparedStatement.setString(5, ",");
+            preparedStatement.setString(6, "\"");
+            preparedStatement.setString(7, "UTF-8");
+            preparedStatement.setInt(
+                8, TableExistsOption.TABLE_EXISTS_OPTION_REPLACE.equals(ifExists) ? 1 : 0);
+            preparedStatement.execute();
+
+            final DoPutUpdateResult build =
+                DoPutUpdateResult.newBuilder().setRecordCount(-1).build();
+
+            try (final ArrowBuf buffer = rootAllocator.buffer(build.getSerializedSize())) {
+              buffer.writeBytes(build.toByteArray());
+              ackStream.onNext(PutResult.metadata(buffer));
+              ackStream.onCompleted();
+            }
+          } catch (SQLException e) {
+            ackStream.onError(
+                CallStatus.INTERNAL
+                    .withDescription("Failed to execute bulk ingest: " + e)
+                    .toRuntimeException());
+          }
+        }
+      } catch (IOException e) {
+        ackStream.onError(
+            CallStatus.INTERNAL
+                .withDescription("Failed to create temp file for bulk loading: " + e)
+                .toRuntimeException());
+      } finally {
+        if (!isNull(tempFile)) {
+          try {
+            Files.delete(tempFile);
+          } catch (IOException e) {
+            //
+          }
+        }
+      }
+    };
+  }
+
   @Override
   public Runnable acceptPutPreparedStatementUpdate(
       CommandPreparedStatementUpdate command,
diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
index 8387834947283..3f769363fb64d 100644
--- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
+++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java
@@ -30,6 +30,10 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
 import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.PipedInputStream;
+import java.io.PipedOutputStream;
+import java.nio.charset.StandardCharsets;
 import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -53,6 +57,9 @@
 import org.apache.arrow.flight.sql.FlightSqlProducer;
 import org.apache.arrow.flight.sql.example.FlightSqlExample;
 import org.apache.arrow.flight.sql.impl.FlightSql;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableExistsOption;
+import org.apache.arrow.flight.sql.impl.FlightSql.CommandStatementIngest.TableDefinitionOptions.TableNotExistOption;
 import org.apache.arrow.flight.sql.impl.FlightSql.SqlSupportedCaseSensitivity;
 import org.apache.arrow.flight.sql.util.TableRef;
 import org.apache.arrow.memory.BufferAllocator;
@@ -60,11 +67,15 @@
 import org.apache.arrow.vector.IntVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
 import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.types.pojo.Schema;
 import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.VectorBatchAppender;
 import org.hamcrest.Matcher;
 import org.hamcrest.MatcherAssert;
 import org.junit.jupiter.api.AfterAll;
@@ -96,6 +107,43 @@ public class TestFlightSql {
   protected static FlightServer server;
   protected static FlightSqlClient sqlClient;
 
+  private static void populateNext10RowsInIngestRootBatch(
+      int startRowNumber,
+      IntVector valueVector,
+      VarCharVector keyNameVector,
+      IntVector foreignIdVector,
+      VarCharVector keyNamesToBeDeletedVector,
+      VectorSchemaRoot ingestRoot) {
+
+    final int NumRowsInBatch = 10;
+
+    valueVector.reset();
+    keyNameVector.reset();
+    foreignIdVector.reset();
+
+    final IntStream range = IntStream.range(1, NumRowsInBatch);
+
+    range.forEach(
+        i -> {
+          valueVector.setSafe(i - 1, (i + startRowNumber - 1) * NumRowsInBatch);
+          keyNameVector.setSafe(i - 1, new Text("value" + (i + startRowNumber - 1)));
+          foreignIdVector.setSafe(i - 1, 1);
+        });
+    // put some comma and double-quote containing string as well
+    valueVector.setSafe(NumRowsInBatch - 1, (NumRowsInBatch + startRowNumber - 1) * NumRowsInBatch);
+    keyNameVector.setSafe(
+        NumRowsInBatch - 1,
+        new Text(
+            String.format(
+                "value%d, is \"%d\"",
+                (NumRowsInBatch + startRowNumber - 1),
+                (NumRowsInBatch + startRowNumber - 1) * NumRowsInBatch)));
+    foreignIdVector.setSafe(NumRowsInBatch - 1, 1);
+    ingestRoot.setRowCount(NumRowsInBatch);
+
+    VectorBatchAppender.batchAppend(keyNamesToBeDeletedVector, keyNameVector);
+  }
+
   @BeforeAll
   public static void setUp() throws Exception {
     setUpClientServer();
@@ -123,10 +171,10 @@ protected static void setUpExpectedResultsMap() {
         Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE), "Apache Derby");
     GET_SQL_INFO_EXPECTED_RESULTS_MAP.put(
         Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION_VALUE),
-        "10.14.2.0 - (1828579)");
+        "10.15.2.0 - (1873585)");
     GET_SQL_INFO_EXPECTED_RESULTS_MAP.put(
         Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION_VALUE),
-        "10.14.2.0 - (1828579)");
+        "10.15.2.0 - (1873585)");
     GET_SQL_INFO_EXPECTED_RESULTS_MAP.put(
         Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE), "false");
     GET_SQL_INFO_EXPECTED_RESULTS_MAP.put(
@@ -537,6 +585,119 @@ public void testSimplePreparedStatementUpdateResults() throws SQLException {
     }
   }
 
+  @Test
+  public void testBulkIngest() throws IOException {
+    // For bulk ingest DerbyDB requires uppercase column names
+    var keyName = new Field("KEYNAME", FieldType.nullable(new ArrowType.Utf8()), null);
+    var value = new Field("VALUE", FieldType.nullable(new ArrowType.Int(32, true)), null);
+    var foreignId = new Field("FOREIGNID", FieldType.nullable(new ArrowType.Int(32, true)), null);
+
+    Schema dataSchema = new Schema(List.of(keyName, value, foreignId));
+
+    try (final VectorSchemaRoot ingestRoot = VectorSchemaRoot.create(dataSchema, allocator);
+        final VarCharVector keyNamesToBeDeletedVector = new VarCharVector(keyName, allocator)) {
+      final VarCharVector keyNameVector = (VarCharVector) ingestRoot.getVector(0);
+      final IntVector valueVector = (IntVector) ingestRoot.getVector(1);
+      final IntVector foreignIdVector = (IntVector) ingestRoot.getVector(2);
+      ingestRoot.allocateNew();
+      keyNamesToBeDeletedVector.allocateNew();
+
+      try (PipedInputStream inPipe = new PipedInputStream(1024);
+          PipedOutputStream outPipe = new PipedOutputStream(inPipe);
+          ArrowStreamReader reader = new ArrowStreamReader(inPipe, allocator)) {
+
+        new Thread(
+                () -> {
+                  try (ArrowStreamWriter writer =
+                      new ArrowStreamWriter(ingestRoot, null, outPipe)) {
+                    writer.start();
+                    populateNext10RowsInIngestRootBatch(
+                        1,
+                        valueVector,
+                        keyNameVector,
+                        foreignIdVector,
+                        keyNamesToBeDeletedVector,
+                        ingestRoot);
+                    writer.writeBatch();
+                    populateNext10RowsInIngestRootBatch(
+                        11,
+                        valueVector,
+                        keyNameVector,
+                        foreignIdVector,
+                        keyNamesToBeDeletedVector,
+                        ingestRoot);
+                    writer.writeBatch();
+                  } catch (Exception e) {
+                    throw new RuntimeException(e);
+                  }
+                })
+            .start();
+
+        // Ingest from a stream
+        final long updatedRows =
+            sqlClient.executeIngest(
+                reader,
+                new FlightSqlClient.ExecuteIngestOptions(
+                    "INTTABLE",
+                    TableDefinitionOptions.newBuilder()
+                        .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND)
+                        .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL)
+                        .build(),
+                    null,
+                    null,
+                    null));
+
+        MatcherAssert.assertThat(updatedRows, is(-1L));
+
+        // Ingest directly using VectorSchemaRoot
+        populateNext10RowsInIngestRootBatch(
+            21, valueVector, keyNameVector, foreignIdVector, keyNamesToBeDeletedVector, ingestRoot);
+        sqlClient.executeIngest(
+            ingestRoot,
+            new FlightSqlClient.ExecuteIngestOptions(
+                "INTTABLE",
+                TableDefinitionOptions.newBuilder()
+                    .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND)
+                    .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL)
+                    .build(),
+                null,
+                null,
+                null));
+
+        try (PreparedStatement deletePrepare =
+            sqlClient.prepare("DELETE FROM INTTABLE WHERE keyName = ?")) {
+          final long deletedRows;
+          try (final VectorSchemaRoot deleteRoot = VectorSchemaRoot.of(keyNamesToBeDeletedVector)) {
+            deletePrepare.setParameters(deleteRoot);
+            deletedRows = deletePrepare.executeUpdate();
+          }
+
+          MatcherAssert.assertThat(deletedRows, is(30L));
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testBulkIngestTransaction() {
+    assertThrows(
+        RuntimeException.class,
+        () -> {
+          sqlClient.executeIngest(
+              VectorSchemaRoot.create(new Schema(List.of()), allocator),
+              new FlightSqlClient.ExecuteIngestOptions(
+                  "INTTABLE",
+                  TableDefinitionOptions.newBuilder()
+                      .setIfExists(TableExistsOption.TABLE_EXISTS_OPTION_APPEND)
+                      .setIfNotExist(TableNotExistOption.TABLE_NOT_EXIST_OPTION_FAIL)
+                      .build(),
+                  null,
+                  null,
+                  null),
+              new FlightSqlClient.Transaction("123".getBytes(StandardCharsets.UTF_8)));
+        });
+  }
+
   @Test
   public void testSimplePreparedStatementUpdateResultsWithoutParameters() throws SQLException {
     try (PreparedStatement prepare =
diff --git a/java/flight/pom.xml b/java/flight/pom.xml
index 38495fa356e51..55511eba82b3a 100644
--- a/java/flight/pom.xml
+++ b/java/flight/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-flight</artifactId>
 
@@ -37,17 +37,4 @@ under the License.
     <module>flight-sql-jdbc-driver</module>
     <module>flight-integration-tests</module>
   </modules>
-
-  <profiles>
-    <profile>
-      <id>pin-mockito-jdk8</id>
-      <activation>
-        <jdk>1.8</jdk>
-      </activation>
-      <properties>
-        <mockito.core.version>4.11.0</mockito.core.version>
-        <mockito.inline.version>5.2.0</mockito.inline.version>
-      </properties>
-    </profile>
-  </profiles>
 </project>
diff --git a/java/format/pom.xml b/java/format/pom.xml
index f1edfb86253c0..f767215b12807 100644
--- a/java/format/pom.xml
+++ b/java/format/pom.xml
@@ -23,7 +23,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>arrow-format</artifactId>
@@ -61,6 +61,15 @@ under the License.
           </java>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 </project>
diff --git a/java/format/src/main/java/module-info.java b/java/format/src/main/java/module-info.java
index bda779c91afbc..f8d740b726fde 100644
--- a/java/format/src/main/java/module-info.java
+++ b/java/format/src/main/java/module-info.java
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
+// TODO(https://github.com/apache/arrow/issues/44037): Google hasn't reviewed Flatbuffers fix
+@SuppressWarnings({ "requires-automatic", "requires-transitive-automatic" })
 module org.apache.arrow.format {
   exports org.apache.arrow.flatbuf;
   requires transitive flatbuffers.java;
diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml
index 2a1e83f3e21fc..70bde084b4216 100644
--- a/java/gandiva/pom.xml
+++ b/java/gandiva/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <groupId>org.apache.arrow.gandiva</groupId>
diff --git a/java/gandiva/src/main/cpp/jni_common.cc b/java/gandiva/src/main/cpp/jni_common.cc
index a5dff9981ce89..ec1bb7623413a 100644
--- a/java/gandiva/src/main/cpp/jni_common.cc
+++ b/java/gandiva/src/main/cpp/jni_common.cc
@@ -67,7 +67,7 @@ using gandiva::ProjectorHolder;
 // forward declarations
 NodePtr ProtoTypeToNode(const gandiva::types::TreeNode& node);
 
-static jint JNI_VERSION = JNI_VERSION_1_6;
+static jint JNI_VERSION = JNI_VERSION_10;
 
 // extern refs - initialized for other modules.
 jclass configuration_builder_class_;
diff --git a/java/gandiva/src/main/java/module-info.java b/java/gandiva/src/main/java/module-info.java
index 5bce445707a3b..49deed1857691 100644
--- a/java/gandiva/src/main/java/module-info.java
+++ b/java/gandiva/src/main/java/module-info.java
@@ -21,9 +21,9 @@
   exports org.apache.arrow.gandiva.ipc;
 
   requires com.google.common;
+  requires com.google.protobuf;
   requires org.apache.arrow.format;
   requires org.apache.arrow.memory.core;
   requires org.apache.arrow.vector;
   requires org.slf4j;
-  requires protobuf.java;
 }
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java
index 80427de0f03f8..75169a37a95d7 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterProjectTest.java
@@ -34,11 +34,13 @@
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 public class FilterProjectTest extends BaseEvaluatorTest {
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testSimpleSV16() throws GandivaException, Exception {
     Field a = Field.nullable("a", int32);
     Field b = Field.nullable("b", int32);
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java
index 7563465f37483..a98a7cb6b5466 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/FilterTest.java
@@ -34,6 +34,7 @@
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 public class FilterTest extends BaseEvaluatorTest {
@@ -72,6 +73,7 @@ List<ArrowBuf> stringBufs(String[] strings) {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testSimpleInString() throws GandivaException, Exception {
     Field c1 = Field.nullable("c1", new ArrowType.Utf8());
     TreeNode l1 = TreeBuilder.makeLiteral(1L);
@@ -135,6 +137,7 @@ public void testSimpleInString() throws GandivaException, Exception {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testSimpleInInt() throws GandivaException, Exception {
     Field c1 = Field.nullable("c1", int32);
 
@@ -178,6 +181,7 @@ public void testSimpleInInt() throws GandivaException, Exception {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testSimpleSV16() throws GandivaException, Exception {
     Field a = Field.nullable("a", int32);
     Field b = Field.nullable("b", int32);
@@ -199,6 +203,7 @@ public void testSimpleSV16() throws GandivaException, Exception {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testSimpleSV16_AllMatched() throws GandivaException, Exception {
     Field a = Field.nullable("a", int32);
     Field b = Field.nullable("b", int32);
@@ -228,6 +233,7 @@ public void testSimpleSV16_AllMatched() throws GandivaException, Exception {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testSimpleSV16_GreaterThan64Recs() throws GandivaException, Exception {
     Field a = Field.nullable("a", int32);
     Field b = Field.nullable("b", int32);
@@ -259,6 +265,7 @@ public void testSimpleSV16_GreaterThan64Recs() throws GandivaException, Exceptio
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testSimpleSV32() throws GandivaException, Exception {
     Field a = Field.nullable("a", int32);
     Field b = Field.nullable("b", int32);
@@ -280,6 +287,7 @@ public void testSimpleSV32() throws GandivaException, Exception {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testSimpleFilterWithNoOptimisation() throws GandivaException, Exception {
     Field a = Field.nullable("a", int32);
     Field b = Field.nullable("b", int32);
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java
index 3916051224436..74180c0f3598f 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorDecimalTest.java
@@ -42,11 +42,13 @@
 import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 public class ProjectorDecimalTest extends org.apache.arrow.gandiva.evaluator.BaseEvaluatorTest {
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void test_add() throws GandivaException {
     int precision = 38;
     int scale = 8;
@@ -114,6 +116,7 @@ public void test_add() throws GandivaException {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void test_add_literal() throws GandivaException {
     int precision = 2;
     int scale = 0;
@@ -175,6 +178,7 @@ public void test_add_literal() throws GandivaException {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void test_multiply() throws GandivaException {
     int precision = 38;
     int scale = 8;
@@ -244,6 +248,7 @@ public void test_multiply() throws GandivaException {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testCompare() throws GandivaException {
     Decimal aType = new Decimal(38, 3, 128);
     Decimal bType = new Decimal(38, 2, 128);
@@ -338,6 +343,7 @@ public void testCompare() throws GandivaException {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testRound() throws GandivaException {
     Decimal aType = new Decimal(38, 2, 128);
     Decimal aWithScaleZero = new Decimal(38, 0, 128);
@@ -480,6 +486,7 @@ public void testRound() throws GandivaException {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testCastToDecimal() throws GandivaException {
     Decimal decimalType = new Decimal(38, 2, 128);
     Decimal decimalWithScaleOne = new Decimal(38, 1, 128);
@@ -606,6 +613,7 @@ public void testCastToDecimal() throws GandivaException {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testCastToLong() throws GandivaException {
     Decimal decimalType = new Decimal(38, 2, 128);
     Field dec = Field.nullable("dec", decimalType);
@@ -658,6 +666,7 @@ public void testCastToLong() throws GandivaException {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testCastToDouble() throws GandivaException {
     Decimal decimalType = new Decimal(38, 2, 128);
     Field dec = Field.nullable("dec", decimalType);
@@ -712,6 +721,7 @@ public void testCastToDouble() throws GandivaException {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testCastToString() throws GandivaException {
     Decimal decimalType = new Decimal(38, 2, 128);
     Field dec = Field.nullable("dec", decimalType);
@@ -773,6 +783,7 @@ public void testCastToString() throws GandivaException {
   }
 
   @Test
+  @Disabled("GH-43576 - Fix and enable this test")
   public void testCastStringToDecimal() throws GandivaException {
     Decimal decimalType = new Decimal(4, 2, 128);
     Field dec = Field.nullable("dec", decimalType);
diff --git a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
index f2590226b1a74..0d86bd9e72923 100644
--- a/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
+++ b/java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java
@@ -62,6 +62,7 @@
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
+@Disabled("Disabled until GH-43981 is solved")
 public class ProjectorTest extends BaseEvaluatorTest {
 
   private Charset utf8Charset = Charset.forName("UTF-8");
diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml
deleted file mode 100644
index b00c03a014980..0000000000000
--- a/java/maven/module-info-compiler-maven-plugin/pom.xml
+++ /dev/null
@@ -1,124 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.arrow.maven.plugins</groupId>
-    <artifactId>arrow-maven-plugins</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
-  </parent>
-  <artifactId>module-info-compiler-maven-plugin</artifactId>
-  <packaging>maven-plugin</packaging>
-
-  <name>Module Info Compiler Maven Plugin</name>
-
-  <url>https://arrow.apache.org</url>
-
-  <prerequisites>
-    <maven>${maven.version}</maven>
-  </prerequisites>
-
-  <properties>
-    <maven.version>3.8.7</maven.version>
-  </properties>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.glavo</groupId>
-      <artifactId>module-info-compiler</artifactId>
-      <version>2.0</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.maven</groupId>
-      <artifactId>maven-plugin-api</artifactId>
-      <version>${maven.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.maven</groupId>
-      <artifactId>maven-core</artifactId>
-      <version>${maven.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.maven</groupId>
-      <artifactId>maven-artifact</artifactId>
-      <version>${maven.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.maven</groupId>
-      <artifactId>maven-model</artifactId>
-      <version>${maven.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.maven.plugin-tools</groupId>
-      <artifactId>maven-plugin-annotations</artifactId>
-      <version>${maven.plugin.tools.version}</version>
-      <scope>provided</scope>
-    </dependency>
-  </dependencies>
-
-  <build>
-    <pluginManagement>
-      <plugins>
-        <plugin>
-          <groupId>com.gradle</groupId>
-          <artifactId>develocity-maven-extension</artifactId>
-          <configuration>
-            <develocity>
-              <normalization>
-                <runtimeClassPath>
-                  <ignoredFiles>
-                    <ignoredFile>arrow-git.properties</ignoredFile>
-                  </ignoredFiles>
-                </runtimeClassPath>
-              </normalization>
-            </develocity>
-          </configuration>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-plugin-plugin</artifactId>
-        <configuration>
-          <skipErrorNoDescriptorsFound>true</skipErrorNoDescriptorsFound>
-        </configuration>
-        <executions>
-          <execution>
-            <id>mojo-descriptor</id>
-            <goals>
-              <goal>descriptor</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>help-goal</id>
-            <goals>
-              <goal>helpmojo</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-    </plugins>
-  </build>
-</project>
diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java
deleted file mode 100644
index 4fc8fc46e6bcc..0000000000000
--- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/BaseModuleInfoCompilerPlugin.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.arrow.maven.plugins;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.List;
-import java.util.Optional;
-import org.apache.maven.plugin.AbstractMojo;
-import org.apache.maven.plugin.MojoExecutionException;
-import org.glavo.mic.ModuleInfoCompiler;
-
-/** Compiles the first module-info.java file in the project purely syntactically. */
-public abstract class BaseModuleInfoCompilerPlugin extends AbstractMojo {
-  protected abstract List<String> getSourceRoots();
-
-  protected abstract boolean skip();
-
-  protected abstract String getOutputDirectory();
-
-  @Override
-  public void execute() throws MojoExecutionException {
-    if (skip()) {
-      getLog().info("Skipping module-info-compiler-maven-plugin");
-      return;
-    }
-
-    Optional<File> moduleInfoFile = findFirstModuleInfo(getSourceRoots());
-    if (moduleInfoFile.isPresent()) {
-      // The compiled module-info.class file goes into target/classes/module-info/main
-      Path outputDir = Paths.get(getOutputDirectory());
-
-      outputDir.toFile().mkdirs();
-      Path targetPath = outputDir.resolve("module-info.class");
-
-      // Invoke the compiler,
-      ModuleInfoCompiler compiler = new ModuleInfoCompiler();
-      try (Reader reader =
-              new InputStreamReader(
-                  Files.newInputStream(moduleInfoFile.get().toPath()), StandardCharsets.UTF_8);
-          OutputStream output = Files.newOutputStream(targetPath)) {
-        compiler.compile(reader, output);
-        getLog().info("Successfully wrote module-info.class file.");
-      } catch (IOException ex) {
-        throw new MojoExecutionException("Error compiling module-info.java", ex);
-      }
-    } else {
-      getLog().info("No module-info.java file found. module-info.class file was not generated.");
-    }
-  }
-
-  /** Finds the first module-info.java file in the set of source directories. */
-  private Optional<File> findFirstModuleInfo(List<String> sourceDirectories) {
-    if (sourceDirectories == null) {
-      return Optional.empty();
-    }
-
-    return sourceDirectories.stream()
-        .map(Paths::get)
-        .map(
-            sourcePath ->
-                sourcePath.toFile().listFiles(file -> file.getName().equals("module-info.java")))
-        .filter(matchingFiles -> matchingFiles != null && matchingFiles.length != 0)
-        .map(matchingFiles -> matchingFiles[0])
-        .findAny();
-  }
-}
diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java
deleted file mode 100644
index e66a475dbf8be..0000000000000
--- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoCompilerPlugin.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.arrow.maven.plugins;
-
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.maven.plugins.annotations.LifecyclePhase;
-import org.apache.maven.plugins.annotations.Mojo;
-import org.apache.maven.plugins.annotations.Parameter;
-import org.apache.maven.project.MavenProject;
-
-/** A maven plugin for compiler module-info files in main code with JDK8. */
-@Mojo(name = "compile", defaultPhase = LifecyclePhase.COMPILE)
-public class ModuleInfoCompilerPlugin extends BaseModuleInfoCompilerPlugin {
-
-  @Parameter(
-      defaultValue = "${project.compileSourceRoots}",
-      property = "compileSourceRoots",
-      required = true)
-  private final List<String> compileSourceRoots = new ArrayList<>();
-
-  @Parameter(defaultValue = "false", property = "skip", required = false)
-  private boolean skip = false;
-
-  @Parameter(defaultValue = "${project}", readonly = true, required = true)
-  private MavenProject project;
-
-  @Override
-  protected List<String> getSourceRoots() {
-    return compileSourceRoots;
-  }
-
-  @Override
-  protected boolean skip() {
-    return skip;
-  }
-
-  @Override
-  protected String getOutputDirectory() {
-    return project.getBuild().getOutputDirectory();
-  }
-}
diff --git a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java b/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java
deleted file mode 100644
index f18ac9faac735..0000000000000
--- a/java/maven/module-info-compiler-maven-plugin/src/main/java/org/apache/arrow/maven/plugins/ModuleInfoTestCompilerPlugin.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.arrow.maven.plugins;
-
-import java.util.List;
-import org.apache.maven.plugins.annotations.LifecyclePhase;
-import org.apache.maven.plugins.annotations.Mojo;
-import org.apache.maven.plugins.annotations.Parameter;
-import org.apache.maven.project.MavenProject;
-
-/** A maven plugin for compiler module-info files in unit tests with JDK8. */
-@Mojo(name = "testCompile", defaultPhase = LifecyclePhase.TEST_COMPILE)
-public class ModuleInfoTestCompilerPlugin extends BaseModuleInfoCompilerPlugin {
-
-  @Parameter(defaultValue = "false", property = "skip", required = false)
-  private boolean skip = false;
-
-  @Parameter(defaultValue = "${project}", readonly = true, required = true)
-  private MavenProject project;
-
-  @Override
-  protected List<String> getSourceRoots() {
-    return project.getTestCompileSourceRoots();
-  }
-
-  @Override
-  protected boolean skip() {
-    return skip;
-  }
-
-  @Override
-  protected String getOutputDirectory() {
-    return project.getBuild().getTestOutputDirectory();
-  }
-}
diff --git a/java/maven/pom.xml b/java/maven/pom.xml
deleted file mode 100644
index 54b7757e8cebf..0000000000000
--- a/java/maven/pom.xml
+++ /dev/null
@@ -1,371 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <!--
-    Note: Do not inherit from the Arrow parent POM as plugins can be referenced
-    during the parent POM, introducing circular dependencies.
-  -->
-  <parent>
-    <groupId>org.apache</groupId>
-    <artifactId>apache</artifactId>
-    <version>31</version>
-    <relativePath></relativePath>
-  </parent>
-
-  <groupId>org.apache.arrow.maven.plugins</groupId>
-  <artifactId>arrow-maven-plugins</artifactId>
-  <version>17.0.0-SNAPSHOT</version>
-  <packaging>pom</packaging>
-  <name>Arrow Maven Plugins</name>
-
-  <modules>
-    <module>module-info-compiler-maven-plugin</module>
-  </modules>
-
-  <properties>
-    <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
-    <!-- org.apache:apache overrides -->
-    <maven.compiler.source>1.8</maven.compiler.source>
-    <maven.compiler.target>1.8</maven.compiler.target>
-    <maven.plugin.tools.version>3.13.1</maven.plugin.tools.version>
-    <surefire.version>3.2.5</surefire.version>
-    <version.apache-rat-plugin>0.16.1</version.apache-rat-plugin>
-    <version.maven-assembly-plugin>3.7.1</version.maven-assembly-plugin>
-    <version.maven-compiler-plugin>3.12.1</version.maven-compiler-plugin>
-    <version.maven-dependency-plugin>3.6.1</version.maven-dependency-plugin>
-    <version.maven-gpg-plugin>3.2.4</version.maven-gpg-plugin>
-    <version.maven-jar-plugin>3.2.2</version.maven-jar-plugin>
-    <version.maven-javadoc-plugin>3.6.3</version.maven-javadoc-plugin>
-    <version.maven-project-info-reports-plugin>3.5.0</version.maven-project-info-reports-plugin>
-  </properties>
-
-  <build>
-    <pluginManagement>
-      <plugins>
-        <plugin>
-          <groupId>com.diffplug.spotless</groupId>
-          <artifactId>spotless-maven-plugin</artifactId>
-          <version>2.30.0</version>
-        </plugin>
-        <plugin>
-          <groupId>pl.project13.maven</groupId>
-          <artifactId>git-commit-id-plugin</artifactId>
-          <version>4.9.10</version>
-        </plugin>
-        <plugin>
-          <groupId>org.cyclonedx</groupId>
-          <artifactId>cyclonedx-maven-plugin</artifactId>
-          <version>2.8.0</version>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.rat</groupId>
-        <artifactId>apache-rat-plugin</artifactId>
-        <configuration>
-          <excludeSubProjects>false</excludeSubProjects>
-          <excludes>
-            <exclude>**/dependency-reduced-pom.xml</exclude>
-            <exclude>**/*.log</exclude>
-            <exclude>**/*.css</exclude>
-            <exclude>**/*.js</exclude>
-            <exclude>**/*.md</exclude>
-            <exclude>**/*.eps</exclude>
-            <exclude>**/*.json</exclude>
-            <exclude>**/*.seq</exclude>
-            <exclude>**/*.parquet</exclude>
-            <exclude>**/*.sql</exclude>
-            <exclude>**/arrow-git.properties</exclude>
-            <exclude>**/*.csv</exclude>
-            <exclude>**/*.csvh</exclude>
-            <exclude>**/*.csvh-test</exclude>
-            <exclude>**/*.tsv</exclude>
-            <exclude>**/*.txt</exclude>
-            <exclude>**/*.ssv</exclude>
-            <exclude>**/arrow-*.conf</exclude>
-            <exclude>**/.buildpath</exclude>
-            <exclude>**/*.proto</exclude>
-            <exclude>**/*.fmpp</exclude>
-            <exclude>**/target/**</exclude>
-            <exclude>**/*.tdd</exclude>
-            <exclude>**/*.project</exclude>
-            <exclude>**/TAGS</exclude>
-            <exclude>**/*.checkstyle</exclude>
-            <exclude>**/.classpath</exclude>
-            <exclude>**/.factorypath</exclude>
-            <exclude>**/.settings/**</exclude>
-            <exclude>.*/**</exclude>
-            <exclude>**/*.patch</exclude>
-            <exclude>**/*.pb.cc</exclude>
-            <exclude>**/*.pb.h</exclude>
-            <exclude>**/*.linux</exclude>
-            <exclude>**/client/build/**</exclude>
-            <exclude>**/*.tbl</exclude>
-            <exclude>**/*.iml</exclude>
-            <exclude>**/flight.properties</exclude>
-            <exclude>**/*.idea/**</exclude>
-          </excludes>
-        </configuration>
-        <executions>
-          <execution>
-            <id>rat-checks</id>
-            <goals>
-              <goal>check</goal>
-            </goals>
-            <phase>validate</phase>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-        <configuration>
-          <excludes>
-            <exclude>**/logging.properties</exclude>
-            <exclude>**/logback-test.xml</exclude>
-            <exclude>**/logback.out.xml</exclude>
-            <exclude>**/logback.xml</exclude>
-          </excludes>
-          <archive>
-            <manifestEntries>
-              <Extension-Name>org.apache.arrow</Extension-Name>
-              <Built-By>${username}</Built-By>
-              <url>https://arrow.apache.org/</url>
-            </manifestEntries>
-          </archive>
-        </configuration>
-        <executions>
-          <execution>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-            <configuration>
-              <skipIfEmpty>true</skipIfEmpty>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <configuration>
-          <maxmem>2048m</maxmem>
-          <fork>true</fork>
-        </configuration>
-      </plugin>
-      <plugin>
-        <artifactId>maven-enforcer-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>avoid_bad_dependencies</id>
-            <goals>
-              <goal>enforce</goal>
-            </goals>
-            <phase>verify</phase>
-            <configuration>
-              <rules>
-                <bannedDependencies>
-                  <excludes>
-                    <exclude>commons-logging</exclude>
-                    <exclude>javax.servlet:servlet-api</exclude>
-                    <exclude>org.mortbay.jetty:servlet-api</exclude>
-                    <exclude>org.mortbay.jetty:servlet-api-2.5</exclude>
-                    <exclude>log4j:log4j</exclude>
-                  </excludes>
-                </bannedDependencies>
-              </rules>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>pl.project13.maven</groupId>
-        <artifactId>git-commit-id-plugin</artifactId>
-        <configuration>
-          <dateFormat>dd.MM.yyyy '@' HH:mm:ss z</dateFormat>
-          <verbose>false</verbose>
-          <skipPoms>false</skipPoms>
-          <generateGitPropertiesFile>true</generateGitPropertiesFile>
-          <failOnNoGitDirectory>false</failOnNoGitDirectory>
-          <gitDescribe>
-            <skip>false</skip>
-            <always>false</always>
-            <abbrev>7</abbrev>
-            <dirty>-dirty</dirty>
-            <forceLongFormat>true</forceLongFormat>
-          </gitDescribe>
-        </configuration>
-        <executions>
-          <execution>
-            <id>for-jars</id>
-            <goals>
-              <goal>revision</goal>
-            </goals>
-            <inherited>true</inherited>
-            <configuration>
-              <generateGitPropertiesFilename>target/classes/arrow-git.properties</generateGitPropertiesFilename>
-            </configuration>
-          </execution>
-          <execution>
-            <id>for-source-tarball</id>
-            <goals>
-              <goal>revision</goal>
-            </goals>
-            <inherited>false</inherited>
-            <configuration>
-              <generateGitPropertiesFilename>./arrow-git.properties</generateGitPropertiesFilename>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-checkstyle-plugin</artifactId>
-        <configuration>
-          <configLocation>../dev/checkstyle/checkstyle.xml</configLocation>
-          <headerLocation>../dev/license/asf-java.license</headerLocation>
-          <suppressionsLocation>../dev/checkstyle/suppressions.xml</suppressionsLocation>
-          <includeTestSourceDirectory>true</includeTestSourceDirectory>
-          <inputEncoding>UTF-8</inputEncoding>
-          <consoleOutput>true</consoleOutput>
-          <failsOnError>${checkstyle.failOnViolation}</failsOnError>
-          <failOnViolation>${checkstyle.failOnViolation}</failOnViolation>
-          <violationSeverity>warning</violationSeverity>
-          <outputFileFormat>xml</outputFileFormat>
-          <outputFile>${project.build.directory}/test/checkstyle-errors.xml</outputFile>
-          <linkXRef>false</linkXRef>
-        </configuration>
-        <dependencies>
-          <dependency>
-            <groupId>com.puppycrawl.tools</groupId>
-            <artifactId>checkstyle</artifactId>
-            <version>8.29</version>
-          </dependency>
-          <dependency>
-            <groupId>org.slf4j</groupId>
-            <artifactId>jcl-over-slf4j</artifactId>
-            <version>2.0.13</version>
-          </dependency>
-        </dependencies>
-        <executions>
-          <execution>
-            <id>validate</id>
-            <goals>
-              <goal>check</goal>
-            </goals>
-            <phase>validate</phase>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.cyclonedx</groupId>
-        <artifactId>cyclonedx-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals>
-              <goal>makeBom</goal>
-            </goals>
-            <phase>package</phase>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-project-info-reports-plugin</artifactId>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-site-plugin</artifactId>
-      </plugin>
-      <plugin>
-        <groupId>com.diffplug.spotless</groupId>
-        <artifactId>spotless-maven-plugin</artifactId>
-        <configuration>
-          <pom>
-            <licenseHeader>
-              <file>${maven.multiModuleProjectDirectory}/dev/license/asf-xml.license</file>
-              <delimiter>(&lt;configuration|&lt;project)</delimiter>
-            </licenseHeader>
-            <sortPom></sortPom>
-          </pom>
-          <java>
-            <googleJavaFormat>
-              <version>1.7</version>
-              <style>GOOGLE</style>
-            </googleJavaFormat>
-            <licenseHeader>
-              <file>${maven.multiModuleProjectDirectory}/dev/license/asf-java.license</file>
-              <delimiter>package</delimiter>
-            </licenseHeader>
-          </java>
-        </configuration>
-        <executions>
-          <execution>
-            <id>spotless-check</id>
-            <goals>
-              <goal>check</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-    </plugins>
-  </build>
-
-  <reporting>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-project-info-reports-plugin</artifactId>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-site-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </reporting>
-
-  <profiles>
-    <profile>
-      <id>apache-release</id>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-assembly-plugin</artifactId>
-            <executions>
-              <execution>
-                <id>source-release-assembly</id>
-                <configuration>
-                  <!-- source release assembly is managed at the root of the project. -->
-                  <skipAssembly>true</skipAssembly>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
-</project>
diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml
index 729cc512ab111..9b24cee032023 100644
--- a/java/memory/memory-core/pom.xml
+++ b/java/memory/memory-core/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-memory</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>arrow-memory-core</artifactId>
@@ -30,11 +30,12 @@ under the License.
   <name>Arrow Memory - Core</name>
   <description>Core off-heap memory management libraries for Arrow ValueVectors.</description>
 
+  <properties>
+    <!-- List of add-opens arg line arguments for this module's tests -->
+    <surefire.add-opens.argLine>--add-reads=org.apache.arrow.memory.core=ch.qos.logback.classic --add-opens=java.base/java.lang.reflect=org.apache.arrow.memory.core --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</surefire.add-opens.argLine>
+  </properties>
+
   <dependencies>
-    <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
@@ -47,111 +48,67 @@ under the License.
       <groupId>org.checkerframework</groupId>
       <artifactId>checker-qual</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.google.errorprone</groupId>
+      <artifactId>error_prone_annotations</artifactId>
+    </dependency>
   </dependencies>
 
   <build>
     <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <compilerArgs combine.children="append">
+            <arg>-Xmaxerrs</arg>
+            <!-- javac only reports the first 100 errors or warnings -->
+            <arg>10000</arg>
+            <arg>-Xmaxwarns</arg>
+            <arg>10000</arg>
+            <arg>-AskipDefs=.*Test</arg>
+            <!-- Skip analysis for Testing classes -->
+            <arg>-AatfDoNotCache</arg>
+            <!-- not cache results -->
+          </compilerArgs>
+          <annotationProcessorPaths combine.children="append">
+            <path>
+              <groupId>org.checkerframework</groupId>
+              <artifactId>checker</artifactId>
+              <version>${checker.framework.version}</version>
+            </path>
+          </annotationProcessorPaths>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
         <configuration>
+          <argLine>--add-reads=org.apache.arrow.memory.core=ch.qos.logback.classic --add-opens=java.base/java.lang.reflect=org.apache.arrow.memory.core --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
           <excludes>
             <!-- Test is only useful when NOT running with add-opens -->
             <exclude>**/TestOpens.java</exclude>
           </excludes>
         </configuration>
+        <executions>
+          <execution>
+            <!-- TestOpens requires no add-opens JVM directives -->
+            <id>opens-tests</id>
+            <goals>
+              <goal>test</goal>
+            </goals>
+            <phase>test</phase>
+            <configuration>
+              <!-- Do not inherit the default add-opens flag and excludes -->
+              <argLine combine.self="override"></argLine>
+              <excludes combine.self="override"></excludes>
+              <includes>
+                <include>**/TestOpens.java</include>
+              </includes>
+            </configuration>
+          </execution>
+        </executions>
       </plugin>
     </plugins>
   </build>
-
-  <profiles>
-    <profile>
-      <id>jdk11+</id>
-      <activation>
-        <jdk>[11,]</jdk>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <configuration combine.self="override">
-              <argLine>--add-reads=org.apache.arrow.memory.core=ch.qos.logback.classic --add-opens=java.base/java.lang.reflect=org.apache.arrow.memory.core --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
-              <excludes>
-                <!-- Test is only useful when NOT running with add-opens -->
-                <exclude>**/TestOpens.java</exclude>
-              </excludes>
-            </configuration>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-    <profile>
-      <id>opens-tests</id>
-      <!-- Run tests WITHOUT add-opens to make sure we fail-fast -->
-      <activation>
-        <jdk>[16,]</jdk>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <executions>
-              <execution>
-                <id>opens-tests</id>
-                <goals>
-                  <goal>test</goal>
-                </goals>
-                <phase>test</phase>
-                <configuration>
-                  <!-- Do not inherit the default add-opens flag and excludes -->
-                  <argLine combine.self="override"></argLine>
-                  <excludes combine.self="override"></excludes>
-                  <includes>
-                    <include>**/TestOpens.java</include>
-                  </includes>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-
-    <profile>
-      <id>checkerframework-jdk11+</id>
-      <activation>
-        <jdk>[11,]</jdk>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-compiler-plugin</artifactId>
-            <configuration>
-              <compilerArgs combine.children="append">
-                <arg>-Xmaxerrs</arg>
-                <!-- javac only reports the first 100 errors or warnings -->
-                <arg>10000</arg>
-                <arg>-Xmaxwarns</arg>
-                <arg>10000</arg>
-                <arg>-AskipDefs=.*Test</arg>
-                <!-- Skip analysis for Testing classes -->
-                <arg>-AatfDoNotCache</arg>
-                <!-- not cache results -->
-              </compilerArgs>
-              <annotationProcessorPaths combine.children="append">
-                <path>
-                  <groupId>org.checkerframework</groupId>
-                  <artifactId>checker</artifactId>
-                  <version>${checker.framework.version}</version>
-                </path>
-              </annotationProcessorPaths>
-            </configuration>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
 </project>
diff --git a/java/memory/memory-core/src/main/java/module-info.java b/java/memory/memory-core/src/main/java/module-info.java
index 52fcb52d014a5..0a607bdf2f43a 100644
--- a/java/memory/memory-core/src/main/java/module-info.java
+++ b/java/memory/memory-core/src/main/java/module-info.java
@@ -22,7 +22,10 @@
   exports org.apache.arrow.memory.util.hash;
   exports org.apache.arrow.util;
 
+  requires java.compiler;
   requires transitive jdk.unsupported;
-  requires jsr305;
+  requires static org.checkerframework.checker.qual;
+  requires static org.immutables.value.annotations;
+  requires static com.google.errorprone.annotations;
   requires org.slf4j;
 }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java
index 5a31f4cd1914a..5d052c2cdeeec 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/Accountant.java
@@ -17,7 +17,6 @@
 package org.apache.arrow.memory;
 
 import java.util.concurrent.atomic.AtomicLong;
-import javax.annotation.concurrent.ThreadSafe;
 import org.apache.arrow.util.Preconditions;
 import org.checkerframework.checker.nullness.qual.Nullable;
 
@@ -25,7 +24,6 @@
  * Provides a concurrent way to manage account for memory usage without locking. Used as basis for
  * Allocators. All operations are threadsafe (except for close).
  */
-@ThreadSafe
 class Accountant implements AutoCloseable {
 
   /** The parent allocator. */
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
index 20066ed14b65a..856cc88ab9c39 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java
@@ -34,9 +34,22 @@ public interface AllocationReservation extends AutoCloseable {
    * @param nBytes the number of bytes to add
    * @return true if the addition is possible, false otherwise
    * @throws IllegalStateException if called after buffer() is used to allocate the reservation
+   * @deprecated use {@link #add(long)} instead
    */
+  @Deprecated(forRemoval = true)
   boolean add(int nBytes);
 
+  /**
+   * Add to the current reservation.
+   *
+   * <p>Adding may fail if the allocator is not allowed to consume any more space.
+   *
+   * @param nBytes the number of bytes to add
+   * @return true if the addition is possible, false otherwise
+   * @throws IllegalStateException if called after buffer() is used to allocate the reservation
+   */
+  boolean add(long nBytes);
+
   /**
    * Requests a reservation of additional space.
    *
@@ -44,9 +57,21 @@ public interface AllocationReservation extends AutoCloseable {
    *
    * @param nBytes the amount to reserve
    * @return true if the reservation can be satisfied, false otherwise
+   * @deprecated use {@link #reserve(long)} instead
    */
+  @Deprecated(forRemoval = true)
   boolean reserve(int nBytes);
 
+  /**
+   * Requests a reservation of additional space.
+   *
+   * <p>The implementation of the allocator's inner class provides this.
+   *
+   * @param nBytes the amount to reserve
+   * @return true if the reservation can be satisfied, false otherwise
+   */
+  boolean reserve(long nBytes);
+
   /**
    * Allocate a buffer whose size is the total of all the add()s made.
    *
@@ -65,6 +90,13 @@ public interface AllocationReservation extends AutoCloseable {
    */
   int getSize();
 
+  /**
+   * Get the current size of the reservation (the sum of all the add()s) as a long value.
+   *
+   * @return size of the current reservation
+   */
+  long getSizeLong();
+
   /**
    * Return whether or not the reservation has been used.
    *
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
index 24a0ea0761ec0..775a8925ad1a9 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ArrowBuf.java
@@ -310,7 +310,7 @@ private void checkIndexD(long index, long fieldLength) {
    */
   public long getLong(long index) {
     chk(index, LONG_SIZE);
-    return MemoryUtil.UNSAFE.getLong(addr(index));
+    return MemoryUtil.getLong(addr(index));
   }
 
   /**
@@ -322,7 +322,7 @@ public long getLong(long index) {
    */
   public void setLong(long index, long value) {
     chk(index, LONG_SIZE);
-    MemoryUtil.UNSAFE.putLong(addr(index), value);
+    MemoryUtil.putLong(addr(index), value);
   }
 
   /**
@@ -345,7 +345,7 @@ public float getFloat(long index) {
    */
   public void setFloat(long index, float value) {
     chk(index, FLOAT_SIZE);
-    MemoryUtil.UNSAFE.putInt(addr(index), Float.floatToRawIntBits(value));
+    MemoryUtil.putInt(addr(index), Float.floatToRawIntBits(value));
   }
 
   /**
@@ -368,7 +368,7 @@ public double getDouble(long index) {
    */
   public void setDouble(long index, double value) {
     chk(index, DOUBLE_SIZE);
-    MemoryUtil.UNSAFE.putLong(addr(index), Double.doubleToRawLongBits(value));
+    MemoryUtil.putLong(addr(index), Double.doubleToRawLongBits(value));
   }
 
   /**
@@ -391,7 +391,7 @@ public char getChar(long index) {
    */
   public void setChar(long index, int value) {
     chk(index, SHORT_SIZE);
-    MemoryUtil.UNSAFE.putShort(addr(index), (short) value);
+    MemoryUtil.putShort(addr(index), (short) value);
   }
 
   /**
@@ -403,7 +403,7 @@ public void setChar(long index, int value) {
    */
   public int getInt(long index) {
     chk(index, INT_SIZE);
-    return MemoryUtil.UNSAFE.getInt(addr(index));
+    return MemoryUtil.getInt(addr(index));
   }
 
   /**
@@ -414,7 +414,7 @@ public int getInt(long index) {
    */
   public void setInt(long index, int value) {
     chk(index, INT_SIZE);
-    MemoryUtil.UNSAFE.putInt(addr(index), value);
+    MemoryUtil.putInt(addr(index), value);
   }
 
   /**
@@ -426,7 +426,7 @@ public void setInt(long index, int value) {
    */
   public short getShort(long index) {
     chk(index, SHORT_SIZE);
-    return MemoryUtil.UNSAFE.getShort(addr(index));
+    return MemoryUtil.getShort(addr(index));
   }
 
   /**
@@ -449,7 +449,7 @@ public void setShort(long index, int value) {
    */
   public void setShort(long index, short value) {
     chk(index, SHORT_SIZE);
-    MemoryUtil.UNSAFE.putShort(addr(index), value);
+    MemoryUtil.putShort(addr(index), value);
   }
 
   /**
@@ -461,7 +461,7 @@ public void setShort(long index, short value) {
    */
   public void setByte(long index, int value) {
     chk(index, 1);
-    MemoryUtil.UNSAFE.putByte(addr(index), (byte) value);
+    MemoryUtil.putByte(addr(index), (byte) value);
   }
 
   /**
@@ -473,7 +473,7 @@ public void setByte(long index, int value) {
    */
   public void setByte(long index, byte value) {
     chk(index, 1);
-    MemoryUtil.UNSAFE.putByte(addr(index), value);
+    MemoryUtil.putByte(addr(index), value);
   }
 
   /**
@@ -485,7 +485,7 @@ public void setByte(long index, byte value) {
    */
   public byte getByte(long index) {
     chk(index, 1);
-    return MemoryUtil.UNSAFE.getByte(addr(index));
+    return MemoryUtil.getByte(addr(index));
   }
 
   /*--------------------------------------------------*
@@ -549,7 +549,7 @@ public byte readByte() {
   }
 
   /**
-   * Read dst.length bytes at readerIndex into dst byte array
+   * Read dst.length bytes at readerIndex into dst byte array.
    *
    * @param dst byte array where the data will be written
    */
@@ -566,7 +566,7 @@ public void readBytes(byte[] dst) {
    */
   public void writeByte(byte value) {
     ensureWritable(1);
-    MemoryUtil.UNSAFE.putByte(addr(writerIndex), value);
+    MemoryUtil.putByte(addr(writerIndex), value);
     ++writerIndex;
   }
 
@@ -577,7 +577,7 @@ public void writeByte(byte value) {
    */
   public void writeByte(int value) {
     ensureWritable(1);
-    MemoryUtil.UNSAFE.putByte(addr(writerIndex), (byte) value);
+    MemoryUtil.putByte(addr(writerIndex), (byte) value);
     ++writerIndex;
   }
 
@@ -612,7 +612,7 @@ public void writeBytes(byte[] src, int srcIndex, int length) {
    */
   public void writeShort(int value) {
     ensureWritable(SHORT_SIZE);
-    MemoryUtil.UNSAFE.putShort(addr(writerIndex), (short) value);
+    MemoryUtil.putShort(addr(writerIndex), (short) value);
     writerIndex += SHORT_SIZE;
   }
 
@@ -623,7 +623,7 @@ public void writeShort(int value) {
    */
   public void writeInt(int value) {
     ensureWritable(INT_SIZE);
-    MemoryUtil.UNSAFE.putInt(addr(writerIndex), value);
+    MemoryUtil.putInt(addr(writerIndex), value);
     writerIndex += INT_SIZE;
   }
 
@@ -634,7 +634,7 @@ public void writeInt(int value) {
    */
   public void writeLong(long value) {
     ensureWritable(LONG_SIZE);
-    MemoryUtil.UNSAFE.putLong(addr(writerIndex), value);
+    MemoryUtil.putLong(addr(writerIndex), value);
     writerIndex += LONG_SIZE;
   }
 
@@ -645,7 +645,7 @@ public void writeLong(long value) {
    */
   public void writeFloat(float value) {
     ensureWritable(FLOAT_SIZE);
-    MemoryUtil.UNSAFE.putInt(addr(writerIndex), Float.floatToRawIntBits(value));
+    MemoryUtil.putInt(addr(writerIndex), Float.floatToRawIntBits(value));
     writerIndex += FLOAT_SIZE;
   }
 
@@ -656,7 +656,7 @@ public void writeFloat(float value) {
    */
   public void writeDouble(double value) {
     ensureWritable(DOUBLE_SIZE);
-    MemoryUtil.UNSAFE.putLong(addr(writerIndex), Double.doubleToRawLongBits(value));
+    MemoryUtil.putLong(addr(writerIndex), Double.doubleToRawLongBits(value));
     writerIndex += DOUBLE_SIZE;
   }
 
@@ -727,8 +727,7 @@ public void getBytes(long index, byte[] dst, int dstIndex, int length) {
     if (length != 0) {
       // copy "length" bytes from this ArrowBuf starting at addr(index) address
       // into dst byte array at dstIndex onwards
-      MemoryUtil.copyMemory(
-          null, addr(index), dst, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + dstIndex, length);
+      MemoryUtil.copyFromMemory(addr(index), dst, dstIndex, length);
     }
   }
 
@@ -766,8 +765,7 @@ public void setBytes(long index, byte[] src, int srcIndex, long length) {
     if (length > 0) {
       // copy "length" bytes from src byte array at the starting index (srcIndex)
       // into this ArrowBuf starting at address "addr(index)"
-      MemoryUtil.copyMemory(
-          src, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + srcIndex, null, addr(index), length);
+      MemoryUtil.copyToMemory(src, srcIndex, addr(index), length);
     }
   }
 
@@ -792,7 +790,7 @@ public void getBytes(long index, ByteBuffer dst) {
         // at address srcAddress into the dst ByteBuffer starting at
         // address dstAddress
         final long dstAddress = MemoryUtil.getByteBufferAddress(dst) + dst.position();
-        MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, dst.remaining());
+        MemoryUtil.copyMemory(srcAddress, dstAddress, dst.remaining());
         // after copy, bump the next write position for the dst ByteBuffer
         dst.position(dst.position() + dst.remaining());
       } else if (dst.hasArray()) {
@@ -800,12 +798,7 @@ public void getBytes(long index, ByteBuffer dst) {
         // at address srcAddress into the dst ByteBuffer starting at
         // index dstIndex
         final int dstIndex = dst.arrayOffset() + dst.position();
-        MemoryUtil.copyMemory(
-            null,
-            srcAddress,
-            dst.array(),
-            MemoryUtil.BYTE_ARRAY_BASE_OFFSET + dstIndex,
-            dst.remaining());
+        MemoryUtil.copyFromMemory(srcAddress, dst.array(), dstIndex, dst.remaining());
         // after copy, bump the next write position for the dst ByteBuffer
         dst.position(dst.position() + dst.remaining());
       } else {
@@ -834,15 +827,14 @@ public void setBytes(long index, ByteBuffer src) {
         // copy src.remaining() bytes of data from src ByteBuffer starting at
         // address srcAddress into this ArrowBuf starting at address dstAddress
         final long srcAddress = MemoryUtil.getByteBufferAddress(src) + src.position();
-        MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, length);
+        MemoryUtil.copyMemory(srcAddress, dstAddress, length);
         // after copy, bump the next read position for the src ByteBuffer
         src.position(src.position() + length);
       } else if (src.hasArray()) {
         // copy src.remaining() bytes of data from src ByteBuffer starting at
         // index srcIndex into this ArrowBuf starting at address dstAddress
         final int srcIndex = src.arrayOffset() + src.position();
-        MemoryUtil.copyMemory(
-            src.array(), MemoryUtil.BYTE_ARRAY_BASE_OFFSET + srcIndex, null, dstAddress, length);
+        MemoryUtil.copyToMemory(src.array(), srcIndex, dstAddress, length);
         // after copy, bump the next read position for the src ByteBuffer
         src.position(src.position() + length);
       } else {
@@ -852,19 +844,19 @@ public void setBytes(long index, ByteBuffer src) {
           // copy word at a time
           while (length - 128 >= LONG_SIZE) {
             for (int x = 0; x < 16; x++) {
-              MemoryUtil.UNSAFE.putLong(dstAddress, src.getLong());
+              MemoryUtil.putLong(dstAddress, src.getLong());
               length -= LONG_SIZE;
               dstAddress += LONG_SIZE;
             }
           }
           while (length >= LONG_SIZE) {
-            MemoryUtil.UNSAFE.putLong(dstAddress, src.getLong());
+            MemoryUtil.putLong(dstAddress, src.getLong());
             length -= LONG_SIZE;
             dstAddress += LONG_SIZE;
           }
           // copy last byte
           while (length > 0) {
-            MemoryUtil.UNSAFE.putByte(dstAddress, src.get());
+            MemoryUtil.putByte(dstAddress, src.get());
             --length;
             ++dstAddress;
           }
@@ -892,7 +884,7 @@ public void setBytes(long index, ByteBuffer src, int srcIndex, int length) {
       // srcAddress into this ArrowBuf at address dstAddress
       final long srcAddress = MemoryUtil.getByteBufferAddress(src) + srcIndex;
       final long dstAddress = addr(index);
-      MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, length);
+      MemoryUtil.copyMemory(srcAddress, dstAddress, length);
     } else {
       if (srcIndex == 0 && src.capacity() == length) {
         // copy the entire ByteBuffer from start to end of length
@@ -932,7 +924,7 @@ public void getBytes(long index, ArrowBuf dst, long dstIndex, int length) {
       // dstAddress
       final long srcAddress = addr(index);
       final long dstAddress = dst.memoryAddress() + (long) dstIndex;
-      MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, length);
+      MemoryUtil.copyMemory(srcAddress, dstAddress, length);
     }
   }
 
@@ -962,7 +954,7 @@ public void setBytes(long index, ArrowBuf src, long srcIndex, long length) {
       // dstAddress
       final long srcAddress = src.memoryAddress() + srcIndex;
       final long dstAddress = addr(index);
-      MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, length);
+      MemoryUtil.copyMemory(srcAddress, dstAddress, length);
     }
   }
 
@@ -982,7 +974,7 @@ public void setBytes(long index, ArrowBuf src) {
     checkIndex(index, length);
     final long srcAddress = src.memoryAddress() + src.readerIndex;
     final long dstAddress = addr(index);
-    MemoryUtil.copyMemory(null, srcAddress, null, dstAddress, length);
+    MemoryUtil.copyMemory(srcAddress, dstAddress, length);
     src.readerIndex(src.readerIndex + length);
   }
 
@@ -1007,7 +999,7 @@ public int setBytes(long index, InputStream in, int length) throws IOException {
       if (readBytes > 0) {
         // copy readBytes length of data from the tmp byte array starting
         // at srcIndex 0 into this ArrowBuf starting at address addr(index)
-        MemoryUtil.copyMemory(tmp, MemoryUtil.BYTE_ARRAY_BASE_OFFSET, null, addr(index), readBytes);
+        MemoryUtil.copyToMemory(tmp, 0, addr(index), readBytes);
       }
     }
     return readBytes;
@@ -1029,7 +1021,7 @@ public void getBytes(long index, OutputStream out, int length) throws IOExceptio
       // copy length bytes of data from this ArrowBuf starting at
       // address addr(index) into the tmp byte array starting at index 0
       byte[] tmp = new byte[length];
-      MemoryUtil.copyMemory(null, addr(index), tmp, MemoryUtil.BYTE_ARRAY_BASE_OFFSET, length);
+      MemoryUtil.copyFromMemory(addr(index), tmp, 0, length);
       // write the copied data to output stream
       out.write(tmp);
     }
@@ -1173,7 +1165,7 @@ public ArrowBuf writerIndex(long writerIndex) {
   public ArrowBuf setZero(long index, long length) {
     if (length != 0) {
       this.checkIndex(index, length);
-      MemoryUtil.UNSAFE.setMemory(this.addr + index, length, (byte) 0);
+      MemoryUtil.setMemory(this.addr + index, length, (byte) 0);
     }
     return this;
   }
@@ -1191,7 +1183,7 @@ public ArrowBuf setZero(long index, long length) {
   public ArrowBuf setOne(int index, int length) {
     if (length != 0) {
       this.checkIndex(index, length);
-      MemoryUtil.UNSAFE.setMemory(this.addr + index, length, (byte) 0xff);
+      MemoryUtil.setMemory(this.addr + index, length, (byte) 0xff);
     }
     return this;
   }
@@ -1207,7 +1199,7 @@ public ArrowBuf setOne(int index, int length) {
   public ArrowBuf setOne(long index, long length) {
     if (length != 0) {
       this.checkIndex(index, length);
-      MemoryUtil.UNSAFE.setMemory(this.addr + index, length, (byte) 0xff);
+      MemoryUtil.setMemory(this.addr + index, length, (byte) 0xff);
     }
     return this;
   }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
index 3f4426d2c36e5..20a89d0b7bf18 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java
@@ -16,6 +16,8 @@
  */
 package org.apache.arrow.memory;
 
+import com.google.errorprone.annotations.FormatMethod;
+import com.google.errorprone.annotations.FormatString;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
@@ -27,6 +29,7 @@
 import org.apache.arrow.memory.util.AssertionUtil;
 import org.apache.arrow.memory.util.CommonUtil;
 import org.apache.arrow.memory.util.HistoricalLog;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
 import org.apache.arrow.util.Preconditions;
 import org.checkerframework.checker.initialization.qual.Initialized;
 import org.checkerframework.checker.nullness.qual.KeyFor;
@@ -539,9 +542,8 @@ public String toVerboseString() {
     return sb.toString();
   }
 
-  /* Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 */
-  @SuppressWarnings("FormatStringAnnotation")
-  private void hist(String noteFormat, Object... args) {
+  @FormatMethod
+  private void hist(@FormatString String noteFormat, Object... args) {
     if (historicalLog != null) {
       historicalLog.recordEvent(noteFormat, args);
     }
@@ -859,7 +861,7 @@ RoundingPolicy getRoundingPolicy() {
   public class Reservation implements AllocationReservation {
 
     private final @Nullable HistoricalLog historicalLog;
-    private int nBytes = 0;
+    private long nBytes = 0;
     private boolean used = false;
     private boolean closed = false;
 
@@ -887,8 +889,15 @@ public Reservation() {
       }
     }
 
+    @SuppressWarnings({"removal", "InlineMeSuggester"})
+    @Deprecated(forRemoval = true)
     @Override
     public boolean add(final int nBytes) {
+      return add((long) nBytes);
+    }
+
+    @Override
+    public boolean add(final long nBytes) {
       assertOpen();
 
       Preconditions.checkArgument(nBytes >= 0, "nBytes(%d) < 0", nBytes);
@@ -905,7 +914,7 @@ public boolean add(final int nBytes) {
       // modifying this behavior so that we maintain what we reserve and what the user asked for
       // and make sure to only
       // round to power of two as necessary.
-      final int nBytesTwo = CommonUtil.nextPowerOfTwo(nBytes);
+      final long nBytesTwo = CommonUtil.nextPowerOfTwo(nBytes);
       if (!reserve(nBytesTwo)) {
         return false;
       }
@@ -928,6 +937,11 @@ public ArrowBuf allocateBuffer() {
 
     @Override
     public int getSize() {
+      return LargeMemoryUtil.checkedCastToInt(nBytes);
+    }
+
+    @Override
+    public long getSizeLong() {
       return nBytes;
     }
 
@@ -977,8 +991,15 @@ public void close() {
       closed = true;
     }
 
+    @SuppressWarnings({"removal", "InlineMeSuggester"})
+    @Deprecated(forRemoval = true)
     @Override
     public boolean reserve(int nBytes) {
+      return reserve((long) nBytes);
+    }
+
+    @Override
+    public boolean reserve(long nBytes) {
       assertOpen();
 
       final AllocationOutcome outcome = BaseAllocator.this.allocateBytes(nBytes);
@@ -998,7 +1019,7 @@ public boolean reserve(int nBytes) {
      * @param nBytes the size of the buffer requested
      * @return the buffer, or null, if the request cannot be satisfied
      */
-    private ArrowBuf allocate(int nBytes) {
+    private ArrowBuf allocate(long nBytes) {
       assertOpen();
 
       boolean success = false;
@@ -1032,7 +1053,7 @@ private ArrowBuf allocate(int nBytes) {
      *
      * @param nBytes the size of the reservation
      */
-    private void releaseReservation(int nBytes) {
+    private void releaseReservation(long nBytes) {
       assertOpen();
 
       releaseBytes(nBytes);
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
index f8dd7e1d1cb38..50f33d3f021c7 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java
@@ -17,9 +17,9 @@
 package org.apache.arrow.memory;
 
 /**
- * Child allocator class. Only slightly different from the {@see RootAllocator}, in that these can't
- * be created directly, but must be obtained from {@link BufferAllocator#newChildAllocator(String,
- * AllocationListener, long, long)}.
+ * Child allocator class. Only slightly different from the {@link RootAllocator}, in that these
+ * can't be created directly, but must be obtained from {@link
+ * BufferAllocator#newChildAllocator(String, AllocationListener, long, long)}.
  *
  * <p>Child allocators can only be created by the root, or other children, so this class is package
  * private.
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
index 289b10634d84e..90e8a1d5eca77 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java
@@ -34,13 +34,13 @@ public class DefaultRoundingPolicy implements RoundingPolicy {
    *
    * <p>It was copied from {@link io.netty.buffer.PooledByteBufAllocator}.
    */
-  private static final int MIN_PAGE_SIZE = 4096;
+  private static final long MIN_PAGE_SIZE = 4096;
 
-  private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2);
+  private static final long MAX_CHUNK_SIZE = ((long) Integer.MAX_VALUE + 1) / 2;
   private static final long DEFAULT_CHUNK_SIZE;
 
   static {
-    int defaultPageSize = Integer.getInteger("org.apache.memory.allocator.pageSize", 8192);
+    long defaultPageSize = Long.getLong("org.apache.memory.allocator.pageSize", 8192);
     try {
       validateAndCalculatePageShifts(defaultPageSize);
     } catch (Throwable t) {
@@ -60,7 +60,7 @@ public class DefaultRoundingPolicy implements RoundingPolicy {
     }
   }
 
-  private static int validateAndCalculatePageShifts(int pageSize) {
+  private static long validateAndCalculatePageShifts(long pageSize) {
     if (pageSize < MIN_PAGE_SIZE) {
       throw new IllegalArgumentException(
           "pageSize: " + pageSize + " (expected: " + MIN_PAGE_SIZE + ")");
@@ -71,17 +71,17 @@ private static int validateAndCalculatePageShifts(int pageSize) {
     }
 
     // Logarithm base 2. At this point we know that pageSize is a power of two.
-    return Integer.SIZE - 1 - Integer.numberOfLeadingZeros(pageSize);
+    return Long.SIZE - 1L - Long.numberOfLeadingZeros(pageSize);
   }
 
-  private static int validateAndCalculateChunkSize(int pageSize, int maxOrder) {
+  private static long validateAndCalculateChunkSize(long pageSize, int maxOrder) {
     if (maxOrder > 14) {
       throw new IllegalArgumentException("maxOrder: " + maxOrder + " (expected: 0-14)");
     }
 
     // Ensure the resulting chunkSize does not overflow.
-    int chunkSize = pageSize;
-    for (int i = maxOrder; i > 0; i--) {
+    long chunkSize = pageSize;
+    for (long i = maxOrder; i > 0; i--) {
       if (chunkSize > MAX_CHUNK_SIZE / 2) {
         throw new IllegalArgumentException(
             String.format(
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
index f501cfedd168d..89db736e6a0f9 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/SegmentRoundingPolicy.java
@@ -16,6 +16,8 @@
  */
 package org.apache.arrow.memory.rounding;
 
+import com.google.errorprone.annotations.InlineMe;
+import org.apache.arrow.memory.util.LargeMemoryUtil;
 import org.apache.arrow.util.Preconditions;
 
 /** The rounding policy that each buffer size must a multiple of the segment size. */
@@ -28,7 +30,7 @@ public class SegmentRoundingPolicy implements RoundingPolicy {
    * The segment size. It must be at least {@link SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, and be a
    * power of 2.
    */
-  private int segmentSize;
+  private long segmentSize;
 
   /**
    * Constructor for the segment rounding policy.
@@ -36,8 +38,22 @@ public class SegmentRoundingPolicy implements RoundingPolicy {
    * @param segmentSize the segment size.
    * @throws IllegalArgumentException if the segment size is smaller than {@link
    *     SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2.
+   * @deprecated use {@link SegmentRoundingPolicy#SegmentRoundingPolicy(long)} instead.
    */
+  @Deprecated(forRemoval = true)
+  @InlineMe(replacement = "this((long) segmentSize)")
   public SegmentRoundingPolicy(int segmentSize) {
+    this((long) segmentSize);
+  }
+
+  /**
+   * Constructor for the segment rounding policy.
+   *
+   * @param segmentSize the segment size.
+   * @throws IllegalArgumentException if the segment size is smaller than {@link
+   *     SegmentRoundingPolicy#MIN_SEGMENT_SIZE}, or is not a power of 2.
+   */
+  public SegmentRoundingPolicy(long segmentSize) {
     Preconditions.checkArgument(
         segmentSize >= MIN_SEGMENT_SIZE,
         "The segment size cannot be smaller than %s",
@@ -52,7 +68,12 @@ public long getRoundedSize(long requestSize) {
     return (requestSize + (segmentSize - 1)) / segmentSize * segmentSize;
   }
 
+  @Deprecated(forRemoval = true)
   public int getSegmentSize() {
+    return LargeMemoryUtil.checkedCastToInt(segmentSize);
+  }
+
+  public long getSegmentSizeAsLong() {
     return segmentSize;
   }
 }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
index 44289183a318d..9243be399b6db 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpers.java
@@ -61,8 +61,8 @@ private static int memEqual(
 
       while (n > 63) {
         for (int x = 0; x < 8; x++) {
-          long leftLong = MemoryUtil.UNSAFE.getLong(lPos);
-          long rightLong = MemoryUtil.UNSAFE.getLong(rPos);
+          long leftLong = MemoryUtil.getLong(lPos);
+          long rightLong = MemoryUtil.getLong(rPos);
           if (leftLong != rightLong) {
             return 0;
           }
@@ -73,8 +73,8 @@ private static int memEqual(
       }
 
       while (n > 7) {
-        long leftLong = MemoryUtil.UNSAFE.getLong(lPos);
-        long rightLong = MemoryUtil.UNSAFE.getLong(rPos);
+        long leftLong = MemoryUtil.getLong(lPos);
+        long rightLong = MemoryUtil.getLong(rPos);
         if (leftLong != rightLong) {
           return 0;
         }
@@ -84,8 +84,8 @@ private static int memEqual(
       }
 
       if (n > 3) {
-        int leftInt = MemoryUtil.UNSAFE.getInt(lPos);
-        int rightInt = MemoryUtil.UNSAFE.getInt(rPos);
+        int leftInt = MemoryUtil.getInt(lPos);
+        int rightInt = MemoryUtil.getInt(rPos);
         if (leftInt != rightInt) {
           return 0;
         }
@@ -95,8 +95,8 @@ private static int memEqual(
       }
 
       while (n-- != 0) {
-        byte leftByte = MemoryUtil.UNSAFE.getByte(lPos);
-        byte rightByte = MemoryUtil.UNSAFE.getByte(rPos);
+        byte leftByte = MemoryUtil.getByte(lPos);
+        byte rightByte = MemoryUtil.getByte(rPos);
         if (leftByte != rightByte) {
           return 0;
         }
@@ -141,8 +141,8 @@ private static int memcmp(
 
     while (n > 63) {
       for (int x = 0; x < 8; x++) {
-        long leftLong = MemoryUtil.UNSAFE.getLong(lPos);
-        long rightLong = MemoryUtil.UNSAFE.getLong(rPos);
+        long leftLong = MemoryUtil.getLong(lPos);
+        long rightLong = MemoryUtil.getLong(rPos);
         if (leftLong != rightLong) {
           if (LITTLE_ENDIAN) {
             return unsignedLongCompare(Long.reverseBytes(leftLong), Long.reverseBytes(rightLong));
@@ -157,8 +157,8 @@ private static int memcmp(
     }
 
     while (n > 7) {
-      long leftLong = MemoryUtil.UNSAFE.getLong(lPos);
-      long rightLong = MemoryUtil.UNSAFE.getLong(rPos);
+      long leftLong = MemoryUtil.getLong(lPos);
+      long rightLong = MemoryUtil.getLong(rPos);
       if (leftLong != rightLong) {
         if (LITTLE_ENDIAN) {
           return unsignedLongCompare(Long.reverseBytes(leftLong), Long.reverseBytes(rightLong));
@@ -172,8 +172,8 @@ private static int memcmp(
     }
 
     if (n > 3) {
-      int leftInt = MemoryUtil.UNSAFE.getInt(lPos);
-      int rightInt = MemoryUtil.UNSAFE.getInt(rPos);
+      int leftInt = MemoryUtil.getInt(lPos);
+      int rightInt = MemoryUtil.getInt(rPos);
       if (leftInt != rightInt) {
         if (LITTLE_ENDIAN) {
           return unsignedIntCompare(Integer.reverseBytes(leftInt), Integer.reverseBytes(rightInt));
@@ -187,8 +187,8 @@ private static int memcmp(
     }
 
     while (n-- != 0) {
-      byte leftByte = MemoryUtil.UNSAFE.getByte(lPos);
-      byte rightByte = MemoryUtil.UNSAFE.getByte(rPos);
+      byte leftByte = MemoryUtil.getByte(lPos);
+      byte rightByte = MemoryUtil.getByte(rPos);
       if (leftByte != rightByte) {
         return ((leftByte & 0xFF) - (rightByte & 0xFF)) > 0 ? 1 : -1;
       }
@@ -248,8 +248,8 @@ private static int memcmp(
     int rPos = rStart;
 
     while (n > 7) {
-      long leftLong = MemoryUtil.UNSAFE.getLong(lPos);
-      long rightLong = MemoryUtil.UNSAFE.getLong(right, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + rPos);
+      long leftLong = MemoryUtil.getLong(lPos);
+      long rightLong = MemoryUtil.getLong(right, rPos);
       if (leftLong != rightLong) {
         if (LITTLE_ENDIAN) {
           return unsignedLongCompare(Long.reverseBytes(leftLong), Long.reverseBytes(rightLong));
@@ -263,8 +263,8 @@ private static int memcmp(
     }
 
     if (n > 3) {
-      int leftInt = MemoryUtil.UNSAFE.getInt(lPos);
-      int rightInt = MemoryUtil.UNSAFE.getInt(right, MemoryUtil.BYTE_ARRAY_BASE_OFFSET + rPos);
+      int leftInt = MemoryUtil.getInt(lPos);
+      int rightInt = MemoryUtil.getInt(right, rPos);
       if (leftInt != rightInt) {
         if (LITTLE_ENDIAN) {
           return unsignedIntCompare(Integer.reverseBytes(leftInt), Integer.reverseBytes(rightInt));
@@ -278,7 +278,7 @@ private static int memcmp(
     }
 
     while (n-- != 0) {
-      byte leftByte = MemoryUtil.UNSAFE.getByte(lPos);
+      byte leftByte = MemoryUtil.getByte(lPos);
       byte rightByte = right[rPos];
       if (leftByte != rightByte) {
         return ((leftByte & 0xFF) - (rightByte & 0xFF)) > 0 ? 1 : -1;
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java
index 659ddde28df9b..5b1bdd8b7244c 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java
@@ -16,6 +16,8 @@
  */
 package org.apache.arrow.memory.util;
 
+import com.google.errorprone.annotations.FormatMethod;
+import com.google.errorprone.annotations.FormatString;
 import java.util.ArrayDeque;
 import java.util.Arrays;
 import java.util.Deque;
@@ -42,9 +44,8 @@ public class HistoricalLog {
    *     object instance is best.
    * @param args for the format string, or nothing if none are required
    */
-  @SuppressWarnings("FormatStringAnnotation")
-  /* Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 */
-  public HistoricalLog(final String idStringFormat, Object... args) {
+  @FormatMethod
+  public HistoricalLog(@FormatString final String idStringFormat, Object... args) {
     this(Integer.MAX_VALUE, idStringFormat, args);
   }
 
@@ -65,9 +66,8 @@ public HistoricalLog(final String idStringFormat, Object... args) {
    *     object instance is best.
    * @param args for the format string, or nothing if none are required
    */
-  @SuppressWarnings("AnnotateFormatMethod")
-  public HistoricalLog(final int limit, final String idStringFormat, Object... args) {
-    // Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951
+  @FormatMethod
+  public HistoricalLog(final int limit, @FormatString final String idStringFormat, Object... args) {
     this.limit = limit;
     this.idString = String.format(idStringFormat, args);
     this.firstEvent = null;
@@ -80,9 +80,8 @@ public HistoricalLog(final int limit, final String idStringFormat, Object... arg
    * @param noteFormat {@link String#format} format string that describes the event
    * @param args for the format string, or nothing if none are required
    */
-  @SuppressWarnings("AnnotateFormatMethod")
-  public synchronized void recordEvent(final String noteFormat, Object... args) {
-    // Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951
+  @FormatMethod
+  public synchronized void recordEvent(@FormatString final String noteFormat, Object... args) {
     final String note = String.format(noteFormat, args);
     final Event event = new Event(note);
     if (firstEvent == null) {
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
index c1b44c3932659..acf77547fbcdd 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java
@@ -33,13 +33,13 @@ public class MemoryUtil {
 
   private static final @Nullable Constructor<?> DIRECT_BUFFER_CONSTRUCTOR;
   /** The unsafe object from which to access the off-heap memory. */
-  public static final Unsafe UNSAFE;
+  private static final Unsafe UNSAFE;
 
   /** The start offset of array data relative to the start address of the array object. */
-  public static final long BYTE_ARRAY_BASE_OFFSET;
+  private static final long BYTE_ARRAY_BASE_OFFSET;
 
   /** The offset of the address field with the {@link java.nio.ByteBuffer} object. */
-  static final long BYTE_BUFFER_ADDRESS_OFFSET;
+  private static final long BYTE_BUFFER_ADDRESS_OFFSET;
 
   /** If the native byte order is little-endian. */
   public static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
@@ -178,7 +178,7 @@ public static ByteBuffer directBuffer(long address, int capacity) {
 
   @SuppressWarnings(
       "nullness:argument") // to handle null assignment on third party dependency: Unsafe
-  public static void copyMemory(
+  private static void copyMemory(
       @Nullable Object srcBase,
       long srcOffset,
       @Nullable Object destBase,
@@ -186,4 +186,68 @@ public static void copyMemory(
       long bytes) {
     UNSAFE.copyMemory(srcBase, srcOffset, destBase, destOffset, bytes);
   }
+
+  public static void copyMemory(long srcAddress, long destAddress, long bytes) {
+    UNSAFE.copyMemory(srcAddress, destAddress, bytes);
+  }
+
+  public static void copyToMemory(byte[] src, long srcIndex, long destAddress, long bytes) {
+    copyMemory(src, BYTE_ARRAY_BASE_OFFSET + srcIndex, null, destAddress, bytes);
+  }
+
+  public static void copyFromMemory(long srcAddress, byte[] dest, long destIndex, long bytes) {
+    copyMemory(null, srcAddress, dest, BYTE_ARRAY_BASE_OFFSET + destIndex, bytes);
+  }
+
+  public static byte getByte(long address) {
+    return UNSAFE.getByte(address);
+  }
+
+  public static void putByte(long address, byte value) {
+    UNSAFE.putByte(address, value);
+  }
+
+  public static short getShort(long address) {
+    return UNSAFE.getShort(address);
+  }
+
+  public static void putShort(long address, short value) {
+    UNSAFE.putShort(address, value);
+  }
+
+  public static int getInt(long address) {
+    return UNSAFE.getInt(address);
+  }
+
+  public static void putInt(long address, int value) {
+    UNSAFE.putInt(address, value);
+  }
+
+  public static long getLong(long address) {
+    return UNSAFE.getLong(address);
+  }
+
+  public static void putLong(long address, long value) {
+    UNSAFE.putLong(address, value);
+  }
+
+  public static void setMemory(long address, long bytes, byte value) {
+    UNSAFE.setMemory(address, bytes, value);
+  }
+
+  public static int getInt(byte[] bytes, int index) {
+    return UNSAFE.getInt(bytes, BYTE_ARRAY_BASE_OFFSET + index);
+  }
+
+  public static long getLong(byte[] bytes, int index) {
+    return UNSAFE.getLong(bytes, BYTE_ARRAY_BASE_OFFSET + index);
+  }
+
+  public static long allocateMemory(long bytes) {
+    return UNSAFE.allocateMemory(bytes);
+  }
+
+  public static void freeMemory(long address) {
+    UNSAFE.freeMemory(address);
+  }
 }
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java
index eaf4a833c4eeb..7907018d0a815 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java
@@ -86,7 +86,7 @@ public static int hashCode(long address, long length, int seed) {
     int index = 0;
     int hash = seed;
     while (index + 4 <= length) {
-      int intValue = MemoryUtil.UNSAFE.getInt(address + index);
+      int intValue = MemoryUtil.getInt(address + index);
       hash = combineHashCode(hash, intValue);
       index += 4;
     }
@@ -96,7 +96,7 @@ public static int hashCode(long address, long length, int seed) {
       int intValue = 0;
       for (long i = length - 1; i >= index; i--) {
         intValue <<= 8;
-        intValue |= (MemoryUtil.UNSAFE.getByte(address + i) & 0x000000ff);
+        intValue |= (MemoryUtil.getByte(address + i) & 0x000000ff);
         index += 1;
       }
       hash = combineHashCode(hash, intValue);
diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java
index b9987a5ecb049..5c1384163e81e 100644
--- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java
+++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java
@@ -52,21 +52,21 @@ public int hashCode(long address, long length) {
     int hashValue = 0;
     int index = 0;
     while (index + 8 <= length) {
-      long longValue = MemoryUtil.UNSAFE.getLong(address + index);
+      long longValue = MemoryUtil.getLong(address + index);
       int longHash = getLongHashCode(longValue);
       hashValue = combineHashCode(hashValue, longHash);
       index += 8;
     }
 
     if (index + 4 <= length) {
-      int intValue = MemoryUtil.UNSAFE.getInt(address + index);
+      int intValue = MemoryUtil.getInt(address + index);
       int intHash = intValue;
       hashValue = combineHashCode(hashValue, intHash);
       index += 4;
     }
 
     while (index < length) {
-      byte byteValue = MemoryUtil.UNSAFE.getByte(address + index);
+      byte byteValue = MemoryUtil.getByte(address + index);
       int byteHash = byteValue;
       hashValue = combineHashCode(hashValue, byteHash);
       index += 1;
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
index 83118face8674..348ed3e7933b0 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/DefaultAllocationManagerFactory.java
@@ -27,13 +27,13 @@ public class DefaultAllocationManagerFactory implements AllocationManager.Factor
 
   public static final AllocationManager.Factory FACTORY = new DefaultAllocationManagerFactory();
   private static final ArrowBuf EMPTY =
-      new ArrowBuf(ReferenceManager.NO_OP, null, 0, MemoryUtil.UNSAFE.allocateMemory(0));
+      new ArrowBuf(ReferenceManager.NO_OP, null, 0, MemoryUtil.allocateMemory(0));
 
   @Override
   public AllocationManager create(BufferAllocator accountingAllocator, long size) {
     return new AllocationManager(accountingAllocator) {
       private final long allocatedSize = size;
-      private final long address = MemoryUtil.UNSAFE.allocateMemory(size);
+      private final long address = MemoryUtil.allocateMemory(size);
 
       @Override
       public long getSize() {
@@ -47,7 +47,7 @@ protected long memoryAddress() {
 
       @Override
       protected void release0() {
-        MemoryUtil.UNSAFE.freeMemory(address);
+        MemoryUtil.freeMemory(address);
       }
     };
   }
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
index d7d7fde00ac63..87e9316964dfc 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java
@@ -25,7 +25,6 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
-import java.lang.reflect.Field;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
@@ -34,9 +33,9 @@
 import org.apache.arrow.memory.rounding.RoundingPolicy;
 import org.apache.arrow.memory.rounding.SegmentRoundingPolicy;
 import org.apache.arrow.memory.util.AssertionUtil;
+import org.apache.arrow.memory.util.MemoryUtil;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
-import sun.misc.Unsafe;
 
 public class TestBaseAllocator {
 
@@ -316,7 +315,7 @@ public void testRootAllocator_createChildDontClose() throws Exception {
 
   @Test
   public void testSegmentAllocator() {
-    RoundingPolicy policy = new SegmentRoundingPolicy(1024);
+    RoundingPolicy policy = new SegmentRoundingPolicy(1024L);
     try (RootAllocator allocator =
         new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy)) {
       ArrowBuf buf = allocator.buffer(798);
@@ -335,7 +334,7 @@ public void testSegmentAllocator() {
 
   @Test
   public void testSegmentAllocator_childAllocator() {
-    RoundingPolicy policy = new SegmentRoundingPolicy(1024);
+    RoundingPolicy policy = new SegmentRoundingPolicy(1024L);
     try (RootAllocator allocator = new RootAllocator(AllocationListener.NOOP, 1024 * 1024, policy);
         BufferAllocator childAllocator = allocator.newChildAllocator("child", 0, 512 * 1024)) {
 
@@ -358,14 +357,14 @@ public void testSegmentAllocator_childAllocator() {
   @Test
   public void testSegmentAllocator_smallSegment() {
     IllegalArgumentException e =
-        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(128));
+        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(128L));
     assertEquals("The segment size cannot be smaller than 1024", e.getMessage());
   }
 
   @Test
   public void testSegmentAllocator_segmentSizeNotPowerOf2() {
     IllegalArgumentException e =
-        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(4097));
+        assertThrows(IllegalArgumentException.class, () -> new SegmentRoundingPolicy(4097L));
     assertEquals("The segment size must be a power of 2", e.getMessage());
   }
 
@@ -405,8 +404,7 @@ private BaseAllocator createAllocatorWithCustomizedAllocationManager() {
                   public AllocationManager create(
                       BufferAllocator accountingAllocator, long requestedSize) {
                     return new AllocationManager(accountingAllocator) {
-                      private final Unsafe unsafe = getUnsafe();
-                      private final long address = unsafe.allocateMemory(requestedSize);
+                      private final long address = MemoryUtil.allocateMemory(requestedSize);
 
                       @Override
                       protected long memoryAddress() {
@@ -415,29 +413,14 @@ protected long memoryAddress() {
 
                       @Override
                       protected void release0() {
-                        unsafe.setMemory(address, requestedSize, (byte) 0);
-                        unsafe.freeMemory(address);
+                        MemoryUtil.setMemory(address, requestedSize, (byte) 0);
+                        MemoryUtil.freeMemory(address);
                       }
 
                       @Override
                       public long getSize() {
                         return requestedSize;
                       }
-
-                      private Unsafe getUnsafe() {
-                        Field f = null;
-                        try {
-                          f = Unsafe.class.getDeclaredField("theUnsafe");
-                          f.setAccessible(true);
-                          return (Unsafe) f.get(null);
-                        } catch (NoSuchFieldException | IllegalAccessException e) {
-                          throw new RuntimeException(e);
-                        } finally {
-                          if (f != null) {
-                            f.setAccessible(false);
-                          }
-                        }
-                      }
                     };
                   }
 
@@ -974,7 +957,7 @@ public void testAllocator_unclaimedReservation() throws Exception {
       try (final BufferAllocator childAllocator1 =
           rootAllocator.newChildAllocator("unclaimedReservation", 0, MAX_ALLOCATION)) {
         try (final AllocationReservation reservation = childAllocator1.newReservation()) {
-          assertTrue(reservation.add(64));
+          assertTrue(reservation.add(64L));
         }
         rootAllocator.verify();
       }
@@ -989,8 +972,8 @@ public void testAllocator_claimedReservation() throws Exception {
           rootAllocator.newChildAllocator("claimedReservation", 0, MAX_ALLOCATION)) {
 
         try (final AllocationReservation reservation = childAllocator1.newReservation()) {
-          assertTrue(reservation.add(32));
-          assertTrue(reservation.add(32));
+          assertTrue(reservation.add(32L));
+          assertTrue(reservation.add(32L));
 
           final ArrowBuf arrowBuf = reservation.allocateBuffer();
           assertEquals(64, arrowBuf.capacity());
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java
index 162bbbcbe939c..b19453df5e109 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestForeignAllocation.java
@@ -139,13 +139,13 @@ private static class UnsafeForeignAllocation extends ForeignAllocation {
     boolean released = false;
 
     public UnsafeForeignAllocation(long bufferSize) {
-      super(bufferSize, MemoryUtil.UNSAFE.allocateMemory(bufferSize));
+      super(bufferSize, MemoryUtil.allocateMemory(bufferSize));
     }
 
     @Override
     protected void release0() {
       if (!released) {
-        MemoryUtil.UNSAFE.freeMemory(memoryAddress());
+        MemoryUtil.freeMemory(memoryAddress());
         released = true;
       }
     }
diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java
index 756aa2919789b..b5e0a71e7ee0e 100644
--- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java
+++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestOpens.java
@@ -18,12 +18,15 @@
 
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.condition.JRE.JAVA_16;
 
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledForJreRange;
 
 public class TestOpens {
   /** Instantiating the RootAllocator should poke MemoryUtil and fail. */
   @Test
+  @EnabledForJreRange(min = JAVA_16)
   public void testMemoryUtilFailsLoudly() {
     // This test is configured by Maven to run WITHOUT add-opens. So this should fail on JDK16+
     // (where JEP396 means that add-opens is required to access JDK internals).
@@ -44,6 +47,6 @@ public void testMemoryUtilFailsLoudly() {
         break;
       }
     }
-    assertTrue(found, "Expected exception as not thrown");
+    assertTrue(found, "Expected exception was not thrown");
   }
 }
diff --git a/java/memory/memory-netty-buffer-patch/pom.xml b/java/memory/memory-netty-buffer-patch/pom.xml
index 1f645472a3398..2e70279b23b00 100644
--- a/java/memory/memory-netty-buffer-patch/pom.xml
+++ b/java/memory/memory-netty-buffer-patch/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-memory</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>arrow-memory-netty-buffer-patch</artifactId>
diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
index bdad3700cb311..9319d15aaa9a9 100644
--- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
+++ b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java
@@ -38,7 +38,7 @@ public class NettyArrowBuf extends AbstractByteBuf implements AutoCloseable {
 
   private final ArrowBuf arrowBuf;
   private final ArrowByteBufAllocator arrowByteBufAllocator;
-  private int length;
+  private long length;
   private final long address;
 
   /**
@@ -47,10 +47,24 @@ public class NettyArrowBuf extends AbstractByteBuf implements AutoCloseable {
    * @param arrowBuf The buffer to wrap.
    * @param bufferAllocator The allocator for the buffer.
    * @param length The length of this buffer.
+   * @deprecated Use {@link #NettyArrowBuf(ArrowBuf, BufferAllocator, long)} instead.
    */
+  @Deprecated(forRemoval = true)
   public NettyArrowBuf(
       final ArrowBuf arrowBuf, final BufferAllocator bufferAllocator, final int length) {
-    super(length);
+    this(arrowBuf, bufferAllocator, (long) length);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param arrowBuf The buffer to wrap.
+   * @param bufferAllocator The allocator for the buffer.
+   * @param length The length of this buffer.
+   */
+  public NettyArrowBuf(
+      final ArrowBuf arrowBuf, final BufferAllocator bufferAllocator, final long length) {
+    super((int) length);
     this.arrowBuf = arrowBuf;
     this.arrowByteBufAllocator = new ArrowByteBufAllocator(bufferAllocator);
     this.length = length;
diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml
index f66899151128a..6cf573dd4d381 100644
--- a/java/memory/memory-netty/pom.xml
+++ b/java/memory/memory-netty/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-memory</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>arrow-memory-netty</artifactId>
@@ -56,7 +56,6 @@ under the License.
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-core</artifactId>
-      <version>1.3.14</version>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -78,6 +77,7 @@ under the License.
             <artifactId>maven-failsafe-plugin</artifactId>
             <executions>
               <execution>
+                <id>default-it</id>
                 <goals>
                   <goal>integration-test</goal>
                   <goal>verify</goal>
diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml
index c1a42e3ff0081..6bba222d89cb3 100644
--- a/java/memory/memory-unsafe/pom.xml
+++ b/java/memory/memory-unsafe/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-memory</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
 
   <artifactId>arrow-memory-unsafe</artifactId>
diff --git a/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/UnsafeAllocationManager.java b/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/UnsafeAllocationManager.java
index 31af262a5720e..67d7e0d2af7cb 100644
--- a/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/UnsafeAllocationManager.java
+++ b/java/memory/memory-unsafe/src/main/java/org/apache/arrow/memory/unsafe/UnsafeAllocationManager.java
@@ -26,7 +26,7 @@
 public final class UnsafeAllocationManager extends AllocationManager {
 
   private static final ArrowBuf EMPTY =
-      new ArrowBuf(ReferenceManager.NO_OP, null, 0, MemoryUtil.UNSAFE.allocateMemory(0));
+      new ArrowBuf(ReferenceManager.NO_OP, null, 0, MemoryUtil.allocateMemory(0));
 
   public static final AllocationManager.Factory FACTORY =
       new Factory() {
@@ -47,7 +47,7 @@ public ArrowBuf empty() {
 
   UnsafeAllocationManager(BufferAllocator accountingAllocator, long requestedSize) {
     super(accountingAllocator);
-    allocatedAddress = MemoryUtil.UNSAFE.allocateMemory(requestedSize);
+    allocatedAddress = MemoryUtil.allocateMemory(requestedSize);
     allocatedSize = requestedSize;
   }
 
@@ -63,6 +63,6 @@ protected long memoryAddress() {
 
   @Override
   protected void release0() {
-    MemoryUtil.UNSAFE.freeMemory(allocatedAddress);
+    MemoryUtil.freeMemory(allocatedAddress);
   }
 }
diff --git a/java/memory/pom.xml b/java/memory/pom.xml
index 9eaad8a467895..0ca357beaa781 100644
--- a/java/memory/pom.xml
+++ b/java/memory/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-memory</artifactId>
   <packaging>pom</packaging>
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index 83b0a88da063b..9f4df1ff2e70d 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-performance</artifactId>
   <packaging>jar</packaging>
@@ -75,7 +75,7 @@ under the License.
     <dependency>
       <groupId>com.h2database</groupId>
       <artifactId>h2</artifactId>
-      <version>2.2.224</version>
+      <version>2.3.232</version>
       <scope>runtime</scope>
     </dependency>
     <dependency>
diff --git a/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
index f275090aae6bf..1154809cae753 100644
--- a/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
+++ b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java
@@ -57,9 +57,9 @@ public void defaultAllocatorBenchmark() {
   @BenchmarkMode(Mode.AverageTime)
   @OutputTimeUnit(TimeUnit.MICROSECONDS)
   public void segmentRoundingPolicyBenchmark() {
-    final int bufferSize = 1024;
+    final long bufferSize = 1024L;
     final int numBuffers = 1024;
-    final int segmentSize = 1024;
+    final long segmentSize = 1024L;
 
     RoundingPolicy policy = new SegmentRoundingPolicy(segmentSize);
     try (RootAllocator allocator =
diff --git a/java/pom.xml b/java/pom.xml
index b38e3382d4d24..808b0ad4d8cc7 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -17,18 +17,18 @@ KIND, either express or implied.  See the License for the
 specific language governing permissions and limitations
 under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" child.project.url.inherit.append.path="false" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>
 
   <parent>
     <groupId>org.apache</groupId>
     <artifactId>apache</artifactId>
-    <version>31</version>
+    <version>33</version>
   </parent>
 
   <groupId>org.apache.arrow</groupId>
   <artifactId>arrow-java-root</artifactId>
-  <version>17.0.0-SNAPSHOT</version>
+  <version>18.0.0-SNAPSHOT</version>
   <packaging>pom</packaging>
 
   <name>Apache Arrow Java Root POM</name>
@@ -41,25 +41,30 @@ under the License.
       <subscribe>dev-subscribe@arrow.apache.org</subscribe>
       <unsubscribe>dev-unsubscribe@arrow.apache.org</unsubscribe>
       <post>dev@arrow.apache.org</post>
-      <archive>https://mail-archives.apache.org/mod_mbox/arrow-dev/</archive>
+      <archive>https://lists.apache.org/list.html?dev@arrow.apache.org</archive>
     </mailingList>
     <mailingList>
       <name>Commits List</name>
       <subscribe>commits-subscribe@arrow.apache.org</subscribe>
       <unsubscribe>commits-unsubscribe@arrow.apache.org</unsubscribe>
       <post>commits@arrow.apache.org</post>
-      <archive>https://mail-archives.apache.org/mod_mbox/arrow-commits/</archive>
+      <archive>https://lists.apache.org/list.html?commits@arrow.apache.org</archive>
     </mailingList>
     <mailingList>
       <name>Issues List</name>
       <subscribe>issues-subscribe@arrow.apache.org</subscribe>
       <unsubscribe>issues-unsubscribe@arrow.apache.org</unsubscribe>
-      <archive>https://mail-archives.apache.org/mod_mbox/arrow-issues/</archive>
+      <archive>https://lists.apache.org/list.html?issues@arrow.apache.org</archive>
+    </mailingList>
+    <mailingList>
+      <name>GitHub List</name>
+      <subscribe>github-subscribe@arrow.apache.org</subscribe>
+      <unsubscribe>github-unsubscribe@arrow.apache.org</unsubscribe>
+      <archive>https://lists.apache.org/list.html?github@arrow.apache.org</archive>
     </mailingList>
   </mailingLists>
 
   <modules>
-    <module>maven</module>
     <module>bom</module>
     <module>format</module>
     <module>memory</module>
@@ -73,11 +78,11 @@ under the License.
     <module>compression</module>
   </modules>
 
-  <scm>
+  <scm child.scm.connection.inherit.append.path="false" child.scm.developerConnection.inherit.append.path="false" child.scm.url.inherit.append.path="false">
     <connection>scm:git:https://github.com/apache/arrow.git</connection>
     <developerConnection>scm:git:https://github.com/apache/arrow.git</developerConnection>
-    <tag>apache-arrow-2.0.0</tag>
-    <url>https://github.com/apache/arrow</url>
+    <tag>main</tag>
+    <url>https://github.com/apache/arrow/tree/${project.scm.tag}</url>
   </scm>
 
   <issueManagement>
@@ -88,43 +93,39 @@ under the License.
   <properties>
     <target.gen.source.path>${project.build.directory}/generated-sources</target.gen.source.path>
     <dep.junit.platform.version>1.9.0</dep.junit.platform.version>
-    <dep.junit.jupiter.version>5.10.3</dep.junit.jupiter.version>
-    <dep.slf4j.version>2.0.13</dep.slf4j.version>
-    <dep.guava-bom.version>33.2.1-jre</dep.guava-bom.version>
-    <dep.netty-bom.version>4.1.110.Final</dep.netty-bom.version>
-    <dep.grpc-bom.version>1.63.0</dep.grpc-bom.version>
-    <dep.protobuf-bom.version>3.25.1</dep.protobuf-bom.version>
+    <dep.junit.jupiter.version>5.11.0</dep.junit.jupiter.version>
+    <dep.slf4j.version>2.0.16</dep.slf4j.version>
+    <dep.guava-bom.version>33.3.0-jre</dep.guava-bom.version>
+    <dep.netty-bom.version>4.1.113.Final</dep.netty-bom.version>
+    <dep.grpc-bom.version>1.65.0</dep.grpc-bom.version>
+    <dep.protobuf-bom.version>3.25.4</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.17.2</dep.jackson-bom.version>
     <dep.hadoop.version>3.4.0</dep.hadoop.version>
     <dep.fbs.version>24.3.25</dep.fbs.version>
-    <dep.avro.version>1.11.3</dep.avro.version>
+    <dep.avro.version>1.12.0</dep.avro.version>
     <arrow.vector.classifier></arrow.vector.classifier>
     <forkCount>2</forkCount>
+    <checkstyle.version>10.18.1</checkstyle.version>
     <checkstyle.failOnViolation>true</checkstyle.failOnViolation>
-    <errorprone.javac.version>9+181-r4173-1</errorprone.javac.version>
-    <error_prone_core.version>2.28.0</error_prone_core.version>
+    <error_prone_core.version>2.31.0</error_prone_core.version>
     <mockito.core.version>5.11.0</mockito.core.version>
     <mockito.inline.version>5.2.0</mockito.inline.version>
-    <checker.framework.version>3.45.0</checker.framework.version>
+    <checker.framework.version>3.47.0</checker.framework.version>
+    <logback.version>1.5.8</logback.version>
     <doclint>none</doclint>
     <additionalparam>-Xdoclint:none</additionalparam>
+    <!-- List of add-opens arg line arguments for tests -->
+    <surefire.add-opens.argLine>--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</surefire.add-opens.argLine>
     <!-- org.apache:apache overrides -->
-    <maven.compiler.source>1.8</maven.compiler.source>
-    <maven.compiler.target>1.8</maven.compiler.target>
-    <maven.plugin.tools.version>3.12.0</maven.plugin.tools.version>
-    <surefire.version>3.2.5</surefire.version>
-    <version.apache-rat-plugin>0.16.1</version.apache-rat-plugin>
-    <version.maven-assembly-plugin>3.7.1</version.maven-assembly-plugin>
-    <version.maven-compiler-plugin>3.12.1</version.maven-compiler-plugin>
-    <version.maven-dependency-plugin>3.6.1</version.maven-dependency-plugin>
-    <version.maven-gpg-plugin>3.2.4</version.maven-gpg-plugin>
+    <minimalJavaBuildVersion>11</minimalJavaBuildVersion>
+    <maven.compiler.source>11</maven.compiler.source>
+    <maven.compiler.target>11</maven.compiler.target>
+    <maven.compiler.release>11</maven.compiler.release>
     <!--
       Downgrade maven-jar-plugin until https://github.com/codehaus-plexus/plexus-archiver/issues/332
       is addressed
     -->
     <version.maven-jar-plugin>3.2.2</version.maven-jar-plugin>
-    <version.maven-javadoc-plugin>3.6.3</version.maven-javadoc-plugin>
-    <version.maven-project-info-reports-plugin>3.5.0</version.maven-project-info-reports-plugin>
   </properties>
 
   <dependencyManagement>
@@ -149,9 +150,10 @@ under the License.
         <version>${dep.fbs.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.google.code.findbugs</groupId>
-        <artifactId>jsr305</artifactId>
-        <version>3.0.2</version>
+        <groupId>com.google.errorprone</groupId>
+        <artifactId>error_prone_annotations</artifactId>
+        <version>${error_prone_core.version}</version>
+        <scope>provided</scope>
       </dependency>
       <dependency>
         <groupId>org.slf4j</groupId>
@@ -171,7 +173,7 @@ under the License.
       <dependency>
         <groupId>org.assertj</groupId>
         <artifactId>assertj-core</artifactId>
-        <version>3.26.0</version>
+        <version>3.26.3</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -183,7 +185,7 @@ under the License.
       <dependency>
         <groupId>org.hamcrest</groupId>
         <artifactId>hamcrest</artifactId>
-        <version>2.2</version>
+        <version>3.0</version>
       </dependency>
       <dependency>
         <groupId>com.fasterxml.jackson</groupId>
@@ -220,6 +222,16 @@ under the License.
         <type>pom</type>
         <scope>import</scope>
       </dependency>
+      <dependency>
+        <groupId>ch.qos.logback</groupId>
+        <artifactId>logback-classic</artifactId>
+        <version>${logback.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>ch.qos.logback</groupId>
+        <artifactId>logback-core</artifactId>
+        <version>${logback.version}</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 
@@ -267,13 +279,13 @@ under the License.
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-junit-jupiter</artifactId>
-      <version>2.25.1</version>
+      <version>5.13.0</version>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
-      <version>1.3.14</version>
+      <version>${logback.version}</version>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -292,8 +304,6 @@ under the License.
           <artifactId>maven-compiler-plugin</artifactId>
           <configuration>
             <fork>true</fork>
-            <excludes>**/module-info.java</excludes>
-            <testExcludes>**/module-info.java</testExcludes>
             <useModulePath>false</useModulePath>
             <annotationProcessorPaths>
               <path>
@@ -307,6 +317,7 @@ under the License.
         <plugin>
           <artifactId>maven-surefire-plugin</artifactId>
           <configuration>
+            <argLine>${surefire.add-opens.argLine}</argLine>
             <enableAssertions>true</enableAssertions>
             <childDelegation>true</childDelegation>
             <forkCount>${forkCount}</forkCount>
@@ -319,11 +330,13 @@ under the License.
               which in turn can cause OOM. -->
               <arrow.vector.max_allocation_bytes>1048576</arrow.vector.max_allocation_bytes>
             </systemPropertyVariables>
+            <useModulePath>false</useModulePath>
           </configuration>
         </plugin>
         <plugin>
           <artifactId>maven-failsafe-plugin</artifactId>
           <configuration>
+            <argLine>${surefire.add-opens.argLine}</argLine>
             <systemPropertyVariables>
               <java.io.tmpdir>${project.build.directory}</java.io.tmpdir>
               <io.netty.tryReflectionSetAccessible>true</io.netty.tryReflectionSetAccessible>
@@ -438,13 +451,9 @@ under the License.
             <sourceFileExcludes>
               <sourceFileExclude>**/module-info.java</sourceFileExclude>
             </sourceFileExcludes>
+            <skippedModules>arrow-memory-netty-buffer-patch,arrow-memory-netty,flight-sql-jdbc-core,flight-integration-tests,arrow-performance</skippedModules>
           </configuration>
         </plugin>
-        <plugin>
-          <groupId>org.apache.arrow.maven.plugins</groupId>
-          <artifactId>module-info-compiler-maven-plugin</artifactId>
-          <version>${project.version}</version>
-        </plugin>
         <plugin>
           <groupId>com.gradle</groupId>
           <artifactId>develocity-maven-extension</artifactId>
@@ -485,6 +494,7 @@ under the License.
             <protocArtifact>com.google.protobuf:protoc:${dep.protobuf-bom.version}:exe:${os.detected.classifier}</protocArtifact>
             <pluginId>grpc-java</pluginId>
             <pluginArtifact>io.grpc:protoc-gen-grpc-java:${dep.grpc-bom.version}:exe:${os.detected.classifier}</pluginArtifact>
+            <pluginParameter>@generated=omit</pluginParameter>
           </configuration>
         </plugin>
         <plugin>
@@ -505,7 +515,12 @@ under the License.
         <plugin>
           <groupId>org.codehaus.mojo</groupId>
           <artifactId>exec-maven-plugin</artifactId>
-          <version>3.3.0</version>
+          <version>3.4.1</version>
+        </plugin>
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>versions-maven-plugin</artifactId>
+          <version>2.17.1</version>
         </plugin>
         <plugin>
           <groupId>pl.project13.maven</groupId>
@@ -515,7 +530,7 @@ under the License.
         <plugin>
           <groupId>org.cyclonedx</groupId>
           <artifactId>cyclonedx-maven-plugin</artifactId>
-          <version>2.8.0</version>
+          <version>2.8.1</version>
         </plugin>
         <plugin>
           <groupId>org.apache.drill.tools</groupId>
@@ -716,7 +731,7 @@ under the License.
           <dependency>
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>8.29</version>
+            <version>${checkstyle.version}</version>
           </dependency>
           <dependency>
             <groupId>org.slf4j</groupId>
@@ -768,24 +783,6 @@ under the License.
           </execution>
         </executions>
       </plugin>
-      <plugin>
-        <groupId>org.apache.arrow.maven.plugins</groupId>
-        <artifactId>module-info-compiler-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>default-compile</id>
-            <goals>
-              <goal>compile</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>default-testCompile</id>
-            <goals>
-              <goal>testCompile</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-project-info-reports-plugin</artifactId>
@@ -845,6 +842,7 @@ under the License.
           <sourceFileExcludes>
             <sourceFileExclude>**/module-info.java</sourceFileExclude>
           </sourceFileExcludes>
+          <skippedModules>arrow-memory-netty-buffer-patch,arrow-memory-netty,flight-sql-jdbc-core,flight-integration-tests,arrow-performance</skippedModules>
         </configuration>
         <reportSets>
           <reportSet>
@@ -906,56 +904,13 @@ under the License.
     </profile>
 
     <profile>
-      <id>error-prone-jdk8</id>
       <!--
            Do not activate Error Prone while running with Eclipse/M2E as it causes incompatibilities
            with other annotation processors.
            See https://github.com/jbosstools/m2e-apt/issues/62 for details
-
-           Note: Maintaining error-prone version with JDK8
-           See https://github.com/google/error-prone/blob/f8e33bc460be82ab22256a7ef8b979d7a2cacaba/docs/installation.md#jdk-8
       -->
+      <id>error-prone</id>
       <activation>
-        <jdk>1.8</jdk>
-        <property>
-          <name>!m2e.version</name>
-        </property>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-compiler-plugin</artifactId>
-            <configuration>
-              <compilerArgs combine.children="append">
-                <arg>-XDcompilePolicy=simple</arg>
-                <arg>-Xplugin:ErrorProne</arg>
-                <arg>-J-Xbootclasspath/p:${settings.localRepository}/com/google/errorprone/javac/${errorprone.javac.version}/javac-${errorprone.javac.version}.jar</arg>
-              </compilerArgs>
-              <annotationProcessorPaths combine.children="append">
-                <path>
-                  <groupId>com.google.errorprone</groupId>
-                  <artifactId>error_prone_core</artifactId>
-                  <!--
-                       Reference: https://errorprone.info/docs/installation
-                       Error Prone 2.10.0 is the latest version
-                       to support running on JDK 8.
-                       With right flags it could be upgraded,
-                       but we choose to keep this unchanged for now.
-                  -->
-                  <version>2.10.0</version>
-                </path>
-              </annotationProcessorPaths>
-            </configuration>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-
-    <profile>
-      <id>error-prone-jdk11+</id>
-      <activation>
-        <jdk>[11,]</jdk>
         <property>
           <name>!m2e.version</name>
         </property>
@@ -992,30 +947,6 @@ under the License.
         </plugins>
       </build>
     </profile>
-    <profile>
-      <id>jdk11+</id>
-      <activation>
-        <jdk>[11,]</jdk>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <configuration>
-              <argLine>--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
-            </configuration>
-          </plugin>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-failsafe-plugin</artifactId>
-            <configuration>
-              <argLine>--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED</argLine>
-            </configuration>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
 
     <profile>
       <id>code-coverage</id>
@@ -1348,5 +1279,59 @@ under the License.
         </plugins>
       </build>
     </profile>
+
+    <!-- Cross java version test profiles -->
+    <profile>
+      <id>cross-jdk-testing</id>
+      <activation>
+        <property>
+          <name>arrow.test.jdk-version</name>
+        </property>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <artifactId>maven-enforcer-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>check-jdk-version-property</id>
+                <goals>
+                  <goal>enforce</goal>
+                </goals>
+                <phase>validate</phase>
+                <configuration>
+                  <rules>
+                    <requireProperty>
+                      <property>arrow.test.jdk-version</property>
+                      <message>"JDK version used for test must be specified."</message>
+                      <regex>^\d{2,}</regex>
+                      <regexMessage>"JDK version used for test must be 11, 17, 21, ..."</regexMessage>
+                    </requireProperty>
+                  </rules>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-surefire-plugin</artifactId>
+            <configuration>
+              <jdkToolchain>
+                <version>${arrow.test.jdk-version}</version>
+              </jdkToolchain>
+            </configuration>
+          </plugin>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-failsafe-plugin</artifactId>
+            <configuration>
+              <jdkToolchain>
+                <version>${arrow.test.jdk-version}</version>
+              </jdkToolchain>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
 </project>
diff --git a/java/tools/pom.xml b/java/tools/pom.xml
index 07a768e24d5e2..d261496040b78 100644
--- a/java/tools/pom.xml
+++ b/java/tools/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-tools</artifactId>
   <name>Arrow Tools</name>
@@ -54,12 +54,11 @@ under the License.
     <dependency>
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
-      <version>1.8.0</version>
+      <version>1.9.0</version>
     </dependency>
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
-      <version>1.3.14</version>
       <scope>test</scope>
     </dependency>
     <!--
@@ -97,19 +96,29 @@ under the License.
   <build>
     <plugins>
       <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <configuration>
-          <descriptorRefs>
-            <descriptorRef>jar-with-dependencies</descriptorRef>
-          </descriptorRefs>
-        </configuration>
+        <artifactId>maven-shade-plugin</artifactId>
         <executions>
           <execution>
             <id>make-assembly</id>
             <goals>
-              <goal>single</goal>
+              <goal>shade</goal>
             </goals>
             <phase>package</phase>
+            <configuration>
+              <createDependencyReducedPom>false</createDependencyReducedPom>
+              <shadedArtifactAttached>true</shadedArtifactAttached>
+              <shadedClassifierName>jar-with-dependencies</shadedClassifierName>
+              <filters>
+                <filter>
+                  <excludes>
+                    <exclude>**/module-info.class</exclude>
+                  </excludes>
+                </filter>
+              </filters>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"></transformer>
+              </transformers>
+            </configuration>
           </execution>
         </executions>
       </plugin>
diff --git a/java/tools/src/main/java/module-info.java b/java/tools/src/main/java/module-info.java
index 6b4329eb84f2a..4f0817f9a11f6 100644
--- a/java/tools/src/main/java/module-info.java
+++ b/java/tools/src/main/java/module-info.java
@@ -19,9 +19,9 @@
   exports org.apache.arrow.tools;
 
   requires com.fasterxml.jackson.databind;
-  requires com.google.common;
   requires org.apache.arrow.compression;
   requires org.apache.arrow.memory.core;
   requires org.apache.arrow.vector;
+  requires org.apache.commons.cli;
   requires org.slf4j;
 }
diff --git a/java/vector/pom.xml b/java/vector/pom.xml
index 7541cfd56f5dc..eb0e39565332e 100644
--- a/java/vector/pom.xml
+++ b/java/vector/pom.xml
@@ -22,7 +22,7 @@ under the License.
   <parent>
     <groupId>org.apache.arrow</groupId>
     <artifactId>arrow-java-root</artifactId>
-    <version>17.0.0-SNAPSHOT</version>
+    <version>18.0.0-SNAPSHOT</version>
   </parent>
   <artifactId>arrow-vector</artifactId>
   <name>Arrow Vectors</name>
@@ -60,7 +60,7 @@ under the License.
     <dependency>
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
-      <version>1.17.0</version>
+      <version>1.17.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.arrow</groupId>
@@ -118,6 +118,15 @@ under the License.
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration combine.children="append">
+          <compilerArgs>
+            <arg>-Werror</arg>
+          </compilerArgs>
+        </configuration>
+      </plugin>
       <plugin>
         <!-- generate sources from fmpp -->
         <groupId>org.apache.drill.tools</groupId>
@@ -188,6 +197,7 @@ under the License.
             </configuration>
             <executions>
               <execution>
+                <id>default-it</id>
                 <goals>
                   <goal>integration-test</goal>
                   <goal>verify</goal>
diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd
index 72df4779793f0..5a0b30e47ee52 100644
--- a/java/vector/src/main/codegen/data/ArrowTypes.tdd
+++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd
@@ -134,6 +134,16 @@
        name: "ListView",
        fields: [],
        complex: true
+    },
+    {
+       name: "LargeListView",
+       fields: [],
+       complex: true
+    },
+    {
+       name: "RunEndEncoded",
+       fields: [],
+       complex: true
     }
   ]
 }
diff --git a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
index 6c2368117f7c2..5ebfb6877fc5b 100644
--- a/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
+++ b/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
@@ -67,6 +67,16 @@ public void endList() {
     throw new IllegalStateException(String.format("You tried to end a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
   }
 
+  @Override
+  public void startListView() {
+    throw new IllegalStateException(String.format("You tried to start a list view when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
+  @Override
+  public void endListView() {
+    throw new IllegalStateException(String.format("You tried to end a list view when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+  }
+
   @Override
   public void startMap() {
     throw new IllegalStateException(String.format("You tried to start a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
@@ -184,6 +194,12 @@ public ListWriter list() {
     return null;
   }
 
+  @Override
+  public ListWriter listView() {
+    fail("ListView");
+    return null;
+  }
+
   @Override
   public MapWriter map() {
     fail("Map");
@@ -202,6 +218,12 @@ public ListWriter list(String name) {
     return null;
   }
 
+  @Override
+  public ListWriter listView(String name) {
+    fail("ListView");
+    return null;
+  }
+
   @Override
   public MapWriter map(String name) {
     fail("Map");
diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
index 59f9fb5b8098d..06cb235f7dd99 100644
--- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
+++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
@@ -76,6 +76,17 @@ public void endList() {
     setPosition(idx() + 1);
   }
 
+  @Override
+  public void startListView() {
+    getWriter(MinorType.LISTVIEW).startListView();
+  }
+
+  @Override
+  public void endListView() {
+    getWriter(MinorType.LISTVIEW).endListView();
+    setPosition(idx() + 1);
+  }
+
   @Override
   public void startMap() {
     getWriter(MinorType.MAP).startMap();
@@ -267,6 +278,11 @@ public ListWriter list() {
     return getWriter(MinorType.LIST).list();
   }
 
+  @Override
+  public ListWriter listView() {
+    return getWriter(MinorType.LISTVIEW).listView();
+  }
+
   @Override
   public MapWriter map() {
     return getWriter(MinorType.LIST).map();
@@ -287,6 +303,11 @@ public ListWriter list(String name) {
     return getWriter(MinorType.STRUCT).list(name);
   }
 
+  @Override
+  public ListWriter listView(String name) {
+    return getWriter(MinorType.STRUCT).listView(name);
+  }
+
   @Override
   public MapWriter map(String name) {
     return getWriter(MinorType.STRUCT).map(name);
@@ -296,6 +317,7 @@ public MapWriter map(String name) {
   public MapWriter map(String name, boolean keysSorted) {
     return getWriter(MinorType.STRUCT).map(name, keysSorted);
   }
+
   <#list vv.types as type><#list type.minor as minor>
   <#assign lowerName = minor.class?uncap_first />
   <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java b/java/vector/src/main/codegen/templates/BaseWriter.java
index 35df256b324b5..e952d46f1f241 100644
--- a/java/vector/src/main/codegen/templates/BaseWriter.java
+++ b/java/vector/src/main/codegen/templates/BaseWriter.java
@@ -62,6 +62,7 @@ public interface StructWriter extends BaseWriter {
     void copyReaderToField(String name, FieldReader reader);
     StructWriter struct(String name);
     ListWriter list(String name);
+    ListWriter listView(String name);
     MapWriter map(String name);
     MapWriter map(String name, boolean keysSorted);
     void start();
@@ -71,8 +72,11 @@ public interface StructWriter extends BaseWriter {
   public interface ListWriter extends BaseWriter {
     void startList();
     void endList();
+    void startListView();
+    void endListView();
     StructWriter struct();
     ListWriter list();
+    ListWriter listView();
     MapWriter map();
     MapWriter map(boolean keysSorted);
     void copyReader(FieldReader reader);
@@ -106,6 +110,7 @@ public interface ComplexWriter {
     void copyReader(FieldReader reader);
     StructWriter rootAsStruct();
     ListWriter rootAsList();
+    ListWriter rootAsListView();
     MapWriter rootAsMap(boolean keysSorted);
 
     void setPosition(int index);
@@ -120,6 +125,7 @@ public interface StructOrListWriter {
     /**
      * @deprecated use {@link #listOfStruct()} instead.
      */
+    @Deprecated
     StructOrListWriter listoftstruct(String name);
     StructOrListWriter listOfStruct(String name);
     StructOrListWriter list(String name);
diff --git a/java/vector/src/main/codegen/templates/ComplexCopier.java b/java/vector/src/main/codegen/templates/ComplexCopier.java
index 1a3ba940e7977..5adad523120da 100644
--- a/java/vector/src/main/codegen/templates/ComplexCopier.java
+++ b/java/vector/src/main/codegen/templates/ComplexCopier.java
@@ -51,7 +51,9 @@ private static void writeValue(FieldReader reader, FieldWriter writer) {
       switch (mt) {
 
       case LIST:
+      case LISTVIEW:
       case LARGELIST:
+      case LARGELISTVIEW:
       case FIXED_SIZE_LIST:
         if (reader.isSet()) {
           writer.startList();
@@ -158,6 +160,8 @@ private static FieldWriter getStructWriterForReader(FieldReader reader, StructWr
       return (FieldWriter) writer.list(name);
     case MAP:
       return (FieldWriter) writer.map(name);
+    case LISTVIEW:
+      return (FieldWriter) writer.listView(name);
     default:
       throw new UnsupportedOperationException(reader.getMinorType().toString());
     }
@@ -180,6 +184,8 @@ private static FieldWriter getListWriterForReader(FieldReader reader, ListWriter
     case MAP:
     case NULL:
       return (FieldWriter) writer.list();
+    case LISTVIEW:
+      return (FieldWriter) writer.listView();
     default:
       throw new UnsupportedOperationException(reader.getMinorType().toString());
     }
@@ -201,6 +207,8 @@ private static FieldWriter getMapWriterForReader(FieldReader reader, MapWriter w
       case LIST:
       case NULL:
         return (FieldWriter) writer.list();
+      case LISTVIEW:
+        return (FieldWriter) writer.listView();
       case MAP:
         return (FieldWriter) writer.map(false);
       default:
diff --git a/java/vector/src/main/codegen/templates/DenseUnionWriter.java b/java/vector/src/main/codegen/templates/DenseUnionWriter.java
index e69a62a9e0f6f..8515b759e669e 100644
--- a/java/vector/src/main/codegen/templates/DenseUnionWriter.java
+++ b/java/vector/src/main/codegen/templates/DenseUnionWriter.java
@@ -83,6 +83,18 @@ public void endList() {
     getListWriter(typeId).endList();
   }
 
+  @Override
+  public void startListView() {
+    byte typeId = data.getTypeId(idx());
+    getListViewWriter(typeId).startList();
+  }
+
+  @Override
+  public void endListView() {
+    byte typeId = data.getTypeId(idx());
+    getListViewWriter(typeId).endList();
+  }
+
   private StructWriter getStructWriter(byte typeId) {
     StructWriter structWriter = (StructWriter) writers[typeId];
     if (structWriter == null) {
@@ -106,6 +118,15 @@ private ListWriter getListWriter(byte typeId) {
     return listWriter;
   }
 
+  private ListWriter getListViewWriter(byte typeId) {
+    ListWriter listWriter = (ListWriter) writers[typeId];
+    if (listWriter == null) {
+      listWriter = new UnionListViewWriter((ListViewVector) data.getVectorByType(typeId), nullableStructWriterFactory);
+      writers[typeId] = listWriter;
+    }
+    return listWriter;
+  }
+
   public ListWriter asList(byte typeId) {
     data.setTypeId(idx(), typeId);
     return getListWriter(typeId);
diff --git a/java/vector/src/main/codegen/templates/PromotableViewWriter.java b/java/vector/src/main/codegen/templates/PromotableViewWriter.java
new file mode 100644
index 0000000000000..a40901e295557
--- /dev/null
+++ b/java/vector/src/main/codegen/templates/PromotableViewWriter.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/PromotableViewWriter.java" />
+
+<#include "/@includes/license.ftl" />
+
+    package org.apache.arrow.vector.complex.impl;
+
+import java.util.Locale;
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * This FieldWriter implementation delegates all FieldWriter API calls to an inner FieldWriter. This
+ * inner field writer can start as a specific type, and this class will promote the writer to a
+ * UnionWriter if a call is made that the specifically typed writer cannot handle. A new UnionVector
+ * is created, wrapping the original vector, and replaces the original vector in the parent vector,
+ * which can be either an AbstractStructVector or a ListViewVector.
+ *
+ * <p>The writer used can either be for single elements (struct) or lists.
+ */
+public class PromotableViewWriter extends PromotableWriter {
+
+  public PromotableViewWriter(ValueVector v, FixedSizeListVector fixedListVector) {
+    super(v, fixedListVector);
+  }
+
+  public PromotableViewWriter(ValueVector v, FixedSizeListVector fixedListVector,
+      NullableStructWriterFactory nullableStructWriterFactory) {
+    super(v, fixedListVector, nullableStructWriterFactory);
+  }
+
+  public PromotableViewWriter(ValueVector v, LargeListVector largeListVector) {
+    super(v, largeListVector);
+  }
+
+  public PromotableViewWriter(ValueVector v, LargeListVector largeListVector,
+      NullableStructWriterFactory nullableStructWriterFactory) {
+    super(v, largeListVector, nullableStructWriterFactory);
+  }
+
+  public PromotableViewWriter(ValueVector v, ListVector listVector) {
+    super(v, listVector);
+  }
+
+  public PromotableViewWriter(ValueVector v, ListVector listVector,
+      NullableStructWriterFactory nullableStructWriterFactory) {
+    super(v, listVector, nullableStructWriterFactory);
+  }
+
+  public PromotableViewWriter(ValueVector v, ListViewVector listViewVector,
+      NullableStructWriterFactory nullableStructWriterFactory) {
+    super(v, listViewVector, nullableStructWriterFactory);
+  }
+
+  public PromotableViewWriter(ValueVector v, LargeListViewVector largeListViewVector) {
+    super(v, largeListViewVector);
+  }
+
+  public PromotableViewWriter(ValueVector v, LargeListViewVector largeListViewVector,
+      NullableStructWriterFactory nullableStructWriterFactory) {
+    super(v, largeListViewVector, nullableStructWriterFactory);
+  }
+
+  public PromotableViewWriter(ValueVector v, AbstractStructVector parentContainer) {
+    super(v, parentContainer);
+  }
+
+  public PromotableViewWriter(ValueVector v, AbstractStructVector parentContainer,
+      NullableStructWriterFactory nullableStructWriterFactory) {
+    super(v, parentContainer, nullableStructWriterFactory);
+  }
+
+  @Override
+  protected FieldWriter getWriter(MinorType type, ArrowType arrowType) {
+    if (state == State.UNION) {
+      if (requiresArrowType(type)) {
+        writer = ((UnionWriter) writer).toViewWriter();
+        ((UnionViewWriter) writer).getWriter(type, arrowType);
+      } else {
+        writer = ((UnionWriter) writer).toViewWriter();
+        ((UnionViewWriter) writer).getWriter(type);
+      }
+    } else if (state == State.UNTYPED) {
+      if (type == null) {
+        // ???
+        return null;
+      }
+      if (arrowType == null) {
+        arrowType = type.getType();
+      }
+      FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null);
+      ValueVector v;
+      if (listVector != null) {
+        v = listVector.addOrGetVector(fieldType).getVector();
+      } else if (fixedListVector != null) {
+        v = fixedListVector.addOrGetVector(fieldType).getVector();
+      } else if (listViewVector != null) {
+        v = listViewVector.addOrGetVector(fieldType).getVector();
+      } else if (largeListVector != null) {
+        v = largeListVector.addOrGetVector(fieldType).getVector();
+      } else {
+        v = largeListViewVector.addOrGetVector(fieldType).getVector();
+      }
+      v.allocateNew();
+      setWriter(v);
+      writer.setPosition(position);
+    } else if (type != this.type) {
+      promoteToUnion();
+      if (requiresArrowType(type)) {
+        writer = ((UnionWriter) writer).toViewWriter();
+        ((UnionViewWriter) writer).getWriter(type, arrowType);
+      } else {
+        writer = ((UnionWriter) writer).toViewWriter();
+        ((UnionViewWriter) writer).getWriter(type);
+      }
+    }
+    return writer;
+  }
+
+  @Override
+  public StructWriter struct() {
+    return getWriter(MinorType.LISTVIEW).struct();
+  }
+
+  <#list vv.types as type><#list type.minor as minor>
+  <#assign lowerName = minor.class?uncap_first />
+  <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+  <#assign upperName = minor.class?upper_case />
+  <#assign capName = minor.class?cap_first />
+
+  @Override
+  public ${capName}Writer ${lowerName}() {
+    return getWriter(MinorType.LISTVIEW).${lowerName}();
+  }
+
+  </#list></#list>
+
+  @Override
+  public void allocate() {
+    getWriter().allocate();
+  }
+
+  @Override
+  public void clear() {
+    getWriter().clear();
+  }
+
+  @Override
+  public Field getField() {
+    return getWriter().getField();
+  }
+
+  @Override
+  public int getValueCapacity() {
+    return getWriter().getValueCapacity();
+  }
+
+  @Override
+  public void close() throws Exception {
+    getWriter().close();
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/codegen/templates/PromotableWriter.java
similarity index 75%
rename from java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
rename to java/vector/src/main/codegen/templates/PromotableWriter.java
index 7fd0def967388..c0e686f3178a4 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
+++ b/java/vector/src/main/codegen/templates/PromotableWriter.java
@@ -14,32 +14,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/PromotableWriter.java" />
+
+<#include "/@includes/license.ftl" />
+
 package org.apache.arrow.vector.complex.impl;
 
-import java.math.BigDecimal;
-import java.nio.ByteBuffer;
 import java.util.Locale;
-import org.apache.arrow.memory.ArrowBuf;
-import org.apache.arrow.vector.FieldVector;
-import org.apache.arrow.vector.NullVector;
-import org.apache.arrow.vector.ValueVector;
-import org.apache.arrow.vector.complex.AbstractStructVector;
-import org.apache.arrow.vector.complex.FixedSizeListVector;
-import org.apache.arrow.vector.complex.LargeListVector;
-import org.apache.arrow.vector.complex.ListVector;
-import org.apache.arrow.vector.complex.ListViewVector;
-import org.apache.arrow.vector.complex.MapVector;
-import org.apache.arrow.vector.complex.StructVector;
-import org.apache.arrow.vector.complex.UnionVector;
-import org.apache.arrow.vector.complex.writer.FieldWriter;
-import org.apache.arrow.vector.holders.Decimal256Holder;
-import org.apache.arrow.vector.holders.DecimalHolder;
-import org.apache.arrow.vector.types.Types.MinorType;
-import org.apache.arrow.vector.types.pojo.ArrowType;
-import org.apache.arrow.vector.types.pojo.Field;
-import org.apache.arrow.vector.types.pojo.FieldType;
-import org.apache.arrow.vector.util.Text;
-import org.apache.arrow.vector.util.TransferPair;
+<#include "/@includes/vv_imports.ftl" />
 
 /**
  * This FieldWriter implementation delegates all FieldWriter API calls to an inner FieldWriter. This
@@ -52,27 +36,28 @@
  */
 public class PromotableWriter extends AbstractPromotableFieldWriter {
 
-  private final AbstractStructVector parentContainer;
-  private final ListVector listVector;
-  private final ListViewVector listViewVector;
-  private final FixedSizeListVector fixedListVector;
-  private final LargeListVector largeListVector;
-  private final NullableStructWriterFactory nullableStructWriterFactory;
-  private int position;
-  private static final int MAX_DECIMAL_PRECISION = 38;
-  private static final int MAX_DECIMAL256_PRECISION = 76;
-
-  private enum State {
+  protected final AbstractStructVector parentContainer;
+  protected final ListVector listVector;
+  protected final ListViewVector listViewVector;
+  protected final FixedSizeListVector fixedListVector;
+  protected final LargeListVector largeListVector;
+  protected final LargeListViewVector largeListViewVector;
+  protected final NullableStructWriterFactory nullableStructWriterFactory;
+  protected int position;
+  protected static final int MAX_DECIMAL_PRECISION = 38;
+  protected static final int MAX_DECIMAL256_PRECISION = 76;
+
+  protected enum State {
     UNTYPED,
     SINGLE,
     UNION
   }
 
-  private MinorType type;
-  private ValueVector vector;
-  private UnionVector unionVector;
-  private State state;
-  private FieldWriter writer;
+  protected MinorType type;
+  protected ValueVector vector;
+  protected UnionVector unionVector;
+  protected State state;
+  protected FieldWriter writer;
 
   /**
    * Constructs a new instance.
@@ -100,6 +85,7 @@ public PromotableWriter(
     this.listViewVector = null;
     this.fixedListVector = null;
     this.largeListVector = null;
+    this.largeListViewVector = null;
     this.nullableStructWriterFactory = nullableStructWriterFactory;
     init(v);
   }
@@ -134,6 +120,26 @@ public PromotableWriter(ValueVector v, LargeListVector largeListVector) {
     this(v, largeListVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
   }
 
+  /**
+   * Constructs a new instance.
+   *
+   * @param v The vector to initialize the writer with.
+   * @param listViewVector The vector that serves as a parent of v.
+   */
+  public PromotableWriter(ValueVector v, ListViewVector listViewVector) {
+    this(v, listViewVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param v The vector to initialize the writer with.
+   * @param largeListViewVector The vector that serves as a parent of v.
+   */
+  public PromotableWriter(ValueVector v, LargeListViewVector largeListViewVector) {
+    this(v, largeListViewVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+  }
+
   /**
    * Constructs a new instance.
    *
@@ -150,6 +156,7 @@ public PromotableWriter(
     this.parentContainer = null;
     this.fixedListVector = null;
     this.largeListVector = null;
+    this.largeListViewVector = null;
     this.nullableStructWriterFactory = nullableStructWriterFactory;
     init(v);
   }
@@ -170,6 +177,7 @@ public PromotableWriter(
     this.parentContainer = null;
     this.fixedListVector = null;
     this.largeListVector = null;
+    this.largeListViewVector = null;
     this.nullableStructWriterFactory = nullableStructWriterFactory;
     init(v);
   }
@@ -190,6 +198,7 @@ public PromotableWriter(
     this.listVector = null;
     this.listViewVector = null;
     this.largeListVector = null;
+    this.largeListViewVector = null;
     this.nullableStructWriterFactory = nullableStructWriterFactory;
     init(v);
   }
@@ -210,6 +219,28 @@ public PromotableWriter(
     this.parentContainer = null;
     this.listVector = null;
     this.listViewVector = null;
+    this.largeListViewVector = null;
+    this.nullableStructWriterFactory = nullableStructWriterFactory;
+    init(v);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param v The vector to initialize the writer with.
+   * @param largeListViewVector The vector that serves as a parent of v.
+   * @param nullableStructWriterFactory The factory to create the delegate writer.
+   */
+  public PromotableWriter(
+      ValueVector v,
+      LargeListViewVector largeListViewVector,
+      NullableStructWriterFactory nullableStructWriterFactory) {
+    this.largeListViewVector = largeListViewVector;
+    this.fixedListVector = null;
+    this.parentContainer = null;
+    this.listVector = null;
+    this.listViewVector = null;
+    this.largeListVector = null;
     this.nullableStructWriterFactory = nullableStructWriterFactory;
     init(v);
   }
@@ -234,7 +265,7 @@ public void setAddVectorAsNullable(boolean nullable) {
     }
   }
 
-  private void setWriter(ValueVector v) {
+  protected void setWriter(ValueVector v) {
     state = State.SINGLE;
     vector = v;
     type = v.getMinorType();
@@ -245,6 +276,9 @@ private void setWriter(ValueVector v) {
       case LIST:
         writer = new UnionListWriter((ListVector) vector, nullableStructWriterFactory);
         break;
+      case LISTVIEW:
+        writer = new UnionListViewWriter((ListViewVector) vector, nullableStructWriterFactory);
+        break;
       case MAP:
         writer = new UnionMapWriter((MapVector) vector);
         break;
@@ -277,7 +311,7 @@ public void setPosition(int index) {
     }
   }
 
-  private boolean requiresArrowType(MinorType type) {
+  protected boolean requiresArrowType(MinorType type) {
     return type == MinorType.DECIMAL
         || type == MinorType.MAP
         || type == MinorType.DURATION
@@ -336,7 +370,7 @@ protected FieldWriter getWriter() {
     return writer;
   }
 
-  private FieldWriter promoteToUnion() {
+  protected FieldWriter promoteToUnion() {
     String name = vector.getField().getName();
     TransferPair tp =
         vector.getTransferPair(
@@ -369,76 +403,76 @@ private FieldWriter promoteToUnion() {
   @Override
   public void write(DecimalHolder holder) {
     getWriter(
-            MinorType.DECIMAL,
-            new ArrowType.Decimal(MAX_DECIMAL_PRECISION, holder.scale, /*bitWidth=*/ 128))
+        MinorType.DECIMAL,
+        new ArrowType.Decimal(MAX_DECIMAL_PRECISION, holder.scale, /*bitWidth=*/ 128))
         .write(holder);
   }
 
   @Override
   public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) {
     getWriter(
-            MinorType.DECIMAL,
-            new ArrowType.Decimal(
-                MAX_DECIMAL_PRECISION,
-                ((ArrowType.Decimal) arrowType).getScale(),
-                /*bitWidth=*/ 128))
+        MinorType.DECIMAL,
+        new ArrowType.Decimal(
+            MAX_DECIMAL_PRECISION,
+            ((ArrowType.Decimal) arrowType).getScale(),
+            /*bitWidth=*/ 128))
         .writeDecimal(start, buffer, arrowType);
   }
 
   @Override
   public void writeDecimal(BigDecimal value) {
     getWriter(
-            MinorType.DECIMAL,
-            new ArrowType.Decimal(MAX_DECIMAL_PRECISION, value.scale(), /*bitWidth=*/ 128))
+        MinorType.DECIMAL,
+        new ArrowType.Decimal(MAX_DECIMAL_PRECISION, value.scale(), /*bitWidth=*/ 128))
         .writeDecimal(value);
   }
 
   @Override
   public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) {
     getWriter(
-            MinorType.DECIMAL,
-            new ArrowType.Decimal(
-                MAX_DECIMAL_PRECISION,
-                ((ArrowType.Decimal) arrowType).getScale(),
-                /*bitWidth=*/ 128))
+        MinorType.DECIMAL,
+        new ArrowType.Decimal(
+            MAX_DECIMAL_PRECISION,
+            ((ArrowType.Decimal) arrowType).getScale(),
+            /*bitWidth=*/ 128))
         .writeBigEndianBytesToDecimal(value, arrowType);
   }
 
   @Override
   public void write(Decimal256Holder holder) {
     getWriter(
-            MinorType.DECIMAL256,
-            new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, holder.scale, /*bitWidth=*/ 256))
+        MinorType.DECIMAL256,
+        new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, holder.scale, /*bitWidth=*/ 256))
         .write(holder);
   }
 
   @Override
   public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) {
     getWriter(
-            MinorType.DECIMAL256,
-            new ArrowType.Decimal(
-                MAX_DECIMAL256_PRECISION,
-                ((ArrowType.Decimal) arrowType).getScale(),
-                /*bitWidth=*/ 256))
+        MinorType.DECIMAL256,
+        new ArrowType.Decimal(
+            MAX_DECIMAL256_PRECISION,
+            ((ArrowType.Decimal) arrowType).getScale(),
+            /*bitWidth=*/ 256))
         .writeDecimal256(start, buffer, arrowType);
   }
 
   @Override
   public void writeDecimal256(BigDecimal value) {
     getWriter(
-            MinorType.DECIMAL256,
-            new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, value.scale(), /*bitWidth=*/ 256))
+        MinorType.DECIMAL256,
+        new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, value.scale(), /*bitWidth=*/ 256))
         .writeDecimal256(value);
   }
 
   @Override
   public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) {
     getWriter(
-            MinorType.DECIMAL256,
-            new ArrowType.Decimal(
-                MAX_DECIMAL256_PRECISION,
-                ((ArrowType.Decimal) arrowType).getScale(),
-                /*bitWidth=*/ 256))
+        MinorType.DECIMAL256,
+        new ArrowType.Decimal(
+            MAX_DECIMAL256_PRECISION,
+            ((ArrowType.Decimal) arrowType).getScale(),
+            /*bitWidth=*/ 256))
         .writeBigEndianBytesToDecimal256(value, arrowType);
   }
 
@@ -526,4 +560,19 @@ public int getValueCapacity() {
   public void close() throws Exception {
     getWriter().close();
   }
+
+  /**
+   * Convert the writer to a PromotableViewWriter.
+   *
+   * @return The writer as a PromotableViewWriter.
+   */
+  public PromotableViewWriter toViewWriter() {
+    PromotableViewWriter promotableViewWriter = new PromotableViewWriter(unionVector, parentContainer, nullableStructWriterFactory);
+    promotableViewWriter.position = position;
+    promotableViewWriter.writer = writer;
+    promotableViewWriter.state = state;
+    promotableViewWriter.unionVector = unionVector;
+    promotableViewWriter.type = MinorType.LISTVIEW;
+    return promotableViewWriter;
+  }
 }
diff --git a/java/vector/src/main/codegen/templates/StructWriters.java b/java/vector/src/main/codegen/templates/StructWriters.java
index b676173ac39d9..3e6258a0c6c0e 100644
--- a/java/vector/src/main/codegen/templates/StructWriters.java
+++ b/java/vector/src/main/codegen/templates/StructWriters.java
@@ -69,6 +69,9 @@ public class ${mode}StructWriter extends AbstractFieldWriter {
       case LIST:
         list(child.getName());
         break;
+      case LISTVIEW:
+        listView(child.getName());
+        break;
       case MAP: {
         ArrowType.Map arrowType = (ArrowType.Map) child.getType();
         map(child.getName(), arrowType.getKeysSorted());
@@ -200,6 +203,31 @@ public ListWriter list(String name) {
     return writer;
   }
 
+  @Override
+  public ListWriter listView(String name) {
+    String finalName = handleCase(name);
+    FieldWriter writer = fields.get(finalName);
+    int vectorCount = container.size();
+    if(writer == null) {
+      FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.LISTVIEW.getType(), null, null);
+      writer = new PromotableViewWriter(container.addOrGet(name, fieldType, ListViewVector.class), container, getNullableStructWriterFactory());
+      if (container.size() > vectorCount) {
+        writer.allocate();
+      }
+      writer.setPosition(idx());
+      fields.put(finalName, writer);
+    } else {
+      if (writer instanceof PromotableViewWriter) {
+        // ensure writers are initialized
+        ((PromotableViewWriter) writer).getWriter(MinorType.LISTVIEW);
+      } else {
+        writer = ((PromotableWriter) writer).toViewWriter();
+        ((PromotableViewWriter) writer).getWriter(MinorType.LISTVIEW);
+      }
+    }
+    return writer;
+  }
+
   @Override
   public MapWriter map(String name) {
     return map(name, false);
diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java
index eeb964c055f71..3962e1d0731f3 100644
--- a/java/vector/src/main/codegen/templates/UnionListWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionListWriter.java
@@ -26,7 +26,7 @@
 import java.math.BigDecimal;
 
 <@pp.dropOutputFile />
-<#list ["List", "ListView", "LargeList"] as listName>
+<#list ["List", "ListView", "LargeList", "LargeListView"] as listName>
 
 <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" />
 
@@ -53,15 +53,18 @@ public class Union${listName}Writer extends AbstractFieldWriter {
   private boolean inStruct = false;
   private boolean listStarted = false;
   private String structName;
-  <#if listName == "LargeList">
+  <#if listName == "LargeList" || listName == "LargeListView">
   private static final long OFFSET_WIDTH = 8;
   <#else>
   private static final int OFFSET_WIDTH = 4;
   </#if>
 
-  <#if listName = "ListView">
+  <#if listName == "ListView">
   private static final long SIZE_WIDTH = 4;
   </#if>
+  <#if listName == "LargeListView">
+  private static final long SIZE_WIDTH = 8;
+  </#if>
 
   public Union${listName}Writer(${listName}Vector vector) {
     this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
@@ -69,7 +72,11 @@ public class Union${listName}Writer extends AbstractFieldWriter {
 
   public Union${listName}Writer(${listName}Vector vector, NullableStructWriterFactory nullableStructWriterFactory) {
     this.vector = vector;
+    <#if listName = "ListView" || listName = "LargeListView">
+    this.writer = new PromotableViewWriter(vector.getDataVector(), vector, nullableStructWriterFactory);
+    <#else>
     this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory);
+    </#if>
   }
 
   public Union${listName}Writer(${listName}Vector vector, AbstractFieldWriter parent) {
@@ -154,6 +161,17 @@ public ListWriter list(String name) {
     return listWriter;
   }
 
+  @Override
+  public ListWriter listView() {
+    return writer;
+  }
+
+  @Override
+  public ListWriter listView(String name) {
+    ListWriter listWriter = writer.listView(name);
+    return listWriter;
+  }
+
   @Override
   public StructWriter struct(String name) {
     StructWriter structWriter = writer.struct(name);
@@ -215,6 +233,60 @@ public void endList() {
     setPosition(idx() + 1);
     listStarted = false;
   }
+
+  @Override
+  public void startListView() {
+    vector.startNewValue(idx());
+    writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH));
+    listStarted = true;
+  }
+
+  @Override
+  public void endListView() {
+    int sizeUptoIdx = 0;
+    for (int i = 0; i < idx(); i++) {
+      sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH);
+    }
+    vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx);
+    setPosition(idx() + 1);
+    listStarted = false;
+  }
+  <#elseif listName == "LargeListView">
+  @Override
+  public void startList() {
+    vector.startNewValue(idx());
+    writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH));
+    listStarted = true;
+  }
+
+  @Override
+  public void endList() {
+    int sizeUptoIdx = 0;
+    for (int i = 0; i < idx(); i++) {
+      sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH);
+    }
+    vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx);
+    setPosition(idx() + 1);
+    listStarted = false;
+  }
+
+  @Override
+  public void startListView() {
+    vector.startNewValue(idx());
+    writer.setPosition(checkedCastToInt(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH)));
+    listStarted = true;
+  }
+
+  @Override
+  public void endListView() {
+    int sizeUptoIdx = 0;
+    for (int i = 0; i < idx(); i++) {
+      sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH);
+    }
+    vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx);
+    setPosition(idx() + 1);
+    listStarted = false;
+  }
   <#else>
   @Override
   public void startList() {
diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java
index 243bd832255c2..68e30ef48846b 100644
--- a/java/vector/src/main/codegen/templates/UnionReader.java
+++ b/java/vector/src/main/codegen/templates/UnionReader.java
@@ -39,7 +39,7 @@
 @SuppressWarnings("unused")
 public class UnionReader extends AbstractFieldReader {
 
-  private static final int NUM_SUPPORTED_TYPES = 49;
+  private static final int NUM_SUPPORTED_TYPES = 51;
 
   private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
   public UnionVector data;
@@ -91,6 +91,8 @@ private FieldReader getReaderForIndex(int index) {
       return (FieldReader) getStruct();
     case LIST:
       return (FieldReader) getList();
+    case LISTVIEW:
+      return (FieldReader) getListView();
     case MAP:
       return (FieldReader) getMap();
     <#list vv.types as type>
@@ -130,6 +132,17 @@ private FieldReader getList() {
     return listReader;
   }
 
+  private UnionListViewReader listViewReader;
+
+  private FieldReader getListView() {
+    if (listViewReader == null) {
+      listViewReader = new UnionListViewReader(data.getListView());
+      listViewReader.setPosition(idx());
+      readers[MinorType.LISTVIEW.ordinal()] = listViewReader;
+    }
+    return listViewReader;
+  }
+
   private UnionMapReader mapReader;
 
   private FieldReader getMap() {
diff --git a/java/vector/src/main/codegen/templates/UnionVector.java b/java/vector/src/main/codegen/templates/UnionVector.java
index ea79c5c2fba76..e0fd0e4644313 100644
--- a/java/vector/src/main/codegen/templates/UnionVector.java
+++ b/java/vector/src/main/codegen/templates/UnionVector.java
@@ -94,6 +94,7 @@ public class UnionVector extends AbstractContainerVector implements FieldVector
 
   private StructVector structVector;
   private ListVector listVector;
+  private ListViewVector listViewVector;
   private MapVector mapVector;
 
   private FieldReader reader;
@@ -335,6 +336,20 @@ public ListVector getList() {
     return listVector;
   }
 
+  public ListViewVector getListView() {
+    if (listViewVector == null) {
+      int vectorCount = internalStruct.size();
+      listViewVector = addOrGet(MinorType.LISTVIEW, ListViewVector.class);
+      if (internalStruct.size() > vectorCount) {
+        listViewVector.allocateNew();
+        if (callBack != null) {
+          callBack.doWork();
+        }
+      }
+    }
+    return listViewVector;
+  }
+
   public MapVector getMap() {
     if (mapVector == null) {
       throw new IllegalArgumentException("No map present. Provide ArrowType argument to create a new vector");
@@ -702,6 +717,8 @@ public ValueVector getVectorByType(int typeId, ArrowType arrowType) {
           return getStruct();
         case LIST:
           return getList();
+        case LISTVIEW:
+          return getListView();
         case MAP:
           return getMap(name, arrowType);
         default:
diff --git a/java/vector/src/main/codegen/templates/UnionViewWriter.java b/java/vector/src/main/codegen/templates/UnionViewWriter.java
new file mode 100644
index 0000000000000..7b834d8b6cd86
--- /dev/null
+++ b/java/vector/src/main/codegen/templates/UnionViewWriter.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionViewWriter.java" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+import org.apache.arrow.vector.complex.writer.BaseWriter;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory;
+import org.apache.arrow.vector.types.Types;
+
+<#function is_timestamp_tz type>
+  <#return type?starts_with("TimeStamp") && type?ends_with("TZ")>
+</#function>
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class UnionViewWriter extends UnionWriter {
+
+  public UnionViewWriter(UnionVector vector) {
+    this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+  }
+
+  public UnionViewWriter(UnionVector vector, NullableStructWriterFactory nullableStructWriterFactory) {
+    super(vector, nullableStructWriterFactory);
+  }
+
+  @Override
+  public StructWriter struct() {
+    data.setType(idx(), MinorType.LISTVIEW);
+    getListWriter().setPosition(idx());
+    return getListWriter().struct();
+  }
+
+  <#list vv.types as type>
+    <#list type.minor as minor>
+      <#assign name = minor.class?cap_first />
+      <#assign fields = minor.fields!type.fields />
+      <#assign uncappedName = name?uncap_first/>
+      <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+      <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary">
+
+  private ${name}Writer ${name?uncap_first}Writer;
+
+  <#if minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary">
+  private ${name}Writer get${name}Writer(ArrowType arrowType) {
+    if (${uncappedName}Writer == null) {
+      ${uncappedName}Writer = new ${name}WriterImpl(data.get${name}Vector(arrowType));
+      ${uncappedName}Writer.setPosition(idx());
+      writers.add(${uncappedName}Writer);
+    }
+    return ${uncappedName}Writer;
+  }
+
+  public ${name}Writer as${name}(ArrowType arrowType) {
+    data.setType(idx(), MinorType.${name?upper_case});
+    return get${name}Writer(arrowType);
+  }
+  <#else>
+  private ${name}Writer get${name}Writer() {
+    if (${uncappedName}Writer == null) {
+      ${uncappedName}Writer = new ${name}WriterImpl(data.get${name}Vector());
+      ${uncappedName}Writer.setPosition(idx());
+      writers.add(${uncappedName}Writer);
+    }
+    return ${uncappedName}Writer;
+  }
+
+  public ${name}Writer as${name}() {
+    data.setType(idx(), MinorType.${name?upper_case});
+    return get${name}Writer();
+  }
+  </#if>
+
+  @Override
+  public void write(${name}Holder holder) {
+    data.setType(idx(), MinorType.${name?upper_case});
+    <#if minor.class?starts_with("Decimal")>
+        ArrowType arrowType = new ArrowType.Decimal(holder.precision, holder.scale, ${name}Holder.WIDTH * 8);
+    get${name}Writer(arrowType).setPosition(idx());
+    get${name}Writer(arrowType).write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, </#if></#list>, arrowType);
+    <#elseif is_timestamp_tz(minor.class)>
+        ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType();
+    ArrowType arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), holder.timezone);
+    get${name}Writer(arrowType).setPosition(idx());
+    get${name}Writer(arrowType).write(holder);
+    <#elseif minor.class == "Duration">
+        ArrowType arrowType = new ArrowType.Duration(holder.unit);
+    get${name}Writer(arrowType).setPosition(idx());
+    get${name}Writer(arrowType).write(holder);
+    <#elseif minor.class == "FixedSizeBinary">
+        ArrowType arrowType = new ArrowType.FixedSizeBinary(holder.byteWidth);
+    get${name}Writer(arrowType).setPosition(idx());
+    get${name}Writer(arrowType).write(holder);
+    <#else>
+    get${name}Writer().setPosition(idx());
+    get${name}Writer().write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, </#if></#list>);
+    </#if>
+  }
+
+  public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+    data.setType(idx(), MinorType.${name?upper_case});
+    <#if minor.class?starts_with("Decimal")>
+        get${name}Writer(arrowType).setPosition(idx());
+    get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>, arrowType);
+    <#elseif is_timestamp_tz(minor.class)>
+        ArrowType.Timestamp arrowTypeWithoutTz = (ArrowType.Timestamp) MinorType.${name?upper_case?remove_ending("TZ")}.getType();
+    ArrowType arrowType = new ArrowType.Timestamp(arrowTypeWithoutTz.getUnit(), "UTC");
+    get${name}Writer(arrowType).setPosition(idx());
+    get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+    <#elseif minor.class == "Duration" || minor.class == "FixedSizeBinary">
+        // This is expected to throw. There's nothing more that we can do here since we can't infer any
+        // sort of default unit for the Duration or a default width for the FixedSizeBinary types.
+        ArrowType arrowType = MinorType.${name?upper_case}.getType();
+    get${name}Writer(arrowType).setPosition(idx());
+    get${name}Writer(arrowType).write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+    <#else>
+    get${name}Writer().setPosition(idx());
+    get${name}Writer().write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+    </#if>
+  }
+  <#if minor.class?starts_with("Decimal")>
+  public void write${name}(${friendlyType} value) {
+    data.setType(idx(), MinorType.${name?upper_case});
+    ArrowType arrowType = new ArrowType.Decimal(value.precision(), value.scale(), ${name}Vector.TYPE_WIDTH * 8);
+    get${name}Writer(arrowType).setPosition(idx());
+    get${name}Writer(arrowType).write${name}(value);
+  }
+
+  public void writeBigEndianBytesTo${name}(byte[] value, ArrowType arrowType) {
+    data.setType(idx(), MinorType.${name?upper_case});
+    get${name}Writer(arrowType).setPosition(idx());
+    get${name}Writer(arrowType).writeBigEndianBytesTo${name}(value, arrowType);
+  }
+  <#elseif minor.class?ends_with("VarBinary")>
+  @Override
+  public void write${minor.class}(byte[] value) {
+    get${name}Writer().setPosition(idx());
+    get${name}Writer().write${minor.class}(value);
+  }
+
+  @Override
+  public void write${minor.class}(byte[] value, int offset, int length) {
+    get${name}Writer().setPosition(idx());
+    get${name}Writer().write${minor.class}(value, offset, length);
+  }
+
+  @Override
+  public void write${minor.class}(ByteBuffer value) {
+    get${name}Writer().setPosition(idx());
+    get${name}Writer().write${minor.class}(value);
+  }
+
+  @Override
+  public void write${minor.class}(ByteBuffer value, int offset, int length) {
+    get${name}Writer().setPosition(idx());
+    get${name}Writer().write${minor.class}(value, offset, length);
+  }
+  <#elseif minor.class?ends_with("VarChar")>
+  @Override
+  public void write${minor.class}(${friendlyType} value) {
+    get${name}Writer().setPosition(idx());
+    get${name}Writer().write${minor.class}(value);
+  }
+
+  @Override
+  public void write${minor.class}(String value) {
+    get${name}Writer().setPosition(idx());
+    get${name}Writer().write${minor.class}(value);
+  }
+  </#if>
+      </#if>
+    </#list>
+  </#list>
+
+  <#list vv.types as type><#list type.minor as minor>
+  <#assign lowerName = minor.class?uncap_first />
+  <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+  <#assign upperName = minor.class?upper_case />
+  <#assign capName = minor.class?cap_first />
+  <#if !minor.typeParams?? || minor.class?starts_with("Decimal") || is_timestamp_tz(minor.class) || minor.class == "Duration" || minor.class == "FixedSizeBinary">
+
+  @Override
+  public ${capName}Writer ${lowerName}() {
+    data.setType(idx(), MinorType.LISTVIEW);
+    getListViewWriter().setPosition(idx());
+    return getListViewWriter().${lowerName}();
+  }
+  </#if>
+  </#list></#list>
+}
diff --git a/java/vector/src/main/codegen/templates/UnionWriter.java b/java/vector/src/main/codegen/templates/UnionWriter.java
index 08dbf24324b17..bfe97e2770553 100644
--- a/java/vector/src/main/codegen/templates/UnionWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionWriter.java
@@ -42,12 +42,13 @@
 @SuppressWarnings("unused")
 public class UnionWriter extends AbstractFieldWriter implements FieldWriter {
 
-  UnionVector data;
-  private StructWriter structWriter;
-  private UnionListWriter listWriter;
-  private UnionMapWriter mapWriter;
-  private List<BaseWriter> writers = new java.util.ArrayList<>();
-  private final NullableStructWriterFactory nullableStructWriterFactory;
+  protected UnionVector data;
+  protected StructWriter structWriter;
+  protected UnionListWriter listWriter;
+  protected UnionListViewWriter listViewWriter;
+  protected UnionMapWriter mapWriter;
+  protected List<BaseWriter> writers = new java.util.ArrayList<>();
+  protected final NullableStructWriterFactory nullableStructWriterFactory;
 
   public UnionWriter(UnionVector vector) {
     this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
@@ -58,6 +59,22 @@ public UnionWriter(UnionVector vector, NullableStructWriterFactory nullableStruc
     this.nullableStructWriterFactory = nullableStructWriterFactory;
   }
 
+  /**
+   * Convert the UnionWriter to a UnionViewWriter.
+   *
+   * @return the converted UnionViewWriter
+   */
+  public UnionViewWriter toViewWriter() {
+    UnionViewWriter unionViewWriter = new UnionViewWriter(data, nullableStructWriterFactory);
+    unionViewWriter.structWriter = structWriter;
+    unionViewWriter.listWriter = listWriter;
+    unionViewWriter.listViewWriter = listViewWriter;
+    unionViewWriter.mapWriter = mapWriter;
+    unionViewWriter.writers = writers;
+    unionViewWriter.setPosition(this.getPosition());
+    return unionViewWriter;
+  }
+
   @Override
   public void setPosition(int index) {
     super.setPosition(index);
@@ -89,6 +106,17 @@ public void endList() {
     getListWriter().endList();
   }
 
+  @Override
+  public void startListView() {
+    getListViewWriter().startListView();
+    data.setType(idx(), MinorType.LISTVIEW);
+  }
+
+  @Override
+  public void endListView() {
+    getListViewWriter().endListView();
+  }
+
   @Override
   public void startMap() {
     getMapWriter().startMap();
@@ -134,7 +162,7 @@ public StructWriter asStruct() {
     return getStructWriter();
   }
 
-  private ListWriter getListWriter() {
+  protected ListWriter getListWriter() {
     if (listWriter == null) {
       listWriter = new UnionListWriter(data.getList(), nullableStructWriterFactory);
       listWriter.setPosition(idx());
@@ -143,11 +171,25 @@ private ListWriter getListWriter() {
     return listWriter;
   }
 
+  protected ListWriter getListViewWriter() {
+    if (listViewWriter == null) {
+      listViewWriter = new UnionListViewWriter(data.getListView(), nullableStructWriterFactory);
+      listViewWriter.setPosition(idx());
+      writers.add(listViewWriter);
+    }
+    return listViewWriter;
+  }
+
   public ListWriter asList() {
     data.setType(idx(), MinorType.LIST);
     return getListWriter();
   }
 
+  public ListWriter asListView() {
+    data.setType(idx(), MinorType.LISTVIEW);
+    return getListViewWriter();
+  }
+
   private MapWriter getMapWriter() {
     if (mapWriter == null) {
       mapWriter = new UnionMapWriter(data.getMap(new ArrowType.Map(false)));
@@ -181,6 +223,8 @@ BaseWriter getWriter(MinorType minorType, ArrowType arrowType) {
       return getStructWriter();
     case LIST:
       return getListWriter();
+    case LISTVIEW:
+      return getListViewWriter();
     case MAP:
       return getMapWriter(arrowType);
     <#list vv.types as type>
@@ -367,6 +411,20 @@ public ListWriter list(String name) {
     return getStructWriter().list(name);
   }
 
+  @Override
+  public ListWriter listView() {
+    data.setType(idx(), MinorType.LISTVIEW);
+    getListViewWriter().setPosition(idx());
+    return getListViewWriter().listView();
+  }
+
+  @Override
+  public ListWriter listView(String name) {
+    data.setType(idx(), MinorType.STRUCT);
+    getStructWriter().setPosition(idx());
+    return getStructWriter().listView(name);
+  }
+
   @Override
   public StructWriter struct(String name) {
     data.setType(idx(), MinorType.STRUCT);
diff --git a/java/vector/src/main/java/module-info.java b/java/vector/src/main/java/module-info.java
index 73af2d1b67efd..8ba1b3579e0e1 100644
--- a/java/vector/src/main/java/module-info.java
+++ b/java/vector/src/main/java/module-info.java
@@ -25,6 +25,7 @@
   exports org.apache.arrow.vector.complex.writer;
   exports org.apache.arrow.vector.compression;
   exports org.apache.arrow.vector.dictionary;
+  exports org.apache.arrow.vector.extension;
   exports org.apache.arrow.vector.holders;
   exports org.apache.arrow.vector.ipc;
   exports org.apache.arrow.vector.ipc.message;
@@ -47,4 +48,6 @@
   requires org.apache.arrow.memory.core;
   requires org.apache.commons.codec;
   requires org.slf4j;
+
+  uses org.apache.arrow.vector.compression.CompressionCodec.Factory;
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
index 50ddf30bf7e7c..4be55396b7492 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -453,9 +453,9 @@ public void reAlloc() {
   /**
    * Get the inner vectors.
    *
+   * @return the inner vectors for this field as defined by the TypeLayout
    * @deprecated This API will be removed as the current implementations no longer support inner
    *     vectors.
-   * @return the inner vectors for this field as defined by the TypeLayout
    */
   @Deprecated
   @Override
@@ -888,7 +888,7 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
       BitVectorHelper.unsetBit(this.getValidityBuffer(), thisIndex);
     } else {
       BitVectorHelper.setBit(this.getValidityBuffer(), thisIndex);
-      MemoryUtil.UNSAFE.copyMemory(
+      MemoryUtil.copyMemory(
           from.getDataBuffer().memoryAddress() + (long) fromIndex * typeWidth,
           this.getDataBuffer().memoryAddress() + (long) thisIndex * typeWidth,
           typeWidth);
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
index 3a177f84c9853..552a896ea8c36 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
@@ -259,9 +259,9 @@ public void clear() {
   /**
    * Get the inner vectors.
    *
+   * @return the inner vectors for this field as defined by the TypeLayout
    * @deprecated This API will be removed as the current implementations no longer support inner
    *     vectors.
-   * @return the inner vectors for this field as defined by the TypeLayout
    */
   @Override
   @Deprecated
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
index 5d761ffbee919..aaccec602f292 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -275,9 +275,9 @@ public void clear() {
   /**
    * Get the inner vectors.
    *
+   * @return the inner vectors for this field as defined by the TypeLayout
    * @deprecated This API will be removed as the current implementations no longer support inner
    *     vectors.
-   * @return the inner vectors for this field as defined by the TypeLayout
    */
   @Deprecated
   @Override
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java
index f0c84bd410640..15d21827839e2 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java
@@ -315,9 +315,9 @@ public void clearDataBuffers() {
   /**
    * Get the inner vectors.
    *
+   * @return the inner vectors for this field as defined by the TypeLayout
    * @deprecated This API will be removed as the current implementations no longer support inner
    *     vectors.
-   * @return the inner vectors for this field as defined by the TypeLayout
    */
   @Deprecated
   @Override
@@ -565,6 +565,7 @@ public void reallocViewBuffer(long desiredAllocSize) {
     viewBuffer.getReferenceManager().release();
     viewBuffer = newBuf;
     lastValueAllocationSizeInBytes = viewBuffer.capacity();
+    lastValueCapacity = getValueCapacity();
   }
 
   /**
@@ -1248,10 +1249,7 @@ public void setSafe(int index, ByteBuffer value, int start, int length) {
    */
   @Override
   public void setNull(int index) {
-    // We need to check and reallocate the validity buffer
-    while (index >= getValueCapacity()) {
-      reallocValidityBuffer();
-    }
+    handleSafe(index, 0);
     BitVectorHelper.unsetBit(validityBuffer, index);
   }
 
@@ -1460,25 +1458,12 @@ public final int getTotalValueLengthUpToIndex(int index) {
   }
 
   protected final void handleSafe(int index, int dataLength) {
-    final long lastSetCapacity = lastSet < 0 ? 0 : (long) index * ELEMENT_SIZE;
-    final long targetCapacity = roundUpToMultipleOf16(lastSetCapacity + dataLength);
-    // for views, we need each buffer with 16 byte alignment, so we need to check the last written
-    // index
-    // in the viewBuffer and allocate a new buffer which has 16 byte alignment for adding new
-    // values.
-    long writePosition = (long) index * ELEMENT_SIZE;
-    if (viewBuffer.capacity() <= writePosition || viewBuffer.capacity() < targetCapacity) {
-      /*
-       * Everytime we want to increase the capacity of the viewBuffer, we need to make sure that the new capacity
-       * meets 16 byte alignment.
-       * If the targetCapacity is larger than the writePosition, we may not necessarily
-       * want to allocate the targetCapacity to viewBuffer since when it is >={@link #INLINE_SIZE} either way
-       * we are writing to the dataBuffer.
-       */
-      reallocViewBuffer(Math.max(writePosition, targetCapacity));
+    final long targetCapacity = roundUpToMultipleOf16((long) index * ELEMENT_SIZE + dataLength);
+    if (viewBuffer.capacity() < targetCapacity) {
+      reallocViewBuffer(targetCapacity);
     }
 
-    while (index >= getValueCapacity()) {
+    while (index >= getValidityBufferValueCapacity()) {
       reallocValidityBuffer();
     }
   }
@@ -1498,26 +1483,7 @@ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
       BitVectorHelper.unsetBit(validityBuffer, thisIndex);
     } else {
       final int viewLength = from.getDataBuffer().getInt((long) fromIndex * ELEMENT_SIZE);
-      BitVectorHelper.setBit(validityBuffer, thisIndex);
-      final int start = thisIndex * ELEMENT_SIZE;
-      final int copyStart = fromIndex * ELEMENT_SIZE;
-      from.getDataBuffer().getBytes(start, viewBuffer, copyStart, ELEMENT_SIZE);
-      if (viewLength > INLINE_SIZE) {
-        final int bufIndex =
-            from.getDataBuffer()
-                .getInt(((long) fromIndex * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
-        final int dataOffset =
-            from.getDataBuffer()
-                .getInt(
-                    ((long) fromIndex * ELEMENT_SIZE)
-                        + LENGTH_WIDTH
-                        + PREFIX_WIDTH
-                        + BUF_INDEX_WIDTH);
-        final ArrowBuf dataBuf = ((BaseVariableWidthViewVector) from).dataBuffers.get(bufIndex);
-        final ArrowBuf thisDataBuf = allocateOrGetLastDataBuffer(viewLength);
-        thisDataBuf.setBytes(thisDataBuf.writerIndex(), dataBuf, dataOffset, viewLength);
-        thisDataBuf.writerIndex(thisDataBuf.writerIndex() + viewLength);
-      }
+      copyFromNotNull(fromIndex, thisIndex, from, viewLength);
     }
     lastSet = thisIndex;
   }
@@ -1539,30 +1505,44 @@ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
     } else {
       final int viewLength = from.getDataBuffer().getInt((long) fromIndex * ELEMENT_SIZE);
       handleSafe(thisIndex, viewLength);
-      BitVectorHelper.setBit(validityBuffer, thisIndex);
-      final int start = thisIndex * ELEMENT_SIZE;
-      final int copyStart = fromIndex * ELEMENT_SIZE;
-      from.getDataBuffer().getBytes(start, viewBuffer, copyStart, ELEMENT_SIZE);
-      if (viewLength > INLINE_SIZE) {
-        final int bufIndex =
-            from.getDataBuffer()
-                .getInt(((long) fromIndex * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
-        final int dataOffset =
-            from.getDataBuffer()
-                .getInt(
-                    ((long) fromIndex * ELEMENT_SIZE)
-                        + LENGTH_WIDTH
-                        + PREFIX_WIDTH
-                        + BUF_INDEX_WIDTH);
-        final ArrowBuf dataBuf = ((BaseVariableWidthViewVector) from).dataBuffers.get(bufIndex);
-        final ArrowBuf thisDataBuf = allocateOrGetLastDataBuffer(viewLength);
-        thisDataBuf.setBytes(thisDataBuf.writerIndex(), dataBuf, dataOffset, viewLength);
-        thisDataBuf.writerIndex(thisDataBuf.writerIndex() + viewLength);
-      }
+      copyFromNotNull(fromIndex, thisIndex, from, viewLength);
     }
     lastSet = thisIndex;
   }
 
+  private void copyFromNotNull(int fromIndex, int thisIndex, ValueVector from, int viewLength) {
+    BitVectorHelper.setBit(validityBuffer, thisIndex);
+    final int start = thisIndex * ELEMENT_SIZE;
+    final int copyStart = fromIndex * ELEMENT_SIZE;
+    if (viewLength > INLINE_SIZE) {
+      final int bufIndex =
+          from.getDataBuffer()
+              .getInt(((long) fromIndex * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
+      final int dataOffset =
+          from.getDataBuffer()
+              .getInt(
+                  ((long) fromIndex * ELEMENT_SIZE)
+                      + LENGTH_WIDTH
+                      + PREFIX_WIDTH
+                      + BUF_INDEX_WIDTH);
+      final ArrowBuf dataBuf = ((BaseVariableWidthViewVector) from).dataBuffers.get(bufIndex);
+      final ArrowBuf thisDataBuf = allocateOrGetLastDataBuffer(viewLength);
+
+      viewBuffer.setBytes(start, from.getDataBuffer(), copyStart, LENGTH_WIDTH + PREFIX_WIDTH);
+      int writePosition = start + LENGTH_WIDTH + PREFIX_WIDTH;
+      // set buf id
+      viewBuffer.setInt(writePosition, dataBuffers.size() - 1);
+      writePosition += BUF_INDEX_WIDTH;
+      // set offset
+      viewBuffer.setInt(writePosition, (int) thisDataBuf.writerIndex());
+
+      thisDataBuf.setBytes(thisDataBuf.writerIndex(), dataBuf, dataOffset, viewLength);
+      thisDataBuf.writerIndex(thisDataBuf.writerIndex() + viewLength);
+    } else {
+      from.getDataBuffer().getBytes(copyStart, viewBuffer, start, ELEMENT_SIZE);
+    }
+  }
+
   @Override
   public ArrowBufPointer getDataPointer(int index) {
     return getDataPointer(index, new ArrowBufPointer());
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
index cb7ef62013de5..0ac56691a6f6c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
@@ -247,7 +247,7 @@ public static boolean checkAllBitsEqualTo(
 
     int index = 0;
     while (index + 8 <= fullBytesCount) {
-      long longValue = MemoryUtil.UNSAFE.getLong(validityBuffer.memoryAddress() + index);
+      long longValue = MemoryUtil.getLong(validityBuffer.memoryAddress() + index);
       if (longValue != (long) intToCompare) {
         return false;
       }
@@ -255,7 +255,7 @@ public static boolean checkAllBitsEqualTo(
     }
 
     if (index + 4 <= fullBytesCount) {
-      int intValue = MemoryUtil.UNSAFE.getInt(validityBuffer.memoryAddress() + index);
+      int intValue = MemoryUtil.getInt(validityBuffer.memoryAddress() + index);
       if (intValue != intToCompare) {
         return false;
       }
@@ -263,7 +263,7 @@ public static boolean checkAllBitsEqualTo(
     }
 
     while (index < fullBytesCount) {
-      byte byteValue = MemoryUtil.UNSAFE.getByte(validityBuffer.memoryAddress() + index);
+      byte byteValue = MemoryUtil.getByte(validityBuffer.memoryAddress() + index);
       if (byteValue != (byte) intToCompare) {
         return false;
       }
@@ -272,7 +272,7 @@ public static boolean checkAllBitsEqualTo(
 
     // handling with the last bits
     if (remainder != 0) {
-      byte byteValue = MemoryUtil.UNSAFE.getByte(validityBuffer.memoryAddress() + sizeInBytes - 1);
+      byte byteValue = MemoryUtil.getByte(validityBuffer.memoryAddress() + sizeInBytes - 1);
       byte mask = (byte) ((1 << remainder) - 1);
       byteValue = (byte) (byteValue & mask);
       if (checkOneBits) {
@@ -386,7 +386,7 @@ public static void concatBits(
 
     // copy the first bit set
     if (input1 != output) {
-      MemoryUtil.UNSAFE.copyMemory(input1.memoryAddress(), output.memoryAddress(), numBytes1);
+      MemoryUtil.copyMemory(input1.memoryAddress(), output.memoryAddress(), numBytes1);
     }
 
     if (bitIndex(numBits1) == 0) {
@@ -394,8 +394,7 @@ public static void concatBits(
       // boundary.
       // For this case, we have a shortcut to copy all bytes from the second set after the byte
       // boundary.
-      MemoryUtil.UNSAFE.copyMemory(
-          input2.memoryAddress(), output.memoryAddress() + numBytes1, numBytes2);
+      MemoryUtil.copyMemory(input2.memoryAddress(), output.memoryAddress() + numBytes1, numBytes2);
       return;
     }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
index d4248c4ef91d1..6c6fd919aba43 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
@@ -61,6 +61,7 @@ public String getName() {
   private static final BufferLayout VALUES_32 = new BufferLayout(BufferType.DATA, 32);
   private static final BufferLayout VALUES_16 = new BufferLayout(BufferType.DATA, 16);
   private static final BufferLayout VALUES_8 = new BufferLayout(BufferType.DATA, 8);
+  private static final BufferLayout LARGE_SIZE_BUFFER = new BufferLayout(BufferType.SIZE, 64);
   private static final BufferLayout SIZE_BUFFER = new BufferLayout(BufferType.SIZE, 32);
   private static final BufferLayout VIEW_BUFFER = new BufferLayout(BufferType.VIEWS, 16);
 
@@ -80,6 +81,10 @@ public static BufferLayout sizeBuffer() {
     return SIZE_BUFFER;
   }
 
+  public static BufferLayout largeSizeBuffer() {
+    return LARGE_SIZE_BUFFER;
+  }
+
   /**
    * Returns a databuffer for the given bitwidth. Only supports powers of two between 8 and 128
    * inclusive.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
index 8774956522aef..42ad741c85f8b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
@@ -213,13 +213,13 @@ public void setBigEndian(int index, byte[] value) {
 
     long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
     if (length == 0) {
-      MemoryUtil.UNSAFE.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH, (byte) 0);
+      MemoryUtil.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH, (byte) 0);
       return;
     }
     if (LITTLE_ENDIAN) {
       // swap bytes to convert BE to LE
       for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
-        MemoryUtil.UNSAFE.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
+        MemoryUtil.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
       }
 
       if (length == TYPE_WIDTH) {
@@ -229,21 +229,17 @@ public void setBigEndian(int index, byte[] value) {
       if (length < TYPE_WIDTH) {
         // sign extend
         final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
         return;
       }
     } else {
       if (length <= TYPE_WIDTH) {
         // copy data from value to outAddress
-        MemoryUtil.UNSAFE.copyMemory(
-            value,
-            MemoryUtil.BYTE_ARRAY_BASE_OFFSET,
-            null,
-            outAddress + Decimal256Vector.TYPE_WIDTH - length,
-            length);
+        MemoryUtil.copyToMemory(
+            value, 0, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
         // sign extend
         final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
         return;
       }
     }
@@ -282,21 +278,20 @@ public void setSafe(int index, long start, ArrowBuf buffer, int length) {
     long inAddress = buffer.memoryAddress() + start;
     long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
     if (LITTLE_ENDIAN) {
-      MemoryUtil.UNSAFE.copyMemory(inAddress, outAddress, length);
+      MemoryUtil.copyMemory(inAddress, outAddress, length);
       // sign extend
       if (length < TYPE_WIDTH) {
-        byte msb = MemoryUtil.UNSAFE.getByte(inAddress + length - 1);
+        byte msb = MemoryUtil.getByte(inAddress + length - 1);
         final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
       }
     } else {
-      MemoryUtil.UNSAFE.copyMemory(
-          inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
+      MemoryUtil.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
       // sign extend
       if (length < TYPE_WIDTH) {
-        byte msb = MemoryUtil.UNSAFE.getByte(inAddress);
+        byte msb = MemoryUtil.getByte(inAddress);
         final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
       }
     }
   }
@@ -323,23 +318,22 @@ public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length)
     if (LITTLE_ENDIAN) {
       // swap bytes to convert BE to LE
       for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
-        byte val = MemoryUtil.UNSAFE.getByte((inAddress + length - 1) - byteIdx);
-        MemoryUtil.UNSAFE.putByte(outAddress + byteIdx, val);
+        byte val = MemoryUtil.getByte((inAddress + length - 1) - byteIdx);
+        MemoryUtil.putByte(outAddress + byteIdx, val);
       }
       // sign extend
       if (length < 32) {
-        byte msb = MemoryUtil.UNSAFE.getByte(inAddress);
+        byte msb = MemoryUtil.getByte(inAddress);
         final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
       }
     } else {
-      MemoryUtil.UNSAFE.copyMemory(
-          inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
+      MemoryUtil.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
       // sign extend
       if (length < TYPE_WIDTH) {
-        byte msb = MemoryUtil.UNSAFE.getByte(inAddress);
+        byte msb = MemoryUtil.getByte(inAddress);
         final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
       }
     }
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
index c2f4a14de7cc7..b4c55680b7305 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
@@ -212,13 +212,13 @@ public void setBigEndian(int index, byte[] value) {
 
     long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
     if (length == 0) {
-      MemoryUtil.UNSAFE.setMemory(outAddress, DecimalVector.TYPE_WIDTH, (byte) 0);
+      MemoryUtil.setMemory(outAddress, DecimalVector.TYPE_WIDTH, (byte) 0);
       return;
     }
     if (LITTLE_ENDIAN) {
       // swap bytes to convert BE to LE
       for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
-        MemoryUtil.UNSAFE.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
+        MemoryUtil.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
       }
 
       if (length == TYPE_WIDTH) {
@@ -228,21 +228,16 @@ public void setBigEndian(int index, byte[] value) {
       if (length < TYPE_WIDTH) {
         // sign extend
         final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
         return;
       }
     } else {
       if (length <= TYPE_WIDTH) {
         // copy data from value to outAddress
-        MemoryUtil.UNSAFE.copyMemory(
-            value,
-            MemoryUtil.BYTE_ARRAY_BASE_OFFSET,
-            null,
-            outAddress + DecimalVector.TYPE_WIDTH - length,
-            length);
+        MemoryUtil.copyToMemory(value, 0, outAddress + DecimalVector.TYPE_WIDTH - length, length);
         // sign extend
         final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
         return;
       }
     }
@@ -281,21 +276,20 @@ public void setSafe(int index, long start, ArrowBuf buffer, int length) {
     long inAddress = buffer.memoryAddress() + start;
     long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
     if (LITTLE_ENDIAN) {
-      MemoryUtil.UNSAFE.copyMemory(inAddress, outAddress, length);
+      MemoryUtil.copyMemory(inAddress, outAddress, length);
       // sign extend
       if (length < TYPE_WIDTH) {
-        byte msb = MemoryUtil.UNSAFE.getByte(inAddress + length - 1);
+        byte msb = MemoryUtil.getByte(inAddress + length - 1);
         final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
       }
     } else {
-      MemoryUtil.UNSAFE.copyMemory(
-          inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length);
+      MemoryUtil.copyMemory(inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length);
       // sign extend
       if (length < TYPE_WIDTH) {
-        byte msb = MemoryUtil.UNSAFE.getByte(inAddress);
+        byte msb = MemoryUtil.getByte(inAddress);
         final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
       }
     }
   }
@@ -322,23 +316,22 @@ public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length)
     if (LITTLE_ENDIAN) {
       // swap bytes to convert BE to LE
       for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
-        byte val = MemoryUtil.UNSAFE.getByte((inAddress + length - 1) - byteIdx);
-        MemoryUtil.UNSAFE.putByte(outAddress + byteIdx, val);
+        byte val = MemoryUtil.getByte((inAddress + length - 1) - byteIdx);
+        MemoryUtil.putByte(outAddress + byteIdx, val);
       }
       // sign extend
       if (length < TYPE_WIDTH) {
-        byte msb = MemoryUtil.UNSAFE.getByte(inAddress);
+        byte msb = MemoryUtil.getByte(inAddress);
         final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
       }
     } else {
-      MemoryUtil.UNSAFE.copyMemory(
-          inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length);
+      MemoryUtil.copyMemory(inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length);
       // sign extend
       if (length < TYPE_WIDTH) {
-        byte msb = MemoryUtil.UNSAFE.getByte(inAddress);
+        byte msb = MemoryUtil.getByte(inAddress);
         final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
-        MemoryUtil.UNSAFE.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
+        MemoryUtil.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
       }
     }
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
index 208c8b416cf91..3762fecd0bdcc 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
@@ -233,9 +233,9 @@ public List<ArrowBuf> getFieldBuffers() {
   /**
    * Get the inner vectors.
    *
+   * @return the inner vectors for this field as defined by the TypeLayout
    * @deprecated This API will be removed as the current implementations no longer support inner
    *     vectors.
-   * @return the inner vectors for this field as defined by the TypeLayout
    */
   @Deprecated
   @Override
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
index e58f7bba84494..391ef778169f5 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
@@ -111,9 +111,9 @@ default void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr, lon
   /**
    * Get the inner vectors.
    *
+   * @return the inner vectors for this field as defined by the TypeLayout
    * @deprecated This API will be removed as the current implementations no longer support inner
    *     vectors.
-   * @return the inner vectors for this field as defined by the TypeLayout
    */
   @Deprecated
   List<BufferBacked> getFieldInnerVectors();
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
index 25e5bdc6f46a3..227ca716f6391 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
@@ -224,9 +224,9 @@ public List<ArrowBuf> getFieldBuffers() {
   /**
    * Get the inner vectors.
    *
+   * @return the inner vectors for this field as defined by the TypeLayout
    * @deprecated This API will be removed as the current implementations no longer support inner
    *     vectors.
-   * @return the inner vectors for this field as defined by the TypeLayout
    */
   @Deprecated
   @Override
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
index b8535532ea359..fa75ef04577a3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
@@ -40,6 +40,7 @@
 import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8;
 import org.apache.arrow.vector.types.pojo.ArrowType.Map;
 import org.apache.arrow.vector.types.pojo.ArrowType.Null;
+import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded;
 import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
 import org.apache.arrow.vector.types.pojo.ArrowType.Time;
 import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
@@ -116,6 +117,16 @@ public TypeLayout visit(ArrowType.ListView type) {
                 return new TypeLayout(vectors);
               }
 
+              @Override
+              public TypeLayout visit(ArrowType.LargeListView type) {
+                List<BufferLayout> vectors =
+                    asList(
+                        BufferLayout.validityVector(),
+                        BufferLayout.largeOffsetBuffer(),
+                        BufferLayout.largeSizeBuffer());
+                return new TypeLayout(vectors);
+              }
+
               @Override
               public TypeLayout visit(ArrowType.LargeList type) {
                 List<BufferLayout> vectors =
@@ -270,6 +281,11 @@ public TypeLayout visit(Interval type) {
               public TypeLayout visit(Duration type) {
                 return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
               }
+
+              @Override
+              public TypeLayout visit(RunEndEncoded type) {
+                return new TypeLayout(Collections.<BufferLayout>emptyList());
+              }
             });
     return layout;
   }
@@ -340,6 +356,12 @@ public Integer visit(ArrowType.LargeList type) {
             return 2;
           }
 
+          @Override
+          public Integer visit(ArrowType.LargeListView type) {
+            // validity buffer + offset buffer + size buffer
+            return 3;
+          }
+
           @Override
           public Integer visit(FixedSizeList type) {
             // validity buffer
@@ -428,6 +450,11 @@ public Integer visit(Interval type) {
           public Integer visit(Duration type) {
             return FIXED_WIDTH_BUFFER_COUNT;
           }
+
+          @Override
+          public Integer visit(RunEndEncoded type) {
+            return 0;
+          }
         });
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
index c076161bc21d6..ecd3fb91241b1 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
@@ -50,7 +50,7 @@ public class VectorLoader {
    * @param root the root to add vectors to based on schema
    */
   public VectorLoader(VectorSchemaRoot root) {
-    this(root, NoCompressionCodec.Factory.INSTANCE);
+    this(root, CompressionCodec.Factory.INSTANCE);
   }
 
   /**
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java
index 3050649737355..abcf312c5ecfc 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java
@@ -29,18 +29,26 @@
 import org.apache.arrow.vector.BaseVariableWidthViewVector;
 import org.apache.arrow.vector.BitVector;
 import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.NullVector;
 import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
 import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.RunEndEncodedVector;
+import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
 
 /** Visitor to compare a range of values for vectors. */
 public class RangeEqualsVisitor implements VectorVisitor<Boolean, Range> {
+
   private ValueVector left;
   private ValueVector right;
 
@@ -221,6 +229,14 @@ public Boolean visit(NullVector left, Range range) {
     return true;
   }
 
+  @Override
+  public Boolean visit(RunEndEncodedVector left, Range range) {
+    if (!validate(left)) {
+      return false;
+    }
+    return compareRunEndEncodedVectors(range);
+  }
+
   @Override
   public Boolean visit(ExtensionTypeVector<?> left, Range range) {
     if (!(right instanceof ExtensionTypeVector<?>) || !validate(left)) {
@@ -234,6 +250,64 @@ public Boolean visit(ExtensionTypeVector<?> left, Range range) {
     return underlyingVisitor.rangeEquals(range);
   }
 
+  @Override
+  public Boolean visit(ListViewVector left, Range range) {
+    if (!validate(left)) {
+      return false;
+    }
+    return compareListViewVectors(range);
+  }
+
+  @Override
+  public Boolean visit(LargeListViewVector left, Range range) {
+    if (!validate(left)) {
+      return false;
+    }
+    return compareLargeListViewVectors(range);
+  }
+
+  protected boolean compareRunEndEncodedVectors(Range range) {
+    RunEndEncodedVector leftVector = (RunEndEncodedVector) left;
+    RunEndEncodedVector rightVector = (RunEndEncodedVector) right;
+
+    final int leftRangeEnd = range.getLeftStart() + range.getLength();
+    final int rightRangeEnd = range.getRightStart() + range.getLength();
+
+    FieldVector leftValuesVector = leftVector.getValuesVector();
+    FieldVector rightValuesVector = rightVector.getValuesVector();
+
+    RangeEqualsVisitor innerVisitor = createInnerVisitor(leftValuesVector, rightValuesVector, null);
+
+    int leftLogicalIndex = range.getLeftStart();
+    int rightLogicalIndex = range.getRightStart();
+
+    while (leftLogicalIndex < leftRangeEnd) {
+      // TODO: implement it more efficient
+      // https://github.com/apache/arrow/issues/44157
+      int leftPhysicalIndex = leftVector.getPhysicalIndex(leftLogicalIndex);
+      int rightPhysicalIndex = rightVector.getPhysicalIndex(rightLogicalIndex);
+      if (leftValuesVector.accept(
+          innerVisitor, new Range(leftPhysicalIndex, rightPhysicalIndex, 1))) {
+        int leftRunEnd = leftVector.getRunEnd(leftLogicalIndex);
+        int rightRunEnd = rightVector.getRunEnd(rightLogicalIndex);
+
+        int leftRunLength = Math.min(leftRunEnd, leftRangeEnd) - leftLogicalIndex;
+        int rightRunLength = Math.min(rightRunEnd, rightRangeEnd) - rightLogicalIndex;
+
+        if (leftRunLength != rightRunLength) {
+          return false;
+        } else {
+          leftLogicalIndex = leftRunEnd;
+          rightLogicalIndex = rightRunEnd;
+        }
+      } else {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
   protected RangeEqualsVisitor createInnerVisitor(
       ValueVector leftInner,
       ValueVector rightInner,
@@ -325,6 +399,20 @@ protected boolean compareDenseUnionVectors(Range range) {
     return true;
   }
 
+  private boolean compareStructVectorsInternal(
+      NonNullableStructVector leftVector, NonNullableStructVector rightVector, Range range) {
+    List<String> leftChildNames = leftVector.getChildFieldNames();
+    for (String name : leftChildNames) {
+      RangeEqualsVisitor visitor =
+          createInnerVisitor(
+              leftVector.getChild(name), rightVector.getChild(name), /*type comparator*/ null);
+      if (!visitor.rangeEquals(range)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   protected boolean compareStructVectors(Range range) {
     NonNullableStructVector leftVector = (NonNullableStructVector) left;
     NonNullableStructVector rightVector = (NonNullableStructVector) right;
@@ -334,15 +422,49 @@ protected boolean compareStructVectors(Range range) {
       return false;
     }
 
-    for (String name : leftChildNames) {
-      RangeEqualsVisitor visitor =
-          createInnerVisitor(
-              leftVector.getChild(name), rightVector.getChild(name), /*type comparator*/ null);
-      if (!visitor.rangeEquals(range)) {
+    if (!(leftVector instanceof StructVector || rightVector instanceof StructVector)) {
+      // neither struct vector is nullable
+      return compareStructVectorsInternal(leftVector, rightVector, range);
+    }
+
+    Range subRange = new Range(0, 0, 0);
+    boolean lastIsNull = true;
+    int lastNullIndex = -1;
+    for (int i = 0; i < range.getLength(); i++) {
+      int leftIndex = range.getLeftStart() + i;
+      int rightIndex = range.getRightStart() + i;
+      boolean isLeftNull = leftVector.isNull(leftIndex);
+      boolean isRightNull = rightVector.isNull(rightIndex);
+
+      if (isLeftNull != isRightNull) {
+        // exactly one slot is null, unequal
         return false;
       }
+      if (isLeftNull) {
+        // slots are null
+        if (!lastIsNull) {
+          subRange
+              .setLeftStart(range.getLeftStart() + lastNullIndex + 1)
+              .setRightStart(range.getRightStart() + lastNullIndex + 1)
+              .setLength(i - (lastNullIndex + 1));
+          if (!compareStructVectorsInternal(leftVector, rightVector, subRange)) {
+            return false;
+          }
+        }
+        lastIsNull = true;
+        lastNullIndex = i;
+      } else {
+        // slots are not null
+        lastIsNull = false;
+      }
+    }
+    if (!lastIsNull) {
+      subRange
+          .setLeftStart(range.getLeftStart() + lastNullIndex + 1)
+          .setRightStart(range.getRightStart() + lastNullIndex + 1)
+          .setLength(range.getLength() - (lastNullIndex + 1));
+      return compareStructVectorsInternal(leftVector, rightVector, subRange);
     }
-
     return true;
   }
 
@@ -702,4 +824,98 @@ protected boolean compareLargeListVectors(Range range) {
     }
     return true;
   }
+
+  protected boolean compareListViewVectors(Range range) {
+    ListViewVector leftVector = (ListViewVector) left;
+    ListViewVector rightVector = (ListViewVector) right;
+
+    RangeEqualsVisitor innerVisitor =
+        createInnerVisitor(
+            leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
+    Range innerRange = new Range();
+
+    for (int i = 0; i < range.getLength(); i++) {
+      int leftIndex = range.getLeftStart() + i;
+      int rightIndex = range.getRightStart() + i;
+
+      boolean isNull = leftVector.isNull(leftIndex);
+      if (isNull != rightVector.isNull(rightIndex)) {
+        return false;
+      }
+
+      int offsetWidth = BaseRepeatedValueViewVector.OFFSET_WIDTH;
+      int sizeWidth = BaseRepeatedValueViewVector.SIZE_WIDTH;
+
+      if (!isNull) {
+        final int startIndexLeft =
+            leftVector.getOffsetBuffer().getInt((long) leftIndex * offsetWidth);
+        final int leftSize = leftVector.getSizeBuffer().getInt((long) leftIndex * sizeWidth);
+
+        final int startIndexRight =
+            rightVector.getOffsetBuffer().getInt((long) rightIndex * offsetWidth);
+        final int rightSize = rightVector.getSizeBuffer().getInt((long) rightIndex * sizeWidth);
+
+        if (leftSize != rightSize) {
+          return false;
+        }
+
+        innerRange =
+            innerRange
+                .setRightStart(startIndexRight)
+                .setLeftStart(startIndexLeft)
+                .setLength(leftSize);
+        if (!innerVisitor.rangeEquals(innerRange)) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  protected boolean compareLargeListViewVectors(Range range) {
+    LargeListViewVector leftVector = (LargeListViewVector) left;
+    LargeListViewVector rightVector = (LargeListViewVector) right;
+
+    RangeEqualsVisitor innerVisitor =
+        createInnerVisitor(
+            leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
+    Range innerRange = new Range();
+
+    for (int i = 0; i < range.getLength(); i++) {
+      int leftIndex = range.getLeftStart() + i;
+      int rightIndex = range.getRightStart() + i;
+
+      boolean isNull = leftVector.isNull(leftIndex);
+      if (isNull != rightVector.isNull(rightIndex)) {
+        return false;
+      }
+
+      int offsetWidth = BaseLargeRepeatedValueViewVector.OFFSET_WIDTH;
+      int sizeWidth = BaseLargeRepeatedValueViewVector.SIZE_WIDTH;
+
+      if (!isNull) {
+        final int startIndexLeft =
+            leftVector.getOffsetBuffer().getInt((long) leftIndex * offsetWidth);
+        final int leftSize = leftVector.getSizeBuffer().getInt((long) leftIndex * sizeWidth);
+
+        final int startIndexRight =
+            rightVector.getOffsetBuffer().getInt((long) rightIndex * offsetWidth);
+        final int rightSize = rightVector.getSizeBuffer().getInt((long) rightIndex * sizeWidth);
+
+        if (leftSize != rightSize) {
+          return false;
+        }
+
+        innerRange =
+            innerRange
+                .setRightStart(startIndexRight)
+                .setLeftStart(startIndexLeft)
+                .setLength(leftSize);
+        if (!innerVisitor.rangeEquals(innerRange)) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java
index 15cc2c31b8b98..30b2f511a0445 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java
@@ -28,8 +28,11 @@
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.RunEndEncodedVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.types.pojo.Field;
 
@@ -124,6 +127,21 @@ public Boolean visit(ExtensionTypeVector<?> left, Void value) {
     return compareField(left.getField(), right.getField());
   }
 
+  @Override
+  public Boolean visit(ListViewVector left, Void value) {
+    return compareField(left.getField(), right.getField());
+  }
+
+  @Override
+  public Boolean visit(LargeListViewVector left, Void value) {
+    return compareField(left.getField(), right.getField());
+  }
+
+  @Override
+  public Boolean visit(RunEndEncodedVector left, Void value) {
+    return compareField(left.getField(), right.getField());
+  }
+
   private boolean compareField(Field leftField, Field rightField) {
 
     if (leftField == rightField) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java
index 870f015862764..989c57a0c93d0 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java
@@ -25,8 +25,11 @@
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.RunEndEncodedVector;
 import org.apache.arrow.vector.complex.UnionVector;
 
 /**
@@ -60,4 +63,18 @@ public interface VectorVisitor<OUT, IN> {
   OUT visit(NullVector left, IN value);
 
   OUT visit(ExtensionTypeVector<?> left, IN value);
+
+  default OUT visit(ListViewVector left, IN value) {
+    throw new UnsupportedOperationException("VectorVisitor for ListViewVector is not supported.");
+  }
+
+  default OUT visit(LargeListViewVector left, IN value) {
+    throw new UnsupportedOperationException(
+        "VectorVisitor for LargeListViewVector is not supported.");
+  }
+
+  default OUT visit(RunEndEncodedVector left, IN value) {
+    throw new UnsupportedOperationException(
+        "VectorVisitor for LargeListViewVector is not supported.");
+  };
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
index 0cefbe4004b82..a6a71cf1a4190 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
 import org.apache.arrow.vector.types.pojo.ArrowType.List;
+import org.apache.arrow.vector.types.pojo.ArrowType.ListView;
 import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.apache.arrow.vector.util.CallBack;
@@ -123,6 +124,10 @@ public ListVector addOrGetList(String name) {
     return addOrGet(name, FieldType.nullable(new List()), ListVector.class);
   }
 
+  public ListViewVector addOrGetListView(String name) {
+    return addOrGet(name, FieldType.nullable(new ListView()), ListViewVector.class);
+  }
+
   public UnionVector addOrGetUnion(String name) {
     return addOrGet(name, FieldType.nullable(MinorType.UNION.getType()), UnionVector.class);
   }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
index feb7edfec9495..2921e43cb6410 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
@@ -382,6 +382,17 @@ public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
     return new VectorWithOrdinal(vector, ordinal);
   }
 
+  /**
+   * Return the underlying buffers associated with this vector. Note that this doesn't impact the
+   * reference counts for this buffer, so it only should be used for in-context access. Also note
+   * that this buffer changes regularly, thus external classes shouldn't hold a reference to it
+   * (unless they change it).
+   *
+   * @param clear Whether to clear vector before returning, the buffers will still be refcounted but
+   *     the returned array will be the only reference to them. Also, this won't clear the child
+   *     buffers.
+   * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
+   */
   @Override
   public ArrowBuf[] getBuffers(boolean clear) {
     final List<ArrowBuf> buffers = new ArrayList<>();
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
new file mode 100644
index 0000000000000..12edd6557bd9c
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseLargeRepeatedValueViewVector.java
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import java.util.Collections;
+import java.util.Iterator;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
+
+public abstract class BaseLargeRepeatedValueViewVector extends BaseValueVector
+    implements RepeatedValueVector, FieldVector {
+  public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE;
+  public static final String DATA_VECTOR_NAME = "$data$";
+
+  public static final byte OFFSET_WIDTH = 8;
+  public static final byte SIZE_WIDTH = 8;
+  protected ArrowBuf offsetBuffer;
+  protected ArrowBuf sizeBuffer;
+  protected FieldVector vector;
+  protected final CallBack repeatedCallBack;
+  protected int valueCount;
+  protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
+  protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH;
+  private final String name;
+
+  protected String defaultDataVectorName = DATA_VECTOR_NAME;
+
+  protected BaseLargeRepeatedValueViewVector(
+      String name, BufferAllocator allocator, CallBack callBack) {
+    this(name, allocator, DEFAULT_DATA_VECTOR, callBack);
+  }
+
+  protected BaseLargeRepeatedValueViewVector(
+      String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) {
+    super(allocator);
+    this.name = name;
+    this.offsetBuffer = allocator.getEmpty();
+    this.sizeBuffer = allocator.getEmpty();
+    this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null");
+    this.repeatedCallBack = callBack;
+    this.valueCount = 0;
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+
+  @Override
+  public boolean allocateNewSafe() {
+    boolean dataAlloc = false;
+    try {
+      allocateBuffers();
+      dataAlloc = vector.allocateNewSafe();
+    } catch (Exception e) {
+      clear();
+      return false;
+    } finally {
+      if (!dataAlloc) {
+        clear();
+      }
+    }
+    return dataAlloc;
+  }
+
+  private void allocateBuffers() {
+    offsetBuffer = allocateBuffers(offsetAllocationSizeInBytes);
+    sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes);
+  }
+
+  protected ArrowBuf allocateBuffers(final long size) {
+    final int curSize = (int) size;
+    ArrowBuf buffer = allocator.buffer(curSize);
+    buffer.readerIndex(0);
+    buffer.setZero(0, buffer.capacity());
+    return buffer;
+  }
+
+  @Override
+  public void reAlloc() {
+    reallocateBuffers();
+    vector.reAlloc();
+  }
+
+  protected void reallocateBuffers() {
+    reallocOffsetBuffer();
+    reallocSizeBuffer();
+  }
+
+  private void reallocOffsetBuffer() {
+    final long currentBufferCapacity = offsetBuffer.capacity();
+    long newAllocationSize = currentBufferCapacity * 2;
+    if (newAllocationSize == 0) {
+      if (offsetAllocationSizeInBytes > 0) {
+        newAllocationSize = offsetAllocationSizeInBytes;
+      } else {
+        newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2;
+      }
+    }
+
+    newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+    newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE);
+    assert newAllocationSize >= 1;
+
+    if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) {
+      throw new OversizedAllocationException("Unable to expand the buffer");
+    }
+
+    final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+    newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+    offsetBuffer.getReferenceManager().release(1);
+    offsetBuffer = newBuf;
+    offsetAllocationSizeInBytes = newAllocationSize;
+  }
+
+  private void reallocSizeBuffer() {
+    final long currentBufferCapacity = sizeBuffer.capacity();
+    long newAllocationSize = currentBufferCapacity * 2;
+    if (newAllocationSize == 0) {
+      if (sizeAllocationSizeInBytes > 0) {
+        newAllocationSize = sizeAllocationSizeInBytes;
+      } else {
+        newAllocationSize = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH * 2;
+      }
+    }
+
+    newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+    newAllocationSize = Math.min(newAllocationSize, (long) SIZE_WIDTH * Integer.MAX_VALUE);
+    assert newAllocationSize >= 1;
+
+    if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= sizeBuffer.capacity()) {
+      throw new OversizedAllocationException("Unable to expand the buffer");
+    }
+
+    final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+    newBuf.setBytes(0, sizeBuffer, 0, currentBufferCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+    sizeBuffer.getReferenceManager().release(1);
+    sizeBuffer = newBuf;
+    sizeAllocationSizeInBytes = newAllocationSize;
+  }
+
+  @Override
+  public FieldVector getDataVector() {
+    return vector;
+  }
+
+  @Override
+  public void setInitialCapacity(int numRecords) {
+    offsetAllocationSizeInBytes = (long) (numRecords) * OFFSET_WIDTH;
+    sizeAllocationSizeInBytes = (long) (numRecords) * SIZE_WIDTH;
+    if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) {
+      vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD);
+    } else {
+      vector.setInitialCapacity(numRecords);
+    }
+  }
+
+  @Override
+  public void setInitialCapacity(int numRecords, double density) {
+    if ((numRecords * density) >= Integer.MAX_VALUE) {
+      throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
+    }
+
+    offsetAllocationSizeInBytes = (long) numRecords * OFFSET_WIDTH;
+    sizeAllocationSizeInBytes = (long) numRecords * SIZE_WIDTH;
+
+    int innerValueCapacity = Math.max((int) (numRecords * density), 1);
+
+    if (vector instanceof DensityAwareVector) {
+      ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density);
+    } else {
+      vector.setInitialCapacity(innerValueCapacity);
+    }
+  }
+
+  /**
+   * Specialized version of setInitialTotalCapacity() for LargeListViewVector. This is used by some
+   * callers when they want to explicitly control and be conservative about memory allocated for
+   * inner data vector. This is very useful when we are working with memory constraints for a query
+   * and have a fixed amount of memory reserved for the record batch. In such cases, we are likely
+   * to face OOM or related problems when we reserve memory for a record batch with value count x
+   * and do setInitialCapacity(x) such that each vector allocates only what is necessary and not the
+   * default amount, but the multiplier forces the memory requirement to go beyond what was needed.
+   *
+   * @param numRecords value count
+   * @param totalNumberOfElements the total number of elements to allow for in this vector across
+   *     all records.
+   */
+  public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) {
+    offsetAllocationSizeInBytes = (long) numRecords * OFFSET_WIDTH;
+    sizeAllocationSizeInBytes = (long) numRecords * SIZE_WIDTH;
+    vector.setInitialCapacity(totalNumberOfElements);
+  }
+
+  @Override
+  public int getValueCapacity() {
+    throw new UnsupportedOperationException(
+        "Get value capacity is not supported in RepeatedValueVector");
+  }
+
+  protected int getOffsetBufferValueCapacity() {
+    return checkedCastToInt(offsetBuffer.capacity() / OFFSET_WIDTH);
+  }
+
+  protected int getSizeBufferValueCapacity() {
+    return capAtMaxInt(sizeBuffer.capacity() / SIZE_WIDTH);
+  }
+
+  @Override
+  public int getBufferSize() {
+    if (valueCount == 0) {
+      return 0;
+    }
+    return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + vector.getBufferSize();
+  }
+
+  @Override
+  public int getBufferSizeFor(int valueCount) {
+    if (valueCount == 0) {
+      return 0;
+    }
+
+    int innerVectorValueCount = 0;
+
+    for (int i = 0; i < valueCount; i++) {
+      innerVectorValueCount += sizeBuffer.getInt(i * SIZE_WIDTH);
+    }
+
+    return (valueCount * OFFSET_WIDTH)
+        + (valueCount * SIZE_WIDTH)
+        + vector.getBufferSizeFor(checkedCastToInt(innerVectorValueCount));
+  }
+
+  @Override
+  public Iterator<ValueVector> iterator() {
+    return Collections.<ValueVector>singleton(getDataVector()).iterator();
+  }
+
+  @Override
+  public void clear() {
+    offsetBuffer = releaseBuffer(offsetBuffer);
+    sizeBuffer = releaseBuffer(sizeBuffer);
+    vector.clear();
+    valueCount = 0;
+    super.clear();
+  }
+
+  @Override
+  public void reset() {
+    offsetBuffer.setZero(0, offsetBuffer.capacity());
+    sizeBuffer.setZero(0, sizeBuffer.capacity());
+    vector.reset();
+    valueCount = 0;
+  }
+
+  @Override
+  public ArrowBuf[] getBuffers(boolean clear) {
+    return new ArrowBuf[0];
+  }
+
+  @Override
+  public int getValueCount() {
+    return valueCount;
+  }
+
+  @Override
+  public void setValueCount(int valueCount) {
+    this.valueCount = valueCount;
+    while (valueCount > getOffsetBufferValueCapacity()) {
+      reallocateBuffers();
+    }
+    final int childValueCount = valueCount == 0 ? 0 : getMaxViewEndChildVector();
+    vector.setValueCount(childValueCount);
+  }
+
+  /**
+   * Get the end of the child vector via the maximum view length. This method deduces the length by
+   * considering the condition i.e., argmax_i(offsets[i] + size[i]).
+   *
+   * @return the end of the child vector.
+   */
+  protected int getMaxViewEndChildVector() {
+    int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
+    for (int i = 0; i < valueCount; i++) {
+      int currentOffset = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
+      int currentSize = sizeBuffer.getInt((long) i * SIZE_WIDTH);
+      int currentSum = currentOffset + currentSize;
+      maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
+    }
+
+    return maxOffsetSizeSum;
+  }
+
+  /**
+   * Get the end of the child vector via the maximum view length of the child vector by index.
+   *
+   * @return the end of the child vector by index
+   */
+  protected int getMaxViewEndChildVectorByIndex(int index) {
+    int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
+    for (int i = 0; i < index; i++) {
+      int currentOffset = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
+      int currentSize = sizeBuffer.getInt((long) i * SIZE_WIDTH);
+      int currentSum = currentOffset + currentSize;
+      maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
+    }
+
+    return maxOffsetSizeSum;
+  }
+
+  /**
+   * Initialize the data vector (and execute callback) if it hasn't already been done, returns the
+   * data vector.
+   */
+  public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
+    boolean created = false;
+    if (vector instanceof NullVector) {
+      vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, repeatedCallBack);
+      // returned vector must have the same field
+      created = true;
+      if (repeatedCallBack != null
+          &&
+          // not a schema change if changing from ZeroVector to ZeroVector
+          (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) {
+        repeatedCallBack.doWork();
+      }
+    }
+
+    if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) {
+      final String msg =
+          String.format(
+              "Inner vector type mismatch. Requested type: [%s], actual type: [%s]",
+              fieldType.getType().getTypeID(), vector.getField().getType().getTypeID());
+      throw new SchemaChangeRuntimeException(msg);
+    }
+
+    return new AddOrGetResult<>((T) vector, created);
+  }
+
+  protected void replaceDataVector(FieldVector v) {
+    vector.clear();
+    vector = v;
+  }
+
+  public abstract boolean isEmpty(int index);
+
+  /**
+   * Start a new value at the given index.
+   *
+   * @param index the index to start the new value at
+   * @return the offset in the data vector where the new value starts
+   */
+  public int startNewValue(int index) {
+    while (index >= getOffsetBufferValueCapacity()) {
+      reallocOffsetBuffer();
+    }
+    while (index >= getSizeBufferValueCapacity()) {
+      reallocSizeBuffer();
+    }
+
+    if (index > 0) {
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
+      offsetBuffer.setInt((long) index * OFFSET_WIDTH, prevOffset);
+    }
+
+    setValueCount(index + 1);
+    return offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+  }
+
+  @Override
+  @Deprecated
+  public UInt4Vector getOffsetVector() {
+    throw new UnsupportedOperationException("There is no inner offset vector");
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
index 10637304df057..fbe83bad52cf1 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
@@ -142,9 +142,9 @@ protected void reallocOffsetBuffer() {
   /**
    * Get the offset vector.
    *
+   * @return the underlying offset vector or null if none exists.
    * @deprecated This API will be removed, as the current implementations no longer hold inner
    *     offset vectors.
-   * @return the underlying offset vector or null if none exists.
    */
   @Override
   @Deprecated
@@ -271,6 +271,17 @@ public void reset() {
     valueCount = 0;
   }
 
+  /**
+   * Return the underlying buffers associated with this vector. Note that this doesn't impact the
+   * reference counts for this buffer, so it only should be used for in-context access. Also note
+   * that this buffer changes regularly, thus external classes shouldn't hold a reference to it
+   * (unless they change it).
+   *
+   * @param clear Whether to clear vector before returning, the buffers will still be refcounted but
+   *     the returned array will be the only reference to them. Also, this won't clear the child
+   *     buffers.
+   * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
+   */
   @Override
   public ArrowBuf[] getBuffers(boolean clear) {
     final ArrowBuf[] buffers;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
index 0040d12811258..e6213316b55a3 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java
@@ -101,7 +101,7 @@ private void allocateBuffers() {
     sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes);
   }
 
-  private ArrowBuf allocateBuffers(final long size) {
+  protected ArrowBuf allocateBuffers(final long size) {
     final int curSize = (int) size;
     ArrowBuf buffer = allocator.buffer(curSize);
     buffer.readerIndex(0);
@@ -304,38 +304,44 @@ public void setValueCount(int valueCount) {
     while (valueCount > getOffsetBufferValueCapacity()) {
       reallocateBuffers();
     }
-    final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector();
+    final int childValueCount = valueCount == 0 ? 0 : getMaxViewEndChildVector();
     vector.setValueCount(childValueCount);
   }
 
-  protected int getLengthOfChildVector() {
+  /**
+   * Get the end of the child vector via the maximum view length. This method deduces the length by
+   * considering the condition i.e., argmax_i(offsets[i] + size[i]).
+   *
+   * @return the end of the child vector.
+   */
+  protected int getMaxViewEndChildVector() {
     int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
-    int minOffset = offsetBuffer.getInt(0);
     for (int i = 0; i < valueCount; i++) {
       int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
       int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH);
       int currentSum = currentOffset + currentSize;
-
       maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
-      minOffset = Math.min(minOffset, currentOffset);
     }
 
-    return maxOffsetSizeSum - minOffset;
+    return maxOffsetSizeSum;
   }
 
-  protected int getLengthOfChildVectorByIndex(int index) {
+  /**
+   * Get the end of the child vector via the maximum view length of the child vector by index.
+   *
+   * @return the end of the child vector by index
+   */
+  protected int getMaxViewEndChildVectorByIndex(int index) {
     int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0);
-    int minOffset = offsetBuffer.getInt(0);
+    // int minOffset = offsetBuffer.getInt(0);
     for (int i = 0; i < index; i++) {
       int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
       int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH);
       int currentSum = currentOffset + currentSize;
-
       maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum);
-      minOffset = Math.min(minOffset, currentOffset);
     }
 
-    return maxOffsetSizeSum - minOffset;
+    return maxOffsetSizeSum;
   }
 
   /**
@@ -389,7 +395,7 @@ public int startNewValue(int index) {
     }
 
     if (index > 0) {
-      final int prevOffset = getLengthOfChildVectorByIndex(index);
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
       offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
     }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
index 7a88eaf162314..c762eb51725ca 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -360,6 +360,17 @@ public void reset() {
     valueCount = 0;
   }
 
+  /**
+   * Return the underlying buffers associated with this vector. Note that this doesn't impact the
+   * reference counts for this buffer, so it only should be used for in-context access. Also note
+   * that this buffer changes regularly, thus external classes shouldn't hold a reference to it
+   * (unless they change it).
+   *
+   * @param clear Whether to clear vector before returning, the buffers will still be refcounted but
+   *     the returned array will be the only reference to them. Also, this won't clear the child
+   *     buffers.
+   * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
+   */
   @Override
   public ArrowBuf[] getBuffers(boolean clear) {
     setReaderAndWriterIndex();
@@ -602,7 +613,9 @@ public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) {
 
     public TransferImpl(FixedSizeListVector to) {
       this.to = to;
-      to.addOrGetVector(vector.getField().getFieldType());
+      if (!(vector instanceof ZeroVector)) {
+        to.addOrGetVector(vector.getField().getFieldType());
+      }
       dataPair = vector.makeTransferPair(to.vector);
     }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
index ef2e7383a9105..ed075352c931c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
@@ -319,9 +319,9 @@ private void setReaderAndWriterIndex() {
   /**
    * Get the inner vectors.
    *
+   * @return the inner vectors for this field as defined by the TypeLayout
    * @deprecated This API will be removed as the current implementations no longer support inner
    *     vectors.
-   * @return the inner vectors for this field as defined by the TypeLayout
    */
   @Deprecated
   @Override
@@ -494,9 +494,9 @@ public void copyFrom(int inIndex, int outIndex, ValueVector from) {
   /**
    * Get the offset vector.
    *
+   * @return the underlying offset vector or null if none exists.
    * @deprecated This API will be removed, as the current implementations no longer hold inner
    *     offset vectors.
-   * @return the underlying offset vector or null if none exists.
    */
   @Override
   @Deprecated
@@ -882,12 +882,13 @@ public void reset() {
 
   /**
    * Return the underlying buffers associated with this vector. Note that this doesn't impact the
-   * reference counts for this buffer so it only should be used for in-context access. Also note
-   * that this buffer changes regularly thus external classes shouldn't hold a reference to it
+   * reference counts for this buffer, so it only should be used for in-context access. Also note
+   * that this buffer changes regularly, thus external classes shouldn't hold a reference to it
    * (unless they change it).
    *
-   * @param clear Whether to clear vector before returning; the buffers will still be refcounted but
-   *     the returned array will be the only reference to them
+   * @param clear Whether to clear vector before returning, the buffers will still be refcounted but
+   *     the returned array will be the only reference to them. Also, this won't clear the child
+   *     buffers.
    * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
    */
   @Override
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
new file mode 100644
index 0000000000000..84c6f03edb25d
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java
@@ -0,0 +1,1029 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.complex;
+
+import static java.util.Collections.singletonList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkArgument;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueIterableVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.UnionLargeListViewReader;
+import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter;
+import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A large list view vector contains lists of a specific type of elements. Its structure contains 3
+ * elements.
+ *
+ * <ol>
+ *   <li>A validity buffer.
+ *   <li>An offset buffer, that denotes lists starting positions.
+ *   <li>A size buffer, that denotes sizes of the lists.
+ *   <li>A child data vector that contains the elements of lists.
+ * </ol>
+ *
+ * This is the LargeListView variant of listview, it has a 64-bit wide offset
+ *
+ * <p>WARNING: Currently Arrow in Java doesn't support 64-bit vectors. This class follows the
+ * expected behaviour of a LargeList but doesn't actually support allocating a 64-bit vector. It has
+ * little use until 64-bit vectors are supported and should be used with caution. todo review
+ * checkedCastToInt usage in this class. Once int64 indexed vectors are supported these checks
+ * aren't needed.
+ */
+public class LargeListViewVector extends BaseLargeRepeatedValueViewVector
+    implements PromotableVector, ValueIterableVector<List<?>> {
+
+  protected ArrowBuf validityBuffer;
+  protected UnionLargeListViewReader reader;
+  private CallBack callBack;
+  protected Field field;
+  protected int validityAllocationSizeInBytes;
+
+  public static LargeListViewVector empty(String name, BufferAllocator allocator) {
+    return new LargeListViewVector(
+        name, allocator, FieldType.nullable(ArrowType.LargeListView.INSTANCE), null);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param name The name of the instance.
+   * @param allocator The allocator to use for allocating/reallocating buffers.
+   * @param fieldType The type of this list.
+   * @param callBack A schema change callback.
+   */
+  public LargeListViewVector(
+      String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+    this(new Field(name, fieldType, null), allocator, callBack);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param field The field materialized by this vector.
+   * @param allocator The allocator to use for allocating/reallocating buffers.
+   * @param callBack A schema change callback.
+   */
+  public LargeListViewVector(Field field, BufferAllocator allocator, CallBack callBack) {
+    super(field.getName(), allocator, callBack);
+    this.validityBuffer = allocator.getEmpty();
+    this.field = field;
+    this.callBack = callBack;
+    this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
+  }
+
+  @Override
+  public void initializeChildrenFromFields(List<Field> children) {
+    checkArgument(
+        children.size() == 1,
+        "ListViews have one child Field. Found: %s",
+        children.isEmpty() ? "none" : children);
+
+    Field field = children.get(0);
+    AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(field.getFieldType());
+    checkArgument(
+        addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector());
+
+    addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren());
+    this.field = new Field(this.field.getName(), this.field.getFieldType(), children);
+  }
+
+  @Override
+  public void setInitialCapacity(int numRecords) {
+    validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+    super.setInitialCapacity(numRecords);
+  }
+
+  /**
+   * Specialized version of setInitialCapacity() for LargeListViewVector. This is used by some
+   * callers when they want to explicitly control and be conservative about memory allocated for
+   * inner data vector. This is very useful when we are working with memory constraints for a query
+   * and have a fixed amount of memory reserved for the record batch. In such cases, we are likely
+   * to face OOM or related problems when we reserve memory for a record batch with value count x
+   * and do setInitialCapacity(x) such that each vector allocates only what is necessary and not the
+   * default amount, but the multiplier forces the memory requirement to go beyond what was needed.
+   *
+   * @param numRecords value count
+   * @param density density of LargeListViewVector. Density is the average size of a list per
+   *     position in the LargeListViewVector. For example, a density value of 10 implies each
+   *     position in the list vector has a list of 10 values. A density value of 0.1 implies out of
+   *     10 positions in the list vector, 1 position has a list of size 1, and the remaining
+   *     positions are null (no lists) or empty lists. This helps in tightly controlling the memory
+   *     we provision for inner data vector.
+   */
+  @Override
+  public void setInitialCapacity(int numRecords, double density) {
+    validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+    super.setInitialCapacity(numRecords, density);
+  }
+
+  /**
+   * Specialized version of setInitialTotalCapacity() for LargeListViewVector. This is used by some
+   * callers when they want to explicitly control and be conservative about memory allocated for
+   * inner data vector. This is very useful when we are working with memory constraints for a query
+   * and have a fixed amount of memory reserved for the record batch. In such cases, we are likely
+   * to face OOM or related problems when we reserve memory for a record batch with value count x
+   * and do setInitialCapacity(x) such that each vector allocates only what is necessary and not the
+   * default amount, but the multiplier forces the memory requirement to go beyond what was needed.
+   *
+   * @param numRecords value count
+   * @param totalNumberOfElements the total number of elements to allow for in this vector across
+   *     all records.
+   */
+  @Override
+  public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) {
+    validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+    super.setInitialTotalCapacity(numRecords, totalNumberOfElements);
+  }
+
+  @Override
+  public List<FieldVector> getChildrenFromFields() {
+    return singletonList(getDataVector());
+  }
+
+  /**
+   * Load the buffers associated with this Field.
+   *
+   * @param fieldNode the fieldNode
+   * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+   */
+  @Override
+  public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+    if (ownBuffers.size() != 3) {
+      throw new IllegalArgumentException(
+          "Illegal buffer count, expected " + 3 + ", got: " + ownBuffers.size());
+    }
+
+    ArrowBuf bitBuffer = ownBuffers.get(0);
+    ArrowBuf offBuffer = ownBuffers.get(1);
+    ArrowBuf szBuffer = ownBuffers.get(2);
+
+    validityBuffer.getReferenceManager().release();
+    validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+    offsetBuffer.getReferenceManager().release();
+    offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator);
+    sizeBuffer.getReferenceManager().release();
+    sizeBuffer = szBuffer.getReferenceManager().retain(szBuffer, allocator);
+
+    validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
+    offsetAllocationSizeInBytes = offsetBuffer.capacity();
+    sizeAllocationSizeInBytes = sizeBuffer.capacity();
+
+    valueCount = fieldNode.getLength();
+  }
+
+  /** Set the reader and writer indexes for the inner buffers. */
+  private void setReaderAndWriterIndex() {
+    validityBuffer.readerIndex(0);
+    offsetBuffer.readerIndex(0);
+    sizeBuffer.readerIndex(0);
+    if (valueCount == 0) {
+      validityBuffer.writerIndex(0);
+      offsetBuffer.writerIndex(0);
+      sizeBuffer.writerIndex(0);
+    } else {
+      validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+      offsetBuffer.writerIndex((long) valueCount * OFFSET_WIDTH);
+      sizeBuffer.writerIndex((long) valueCount * SIZE_WIDTH);
+    }
+  }
+
+  @Override
+  public List<ArrowBuf> getFieldBuffers() {
+    List<ArrowBuf> result = new ArrayList<>(2);
+    setReaderAndWriterIndex();
+    result.add(validityBuffer);
+    result.add(offsetBuffer);
+    result.add(sizeBuffer);
+
+    return result;
+  }
+
+  /**
+   * Export the buffers of the fields for C Data Interface. This method traverses the buffers and
+   * export buffer and buffer's memory address into a list of buffers and a pointer to the list of
+   * buffers.
+   */
+  @Override
+  public void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr, long nullValue) {
+    exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true);
+    exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true);
+    exportBuffer(sizeBuffer, buffers, buffersPtr, nullValue, true);
+  }
+
+  @Override
+  public void allocateNew() throws OutOfMemoryException {
+    if (!allocateNewSafe()) {
+      throw new OutOfMemoryException("Failure while allocating memory");
+    }
+  }
+
+  @Override
+  public boolean allocateNewSafe() {
+    boolean success = false;
+    try {
+      /* release the current buffers, hence this is a new allocation
+       * Note that, the `clear` method call below is releasing validityBuffer
+       * calling the superclass clear method which is releasing the associated buffers
+       * (sizeBuffer and offsetBuffer).
+       */
+      clear();
+      /* allocate validity buffer */
+      allocateValidityBuffer(validityAllocationSizeInBytes);
+      /* allocate offset, data and sizes buffer */
+      success = super.allocateNewSafe();
+    } finally {
+      if (!success) {
+        clear();
+      }
+    }
+    return success;
+  }
+
+  protected void allocateValidityBuffer(final long size) {
+    final int curSize = (int) size;
+    validityBuffer = allocator.buffer(curSize);
+    validityBuffer.readerIndex(0);
+    validityAllocationSizeInBytes = curSize;
+    validityBuffer.setZero(0, validityBuffer.capacity());
+  }
+
+  @Override
+  public void reAlloc() {
+    /* reallocate the validity buffer */
+    reallocValidityBuffer();
+    /* reallocate the offset, size, and data */
+    super.reAlloc();
+  }
+
+  protected void reallocValidityAndSizeAndOffsetBuffers() {
+    reallocateBuffers();
+    reallocValidityBuffer();
+  }
+
+  private void reallocValidityBuffer() {
+    final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
+    long newAllocationSize = getNewAllocationSize(currentBufferCapacity);
+
+    final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+    newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+    newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+    validityBuffer.getReferenceManager().release(1);
+    validityBuffer = newBuf;
+    validityAllocationSizeInBytes = (int) newAllocationSize;
+  }
+
+  private long getNewAllocationSize(int currentBufferCapacity) {
+    long newAllocationSize = currentBufferCapacity * 2L;
+    if (newAllocationSize == 0) {
+      if (validityAllocationSizeInBytes > 0) {
+        newAllocationSize = validityAllocationSizeInBytes;
+      } else {
+        newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L;
+      }
+    }
+    newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+    assert newAllocationSize >= 1;
+
+    if (newAllocationSize > MAX_ALLOCATION_SIZE) {
+      throw new OversizedAllocationException("Unable to expand the buffer");
+    }
+    return newAllocationSize;
+  }
+
+  @Override
+  public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+    throw new UnsupportedOperationException(
+        "LargeListViewVector does not support copyFromSafe operation yet.");
+  }
+
+  @Override
+  public void copyFrom(int inIndex, int outIndex, ValueVector from) {
+    throw new UnsupportedOperationException(
+        "LargeListViewVector does not support copyFrom operation yet.");
+  }
+
+  @Override
+  public FieldVector getDataVector() {
+    return vector;
+  }
+
+  @Override
+  public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+    return getTransferPair(ref, allocator, null);
+  }
+
+  @Override
+  public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
+    return getTransferPair(field, allocator, null);
+  }
+
+  @Override
+  public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+    return new TransferImpl(ref, allocator, callBack);
+  }
+
+  @Override
+  public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) {
+    return new TransferImpl(field, allocator, callBack);
+  }
+
+  @Override
+  public TransferPair makeTransferPair(ValueVector target) {
+    return new TransferImpl((LargeListViewVector) target);
+  }
+
+  @Override
+  public long getValidityBufferAddress() {
+    return validityBuffer.memoryAddress();
+  }
+
+  @Override
+  public long getDataBufferAddress() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public long getOffsetBufferAddress() {
+    return offsetBuffer.memoryAddress();
+  }
+
+  @Override
+  public ArrowBuf getValidityBuffer() {
+    return validityBuffer;
+  }
+
+  @Override
+  public ArrowBuf getDataBuffer() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public ArrowBuf getOffsetBuffer() {
+    return offsetBuffer;
+  }
+
+  public ArrowBuf getSizeBuffer() {
+    return sizeBuffer;
+  }
+
+  public long getSizeBufferAddress() {
+    return sizeBuffer.memoryAddress();
+  }
+
+  /**
+   * Get the hash code for the element at the given index.
+   *
+   * @param index position of the element
+   * @return hash code for the element at the given index
+   */
+  @Override
+  public int hashCode(int index) {
+    return hashCode(index, null);
+  }
+
+  /**
+   * Get the hash code for the element at the given index.
+   *
+   * @param index position of the element
+   * @param hasher hasher to use
+   * @return hash code for the element at the given index
+   */
+  @Override
+  public int hashCode(int index, ArrowBufHasher hasher) {
+    if (isSet(index) == 0) {
+      return ArrowBufPointer.NULL_HASH_CODE;
+    }
+    int hash = 0;
+    final int start = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+    final int end = sizeBuffer.getInt((long) index * OFFSET_WIDTH);
+    for (int i = start; i < end; i++) {
+      hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(checkedCastToInt(i), hasher));
+    }
+    return hash;
+  }
+
+  @Override
+  public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+    return visitor.visit(this, value);
+  }
+
+  private class TransferImpl implements TransferPair {
+
+    LargeListViewVector to;
+    TransferPair dataTransferPair;
+
+    public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+      this(new LargeListViewVector(name, allocator, field.getFieldType(), callBack));
+    }
+
+    public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) {
+      this(new LargeListViewVector(field, allocator, callBack));
+    }
+
+    public TransferImpl(LargeListViewVector to) {
+      this.to = to;
+      to.addOrGetVector(vector.getField().getFieldType());
+      if (to.getDataVector() instanceof ZeroVector) {
+        to.addOrGetVector(vector.getField().getFieldType());
+      }
+      dataTransferPair = getDataVector().makeTransferPair(to.getDataVector());
+    }
+
+    @Override
+    public void transfer() {
+      to.clear();
+      dataTransferPair.transfer();
+      to.validityBuffer = transferBuffer(validityBuffer, to.allocator);
+      to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator);
+      to.sizeBuffer = transferBuffer(sizeBuffer, to.allocator);
+      if (valueCount > 0) {
+        to.setValueCount(valueCount);
+      }
+      clear();
+    }
+
+    @Override
+    public void splitAndTransfer(int startIndex, int length) {
+      Preconditions.checkArgument(
+          startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+          "Invalid parameters startIndex: %s, length: %s for valueCount: %s",
+          startIndex,
+          length,
+          valueCount);
+      to.clear();
+      if (length > 0) {
+        // we have to scan by index since there are out-of-order offsets
+        to.offsetBuffer = to.allocateBuffers((long) length * OFFSET_WIDTH);
+        to.sizeBuffer = to.allocateBuffers((long) length * SIZE_WIDTH);
+
+        /* splitAndTransfer the size buffer */
+        int maxOffsetAndSizeSum = Integer.MIN_VALUE;
+        int minOffsetValue = Integer.MAX_VALUE;
+        for (int i = 0; i < length; i++) {
+          final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH);
+          final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) * SIZE_WIDTH);
+          to.sizeBuffer.setInt((long) i * SIZE_WIDTH, sizeValue);
+          maxOffsetAndSizeSum = Math.max(maxOffsetAndSizeSum, offsetValue + sizeValue);
+          minOffsetValue = Math.min(minOffsetValue, offsetValue);
+        }
+
+        /* splitAndTransfer the offset buffer */
+        for (int i = 0; i < length; i++) {
+          final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH);
+          final int relativeOffset = offsetValue - minOffsetValue;
+          to.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeOffset);
+        }
+
+        /* splitAndTransfer the validity buffer */
+        splitAndTransferValidityBuffer(startIndex, length, to);
+
+        /* splitAndTransfer the data buffer */
+        final int childSliceLength = maxOffsetAndSizeSum - minOffsetValue;
+        dataTransferPair.splitAndTransfer(minOffsetValue, childSliceLength);
+        to.setValueCount(length);
+      }
+    }
+
+    /*
+     * transfer the validity.
+     */
+    private void splitAndTransferValidityBuffer(
+        int startIndex, int length, LargeListViewVector target) {
+      int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+      int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+      int byteSizeTarget = getValidityBufferSizeFromCount(length);
+      int offset = startIndex % 8;
+
+      if (length > 0) {
+        if (offset == 0) {
+          // slice
+          if (target.validityBuffer != null) {
+            target.validityBuffer.getReferenceManager().release();
+          }
+          target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+          target.validityBuffer.getReferenceManager().retain(1);
+        } else {
+          /* Copy data
+           * When the first bit starts from the middle of a byte (offset != 0),
+           * copy data from src BitVector.
+           * Each byte in the target is composed by a part in i-th byte,
+           * another part in (i+1)-th byte.
+           */
+          target.allocateValidityBuffer(byteSizeTarget);
+
+          for (int i = 0; i < byteSizeTarget - 1; i++) {
+            byte b1 =
+                BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
+            byte b2 =
+                BitVectorHelper.getBitsFromNextByte(
+                    validityBuffer, firstByteSource + i + 1, offset);
+
+            target.validityBuffer.setByte(i, (b1 + b2));
+          }
+
+          /* Copying the last piece is done in the following manner:
+           * if the source vector has 1 or more bytes remaining, we copy
+           * the last piece as a byte formed by shifting data
+           * from the current byte and the next byte.
+           *
+           * if the source vector has no more bytes remaining
+           * (we are at the last byte), we copy the last piece as a byte
+           * by shifting data from the current byte.
+           */
+          if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+            byte b1 =
+                BitVectorHelper.getBitsFromCurrentByte(
+                    validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
+            byte b2 =
+                BitVectorHelper.getBitsFromNextByte(
+                    validityBuffer, firstByteSource + byteSizeTarget, offset);
+
+            target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+          } else {
+            byte b1 =
+                BitVectorHelper.getBitsFromCurrentByte(
+                    validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
+            target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+          }
+        }
+      }
+    }
+
+    @Override
+    public ValueVector getTo() {
+      return to;
+    }
+
+    @Override
+    public void copyValueSafe(int from, int to) {
+      this.to.copyFrom(from, to, LargeListViewVector.this);
+    }
+  }
+
+  @Override
+  protected FieldReader getReaderImpl() {
+    throw new UnsupportedOperationException(
+        "LargeListViewVector does not support getReaderImpl operation yet.");
+  }
+
+  @Override
+  public UnionListReader getReader() {
+    throw new UnsupportedOperationException(
+        "LargeListViewVector does not support getReader operation yet.");
+  }
+
+  /**
+   * Get the size (number of bytes) of underlying buffers used by this vector.
+   *
+   * @return size of underlying buffers.
+   */
+  @Override
+  public int getBufferSize() {
+    if (valueCount == 0) {
+      return 0;
+    }
+    final int offsetBufferSize = valueCount * OFFSET_WIDTH;
+    final int sizeBufferSize = valueCount * SIZE_WIDTH;
+    final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+    return offsetBufferSize + sizeBufferSize + validityBufferSize + vector.getBufferSize();
+  }
+
+  /**
+   * Get the size (number of bytes) of underlying buffers used by this.
+   *
+   * @param valueCount the number of values to assume this vector contains
+   * @return size of underlying buffers.
+   */
+  @Override
+  public int getBufferSizeFor(int valueCount) {
+    if (valueCount == 0) {
+      return 0;
+    }
+    final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+
+    return super.getBufferSizeFor(valueCount) + validityBufferSize;
+  }
+
+  /**
+   * Get the field associated with the list view vector.
+   *
+   * @return the field
+   */
+  @Override
+  public Field getField() {
+    if (field.getChildren().contains(getDataVector().getField())) {
+      return field;
+    }
+    field =
+        new Field(
+            field.getName(),
+            field.getFieldType(),
+            Collections.singletonList(getDataVector().getField()));
+    return field;
+  }
+
+  /**
+   * Get the minor type for the vector.
+   *
+   * @return the minor type
+   */
+  @Override
+  public MinorType getMinorType() {
+    return MinorType.LARGELISTVIEW;
+  }
+
+  /** Clear the vector data. */
+  @Override
+  public void clear() {
+    // calling superclass clear method which is releasing the sizeBufer and offsetBuffer
+    super.clear();
+    validityBuffer = releaseBuffer(validityBuffer);
+  }
+
+  /** Release the buffers associated with this vector. */
+  @Override
+  public void reset() {
+    super.reset();
+    validityBuffer.setZero(0, validityBuffer.capacity());
+  }
+
+  /**
+   * Return the underlying buffers associated with this vector. Note that this doesn't impact the
+   * reference counts for this buffer, so it only should be used for in-context access. Also note
+   * that this buffer changes regularly, thus external classes shouldn't hold a reference to it
+   * (unless they change it).
+   *
+   * @param clear Whether to clear vector before returning, the buffers will still be refcounted but
+   *     the returned array will be the only reference to them. Also, this won't clear the child
+   *     buffers.
+   * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
+   */
+  @Override
+  public ArrowBuf[] getBuffers(boolean clear) {
+    setReaderAndWriterIndex();
+    final ArrowBuf[] buffers;
+    if (getBufferSize() == 0) {
+      buffers = new ArrowBuf[0];
+    } else {
+      List<ArrowBuf> list = new ArrayList<>();
+      // the order must be validity, offset and size buffers
+      list.add(validityBuffer);
+      list.add(offsetBuffer);
+      list.add(sizeBuffer);
+      list.addAll(Arrays.asList(vector.getBuffers(false)));
+      buffers = list.toArray(new ArrowBuf[list.size()]);
+    }
+    if (clear) {
+      for (ArrowBuf buffer : buffers) {
+        buffer.getReferenceManager().retain();
+      }
+      clear();
+    }
+    return buffers;
+  }
+
+  /**
+   * Get the element in the list view vector at a particular index.
+   *
+   * @param index position of the element
+   * @return Object at given position
+   */
+  @Override
+  public List<?> getObject(int index) {
+    if (isSet(index) == 0) {
+      return null;
+    }
+    final List<Object> vals = new JsonStringArrayList<>();
+    final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
+    final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH);
+    final ValueVector vv = getDataVector();
+    for (int i = start; i < end; i++) {
+      vals.add(vv.getObject(checkedCastToInt(i)));
+    }
+
+    return vals;
+  }
+
+  /**
+   * Check if an element at given index is null.
+   *
+   * @param index position of an element
+   * @return true if an element at given index is null, false otherwise
+   */
+  @Override
+  public boolean isNull(int index) {
+    return (isSet(index) == 0);
+  }
+
+  /**
+   * Check if an element at given index is an empty list.
+   *
+   * @param index position of an element
+   * @return true if an element at given index is an empty list or NULL, false otherwise
+   */
+  @Override
+  public boolean isEmpty(int index) {
+    if (isNull(index)) {
+      return true;
+    } else {
+      return sizeBuffer.getInt(index * SIZE_WIDTH) == 0;
+    }
+  }
+
+  /**
+   * Same as {@link #isNull(int)}.
+   *
+   * @param index position of the element
+   * @return 1 if element at given index is not null, 0 otherwise
+   */
+  public int isSet(int index) {
+    final int byteIndex = index >> 3;
+    final byte b = validityBuffer.getByte(byteIndex);
+    final int bitIndex = index & 7;
+    return (b >> bitIndex) & 0x01;
+  }
+
+  /**
+   * Get the number of elements that are null in the vector.
+   *
+   * @return the number of null elements.
+   */
+  @Override
+  public int getNullCount() {
+    return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+  }
+
+  /**
+   * Get the value capacity by considering validity and offset capacity. Note that the size buffer
+   * capacity is not considered here since it has the same capacity as the offset buffer.
+   *
+   * @return the value capacity
+   */
+  @Override
+  public int getValueCapacity() {
+    return getValidityAndOffsetValueCapacity();
+  }
+
+  private int getValidityAndSizeValueCapacity() {
+    final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0);
+    final int sizeValueCapacity = Math.max(getSizeBufferValueCapacity(), 0);
+    return Math.min(offsetValueCapacity, sizeValueCapacity);
+  }
+
+  private int getValidityAndOffsetValueCapacity() {
+    final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0);
+    return Math.min(offsetValueCapacity, getValidityBufferValueCapacity());
+  }
+
+  private int getValidityBufferValueCapacity() {
+    return capAtMaxInt(validityBuffer.capacity() * 8);
+  }
+
+  /**
+   * Set the element at the given index to null.
+   *
+   * @param index the value to change
+   */
+  @Override
+  public void setNull(int index) {
+    while (index >= getValidityAndSizeValueCapacity()) {
+      reallocValidityAndSizeAndOffsetBuffers();
+    }
+
+    offsetBuffer.setInt(index * OFFSET_WIDTH, 0);
+    sizeBuffer.setInt(index * SIZE_WIDTH, 0);
+    BitVectorHelper.unsetBit(validityBuffer, index);
+  }
+
+  /**
+   * Start new value in the ListView vector.
+   *
+   * @param index index of the value to start
+   * @return offset of the new value
+   */
+  @Override
+  public int startNewValue(int index) {
+    while (index >= getValidityAndSizeValueCapacity()) {
+      reallocValidityAndSizeAndOffsetBuffers();
+    }
+
+    if (index > 0) {
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
+      offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
+    }
+
+    BitVectorHelper.setBit(validityBuffer, index);
+    return offsetBuffer.getInt(index * OFFSET_WIDTH);
+  }
+
+  /**
+   * Validate the invariants of the offset and size buffers. 0 <= offsets[i] <= length of the child
+   * array 0 <= offsets[i] + size[i] <= length of the child array
+   *
+   * @param offset the offset at a given index
+   * @param size the size at a given index
+   */
+  private void validateInvariants(int offset, int size) {
+    if (offset < 0) {
+      throw new IllegalArgumentException("Offset cannot be negative");
+    }
+
+    if (size < 0) {
+      throw new IllegalArgumentException("Size cannot be negative");
+    }
+
+    // 0 <= offsets[i] <= length of the child array
+    if (offset > this.vector.getValueCount()) {
+      throw new IllegalArgumentException("Offset is out of bounds.");
+    }
+
+    // 0 <= offsets[i] + size[i] <= length of the child array
+    if (offset + size > this.vector.getValueCount()) {
+      throw new IllegalArgumentException("Offset + size <= length of the child array.");
+    }
+  }
+
+  /**
+   * Set the offset at the given index. Make sure to use this function after updating `field` vector
+   * and using `setValidity`
+   *
+   * @param index index of the value to set
+   * @param value value to set
+   */
+  public void setOffset(int index, int value) {
+    validateInvariants(value, sizeBuffer.getInt(index * SIZE_WIDTH));
+
+    offsetBuffer.setInt(index * OFFSET_WIDTH, value);
+  }
+
+  /**
+   * Set the size at the given index. Make sure to use this function after using `setOffset`.
+   *
+   * @param index index of the value to set
+   * @param value value to set
+   */
+  public void setSize(int index, int value) {
+    validateInvariants(offsetBuffer.getInt(index * SIZE_WIDTH), value);
+
+    sizeBuffer.setInt(index * SIZE_WIDTH, value);
+  }
+
+  /**
+   * Set the validity at the given index.
+   *
+   * @param index index of the value to set
+   * @param value value to set (0 for unset and 1 for a set)
+   */
+  public void setValidity(int index, int value) {
+    if (value == 0) {
+      BitVectorHelper.unsetBit(validityBuffer, index);
+    } else {
+      BitVectorHelper.setBit(validityBuffer, index);
+    }
+  }
+
+  /**
+   * Sets the value count for the vector.
+   *
+   * <p>Important note: The underlying vector does not support 64-bit allocations yet. This may
+   * throw if attempting to hold larger than what a 32-bit vector can store.
+   *
+   * @param valueCount value count
+   */
+  @Override
+  public void setValueCount(int valueCount) {
+    this.valueCount = valueCount;
+    if (valueCount > 0) {
+      while (valueCount > getValidityAndSizeValueCapacity()) {
+        /* check if validity and offset buffers need to be re-allocated */
+        reallocValidityAndSizeAndOffsetBuffers();
+      }
+    }
+    /* valueCount for the data vector is the current end offset */
+    final long childValueCount = (valueCount == 0) ? 0 : getMaxViewEndChildVector();
+    /* set the value count of data vector and this will take care of
+     * checking whether data buffer needs to be reallocated.
+     * TODO: revisit when 64-bit vectors are supported
+     */
+    Preconditions.checkArgument(
+        childValueCount <= Integer.MAX_VALUE && childValueCount >= 0,
+        "LargeListViewVector doesn't yet support 64-bit allocations: %s",
+        childValueCount);
+    vector.setValueCount((int) childValueCount);
+  }
+
+  @Override
+  public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
+    AddOrGetResult<T> result = super.addOrGetVector(fieldType);
+    invalidateReader();
+    return result;
+  }
+
+  @Override
+  public UnionVector promoteToUnion() {
+    UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack);
+    replaceDataVector(vector);
+    invalidateReader();
+    if (callBack != null) {
+      callBack.doWork();
+    }
+    return vector;
+  }
+
+  private void invalidateReader() {
+    reader = null;
+  }
+
+  @Deprecated
+  @Override
+  public List<BufferBacked> getFieldInnerVectors() {
+    throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+  }
+
+  public UnionLargeListViewWriter getWriter() {
+    return new UnionLargeListViewWriter(this);
+  }
+
+  @Override
+  public int getValueCount() {
+    return valueCount;
+  }
+
+  /**
+   * Get the density of this LargeListViewVector.
+   *
+   * @return density
+   */
+  public double getDensity() {
+    if (valueCount == 0) {
+      return 0.0D;
+    }
+    final double totalListSize = getMaxViewEndChildVector();
+    return totalListSize / valueCount;
+  }
+
+  /** Validating LargeListViewVector creation based on the specification guideline. */
+  @Override
+  public void validate() {
+    for (int i = 0; i < valueCount; i++) {
+      final int offset = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
+      final int size = sizeBuffer.getInt((long) i * SIZE_WIDTH);
+      validateInvariants(offset, size);
+    }
+  }
+
+  /**
+   * End the current value.
+   *
+   * @param index index of the value to end
+   * @param size number of elements in the list that was written
+   */
+  public void endValue(int index, int size) {
+    sizeBuffer.setInt((long) index * SIZE_WIDTH, size);
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
index 17708167ff4b3..76682c28fe65d 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -277,9 +277,9 @@ private void setReaderAndWriterIndex() {
   /**
    * Get the inner vectors.
    *
+   * @return the inner vectors for this field as defined by the TypeLayout
    * @deprecated This API will be removed as the current implementations no longer support inner
    *     vectors.
-   * @return the inner vectors for this field as defined by the TypeLayout
    */
   @Deprecated
   @Override
@@ -726,12 +726,13 @@ public void reset() {
 
   /**
    * Return the underlying buffers associated with this vector. Note that this doesn't impact the
-   * reference counts for this buffer so it only should be used for in-context access. Also note
-   * that this buffer changes regularly thus external classes shouldn't hold a reference to it
+   * reference counts for this buffer, so it only should be used for in-context access. Also note
+   * that this buffer changes regularly, thus external classes shouldn't hold a reference to it
    * (unless they change it).
    *
-   * @param clear Whether to clear vector before returning; the buffers will still be refcounted but
-   *     the returned array will be the only reference to them
+   * @param clear Whether to clear vector before returning, the buffers will still be refcounted but
+   *     the returned array will be the only reference to them. Also, this won't clear the child
+   *     buffers.
    * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
    */
   @Override
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
index 864d08a661cd2..9b4e6b4c0cd4a 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java
@@ -32,16 +32,20 @@
 import org.apache.arrow.memory.util.ByteFunctionHelpers;
 import org.apache.arrow.memory.util.CommonUtil;
 import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.AddOrGetResult;
 import org.apache.arrow.vector.BitVectorHelper;
 import org.apache.arrow.vector.BufferBacked;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.ValueIterableVector;
 import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
 import org.apache.arrow.vector.compare.VectorVisitor;
-import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.complex.impl.UnionListViewReader;
 import org.apache.arrow.vector.complex.impl.UnionListViewWriter;
 import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
@@ -73,7 +77,7 @@ public class ListViewVector extends BaseRepeatedValueViewVector
     implements PromotableVector, ValueIterableVector<List<?>> {
 
   protected ArrowBuf validityBuffer;
-  protected UnionListReader reader;
+  protected UnionListViewReader reader;
   private CallBack callBack;
   protected Field field;
   protected int validityAllocationSizeInBytes;
@@ -245,7 +249,9 @@ public List<ArrowBuf> getFieldBuffers() {
    */
   @Override
   public void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr, long nullValue) {
-    throw new UnsupportedOperationException("exportCDataBuffers Not implemented yet");
+    exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true);
+    exportBuffer(offsetBuffer, buffers, buffersPtr, nullValue, true);
+    exportBuffer(sizeBuffer, buffers, buffersPtr, nullValue, true);
   }
 
   @Override
@@ -330,16 +336,22 @@ private long getNewAllocationSize(int currentBufferCapacity) {
 
   @Override
   public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
-    // TODO: https://github.com/apache/arrow/issues/41270
-    throw new UnsupportedOperationException(
-        "ListViewVector does not support copyFromSafe operation yet.");
+    copyFrom(inIndex, outIndex, from);
+  }
+
+  @Override
+  public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+    return visitor.visit(this, value);
   }
 
   @Override
   public void copyFrom(int inIndex, int outIndex, ValueVector from) {
-    // TODO: https://github.com/apache/arrow/issues/41270
-    throw new UnsupportedOperationException(
-        "ListViewVector does not support copyFrom operation yet.");
+    Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+    FieldReader in = from.getReader();
+    in.setPosition(inIndex);
+    FieldWriter out = getWriter();
+    out.setPosition(outIndex);
+    ComplexCopier.copy(in, out);
   }
 
   @Override
@@ -359,23 +371,17 @@ public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
 
   @Override
   public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
-    // TODO: https://github.com/apache/arrow/issues/41269
-    throw new UnsupportedOperationException(
-        "ListVector does not support getTransferPair(String, BufferAllocator, CallBack) yet");
+    return new TransferImpl(ref, allocator, callBack);
   }
 
   @Override
   public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) {
-    // TODO: https://github.com/apache/arrow/issues/41269
-    throw new UnsupportedOperationException(
-        "ListVector does not support getTransferPair(Field, BufferAllocator, CallBack) yet");
+    return new TransferImpl(field, allocator, callBack);
   }
 
   @Override
   public TransferPair makeTransferPair(ValueVector target) {
-    // TODO: https://github.com/apache/arrow/issues/41269
-    throw new UnsupportedOperationException(
-        "ListVector does not support makeTransferPair(ValueVector) yet");
+    return new TransferImpl((ListViewVector) target);
   }
 
   @Override
@@ -448,23 +454,172 @@ public int hashCode(int index, ArrowBufHasher hasher) {
     return hash;
   }
 
-  @Override
-  public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
-    throw new UnsupportedOperationException();
+  private class TransferImpl implements TransferPair {
+
+    ListViewVector to;
+    TransferPair dataTransferPair;
+
+    public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+      this(new ListViewVector(name, allocator, field.getFieldType(), callBack));
+    }
+
+    public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) {
+      this(new ListViewVector(field, allocator, callBack));
+    }
+
+    public TransferImpl(ListViewVector to) {
+      this.to = to;
+      to.addOrGetVector(vector.getField().getFieldType());
+      if (to.getDataVector() instanceof ZeroVector) {
+        to.addOrGetVector(vector.getField().getFieldType());
+      }
+      dataTransferPair = getDataVector().makeTransferPair(to.getDataVector());
+    }
+
+    @Override
+    public void transfer() {
+      to.clear();
+      dataTransferPair.transfer();
+      to.validityBuffer = transferBuffer(validityBuffer, to.allocator);
+      to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator);
+      to.sizeBuffer = transferBuffer(sizeBuffer, to.allocator);
+      if (valueCount > 0) {
+        to.setValueCount(valueCount);
+      }
+      clear();
+    }
+
+    @Override
+    public void splitAndTransfer(int startIndex, int length) {
+      Preconditions.checkArgument(
+          startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+          "Invalid parameters startIndex: %s, length: %s for valueCount: %s",
+          startIndex,
+          length,
+          valueCount);
+      to.clear();
+      if (length > 0) {
+        final int startPoint = offsetBuffer.getInt((long) startIndex * OFFSET_WIDTH);
+        // we have to scan by index since there are out-of-order offsets
+        to.offsetBuffer = to.allocateBuffers((long) length * OFFSET_WIDTH);
+        to.sizeBuffer = to.allocateBuffers((long) length * SIZE_WIDTH);
+
+        /* splitAndTransfer the size buffer */
+        int maxOffsetAndSizeSum = -1;
+        int minOffsetValue = -1;
+        for (int i = 0; i < length; i++) {
+          final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH);
+          final int sizeValue = sizeBuffer.getInt((long) (startIndex + i) * SIZE_WIDTH);
+          to.sizeBuffer.setInt((long) i * SIZE_WIDTH, sizeValue);
+          if (maxOffsetAndSizeSum < offsetValue + sizeValue) {
+            maxOffsetAndSizeSum = offsetValue + sizeValue;
+          }
+          if (minOffsetValue == -1 || minOffsetValue > offsetValue) {
+            minOffsetValue = offsetValue;
+          }
+        }
+
+        /* splitAndTransfer the offset buffer */
+        for (int i = 0; i < length; i++) {
+          final int offsetValue = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH);
+          final int relativeOffset = offsetValue - minOffsetValue;
+          to.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeOffset);
+        }
+
+        /* splitAndTransfer the validity buffer */
+        splitAndTransferValidityBuffer(startIndex, length, to);
+
+        /* splitAndTransfer the data buffer */
+        final int childSliceLength = maxOffsetAndSizeSum - minOffsetValue;
+        dataTransferPair.splitAndTransfer(minOffsetValue, childSliceLength);
+        to.setValueCount(length);
+      }
+    }
+
+    /*
+     * transfer the validity.
+     */
+    private void splitAndTransferValidityBuffer(int startIndex, int length, ListViewVector target) {
+      int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+      int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+      int byteSizeTarget = getValidityBufferSizeFromCount(length);
+      int offset = startIndex % 8;
+
+      if (length > 0) {
+        if (offset == 0) {
+          // slice
+          if (target.validityBuffer != null) {
+            target.validityBuffer.getReferenceManager().release();
+          }
+          target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+          target.validityBuffer.getReferenceManager().retain(1);
+        } else {
+          /* Copy data
+           * When the first bit starts from the middle of a byte (offset != 0),
+           * copy data from src BitVector.
+           * Each byte in the target is composed by a part in i-th byte,
+           * another part in (i+1)-th byte.
+           */
+          target.allocateValidityBuffer(byteSizeTarget);
+
+          for (int i = 0; i < byteSizeTarget - 1; i++) {
+            byte b1 =
+                BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
+            byte b2 =
+                BitVectorHelper.getBitsFromNextByte(
+                    validityBuffer, firstByteSource + i + 1, offset);
+
+            target.validityBuffer.setByte(i, (b1 + b2));
+          }
+
+          /* Copying the last piece is done in the following manner:
+           * if the source vector has 1 or more bytes remaining, we copy
+           * the last piece as a byte formed by shifting data
+           * from the current byte and the next byte.
+           *
+           * if the source vector has no more bytes remaining
+           * (we are at the last byte), we copy the last piece as a byte
+           * by shifting data from the current byte.
+           */
+          if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+            byte b1 =
+                BitVectorHelper.getBitsFromCurrentByte(
+                    validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
+            byte b2 =
+                BitVectorHelper.getBitsFromNextByte(
+                    validityBuffer, firstByteSource + byteSizeTarget, offset);
+
+            target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+          } else {
+            byte b1 =
+                BitVectorHelper.getBitsFromCurrentByte(
+                    validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
+            target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+          }
+        }
+      }
+    }
+
+    @Override
+    public ValueVector getTo() {
+      return to;
+    }
+
+    @Override
+    public void copyValueSafe(int from, int to) {
+      this.to.copyFrom(from, to, ListViewVector.this);
+    }
   }
 
   @Override
   protected FieldReader getReaderImpl() {
-    // TODO: https://github.com/apache/arrow/issues/41569
-    throw new UnsupportedOperationException(
-        "ListViewVector does not support getReaderImpl operation yet.");
+    return new UnionListViewReader(this);
   }
 
   @Override
-  public UnionListReader getReader() {
-    // TODO: https://github.com/apache/arrow/issues/41569
-    throw new UnsupportedOperationException(
-        "ListViewVector does not support getReader operation yet.");
+  public UnionListViewReader getReader() {
+    reader = (UnionListViewReader) super.getReader();
+    return reader;
   }
 
   /**
@@ -549,7 +704,8 @@ public void reset() {
    * (unless they change it).
    *
    * @param clear Whether to clear vector before returning, the buffers will still be refcounted but
-   *     the returned array will be the only reference to them
+   *     the returned array will be the only reference to them. Also, this won't clear the child
+   *     buffers.
    * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
    */
   @Override
@@ -702,7 +858,7 @@ public int startNewValue(int index) {
     }
 
     if (index > 0) {
-      final int prevOffset = getLengthOfChildVectorByIndex(index);
+      final int prevOffset = getMaxViewEndChildVectorByIndex(index);
       offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset);
     }
 
@@ -786,7 +942,7 @@ public void setValueCount(int valueCount) {
       }
     }
     /* valueCount for the data vector is the current end offset */
-    final int childValueCount = (valueCount == 0) ? 0 : getLengthOfChildVector();
+    final int childValueCount = (valueCount == 0) ? 0 : getMaxViewEndChildVector();
     /* set the value count of data vector and this will take care of
      * checking whether data buffer needs to be reallocated.
      */
@@ -849,7 +1005,7 @@ public double getDensity() {
     if (valueCount == 0) {
       return 0.0D;
     }
-    final double totalListSize = getLengthOfChildVector();
+    final double totalListSize = getMaxViewEndChildVector();
     return totalListSize / valueCount;
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java
index 16492aec7477a..de7966a0aee2e 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java
@@ -35,9 +35,9 @@ public interface RepeatedValueVector extends ValueVector, DensityAwareVector {
   /**
    * Get the offset vector.
    *
+   * @return the underlying offset vector or null if none exists.
    * @deprecated This API will be removed, as the current implementations no longer hold inner
    *     offset vectors.
-   * @return the underlying offset vector or null if none exists.
    */
   @Deprecated
   UInt4Vector getOffsetVector();
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java
new file mode 100644
index 0000000000000..e8de86f6e9549
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/RunEndEncodedVector.java
@@ -0,0 +1,684 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.util.Preconditions.checkArgument;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A run-end encoded vector contains only two child vectors: a run_end vector of type int and a
+ * values vector of any type. There are no buffers associated with the parent vector.
+ */
+public class RunEndEncodedVector extends BaseValueVector implements FieldVector {
+  public static final FieldVector DEFAULT_VALUE_VECTOR = ZeroVector.INSTANCE;
+  public static final FieldVector DEFAULT_RUN_END_VECTOR = ZeroVector.INSTANCE;
+
+  public static RunEndEncodedVector empty(String name, BufferAllocator allocator) {
+    return new RunEndEncodedVector(
+        name, allocator, FieldType.notNullable(ArrowType.RunEndEncoded.INSTANCE), null);
+  }
+
+  protected final CallBack callBack;
+  protected Field field;
+  protected FieldVector runEndsVector;
+  protected FieldVector valuesVector;
+  protected int valueCount;
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param name The name of the instance.
+   * @param allocator The allocator to use for allocating/reallocating buffers.
+   * @param fieldType The type of the array that is run-end encoded.
+   * @param callBack A schema change callback.
+   */
+  public RunEndEncodedVector(
+      String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+    this(new Field(name, fieldType, null), allocator, callBack);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param field The field materialized by this vector.
+   * @param allocator The allocator to use for allocating/reallocating buffers.
+   * @param callBack A schema change callback.
+   */
+  public RunEndEncodedVector(Field field, BufferAllocator allocator, CallBack callBack) {
+    this(field, allocator, DEFAULT_RUN_END_VECTOR, DEFAULT_VALUE_VECTOR, callBack);
+  }
+
+  /**
+   * Constructs a new instance.
+   *
+   * @param field The field materialized by this vector.
+   * @param allocator The allocator to use for allocating/reallocating buffers.
+   * @param runEndsVector The vector represents run ends. Only Zero vector or type int vector with
+   *     size 16, 32 is allowed
+   * @param valuesVector The vector represents values
+   * @param callBack A schema change callback.
+   */
+  public RunEndEncodedVector(
+      Field field,
+      BufferAllocator allocator,
+      FieldVector runEndsVector,
+      FieldVector valuesVector,
+      CallBack callBack) {
+    super(allocator);
+    this.field = field;
+    this.callBack = callBack;
+    this.valueCount = 0;
+    this.runEndsVector = runEndsVector;
+    this.valuesVector = valuesVector;
+  }
+
+  /** ValueVector interface */
+
+  /**
+   * Allocate new buffers. ValueVector implements logic to determine how much to allocate.
+   *
+   * @throws OutOfMemoryException Thrown if no memory can be allocated.
+   */
+  @Override
+  public void allocateNew() throws OutOfMemoryException {
+    if (!allocateNewSafe()) {
+      throw new OutOfMemoryException("Failure while allocating memory");
+    }
+  }
+
+  /**
+   * Allocates new buffers. ValueVector implements logic to determine how much to allocate.
+   *
+   * @return Returns true if allocation was successful.
+   */
+  @Override
+  public boolean allocateNewSafe() {
+    initializeChildrenFromFields(field.getChildren());
+    for (FieldVector v : getChildrenFromFields()) {
+      boolean isAllocated = v.allocateNewSafe();
+      if (!isAllocated) {
+        v.clear();
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Allocate new buffer with double capacity, and copy data into the new buffer. Replace vector's
+   * buffer with new buffer, and release old one
+   */
+  @Override
+  public void reAlloc() {
+    for (FieldVector v : getChildrenFromFields()) {
+      v.reAlloc();
+    }
+  }
+
+  @Override
+  public BufferAllocator getAllocator() {
+    return allocator;
+  }
+
+  @Override
+  protected FieldReader getReaderImpl() {
+    throw new UnsupportedOperationException("Not yet implemented.");
+  }
+
+  /**
+   * Set the initial record capacity.
+   *
+   * @param numRecords the initial record capacity.
+   */
+  @Override
+  public void setInitialCapacity(int numRecords) {}
+
+  /**
+   * Returns the maximum number of values that can be stored in this vector instance.
+   *
+   * @return the maximum number of values that can be stored in this vector instance.
+   */
+  @Override
+  public int getValueCapacity() {
+    return getChildrenFromFields().stream()
+        .mapToInt(item -> item != null ? item.getValueCapacity() : 0)
+        .min()
+        .orElseThrow(NoSuchElementException::new);
+  }
+
+  /** Alternative to clear(). Allows use as an AutoCloseable in try-with-resources. */
+  @Override
+  public void close() {
+    for (FieldVector v : getChildrenFromFields()) {
+      v.close();
+    }
+  }
+
+  /**
+   * Release any owned ArrowBuf and reset the ValueVector to the initial state. If the vector has
+   * any child vectors, they will also be cleared.
+   */
+  @Override
+  public void clear() {
+    for (FieldVector v : getChildrenFromFields()) {
+      v.clear();
+    }
+  }
+
+  /**
+   * Reset the ValueVector to the initial state without releasing any owned ArrowBuf. Buffer
+   * capacities will remain unchanged and any previous data will be zeroed out. This includes
+   * buffers for data, validity, offset, etc. If the vector has any child vectors, they will also be
+   * reset.
+   */
+  @Override
+  public void reset() {
+    for (FieldVector v : getChildrenFromFields()) {
+      v.reset();
+    }
+    valueCount = 0;
+  }
+
+  /**
+   * Get information about how this field is materialized.
+   *
+   * @return the field corresponding to this vector
+   */
+  @Override
+  public Field getField() {
+    return field;
+  }
+
+  @Override
+  public MinorType getMinorType() {
+    return MinorType.RUNENDENCODED;
+  }
+
+  /**
+   * To transfer quota responsibility.
+   *
+   * @param allocator the target allocator
+   * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new
+   *     target vector of the same type.
+   */
+  @Override
+  public TransferPair getTransferPair(BufferAllocator allocator) {
+    throw new UnsupportedOperationException(
+        "RunEndEncodedVector does not support getTransferPair(BufferAllocator)");
+  }
+
+  /**
+   * To transfer quota responsibility.
+   *
+   * @param ref the name of the vector
+   * @param allocator the target allocator
+   * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new
+   *     target vector of the same type.
+   */
+  @Override
+  public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+    return getTransferPair(ref, allocator, null);
+  }
+
+  /**
+   * To transfer quota responsibility.
+   *
+   * @param field the Field object used by the target vector
+   * @param allocator the target allocator
+   * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new
+   *     target vector of the same type.
+   */
+  @Override
+  public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
+    return getTransferPair(field, allocator, null);
+  }
+
+  /**
+   * To transfer quota responsibility.
+   *
+   * @param ref the name of the vector
+   * @param allocator the target allocator
+   * @param callBack A schema change callback.
+   * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new
+   *     target vector of the same type.
+   */
+  @Override
+  public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+    throw new UnsupportedOperationException(
+        "RunEndEncodedVector does not support getTransferPair(String, BufferAllocator, CallBack)");
+  }
+
+  /**
+   * To transfer quota responsibility.
+   *
+   * @param field the Field object used by the target vector
+   * @param allocator the target allocator
+   * @param callBack A schema change callback.
+   * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new
+   *     target vector of the same type.
+   */
+  @Override
+  public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) {
+    throw new UnsupportedOperationException(
+        "RunEndEncodedVector does not support getTransferPair(Field, BufferAllocator, CallBack)");
+  }
+
+  /**
+   * Makes a new transfer pair used to transfer underlying buffers.
+   *
+   * @param target the target for the transfer
+   * @return a new {@link org.apache.arrow.vector.util.TransferPair transfer pair} that is used to
+   *     transfer underlying buffers into the target vector.
+   */
+  @Override
+  public TransferPair makeTransferPair(ValueVector target) {
+    throw new UnsupportedOperationException(
+        "RunEndEncodedVector does not support makeTransferPair(ValueVector)");
+  }
+
+  /**
+   * Get a reader for this vector.
+   *
+   * @return a {@link org.apache.arrow.vector.complex.reader.FieldReader field reader} that supports
+   *     reading values from this vector.
+   */
+  @Override
+  public FieldReader getReader() {
+    throw new UnsupportedOperationException("Not yet implemented.");
+  }
+
+  /**
+   * Get a writer for this vector.
+   *
+   * @return a {@link org.apache.arrow.vector.complex.writer.FieldWriter field writer} that supports
+   *     writing values to this vector.
+   */
+  public FieldWriter getWriter() {
+    throw new UnsupportedOperationException("Not yet implemented.");
+  }
+
+  /**
+   * Get the number of bytes used by this vector.
+   *
+   * @return the number of bytes that is used by this vector instance.
+   */
+  @Override
+  public int getBufferSize() {
+    int bufferSize = 0;
+    for (FieldVector v : getChildrenFromFields()) {
+      bufferSize += v.getBufferSize();
+    }
+    return bufferSize;
+  }
+
+  /**
+   * Returns the number of bytes that is used by this vector if it holds the given number of values.
+   * The result will be the same as if setValueCount() were called, followed by calling
+   * getBufferSize(), but without any of the closing side-effects that setValueCount() implies wrt
+   * finishing off the population of a vector. Some operations might wish to use this to determine
+   * how much memory has been used by a vector so far, even though it is not finished being
+   * populated.
+   *
+   * @param valueCount the number of values to assume this vector contains
+   * @return the buffer size if this vector is holding valueCount values
+   */
+  @Override
+  public int getBufferSizeFor(int valueCount) {
+    return 0;
+  }
+
+  /**
+   * Return the underlying buffers associated with this vector. Note that this doesn't impact the
+   * reference counts for this buffer so it only should be used for in-context access. Also note
+   * that this buffer changes regularly thus external classes shouldn't hold a reference to it
+   * (unless they change it).
+   *
+   * @param clear Whether to clear vector before returning; the buffers will still be refcounted;
+   *     but the returned array will be the only reference to them
+   * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
+   */
+  @Override
+  public ArrowBuf[] getBuffers(boolean clear) {
+    return new ArrowBuf[0];
+  }
+
+  /**
+   * Gets the underlying buffer associated with validity vector.
+   *
+   * @return buffer
+   */
+  @Override
+  public ArrowBuf getValidityBuffer() {
+    throw new UnsupportedOperationException(
+        "Run-end encoded vectors do not have a validity buffer.");
+  }
+
+  /**
+   * Gets the underlying buffer associated with data vector.
+   *
+   * @return buffer
+   */
+  @Override
+  public ArrowBuf getDataBuffer() {
+    throw new UnsupportedOperationException("Run-end encoded vectors do not have a data buffer.");
+  }
+
+  /**
+   * Gets the underlying buffer associated with offset vector.
+   *
+   * @return buffer
+   */
+  @Override
+  public ArrowBuf getOffsetBuffer() {
+    throw new UnsupportedOperationException("Run-end encoded vectors do not have a offset buffer.");
+  }
+
+  /**
+   * Gets the number of values.
+   *
+   * @return number of values in the vector
+   */
+  @Override
+  public int getValueCount() {
+    return valueCount;
+  }
+
+  /** Set number of values in the vector. */
+  @Override
+  public void setValueCount(int valueCount) {
+    this.valueCount = valueCount;
+  }
+
+  /**
+   * Get friendly type object from the vector.
+   *
+   * @param index index of object to get
+   * @return friendly type object
+   */
+  @Override
+  public Object getObject(int index) {
+    checkIndex(index);
+    int physicalIndex = getPhysicalIndex(index);
+    return valuesVector.getObject(physicalIndex);
+  }
+
+  /**
+   * Get the run end of giving index.
+   *
+   * @param index index of the run end to get
+   * @return the run end of giving index
+   */
+  public int getRunEnd(int index) {
+    checkIndex(index);
+    int physicalIndex = getPhysicalIndex(index);
+    return (int) ((BaseIntVector) runEndsVector).getValueAsLong(physicalIndex);
+  }
+
+  /**
+   * Returns number of null elements in the vector.
+   *
+   * @return number of null elements
+   */
+  @Override
+  public int getNullCount() {
+    // Null count is always 0 for run-end encoded array
+    return 0;
+  }
+
+  /**
+   * Check whether an element in the vector is null.
+   *
+   * @param index index to check for null
+   * @return true if element is null
+   */
+  @Override
+  public boolean isNull(int index) {
+    int physicalIndex = getPhysicalIndex(runEndsVector, index);
+    return valuesVector.isNull(physicalIndex);
+  }
+
+  /** Returns hashCode of element in index with the default hasher. */
+  @Override
+  public int hashCode(int index) {
+    return hashCode(index, null);
+  }
+
+  /** Returns hashCode of element in index with the given hasher. */
+  @Override
+  public int hashCode(int index, ArrowBufHasher hasher) {
+    int hash = 0;
+    for (FieldVector v : getChildrenFromFields()) {
+      if (index < v.getValueCount()) {
+        hash = ByteFunctionHelpers.combineHash(hash, v.hashCode(index, hasher));
+      }
+    }
+    return hash;
+  }
+
+  /**
+   * Accept a generic {@link VectorVisitor} and return the result.
+   *
+   * @param <OUT> the output result type.
+   * @param <IN> the input data together with visitor.
+   */
+  @Override
+  public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+    return visitor.visit(this, value);
+  }
+
+  /**
+   * Gets the name of the vector.
+   *
+   * @return the name of the vector.
+   */
+  @Override
+  public String getName() {
+    return this.field.getName();
+  }
+
+  @Override
+  public Iterator<ValueVector> iterator() {
+    return Collections.<ValueVector>unmodifiableCollection(getChildrenFromFields()).iterator();
+  }
+
+  /** FieldVector interface */
+
+  /**
+   * Initializes the child vectors to be later loaded with loadBuffers.
+   *
+   * @param children the schema containing the run_ends column first and the values column second
+   */
+  @Override
+  public void initializeChildrenFromFields(List<Field> children) {
+    checkArgument(
+        children.size() == 2,
+        "Run-end encoded vectors must have two child Fields. Found: %s",
+        children.isEmpty() ? "none" : children);
+    checkArgument(
+        Arrays.asList(
+                MinorType.SMALLINT.getType(), MinorType.INT.getType(), MinorType.BIGINT.getType())
+            .contains(children.get(0).getType()),
+        "The first field represents the run-end vector and must be of type int "
+            + "with size 16, 32, or 64 bits. Found: %s",
+        children.get(0).getType());
+    runEndsVector = (BaseIntVector) children.get(0).createVector(allocator);
+    valuesVector = children.get(1).createVector(allocator);
+    field = new Field(field.getName(), field.getFieldType(), children);
+  }
+
+  /**
+   * The returned list is the same size as the list passed to initializeChildrenFromFields.
+   *
+   * @return the children according to schema (empty for primitive types)
+   */
+  @Override
+  public List<FieldVector> getChildrenFromFields() {
+    return Arrays.asList(runEndsVector, valuesVector);
+  }
+
+  /**
+   * Loads data in the vectors. (ownBuffers must be the same size as getFieldVectors())
+   *
+   * @param fieldNode the fieldNode
+   * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+   */
+  @Override
+  public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+    if (!ownBuffers.isEmpty()) {
+      throw new UnsupportedOperationException(
+          "Run-end encoded vectors do not have any associated buffers.");
+    }
+  }
+
+  /**
+   * Get the buffers of the fields, (same size as getFieldVectors() since it is their content).
+   *
+   * @return the buffers containing the data for this vector (ready for reading)
+   */
+  @Override
+  public List<ArrowBuf> getFieldBuffers() {
+    return List.of();
+  }
+
+  /**
+   * Get the inner vectors.
+   *
+   * @return the inner vectors for this field as defined by the TypeLayout
+   * @deprecated This API will be removed as the current implementations no longer support inner
+   *     vectors.
+   */
+  @Deprecated
+  @Override
+  public List<BufferBacked> getFieldInnerVectors() {
+    throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers().");
+  }
+
+  /**
+   * Gets the starting address of the underlying buffer associated with validity vector.
+   *
+   * @return buffer address
+   */
+  @Override
+  public long getValidityBufferAddress() {
+    throw new UnsupportedOperationException(
+        "Run-end encoded vectors do not have a validity buffer.");
+  }
+
+  /**
+   * Gets the starting address of the underlying buffer associated with data vector.
+   *
+   * @return buffer address
+   */
+  @Override
+  public long getDataBufferAddress() {
+    throw new UnsupportedOperationException("Run-end encoded vectors do not have a data buffer.");
+  }
+
+  /**
+   * Gets the starting address of the underlying buffer associated with offset vector.
+   *
+   * @return buffer address
+   */
+  @Override
+  public long getOffsetBufferAddress() {
+    throw new UnsupportedOperationException(
+        "Run-end encoded vectors do not have an offset buffer.");
+  }
+
+  /**
+   * Set the element at the given index to null.
+   *
+   * @param index the value to change
+   */
+  @Override
+  public void setNull(int index) {
+    throw new UnsupportedOperationException(
+        "Run-end encoded vectors do not have a validity buffer.");
+  }
+
+  public FieldVector getRunEndsVector() {
+    return runEndsVector;
+  }
+
+  public FieldVector getValuesVector() {
+    return valuesVector;
+  }
+
+  private void checkIndex(int logicalIndex) {
+    if (logicalIndex < 0 || logicalIndex >= valueCount) {
+      throw new IndexOutOfBoundsException(
+          String.format("index: %s, expected range (0, %s)", logicalIndex, valueCount));
+    }
+  }
+
+  /**
+   * The physical index is the index of the first value that is larger than logical index. e.g. if
+   * run_ends is [1,2,3], the physical index of logical index from 0 to 5 is [0, 1, 1, 2, 2, 2]
+   */
+  public int getPhysicalIndex(int logicalIndex) {
+    return getPhysicalIndex(runEndsVector, logicalIndex);
+  }
+
+  static int getPhysicalIndex(FieldVector runEndVector, int logicalIndex) {
+    if (runEndVector == null || runEndVector.getValueCount() == 0) {
+      return -1;
+    }
+
+    int low = 0;
+    int high = runEndVector.getValueCount() - 1;
+    int result = -1;
+
+    while (low <= high) {
+      int mid = low + (high - low) / 2;
+      long valueAsLong = ((BaseIntVector) runEndVector).getValueAsLong(mid);
+      if (valueAsLong > logicalIndex) {
+        result = mid;
+        high = mid - 1;
+      } else {
+        low = mid + 1;
+      }
+    }
+
+    return result;
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
index 53f83fdfa7cbe..ca5f572034cee 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
@@ -188,9 +188,9 @@ private void setReaderAndWriterIndex() {
   /**
    * Get the inner vectors.
    *
+   * @return the inner vectors for this field as defined by the TypeLayout
    * @deprecated This API will be removed as the current implementations no longer support inner
    *     vectors.
-   * @return the inner vectors for this field as defined by the TypeLayout
    */
   @Deprecated
   @Override
@@ -396,12 +396,13 @@ public int getValueCapacity() {
 
   /**
    * Return the underlying buffers associated with this vector. Note that this doesn't impact the
-   * reference counts for this buffer so it only should be used for in-context access. Also note
-   * that this buffer changes regularly thus external classes shouldn't hold a reference to it
+   * reference counts for this buffer, so it only should be used for in-context access. Also note
+   * that this buffer changes regularly, thus external classes shouldn't hold a reference to it
    * (unless they change it).
    *
-   * @param clear Whether to clear vector before returning; the buffers will still be refcounted but
-   *     the returned array will be the only reference to them
+   * @param clear Whether to clear vector before returning, the buffers will still be refcounted but
+   *     the returned array will be the only reference to them. Also, this won't clear the child
+   *     buffers.
    * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
    */
   @Override
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
index 453f3ebb0c6e9..f3e48aa050e30 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
@@ -18,6 +18,7 @@
 
 import org.apache.arrow.util.Preconditions;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
 import org.apache.arrow.vector.complex.StateTool;
@@ -30,6 +31,7 @@ public class ComplexWriterImpl extends AbstractFieldWriter implements ComplexWri
 
   private NullableStructWriter structRoot;
   private UnionListWriter listRoot;
+  private UnionListViewWriter listViewRoot;
   private UnionMapWriter mapRoot;
   private final NonNullableStructVector container;
 
@@ -42,6 +44,7 @@ private enum Mode {
     INIT,
     STRUCT,
     LIST,
+    LISTVIEW,
     MAP
   }
 
@@ -99,6 +102,9 @@ public void close() throws Exception {
     if (listRoot != null) {
       listRoot.close();
     }
+    if (listViewRoot != null) {
+      listViewRoot.close();
+    }
   }
 
   @Override
@@ -110,6 +116,9 @@ public void clear() {
       case LIST:
         listRoot.clear();
         break;
+      case LISTVIEW:
+        listViewRoot.clear();
+        break;
       case MAP:
         mapRoot.clear();
         break;
@@ -127,6 +136,9 @@ public void setValueCount(int count) {
       case LIST:
         listRoot.setValueCount(count);
         break;
+      case LISTVIEW:
+        listViewRoot.setValueCount(count);
+        break;
       case MAP:
         mapRoot.setValueCount(count);
         break;
@@ -145,6 +157,9 @@ public void setPosition(int index) {
       case LIST:
         listRoot.setPosition(index);
         break;
+      case LISTVIEW:
+        listViewRoot.setPosition(index);
+        break;
       case MAP:
         mapRoot.setPosition(index);
         break;
@@ -232,6 +247,31 @@ public ListWriter rootAsList() {
     return listRoot;
   }
 
+  @Override
+  public ListWriter rootAsListView() {
+    switch (mode) {
+      case INIT:
+        int vectorCount = container.size();
+        // TODO allow dictionaries in complex types
+        ListViewVector listVector = container.addOrGetListView(name);
+        if (container.size() > vectorCount) {
+          listVector.allocateNew();
+        }
+        listViewRoot = new UnionListViewWriter(listVector, nullableStructWriterFactory);
+        listViewRoot.setPosition(idx());
+        mode = Mode.LISTVIEW;
+        break;
+
+      case LISTVIEW:
+        break;
+
+      default:
+        check(Mode.INIT, Mode.STRUCT);
+    }
+
+    return listViewRoot;
+  }
+
   @Override
   public MapWriter rootAsMap(boolean keysSorted) {
     switch (mode) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListViewReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListViewReader.java
new file mode 100644
index 0000000000000..4bcd028de300d
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListViewReader.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.complex.impl;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.UnionHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/** {@link FieldReader} for largeListView of union types. */
+public class UnionLargeListViewReader extends AbstractFieldReader {
+
+  private final LargeListViewVector vector;
+  private final ValueVector data;
+  private int currentOffset;
+  private int size;
+
+  /**
+   * Constructor for UnionLargeListViewReader.
+   *
+   * @param vector the vector to read from
+   */
+  public UnionLargeListViewReader(LargeListViewVector vector) {
+    this.vector = vector;
+    this.data = vector.getDataVector();
+  }
+
+  @Override
+  public Field getField() {
+    return vector.getField();
+  }
+
+  @Override
+  public boolean isSet() {
+    return !vector.isNull(idx());
+  }
+
+  @Override
+  public void setPosition(int index) {
+    super.setPosition(index);
+    if (vector.getOffsetBuffer().capacity() == 0) {
+      currentOffset = 0;
+      size = 0;
+    } else {
+      currentOffset =
+          vector
+              .getOffsetBuffer()
+              .getInt(index * (long) BaseLargeRepeatedValueViewVector.OFFSET_WIDTH);
+      size =
+          vector.getSizeBuffer().getInt(index * (long) BaseLargeRepeatedValueViewVector.SIZE_WIDTH);
+    }
+  }
+
+  @Override
+  public FieldReader reader() {
+    return data.getReader();
+  }
+
+  @Override
+  public Object readObject() {
+    return vector.getObject(idx());
+  }
+
+  @Override
+  public MinorType getMinorType() {
+    return MinorType.LISTVIEW;
+  }
+
+  @Override
+  public void read(int index, UnionHolder holder) {
+    setPosition(idx());
+    for (int i = -1; i < index; i++) {
+      next();
+    }
+    holder.reader = data.getReader();
+    holder.isSet = data.getReader().isSet() ? 1 : 0;
+  }
+
+  @Override
+  public int size() {
+    return Math.max(size, 0);
+  }
+
+  @Override
+  public boolean next() {
+    // Here, the currentOffSet keeps track of the current position in the vector inside the list at
+    // set position.
+    // And, size keeps track of the elements count in the list, so to make sure we traverse
+    // the full list, we need to check if the currentOffset is less than the currentOffset + size
+    if (currentOffset < currentOffset + size) {
+      data.getReader().setPosition(checkedCastToInt(currentOffset++));
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java
new file mode 100644
index 0000000000000..17ac1150fd412
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListViewReader.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
+import org.apache.arrow.vector.complex.ListViewVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.UnionHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/** {@link FieldReader} for listview of union types. */
+public class UnionListViewReader extends AbstractFieldReader {
+
+  private final ListViewVector vector;
+  private final ValueVector data;
+  private int currentOffset;
+  private int size;
+
+  /**
+   * Constructor for UnionListViewReader.
+   *
+   * @param vector the vector to read from
+   */
+  public UnionListViewReader(ListViewVector vector) {
+    this.vector = vector;
+    this.data = vector.getDataVector();
+  }
+
+  @Override
+  public Field getField() {
+    return vector.getField();
+  }
+
+  @Override
+  public boolean isSet() {
+    return !vector.isNull(idx());
+  }
+
+  @Override
+  public void setPosition(int index) {
+    super.setPosition(index);
+    if (vector.getOffsetBuffer().capacity() == 0) {
+      currentOffset = 0;
+      size = 0;
+    } else {
+      currentOffset =
+          vector.getOffsetBuffer().getInt(index * (long) BaseRepeatedValueViewVector.OFFSET_WIDTH);
+      size = vector.getSizeBuffer().getInt(index * (long) BaseRepeatedValueViewVector.SIZE_WIDTH);
+    }
+  }
+
+  @Override
+  public FieldReader reader() {
+    return data.getReader();
+  }
+
+  @Override
+  public Object readObject() {
+    return vector.getObject(idx());
+  }
+
+  @Override
+  public MinorType getMinorType() {
+    return MinorType.LISTVIEW;
+  }
+
+  @Override
+  public void read(int index, UnionHolder holder) {
+    setPosition(idx());
+    for (int i = -1; i < index; i++) {
+      next();
+    }
+    holder.reader = data.getReader();
+    holder.isSet = data.getReader().isSet() ? 1 : 0;
+  }
+
+  @Override
+  public int size() {
+    return Math.max(size, 0);
+  }
+
+  @Override
+  public boolean next() {
+    // Here, the currentOffSet keeps track of the current position in the vector inside the list at
+    // set position.
+    // And, size keeps track of the elements count in the list, so to make sure we traverse
+    // the full list, we need to check if the currentOffset is less than the currentOffset + size
+    if (currentOffset < currentOffset + size) {
+      data.getReader().setPosition(currentOffset++);
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java b/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java
index 2de8ff246591b..dd62108a84a6b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java
@@ -16,6 +16,9 @@
  */
 package org.apache.arrow.vector.compression;
 
+import java.util.EnumMap;
+import java.util.Map;
+import java.util.ServiceLoader;
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.memory.BufferAllocator;
 
@@ -51,11 +54,52 @@ public interface CompressionCodec {
 
   /** Factory to create compression codec. */
   interface Factory {
+    /**
+     * This combines all the available factories registered as service providers in the module path.
+     * For each {@link CompressionUtil.CodecType compression codec type}, it will use whatever
+     * factory supports it, i.e. doesn't throw on `createCodec(type)`. If multiple factories
+     * registered as service providers support the same codec type, the first one encountered while
+     * iterating over the {@link ServiceLoader} will be selected. A codec type that is not supported
+     * by any registered service provider will fall back to {@link
+     * NoCompressionCodec.Factory#INSTANCE} for backwards compatibility.
+     */
+    Factory INSTANCE = bestEffort();
 
     /** Creates the codec based on the codec type. */
     CompressionCodec createCodec(CompressionUtil.CodecType codecType);
 
     /** Creates the codec based on the codec type and compression level. */
     CompressionCodec createCodec(CompressionUtil.CodecType codecType, int compressionLevel);
+
+    private static Factory bestEffort() {
+      final ServiceLoader<Factory> serviceLoader = ServiceLoader.load(Factory.class);
+      final Map<CompressionUtil.CodecType, Factory> factories =
+          new EnumMap<>(CompressionUtil.CodecType.class);
+      for (Factory factory : serviceLoader) {
+        for (CompressionUtil.CodecType codecType : CompressionUtil.CodecType.values()) {
+          try {
+            factory.createCodec(codecType); // will throw if not supported
+            factories.putIfAbsent(codecType, factory);
+          } catch (Throwable ignored) {
+          }
+        }
+      }
+
+      final Factory fallback = NoCompressionCodec.Factory.INSTANCE;
+      return new Factory() {
+        @Override
+        public CompressionCodec createCodec(CompressionUtil.CodecType codecType) {
+          return factories.getOrDefault(codecType, fallback).createCodec(codecType);
+        }
+
+        @Override
+        public CompressionCodec createCodec(
+            CompressionUtil.CodecType codecType, int compressionLevel) {
+          return factories
+              .getOrDefault(codecType, fallback)
+              .createCodec(codecType, compressionLevel);
+        }
+      };
+    }
   }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java b/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java
new file mode 100644
index 0000000000000..2349a7d4bc28d
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/extension/InvalidExtensionMetadataException.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.extension;
+
+/** The extension metadata was malformed. */
+public class InvalidExtensionMetadataException extends RuntimeException {
+  public InvalidExtensionMetadataException(String message) {
+    super(message);
+  }
+
+  public InvalidExtensionMetadataException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java
new file mode 100644
index 0000000000000..ca56214fdac77
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueType.java
@@ -0,0 +1,403 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.extension;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import java.util.Collections;
+import java.util.Objects;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float2Vector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ViewVarBinaryVector;
+import org.apache.arrow.vector.ViewVarCharVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ExtensionTypeRegistry;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Opaque is a placeholder for a type from an external (usually non-Arrow) system that could not be
+ * interpreted.
+ */
+public class OpaqueType extends ArrowType.ExtensionType {
+  private static final AtomicBoolean registered = new AtomicBoolean(false);
+  public static final String EXTENSION_NAME = "arrow.opaque";
+  private final ArrowType storageType;
+  private final String typeName;
+  private final String vendorName;
+
+  /** Register the extension type so it can be used globally. */
+  public static void ensureRegistered() {
+    if (!registered.getAndSet(true)) {
+      // The values don't matter, we just need an instance
+      ExtensionTypeRegistry.register(new OpaqueType(Types.MinorType.NULL.getType(), "", ""));
+    }
+  }
+
+  /**
+   * Create a new type instance.
+   *
+   * @param storageType The underlying Arrow type.
+   * @param typeName The name of the unknown type.
+   * @param vendorName The name of the originating system of the unknown type.
+   */
+  public OpaqueType(ArrowType storageType, String typeName, String vendorName) {
+    this.storageType = Objects.requireNonNull(storageType, "storageType");
+    this.typeName = Objects.requireNonNull(typeName, "typeName");
+    this.vendorName = Objects.requireNonNull(vendorName, "vendorName");
+  }
+
+  @Override
+  public ArrowType storageType() {
+    return storageType;
+  }
+
+  public String typeName() {
+    return typeName;
+  }
+
+  public String vendorName() {
+    return vendorName;
+  }
+
+  @Override
+  public String extensionName() {
+    return EXTENSION_NAME;
+  }
+
+  @Override
+  public boolean extensionEquals(ExtensionType other) {
+    return other != null
+        && EXTENSION_NAME.equals(other.extensionName())
+        && other instanceof OpaqueType
+        && storageType.equals(other.storageType())
+        && typeName.equals(((OpaqueType) other).typeName())
+        && vendorName.equals(((OpaqueType) other).vendorName());
+  }
+
+  @Override
+  public String serialize() {
+    ObjectMapper mapper = new ObjectMapper();
+    ObjectNode object = mapper.createObjectNode();
+    object.put("type_name", typeName);
+    object.put("vendor_name", vendorName);
+    try {
+      return mapper.writeValueAsString(object);
+    } catch (JsonProcessingException e) {
+      throw new RuntimeException("Could not serialize " + this, e);
+    }
+  }
+
+  @Override
+  public ArrowType deserialize(ArrowType storageType, String serializedData) {
+    ObjectMapper mapper = new ObjectMapper();
+    JsonNode object;
+    try {
+      object = mapper.readTree(serializedData);
+    } catch (JsonProcessingException e) {
+      throw new InvalidExtensionMetadataException("Extension metadata is invalid", e);
+    }
+    JsonNode typeName = object.get("type_name");
+    JsonNode vendorName = object.get("vendor_name");
+    if (typeName == null) {
+      throw new InvalidExtensionMetadataException("typeName is missing");
+    }
+    if (vendorName == null) {
+      throw new InvalidExtensionMetadataException("vendorName is missing");
+    }
+    if (!typeName.isTextual()) {
+      throw new InvalidExtensionMetadataException("typeName should be string, was " + typeName);
+    }
+    if (!vendorName.isTextual()) {
+      throw new InvalidExtensionMetadataException("vendorName should be string, was " + vendorName);
+    }
+    return new OpaqueType(storageType, typeName.asText(), vendorName.asText());
+  }
+
+  @Override
+  public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
+    // XXX: fieldType is supposed to be the extension type
+    final Field field = new Field(name, fieldType, Collections.emptyList());
+    final FieldVector underlyingVector =
+        storageType.accept(new UnderlyingVectorTypeVisitor(name, allocator));
+    return new OpaqueVector(field, allocator, underlyingVector);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(super.hashCode(), storageType, typeName, vendorName);
+  }
+
+  @Override
+  public String toString() {
+    return "OpaqueType("
+        + storageType
+        + ", typeName='"
+        + typeName
+        + '\''
+        + ", vendorName='"
+        + vendorName
+        + '\''
+        + ')';
+  }
+
+  private static class UnderlyingVectorTypeVisitor implements ArrowTypeVisitor<FieldVector> {
+    private final String name;
+    private final BufferAllocator allocator;
+
+    UnderlyingVectorTypeVisitor(String name, BufferAllocator allocator) {
+      this.name = name;
+      this.allocator = allocator;
+    }
+
+    @Override
+    public FieldVector visit(Null type) {
+      return new NullVector(name);
+    }
+
+    private RuntimeException unsupported(ArrowType type) {
+      throw new UnsupportedOperationException(
+          "OpaqueType#getUnderlyingVector is not supported for storage type: " + type);
+    }
+
+    @Override
+    public FieldVector visit(Struct type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(List type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(LargeList type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(FixedSizeList type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(Union type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(Map type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(Int type) {
+      return new IntVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(FloatingPoint type) {
+      switch (type.getPrecision()) {
+        case HALF:
+          return new Float2Vector(name, allocator);
+        case SINGLE:
+          return new Float4Vector(name, allocator);
+        case DOUBLE:
+          return new Float8Vector(name, allocator);
+        default:
+          throw unsupported(type);
+      }
+    }
+
+    @Override
+    public FieldVector visit(Utf8 type) {
+      return new VarCharVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(Utf8View type) {
+      return new ViewVarCharVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(LargeUtf8 type) {
+      return new LargeVarCharVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(Binary type) {
+      return new VarBinaryVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(BinaryView type) {
+      return new ViewVarBinaryVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(LargeBinary type) {
+      return new LargeVarBinaryVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(FixedSizeBinary type) {
+      return new FixedSizeBinaryVector(Field.nullable(name, type), allocator);
+    }
+
+    @Override
+    public FieldVector visit(Bool type) {
+      return new BitVector(name, allocator);
+    }
+
+    @Override
+    public FieldVector visit(Decimal type) {
+      if (type.getBitWidth() == 128) {
+        return new DecimalVector(Field.nullable(name, type), allocator);
+      } else if (type.getBitWidth() == 256) {
+        return new Decimal256Vector(Field.nullable(name, type), allocator);
+      }
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(Date type) {
+      switch (type.getUnit()) {
+        case DAY:
+          return new DateDayVector(name, allocator);
+        case MILLISECOND:
+          return new DateMilliVector(name, allocator);
+        default:
+          throw unsupported(type);
+      }
+    }
+
+    @Override
+    public FieldVector visit(Time type) {
+      switch (type.getUnit()) {
+        case SECOND:
+          return new TimeSecVector(name, allocator);
+        case MILLISECOND:
+          return new TimeMilliVector(name, allocator);
+        case MICROSECOND:
+          return new TimeMicroVector(name, allocator);
+        case NANOSECOND:
+          return new TimeNanoVector(name, allocator);
+        default:
+          throw unsupported(type);
+      }
+    }
+
+    @Override
+    public FieldVector visit(Timestamp type) {
+      if (type.getTimezone() == null || type.getTimezone().isEmpty()) {
+        switch (type.getUnit()) {
+          case SECOND:
+            return new TimeStampSecVector(Field.nullable(name, type), allocator);
+          case MILLISECOND:
+            return new TimeStampMilliVector(Field.nullable(name, type), allocator);
+          case MICROSECOND:
+            return new TimeStampMicroVector(Field.nullable(name, type), allocator);
+          case NANOSECOND:
+            return new TimeStampNanoVector(Field.nullable(name, type), allocator);
+          default:
+            throw unsupported(type);
+        }
+      }
+      switch (type.getUnit()) {
+        case SECOND:
+          return new TimeStampSecTZVector(Field.nullable(name, type), allocator);
+        case MILLISECOND:
+          return new TimeStampMilliTZVector(Field.nullable(name, type), allocator);
+        case MICROSECOND:
+          return new TimeStampMicroTZVector(Field.nullable(name, type), allocator);
+        case NANOSECOND:
+          return new TimeStampNanoTZVector(Field.nullable(name, type), allocator);
+        default:
+          throw unsupported(type);
+      }
+    }
+
+    @Override
+    public FieldVector visit(Interval type) {
+      switch (type.getUnit()) {
+        case YEAR_MONTH:
+          return new IntervalYearVector(name, allocator);
+        case DAY_TIME:
+          return new IntervalDayVector(name, allocator);
+        case MONTH_DAY_NANO:
+          return new IntervalMonthDayNanoVector(name, allocator);
+        default:
+          throw unsupported(type);
+      }
+    }
+
+    @Override
+    public FieldVector visit(Duration type) {
+      return new DurationVector(Field.nullable(name, type), allocator);
+    }
+
+    @Override
+    public FieldVector visit(ListView type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(LargeListView type) {
+      throw unsupported(type);
+    }
+
+    @Override
+    public FieldVector visit(RunEndEncoded type) {
+      throw unsupported(type);
+    }
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java
new file mode 100644
index 0000000000000..00eb9a984e6bf
--- /dev/null
+++ b/java/vector/src/main/java/org/apache/arrow/vector/extension/OpaqueVector.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector.extension;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueIterableVector;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Opaque is a wrapper for (usually binary) data from an external (often non-Arrow) system that
+ * could not be interpreted.
+ */
+public class OpaqueVector extends ExtensionTypeVector<FieldVector>
+    implements ValueIterableVector<Object> {
+  private final Field field;
+
+  public OpaqueVector(Field field, BufferAllocator allocator, FieldVector underlyingVector) {
+    super(field, allocator, underlyingVector);
+    this.field = field;
+  }
+
+  @Override
+  public Field getField() {
+    return field;
+  }
+
+  @Override
+  public Object getObject(int index) {
+    return getUnderlyingVector().getObject(index);
+  }
+
+  @Override
+  public int hashCode(int index) {
+    return hashCode(index, null);
+  }
+
+  @Override
+  public int hashCode(int index, ArrowBufHasher hasher) {
+    return getUnderlyingVector().hashCode(index, hasher);
+  }
+}
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java
index 982651b2ff3de..7cac0a15a198e 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java
@@ -27,7 +27,6 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.util.VisibleForTesting;
 import org.apache.arrow.vector.compression.CompressionCodec;
-import org.apache.arrow.vector.compression.NoCompressionCodec;
 import org.apache.arrow.vector.ipc.message.ArrowBlock;
 import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
 import org.apache.arrow.vector.ipc.message.ArrowFooter;
@@ -64,7 +63,7 @@ public ArrowFileReader(
   }
 
   public ArrowFileReader(SeekableReadChannel in, BufferAllocator allocator) {
-    this(in, allocator, NoCompressionCodec.Factory.INSTANCE);
+    this(in, allocator, CompressionCodec.Factory.INSTANCE);
   }
 
   public ArrowFileReader(SeekableByteChannel in, BufferAllocator allocator) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
index 15ade38cd3d62..7f4addf2d0dea 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
@@ -28,7 +28,6 @@
 import org.apache.arrow.vector.VectorLoader;
 import org.apache.arrow.vector.VectorSchemaRoot;
 import org.apache.arrow.vector.compression.CompressionCodec;
-import org.apache.arrow.vector.compression.NoCompressionCodec;
 import org.apache.arrow.vector.dictionary.Dictionary;
 import org.apache.arrow.vector.dictionary.DictionaryProvider;
 import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
@@ -50,7 +49,7 @@ public abstract class ArrowReader implements DictionaryProvider, AutoCloseable {
   private final CompressionCodec.Factory compressionFactory;
 
   protected ArrowReader(BufferAllocator allocator) {
-    this(allocator, NoCompressionCodec.Factory.INSTANCE);
+    this(allocator, CompressionCodec.Factory.INSTANCE);
   }
 
   protected ArrowReader(BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java
index 660c6a5f8986a..69811dc71727c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java
@@ -25,7 +25,6 @@
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.compression.CompressionCodec;
-import org.apache.arrow.vector.compression.NoCompressionCodec;
 import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
 import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
 import org.apache.arrow.vector.ipc.message.MessageChannelReader;
@@ -65,7 +64,7 @@ public ArrowStreamReader(
    * @param allocator to allocate new buffers
    */
   public ArrowStreamReader(MessageChannelReader messageReader, BufferAllocator allocator) {
-    this(messageReader, allocator, NoCompressionCodec.Factory.INSTANCE);
+    this(messageReader, allocator, CompressionCodec.Factory.INSTANCE);
   }
 
   /**
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
index 604f18b56b5c7..5668325a87eeb 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
@@ -22,6 +22,7 @@
 import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
 import static org.apache.arrow.vector.BufferLayout.BufferType.DATA;
 import static org.apache.arrow.vector.BufferLayout.BufferType.OFFSET;
+import static org.apache.arrow.vector.BufferLayout.BufferType.SIZE;
 import static org.apache.arrow.vector.BufferLayout.BufferType.TYPE;
 import static org.apache.arrow.vector.BufferLayout.BufferType.VALIDITY;
 import static org.apache.arrow.vector.BufferLayout.BufferType.VARIADIC_DATA_BUFFERS;
@@ -72,6 +73,8 @@
 import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
 import org.apache.arrow.vector.types.Types.MinorType;
 import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.LargeListView;
+import org.apache.arrow.vector.types.pojo.ArrowType.ListView;
 import org.apache.arrow.vector.types.pojo.ArrowType.Union;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.Schema;
@@ -724,10 +727,11 @@ private List<ArrowBuf> readIntoBuffer(
 
     if (bufferType.equals(VALIDITY)) {
       reader = helper.BIT;
-    } else if (bufferType.equals(OFFSET)) {
+    } else if (bufferType.equals(OFFSET) || bufferType.equals(SIZE)) {
       if (type == MinorType.LARGELIST
           || type == MinorType.LARGEVARCHAR
-          || type == MinorType.LARGEVARBINARY) {
+          || type == MinorType.LARGEVARBINARY
+          || type == MinorType.LARGELISTVIEW) {
         reader = helper.INT8;
       } else {
         reader = helper.INT4;
@@ -888,8 +892,11 @@ private void readFromJsonIntoVector(Field field, FieldVector vector) throws IOEx
         BufferType bufferType = vectorTypes.get(v);
         nextFieldIs(bufferType.getName());
         int innerBufferValueCount = valueCount;
-        if (bufferType.equals(OFFSET) && !(type instanceof Union)) {
-          /* offset buffer has 1 additional value capacity except for dense unions */
+        if (bufferType.equals(OFFSET)
+            && !(type instanceof Union)
+            && !(type instanceof ListView)
+            && !(type instanceof LargeListView)) {
+          /* offset buffer has 1 additional value capacity except for dense unions and ListView */
           innerBufferValueCount = valueCount + 1;
         }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
index d1ee890f5c596..68700fe6afd25 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
@@ -73,6 +73,8 @@
 import org.apache.arrow.vector.UInt4Vector;
 import org.apache.arrow.vector.UInt8Vector;
 import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
+import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
 import org.apache.arrow.vector.dictionary.Dictionary;
 import org.apache.arrow.vector.dictionary.DictionaryProvider;
 import org.apache.arrow.vector.types.Types.MinorType;
@@ -229,7 +231,10 @@ private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOE
         // thus the values are only written to a single entity.
         generator.writeArrayFieldStart(bufferType.getName());
         final int bufferValueCount =
-            (bufferType.equals(OFFSET) && vector.getMinorType() != MinorType.DENSEUNION)
+            (bufferType.equals(OFFSET)
+                    && vector.getMinorType() != MinorType.DENSEUNION
+                    && vector.getMinorType() != MinorType.LISTVIEW
+                    && vector.getMinorType() != MinorType.LARGELISTVIEW)
                 ? valueCount + 1
                 : valueCount;
         for (int i = 0; i < bufferValueCount; i++) {
@@ -259,6 +264,7 @@ private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOE
           } else if (bufferType.equals(OFFSET)
               && vector.getValueCount() == 0
               && (vector.getMinorType() == MinorType.LIST
+                  || vector.getMinorType() == MinorType.LISTVIEW
                   || vector.getMinorType() == MinorType.MAP
                   || vector.getMinorType() == MinorType.VARBINARY
                   || vector.getMinorType() == MinorType.VARCHAR)) {
@@ -270,6 +276,7 @@ private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOE
           } else if (bufferType.equals(OFFSET)
               && vector.getValueCount() == 0
               && (vector.getMinorType() == MinorType.LARGELIST
+                  || vector.getMinorType() == MinorType.LARGELISTVIEW
                   || vector.getMinorType() == MinorType.LARGEVARBINARY
                   || vector.getMinorType() == MinorType.LARGEVARCHAR)) {
             // Empty vectors may not have allocated an offsets buffer
@@ -419,6 +426,14 @@ private void writeValueToGenerator(
         case MAP:
           generator.writeNumber(buffer.getInt((long) index * BaseVariableWidthVector.OFFSET_WIDTH));
           break;
+        case LISTVIEW:
+          generator.writeNumber(
+              buffer.getInt((long) index * BaseRepeatedValueViewVector.OFFSET_WIDTH));
+          break;
+        case LARGELISTVIEW:
+          generator.writeNumber(
+              buffer.getInt((long) index * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+          break;
         case LARGELIST:
         case LARGEVARBINARY:
         case LARGEVARCHAR:
@@ -573,6 +588,13 @@ private void writeValueToGenerator(
         default:
           throw new UnsupportedOperationException("minor type: " + vector.getMinorType());
       }
+    } else if (bufferType.equals(SIZE)) {
+      if (vector.getMinorType() == MinorType.LISTVIEW) {
+        generator.writeNumber(buffer.getInt((long) index * BaseRepeatedValueViewVector.SIZE_WIDTH));
+      } else {
+        generator.writeNumber(
+            buffer.getInt((long) index * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      }
     }
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
index a704dbdd74eaa..cee76433ea4c7 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
@@ -21,7 +21,7 @@
 import org.apache.arrow.flatbuf.MessageHeader;
 
 /**
- * POJO wrapper around a Dictionary Batch IPC messages
+ * POJO wrapper around a Dictionary Batch IPC messages.
  * (https://arrow.apache.org/docs/format/IPC.html#dictionary-batches)
  */
 public class ArrowDictionaryBatch implements ArrowMessage {
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
index ed099890e1b08..e9b963b62c13b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -69,9 +69,11 @@
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.RunEndEncodedVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.complex.impl.BigIntWriterImpl;
@@ -111,6 +113,7 @@
 import org.apache.arrow.vector.complex.impl.UInt2WriterImpl;
 import org.apache.arrow.vector.complex.impl.UInt4WriterImpl;
 import org.apache.arrow.vector.complex.impl.UInt8WriterImpl;
+import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter;
 import org.apache.arrow.vector.complex.impl.UnionLargeListWriter;
 import org.apache.arrow.vector.complex.impl.UnionListWriter;
 import org.apache.arrow.vector.complex.impl.UnionWriter;
@@ -134,11 +137,13 @@
 import org.apache.arrow.vector.types.pojo.ArrowType.Int;
 import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
 import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.LargeListView;
 import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8;
 import org.apache.arrow.vector.types.pojo.ArrowType.List;
 import org.apache.arrow.vector.types.pojo.ArrowType.ListView;
 import org.apache.arrow.vector.types.pojo.ArrowType.Map;
 import org.apache.arrow.vector.types.pojo.ArrowType.Null;
+import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded;
 import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
 import org.apache.arrow.vector.types.pojo.ArrowType.Time;
 import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
@@ -645,6 +650,19 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
         return new UnionLargeListWriter((LargeListVector) vector);
       }
     },
+    LARGELISTVIEW(ArrowType.LargeListView.INSTANCE) {
+      @Override
+      public FieldVector getNewVector(
+          Field field, BufferAllocator allocator, CallBack schemaChangeCallback) {
+        return new LargeListViewVector(
+            field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+      }
+
+      @Override
+      public FieldWriter getNewFieldWriter(ValueVector vector) {
+        return new UnionLargeListViewWriter((LargeListViewVector) vector);
+      }
+    },
     FIXED_SIZE_LIST(null) {
       @Override
       public FieldVector getNewVector(
@@ -770,6 +788,19 @@ public FieldWriter getNewFieldWriter(ValueVector vector) {
             .getNewFieldWriter(vector);
       }
     },
+    RUNENDENCODED(RunEndEncoded.INSTANCE) {
+      @Override
+      public FieldVector getNewVector(
+          Field field, BufferAllocator allocator, CallBack schemaChangeCallback) {
+        return new RunEndEncodedVector(field, allocator, schemaChangeCallback);
+      }
+
+      @Override
+      public FieldWriter getNewFieldWriter(ValueVector vector) {
+        throw new UnsupportedOperationException(
+            "FieldWriter for run-end encoded vector is not implemented yet.");
+      }
+    },
     ;
 
     private final ArrowType type;
@@ -996,10 +1027,20 @@ public MinorType visit(ListView type) {
             return MinorType.LISTVIEW;
           }
 
+          @Override
+          public MinorType visit(LargeListView type) {
+            return MinorType.LARGELISTVIEW;
+          }
+
           @Override
           public MinorType visit(ExtensionType type) {
             return MinorType.EXTENSIONTYPE;
           }
+
+          @Override
+          public MinorType visit(RunEndEncoded type) {
+            return MinorType.RUNENDENCODED;
+          }
         });
   }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
index dd86b58b267fb..31b79fe53a4a5 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
@@ -170,15 +170,15 @@ public static void writeLongToArrowBuf(long value, ArrowBuf bytebuf, int index,
     final long addressOfValue = bytebuf.memoryAddress() + (long) index * byteWidth;
     final long padValue = Long.signum(value) == -1 ? -1L : 0L;
     if (LITTLE_ENDIAN) {
-      MemoryUtil.UNSAFE.putLong(addressOfValue, value);
+      MemoryUtil.putLong(addressOfValue, value);
       for (int i = 1; i <= (byteWidth - 8) / 8; i++) {
-        MemoryUtil.UNSAFE.putLong(addressOfValue + Long.BYTES * i, padValue);
+        MemoryUtil.putLong(addressOfValue + Long.BYTES * i, padValue);
       }
     } else {
       for (int i = 0; i < (byteWidth - 8) / 8; i++) {
-        MemoryUtil.UNSAFE.putLong(addressOfValue + Long.BYTES * i, padValue);
+        MemoryUtil.putLong(addressOfValue + Long.BYTES * i, padValue);
       }
-      MemoryUtil.UNSAFE.putLong(addressOfValue + Long.BYTES * (byteWidth - 8) / 8, value);
+      MemoryUtil.putLong(addressOfValue + Long.BYTES * (byteWidth - 8) / 8, value);
     }
   }
 
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
index 4f81cba55f1b3..e703571b374eb 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
@@ -93,7 +93,7 @@ public ValueVector visit(BaseFixedWidthVector deltaVector, Void value) {
           targetVector.getDataBuffer());
 
     } else {
-      MemoryUtil.UNSAFE.copyMemory(
+      MemoryUtil.copyMemory(
           deltaVector.getDataBuffer().memoryAddress(),
           targetVector.getDataBuffer().memoryAddress()
               + deltaVector.getTypeWidth() * targetVector.getValueCount(),
@@ -142,13 +142,13 @@ public ValueVector visit(BaseVariableWidthVector deltaVector, Void value) {
         targetVector.getValidityBuffer());
 
     // append data buffer
-    MemoryUtil.UNSAFE.copyMemory(
+    MemoryUtil.copyMemory(
         deltaVector.getDataBuffer().memoryAddress(),
         targetVector.getDataBuffer().memoryAddress() + targetDataSize,
         deltaDataSize);
 
     // copy offset buffer
-    MemoryUtil.UNSAFE.copyMemory(
+    MemoryUtil.copyMemory(
         deltaVector.getOffsetBuffer().memoryAddress() + BaseVariableWidthVector.OFFSET_WIDTH,
         targetVector.getOffsetBuffer().memoryAddress()
             + (targetVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH,
@@ -214,13 +214,13 @@ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) {
         targetVector.getValidityBuffer());
 
     // append data buffer
-    MemoryUtil.UNSAFE.copyMemory(
+    MemoryUtil.copyMemory(
         deltaVector.getDataBuffer().memoryAddress(),
         targetVector.getDataBuffer().memoryAddress() + targetDataSize,
         deltaDataSize);
 
     // copy offset buffer
-    MemoryUtil.UNSAFE.copyMemory(
+    MemoryUtil.copyMemory(
         deltaVector.getOffsetBuffer().memoryAddress() + BaseLargeVariableWidthVector.OFFSET_WIDTH,
         targetVector.getOffsetBuffer().memoryAddress()
             + (targetVector.getValueCount() + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH,
@@ -292,7 +292,7 @@ public ValueVector visit(ListVector deltaVector, Void value) {
         targetVector.getValidityBuffer());
 
     // append offset buffer
-    MemoryUtil.UNSAFE.copyMemory(
+    MemoryUtil.copyMemory(
         deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH,
         targetVector.getOffsetBuffer().memoryAddress()
             + (targetVector.getValueCount() + 1) * ListVector.OFFSET_WIDTH,
@@ -362,7 +362,7 @@ public ValueVector visit(LargeListVector deltaVector, Void value) {
         targetVector.getValidityBuffer());
 
     // append offset buffer
-    MemoryUtil.UNSAFE.copyMemory(
+    MemoryUtil.copyMemory(
         deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH,
         targetVector.getOffsetBuffer().memoryAddress()
             + (targetVector.getValueCount() + 1) * LargeListVector.OFFSET_WIDTH,
@@ -499,7 +499,7 @@ public ValueVector visit(UnionVector deltaVector, Void value) {
     }
 
     // append type buffers
-    MemoryUtil.UNSAFE.copyMemory(
+    MemoryUtil.copyMemory(
         deltaVector.getTypeBufferAddress(),
         targetUnionVector.getTypeBufferAddress() + targetVector.getValueCount(),
         deltaVector.getValueCount());
@@ -564,7 +564,7 @@ public ValueVector visit(DenseUnionVector deltaVector, Void value) {
     }
 
     // append type buffers
-    MemoryUtil.UNSAFE.copyMemory(
+    MemoryUtil.copyMemory(
         deltaVector.getTypeBuffer().memoryAddress(),
         targetDenseUnionVector.getTypeBuffer().memoryAddress() + targetVector.getValueCount(),
         deltaVector.getValueCount());
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
index 0c9140c360d15..ef31b4f837344 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
@@ -20,6 +20,7 @@
 
 import org.apache.arrow.memory.ArrowBuf;
 import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseIntVector;
 import org.apache.arrow.vector.BaseLargeVariableWidthVector;
 import org.apache.arrow.vector.BaseVariableWidthVector;
 import org.apache.arrow.vector.BaseVariableWidthViewVector;
@@ -35,6 +36,7 @@
 import org.apache.arrow.vector.complex.LargeListVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.RunEndEncodedVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.types.pojo.ArrowType;
 
@@ -287,4 +289,36 @@ public Void visit(ExtensionTypeVector<?> vector, Void value) {
     vector.getUnderlyingVector().accept(this, value);
     return null;
   }
+
+  @Override
+  public Void visit(RunEndEncodedVector vector, Void value) {
+    validateVectorCommon(vector);
+    int valueCount = vector.getValueCount();
+    FieldVector runEndsVector = vector.getRunEndsVector();
+
+    if (runEndsVector != null) {
+      validateOrThrow(
+          runEndsVector.getNullCount() == 0, "Run ends vector cannot contain null values");
+      runEndsVector.accept(this, null);
+
+      int runCount = runEndsVector.getValueCount();
+      if (runCount == 0) {
+        validateOrThrow(valueCount == 0, "Run end vector does not contain enough elements");
+      } else if (runCount > 0) {
+        double lastEnd = ((BaseIntVector) runEndsVector).getValueAsLong(runCount - 1);
+        validateOrThrow(
+            valueCount == lastEnd,
+            "Vector logic length not equal to the last end in run ends vector. Logical length %s, last end %s",
+            valueCount,
+            lastEnd);
+      }
+    }
+
+    FieldVector valuesVector = vector.getValuesVector();
+    if (valuesVector != null) {
+      valuesVector.accept(this, null);
+    }
+
+    return null;
+  }
 }
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
index f947dcf41342f..daad41dbdc2ce 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
@@ -67,6 +67,7 @@
 import org.apache.arrow.vector.complex.LargeListVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.RunEndEncodedVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.types.DateUnit;
 import org.apache.arrow.vector.types.FloatingPointPrecision;
@@ -478,4 +479,15 @@ public Void visit(ExtensionTypeVector<?> vector, Void value) {
     validateExtensionTypeVector(vector);
     return null;
   }
+
+  @Override
+  public Void visit(RunEndEncodedVector vector, Void value) {
+    validateVectorCommon(vector, ArrowType.RunEndEncoded.class);
+    for (ValueVector subVector : vector.getChildrenFromFields()) {
+      if (subVector != null) {
+        subVector.accept(this, null);
+      }
+    }
+    return null;
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
index f17c065d4e2df..68aa61962ba3f 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
@@ -115,34 +115,34 @@ public void testAllBitsSet() {
     try (RootAllocator allocator = new RootAllocator(bufferLength);
         ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {
 
-      MemoryUtil.UNSAFE.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
       int bitLength = 1024;
       assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
 
       bitLength = 1028;
       assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
 
-      MemoryUtil.UNSAFE.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
       bitLength = 1025;
       BitVectorHelper.unsetBit(validityBuffer, 12);
       assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
 
-      MemoryUtil.UNSAFE.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
       bitLength = 1025;
       BitVectorHelper.unsetBit(validityBuffer, 1024);
       assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
 
-      MemoryUtil.UNSAFE.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
       bitLength = 1026;
       BitVectorHelper.unsetBit(validityBuffer, 1024);
       assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
 
-      MemoryUtil.UNSAFE.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
       bitLength = 1027;
       BitVectorHelper.unsetBit(validityBuffer, 1025);
       assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
 
-      MemoryUtil.UNSAFE.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+      MemoryUtil.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
       bitLength = 1031;
       BitVectorHelper.unsetBit(validityBuffer, 1029);
       BitVectorHelper.unsetBit(validityBuffer, 1030);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
index fc220e0f05a14..f582406de6808 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
@@ -26,6 +26,7 @@
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import org.apache.arrow.memory.BufferAllocator;
@@ -243,6 +244,34 @@ public void testTransferPair() {
     }
   }
 
+  @Test
+  public void testTransferEmptyVector() throws Exception {
+    // #43320
+    try (FixedSizeListVector src =
+            new FixedSizeListVector(
+                "src", allocator, FieldType.nullable(new ArrowType.FixedSizeList(2)), null);
+        FixedSizeListVector dest =
+            new FixedSizeListVector(
+                "dest", allocator, FieldType.nullable(new ArrowType.FixedSizeList(2)), null)) {
+      src.makeTransferPair(dest).transfer();
+
+      IntVector els =
+          (IntVector) dest.addOrGetVector(FieldType.nullable(MinorType.INT.getType())).getVector();
+
+      dest.allocateNew();
+      dest.startNewValue(0);
+      els.setSafe(0, 1);
+      els.setSafe(1, 2);
+      dest.setValueCount(1);
+
+      List<Integer> expected = new ArrayList<>(2);
+      expected.add(1);
+      expected.add(2);
+
+      assertEquals(expected, dest.getObject(0));
+    }
+  }
+
   @Test
   public void testConsistentChildName() throws Exception {
     try (FixedSizeListVector listVector =
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
new file mode 100644
index 0000000000000..26e7bb4a0d3b2
--- /dev/null
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListViewVector.java
@@ -0,0 +1,2239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
+import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+public class TestLargeListViewVector {
+
+  private BufferAllocator allocator;
+
+  @BeforeEach
+  public void init() {
+    allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+  }
+
+  @AfterEach
+  public void terminate() throws Exception {
+    allocator.close();
+  }
+
+  @Test
+  public void testBasicLargeListViewVector() {
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("sourceVector", allocator)) {
+      UnionLargeListViewWriter largeListViewWriter = largeListViewVector.getWriter();
+
+      /* allocate memory */
+      largeListViewWriter.allocate();
+
+      /* write the first list at index 0 */
+      largeListViewWriter.setPosition(0);
+      largeListViewWriter.startListView();
+
+      largeListViewWriter.bigInt().writeBigInt(12);
+      largeListViewWriter.bigInt().writeBigInt(-7);
+      largeListViewWriter.bigInt().writeBigInt(25);
+      largeListViewWriter.endListView();
+
+      /* the second list at index 1 is null (we are not setting any)*/
+
+      /* write the third list at index 2 */
+      largeListViewWriter.setPosition(2);
+      largeListViewWriter.startListView();
+
+      largeListViewWriter.bigInt().writeBigInt(0);
+      largeListViewWriter.bigInt().writeBigInt(-127);
+      largeListViewWriter.bigInt().writeBigInt(127);
+      largeListViewWriter.bigInt().writeBigInt(50);
+      largeListViewWriter.endListView();
+
+      /* write the fourth list at index 3 (empty list) */
+      largeListViewWriter.setPosition(3);
+      largeListViewWriter.startListView();
+      largeListViewWriter.endListView();
+
+      /* write the fifth list at index 4 */
+      largeListViewWriter.setPosition(4);
+      largeListViewWriter.startListView();
+      largeListViewWriter.bigInt().writeBigInt(1);
+      largeListViewWriter.bigInt().writeBigInt(2);
+      largeListViewWriter.bigInt().writeBigInt(3);
+      largeListViewWriter.bigInt().writeBigInt(4);
+      largeListViewWriter.endListView();
+
+      largeListViewWriter.setValueCount(5);
+      // check value count
+      assertEquals(5, largeListViewVector.getValueCount());
+
+      /* get vector at index 0 -- the value is a BigIntVector*/
+      final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer();
+      final FieldVector dataVec = largeListViewVector.getDataVector();
+
+      // check offset buffer
+      assertEquals(0, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(7, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(7, offSetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      // check size buffer
+      assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(4, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      // check data vector
+      assertEquals(12, ((BigIntVector) dataVec).get(0));
+      assertEquals(-7, ((BigIntVector) dataVec).get(1));
+      assertEquals(25, ((BigIntVector) dataVec).get(2));
+      assertEquals(0, ((BigIntVector) dataVec).get(3));
+      assertEquals(-127, ((BigIntVector) dataVec).get(4));
+      assertEquals(127, ((BigIntVector) dataVec).get(5));
+      assertEquals(50, ((BigIntVector) dataVec).get(6));
+      assertEquals(1, ((BigIntVector) dataVec).get(7));
+      assertEquals(2, ((BigIntVector) dataVec).get(8));
+      assertEquals(3, ((BigIntVector) dataVec).get(9));
+      assertEquals(4, ((BigIntVector) dataVec).get(10));
+
+      largeListViewVector.validate();
+    }
+  }
+
+  @Test
+  public void testImplicitNullVectors() {
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("sourceVector", allocator)) {
+      UnionLargeListViewWriter largeListViewWriter = largeListViewVector.getWriter();
+      /* allocate memory */
+      largeListViewWriter.allocate();
+
+      final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer();
+
+      /* write the first list at index 0 */
+      largeListViewWriter.setPosition(0);
+      largeListViewWriter.startListView();
+
+      largeListViewWriter.bigInt().writeBigInt(12);
+      largeListViewWriter.bigInt().writeBigInt(-7);
+      largeListViewWriter.bigInt().writeBigInt(25);
+      largeListViewWriter.endListView();
+
+      int offSet0 = offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH);
+      int size0 = sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH);
+
+      // after the first list is written,
+      // the initial offset must be 0,
+      // the size must be 3 (as there are 3 elements in the array),
+      // the lastSet must be 0 since, the first list is written at index 0.
+
+      assertEquals(0, offSet0);
+      assertEquals(3, size0);
+
+      largeListViewWriter.setPosition(5);
+      largeListViewWriter.startListView();
+
+      // writing the 6th list at index 5,
+      // and the list items from index 1 through 4 are not populated.
+      // but since there is a gap between the 0th and 5th list, in terms
+      // of buffer allocation, the offset and size buffers must be updated
+      // to reflect the implicit null vectors.
+
+      for (int i = 1; i < 5; i++) {
+        int offSet = offSetBuffer.getInt(i * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH);
+        int size = sizeBuffer.getInt(i * BaseLargeRepeatedValueViewVector.SIZE_WIDTH);
+        // Since the list is not written, the offset and size must equal to child vector's size
+        // i.e., 3, and size should be 0 as the list is not written.
+        // And the last set value is the value currently being written, which is 5.
+        assertEquals(0, offSet);
+        assertEquals(0, size);
+      }
+
+      largeListViewWriter.bigInt().writeBigInt(12);
+      largeListViewWriter.bigInt().writeBigInt(25);
+      largeListViewWriter.endListView();
+
+      int offSet5 = offSetBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH);
+      int size5 = sizeBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH);
+
+      assertEquals(3, offSet5);
+      assertEquals(2, size5);
+
+      largeListViewWriter.setPosition(10);
+      largeListViewWriter.startListView();
+
+      // writing the 11th list at index 10,
+      // and the list items from index 6 through 10 are not populated.
+      // but since there is a gap between the 5th and 11th list, in terms
+      // of buffer allocation, the offset and size buffers must be updated
+      // to reflect the implicit null vectors.
+      for (int i = 6; i < 10; i++) {
+        int offSet = offSetBuffer.getInt(i * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH);
+        int size = sizeBuffer.getInt(i * BaseLargeRepeatedValueViewVector.SIZE_WIDTH);
+        // Since the list is not written, the offset and size must equal to 0
+        // and size should be 0 as the list is not written.
+        // And the last set value is the value currently being written, which is 10.
+        assertEquals(0, offSet);
+        assertEquals(0, size);
+      }
+
+      largeListViewWriter.bigInt().writeBigInt(12);
+      largeListViewWriter.endListView();
+
+      int offSet11 = offSetBuffer.getInt(10 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH);
+      int size11 = sizeBuffer.getInt(10 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH);
+
+      assertEquals(5, offSet11);
+      assertEquals(1, size11);
+
+      largeListViewVector.setValueCount(11);
+
+      largeListViewVector.validate();
+    }
+  }
+
+  @Test
+  public void testNestedLargeListViewVector() throws Exception {
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("sourceVector", allocator)) {
+
+      UnionLargeListViewWriter largeListViewWriter = largeListViewVector.getWriter();
+
+      /* allocate memory */
+      largeListViewWriter.allocate();
+
+      /* the dataVector that backs a largeListViewVector will also be a
+       * largeListViewVector for this test.
+       */
+
+      /* write one or more inner lists at index 0 */
+      largeListViewWriter.setPosition(0);
+      largeListViewWriter.startListView();
+
+      largeListViewWriter.listView().startListView();
+      largeListViewWriter.listView().bigInt().writeBigInt(50);
+      largeListViewWriter.listView().bigInt().writeBigInt(100);
+      largeListViewWriter.listView().bigInt().writeBigInt(200);
+      largeListViewWriter.listView().endListView();
+
+      largeListViewWriter.listView().startListView();
+      largeListViewWriter.listView().bigInt().writeBigInt(75);
+      largeListViewWriter.listView().bigInt().writeBigInt(125);
+      largeListViewWriter.listView().bigInt().writeBigInt(150);
+      largeListViewWriter.listView().bigInt().writeBigInt(175);
+      largeListViewWriter.listView().endListView();
+
+      largeListViewWriter.endListView();
+
+      /* write one or more inner lists at index 1 */
+      largeListViewWriter.setPosition(1);
+      largeListViewWriter.startListView();
+
+      largeListViewWriter.listView().startListView();
+      largeListViewWriter.listView().bigInt().writeBigInt(10);
+      largeListViewWriter.listView().endListView();
+
+      largeListViewWriter.listView().startListView();
+      largeListViewWriter.listView().bigInt().writeBigInt(15);
+      largeListViewWriter.listView().bigInt().writeBigInt(20);
+      largeListViewWriter.listView().endListView();
+
+      largeListViewWriter.listView().startListView();
+      largeListViewWriter.listView().bigInt().writeBigInt(25);
+      largeListViewWriter.listView().bigInt().writeBigInt(30);
+      largeListViewWriter.listView().bigInt().writeBigInt(35);
+      largeListViewWriter.listView().endListView();
+
+      largeListViewWriter.endListView();
+
+      largeListViewVector.setValueCount(2);
+
+      assertEquals(2, largeListViewVector.getValueCount());
+
+      /* get largeListViewVector value at index 0 -- the value itself is a largeListViewVector */
+      Object result = largeListViewVector.getObject(0);
+      ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+      ArrayList<Long> list;
+
+      assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+      assertEquals(3, resultSet.get(0).size()); /* size of first inner list */
+      assertEquals(4, resultSet.get(1).size()); /* size of second inner list */
+
+      list = resultSet.get(0);
+      assertEquals(Long.valueOf(50), list.get(0));
+      assertEquals(Long.valueOf(100), list.get(1));
+      assertEquals(Long.valueOf(200), list.get(2));
+
+      list = resultSet.get(1);
+      assertEquals(Long.valueOf(75), list.get(0));
+      assertEquals(Long.valueOf(125), list.get(1));
+      assertEquals(Long.valueOf(150), list.get(2));
+      assertEquals(Long.valueOf(175), list.get(3));
+
+      /* get largeListViewVector value at index 1 -- the value itself is a largeListViewVector */
+      result = largeListViewVector.getObject(1);
+      resultSet = (ArrayList<ArrayList<Long>>) result;
+
+      assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */
+      assertEquals(1, resultSet.get(0).size()); /* size of first inner list */
+      assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
+      assertEquals(3, resultSet.get(2).size()); /* size of third inner list */
+
+      list = resultSet.get(0);
+      assertEquals(Long.valueOf(10), list.get(0));
+
+      list = resultSet.get(1);
+      assertEquals(Long.valueOf(15), list.get(0));
+      assertEquals(Long.valueOf(20), list.get(1));
+
+      list = resultSet.get(2);
+      assertEquals(Long.valueOf(25), list.get(0));
+      assertEquals(Long.valueOf(30), list.get(1));
+      assertEquals(Long.valueOf(35), list.get(2));
+
+      /* check underlying bitVector */
+      assertFalse(largeListViewVector.isNull(0));
+      assertFalse(largeListViewVector.isNull(1));
+
+      /* check underlying offsets */
+      final ArrowBuf offsetBuffer = largeListViewVector.getOffsetBuffer();
+
+      /* largeListViewVector has 2 lists at index 0 and 3 lists at index 1 */
+      assertEquals(0, offsetBuffer.getLong(0 * LargeListViewVector.OFFSET_WIDTH));
+      assertEquals(2, offsetBuffer.getLong(1 * LargeListViewVector.OFFSET_WIDTH));
+    }
+  }
+
+  @Test
+  public void testNestedLargeListViewVector1() {
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("sourceVector", allocator)) {
+
+      MinorType listViewType = MinorType.LARGELISTVIEW;
+      MinorType scalarType = MinorType.BIGINT;
+
+      largeListViewVector.addOrGetVector(FieldType.nullable(listViewType.getType()));
+
+      LargeListViewVector innerList1 = (LargeListViewVector) largeListViewVector.getDataVector();
+      innerList1.addOrGetVector(FieldType.nullable(listViewType.getType()));
+
+      LargeListViewVector innerList2 = (LargeListViewVector) innerList1.getDataVector();
+      innerList2.addOrGetVector(FieldType.nullable(listViewType.getType()));
+
+      LargeListViewVector innerList3 = (LargeListViewVector) innerList2.getDataVector();
+      innerList3.addOrGetVector(FieldType.nullable(listViewType.getType()));
+
+      LargeListViewVector innerList4 = (LargeListViewVector) innerList3.getDataVector();
+      innerList4.addOrGetVector(FieldType.nullable(listViewType.getType()));
+
+      LargeListViewVector innerList5 = (LargeListViewVector) innerList4.getDataVector();
+      innerList5.addOrGetVector(FieldType.nullable(listViewType.getType()));
+
+      LargeListViewVector innerList6 = (LargeListViewVector) innerList5.getDataVector();
+      innerList6.addOrGetVector(FieldType.nullable(scalarType.getType()));
+
+      largeListViewVector.setInitialCapacity(128);
+    }
+  }
+
+  @Test
+  public void testNestedLargeListViewVector2() throws Exception {
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("sourceVector", allocator)) {
+      largeListViewVector.setInitialCapacity(1);
+      UnionLargeListViewWriter largeListViewWriter = largeListViewVector.getWriter();
+      /* allocate memory */
+      largeListViewWriter.allocate();
+
+      /* write one or more inner lists at index 0 */
+      largeListViewWriter.setPosition(0);
+      largeListViewWriter.startListView();
+
+      largeListViewWriter.listView().startListView();
+      largeListViewWriter.listView().bigInt().writeBigInt(50);
+      largeListViewWriter.listView().bigInt().writeBigInt(100);
+      largeListViewWriter.listView().bigInt().writeBigInt(200);
+      largeListViewWriter.listView().endListView();
+
+      largeListViewWriter.listView().startListView();
+      largeListViewWriter.listView().bigInt().writeBigInt(75);
+      largeListViewWriter.listView().bigInt().writeBigInt(125);
+      largeListViewWriter.listView().endListView();
+
+      largeListViewWriter.endListView();
+
+      /* write one or more inner lists at index 1 */
+      largeListViewWriter.setPosition(1);
+      largeListViewWriter.startListView();
+
+      largeListViewWriter.listView().startListView();
+      largeListViewWriter.listView().bigInt().writeBigInt(15);
+      largeListViewWriter.listView().bigInt().writeBigInt(20);
+      largeListViewWriter.listView().endListView();
+
+      largeListViewWriter.listView().startListView();
+      largeListViewWriter.listView().bigInt().writeBigInt(25);
+      largeListViewWriter.listView().bigInt().writeBigInt(30);
+      largeListViewWriter.listView().bigInt().writeBigInt(35);
+      largeListViewWriter.listView().endListView();
+
+      largeListViewWriter.endListView();
+
+      largeListViewVector.setValueCount(2);
+
+      assertEquals(2, largeListViewVector.getValueCount());
+
+      /* get largeListViewVector value at index 0 -- the value itself is a largeListViewVector */
+      Object result = largeListViewVector.getObject(0);
+      ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+      ArrayList<Long> list;
+
+      assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+      assertEquals(3, resultSet.get(0).size()); /* size of first inner list */
+      assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
+
+      list = resultSet.get(0);
+      assertEquals(Long.valueOf(50), list.get(0));
+      assertEquals(Long.valueOf(100), list.get(1));
+      assertEquals(Long.valueOf(200), list.get(2));
+
+      list = resultSet.get(1);
+      assertEquals(Long.valueOf(75), list.get(0));
+      assertEquals(Long.valueOf(125), list.get(1));
+
+      /* get largeListViewVector value at index 1 -- the value itself is a largeListViewVector */
+      result = largeListViewVector.getObject(1);
+      resultSet = (ArrayList<ArrayList<Long>>) result;
+
+      assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */
+      assertEquals(2, resultSet.get(0).size()); /* size of first inner list */
+      assertEquals(3, resultSet.get(1).size()); /* size of second inner list */
+
+      list = resultSet.get(0);
+      assertEquals(Long.valueOf(15), list.get(0));
+      assertEquals(Long.valueOf(20), list.get(1));
+
+      list = resultSet.get(1);
+      assertEquals(Long.valueOf(25), list.get(0));
+      assertEquals(Long.valueOf(30), list.get(1));
+      assertEquals(Long.valueOf(35), list.get(2));
+
+      /* check underlying bitVector */
+      assertFalse(largeListViewVector.isNull(0));
+      assertFalse(largeListViewVector.isNull(1));
+
+      /* check underlying offsets */
+      final ArrowBuf offsetBuffer = largeListViewVector.getOffsetBuffer();
+
+      /* largeListViewVector has 2 lists at index 0 and 3 lists at index 1 */
+      assertEquals(0, offsetBuffer.getLong(0 * LargeListViewVector.OFFSET_WIDTH));
+      assertEquals(2, offsetBuffer.getLong(1 * LargeListViewVector.OFFSET_WIDTH));
+    }
+  }
+
+  @Test
+  public void testGetBufferAddress() throws Exception {
+    try (LargeListViewVector largeListViewVector = LargeListViewVector.empty("vector", allocator)) {
+
+      UnionLargeListViewWriter largeListViewWriter = largeListViewVector.getWriter();
+      boolean error = false;
+
+      largeListViewWriter.allocate();
+
+      largeListViewWriter.setPosition(0);
+      largeListViewWriter.startListView();
+      largeListViewWriter.bigInt().writeBigInt(50);
+      largeListViewWriter.bigInt().writeBigInt(100);
+      largeListViewWriter.bigInt().writeBigInt(200);
+      largeListViewWriter.endListView();
+
+      largeListViewWriter.setPosition(1);
+      largeListViewWriter.startListView();
+      largeListViewWriter.bigInt().writeBigInt(250);
+      largeListViewWriter.bigInt().writeBigInt(300);
+      largeListViewWriter.endListView();
+
+      largeListViewVector.setValueCount(2);
+
+      /* check largeListViewVector contents */
+      Object result = largeListViewVector.getObject(0);
+      ArrayList<Long> resultSet = (ArrayList<Long>) result;
+      assertEquals(3, resultSet.size());
+      assertEquals(Long.valueOf(50), resultSet.get(0));
+      assertEquals(Long.valueOf(100), resultSet.get(1));
+      assertEquals(Long.valueOf(200), resultSet.get(2));
+
+      result = largeListViewVector.getObject(1);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(250), resultSet.get(0));
+      assertEquals(Long.valueOf(300), resultSet.get(1));
+
+      List<ArrowBuf> buffers = largeListViewVector.getFieldBuffers();
+
+      long bitAddress = largeListViewVector.getValidityBufferAddress();
+      long offsetAddress = largeListViewVector.getOffsetBufferAddress();
+      long sizeAddress = largeListViewVector.getSizeBufferAddress();
+
+      try {
+        largeListViewVector.getDataBufferAddress();
+      } catch (UnsupportedOperationException ue) {
+        error = true;
+      } finally {
+        assertTrue(error);
+      }
+
+      assertEquals(3, buffers.size());
+      assertEquals(bitAddress, buffers.get(0).memoryAddress());
+      assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+      assertEquals(sizeAddress, buffers.get(2).memoryAddress());
+
+      /* (3+2)/2 */
+      assertEquals(2.5, largeListViewVector.getDensity(), 0);
+    }
+  }
+
+  /*
+   * Setting up the buffers directly needs to be validated with the base method used in
+   * the LargeListViewVector class where we use the approach of startListView(),
+   * write to the child vector and endListView().
+   * <p>
+   * To support this, we have to consider the following scenarios;
+   * <p>
+   * 1. Only using directly buffer-based inserts.
+   * 2. Default list insertion followed by buffer-based inserts.
+   * 3. Buffer-based inserts followed by default list insertion.
+   */
+
+  /* Setting up buffers directly would require the following steps to be taken
+   * 0. Allocate buffers in largeListViewVector by calling `allocateNew` method.
+   * 1. Initialize the child vector using `initializeChildrenFromFields` method.
+   * 2. Set values in the child vector.
+   * 3. Set validity, offset and size buffers using `setValidity`,
+   * `setOffset` and `setSize` methods.
+   * 4. Set value count using `setValueCount` method.
+   */
+  @Test
+  public void testBasicLargeListViewSet() {
+
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("sourceVector", allocator)) {
+      // Allocate buffers in largeListViewVector by calling `allocateNew` method.
+      largeListViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = largeListViewVector.getDataVector();
+      fieldVector.clear();
+
+      BigIntVector childVector = (BigIntVector) fieldVector;
+      childVector.allocateNew(7);
+
+      childVector.set(0, 12);
+      childVector.set(1, -7);
+      childVector.set(2, 25);
+      childVector.set(3, 0);
+      childVector.set(4, -127);
+      childVector.set(5, 127);
+      childVector.set(6, 50);
+
+      childVector.setValueCount(7);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      // `setOffset` and `setSize` methods.
+      largeListViewVector.setOffset(0, 0);
+      largeListViewVector.setOffset(1, 3);
+      largeListViewVector.setOffset(2, 3);
+      largeListViewVector.setOffset(3, 7);
+
+      largeListViewVector.setSize(0, 3);
+      largeListViewVector.setSize(1, 0);
+      largeListViewVector.setSize(2, 4);
+      largeListViewVector.setSize(3, 0);
+
+      largeListViewVector.setValidity(0, 1);
+      largeListViewVector.setValidity(1, 0);
+      largeListViewVector.setValidity(2, 1);
+      largeListViewVector.setValidity(3, 1);
+
+      // Set value count using `setValueCount` method.
+      largeListViewVector.setValueCount(4);
+
+      final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer();
+
+      // check offset buffer
+      assertEquals(0, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(7, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      // check size buffer
+      assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      // check values
+      assertEquals(12, ((BigIntVector) largeListViewVector.getDataVector()).get(0));
+      assertEquals(-7, ((BigIntVector) largeListViewVector.getDataVector()).get(1));
+      assertEquals(25, ((BigIntVector) largeListViewVector.getDataVector()).get(2));
+      assertEquals(0, ((BigIntVector) largeListViewVector.getDataVector()).get(3));
+      assertEquals(-127, ((BigIntVector) largeListViewVector.getDataVector()).get(4));
+      assertEquals(127, ((BigIntVector) largeListViewVector.getDataVector()).get(5));
+      assertEquals(50, ((BigIntVector) largeListViewVector.getDataVector()).get(6));
+
+      largeListViewVector.validate();
+    }
+  }
+
+  @Test
+  public void testBasicLargeListViewSetNested() {
+    // Expected largeListViewVector
+    // [[[50,100,200],[75,125,150,175]],[[10],[15,20],[25,30,35]]]
+
+    // Setting child vector
+    // [[50,100,200],[75,125,150,175],[10],[15,20],[25,30,35]]
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("sourceVector", allocator)) {
+      // Allocate buffers in largeListViewVector by calling `allocateNew` method.
+      largeListViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+      FieldType fieldType = new FieldType(true, new ArrowType.LargeListView(), null, null);
+      FieldType childFieldType = new FieldType(true, new ArrowType.Int(64, true), null, null);
+      Field childField = new Field("child-vector", childFieldType, null);
+      List<Field> children = new ArrayList<>();
+      children.add(childField);
+      Field field = new Field("child-vector", fieldType, children);
+      largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = largeListViewVector.getDataVector();
+      fieldVector.clear();
+
+      LargeListViewVector childVector = (LargeListViewVector) fieldVector;
+      UnionLargeListViewWriter largeListViewWriter = childVector.getWriter();
+      largeListViewWriter.allocate();
+
+      largeListViewWriter.setPosition(0);
+      largeListViewWriter.startListView();
+
+      largeListViewWriter.bigInt().writeBigInt(50);
+      largeListViewWriter.bigInt().writeBigInt(100);
+      largeListViewWriter.bigInt().writeBigInt(200);
+
+      largeListViewWriter.endListView();
+
+      largeListViewWriter.setPosition(1);
+      largeListViewWriter.startListView();
+
+      largeListViewWriter.bigInt().writeBigInt(75);
+      largeListViewWriter.bigInt().writeBigInt(125);
+      largeListViewWriter.bigInt().writeBigInt(150);
+      largeListViewWriter.bigInt().writeBigInt(175);
+
+      largeListViewWriter.endListView();
+
+      largeListViewWriter.setPosition(2);
+      largeListViewWriter.startListView();
+
+      largeListViewWriter.bigInt().writeBigInt(10);
+
+      largeListViewWriter.endListView();
+
+      largeListViewWriter.startListView();
+      largeListViewWriter.setPosition(3);
+
+      largeListViewWriter.bigInt().writeBigInt(15);
+      largeListViewWriter.bigInt().writeBigInt(20);
+
+      largeListViewWriter.endListView();
+
+      largeListViewWriter.startListView();
+      largeListViewWriter.setPosition(4);
+
+      largeListViewWriter.bigInt().writeBigInt(25);
+      largeListViewWriter.bigInt().writeBigInt(30);
+      largeListViewWriter.bigInt().writeBigInt(35);
+
+      largeListViewWriter.endListView();
+
+      childVector.setValueCount(5);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+
+      largeListViewVector.setValidity(0, 1);
+      largeListViewVector.setValidity(1, 1);
+
+      largeListViewVector.setOffset(0, 0);
+      largeListViewVector.setOffset(1, 2);
+
+      largeListViewVector.setSize(0, 2);
+      largeListViewVector.setSize(1, 3);
+
+      // Set value count using `setValueCount` method.
+      largeListViewVector.setValueCount(2);
+
+      assertEquals(2, largeListViewVector.getValueCount());
+
+      /* get largeListViewVector value at index 0 -- the value itself is a largeListViewVector */
+      Object result = largeListViewVector.getObject(0);
+      ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+      ArrayList<Long> list;
+
+      assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+      assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */
+      assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */
+
+      list = resultSet.get(0);
+      assertEquals(Long.valueOf(50), list.get(0));
+      assertEquals(Long.valueOf(100), list.get(1));
+      assertEquals(Long.valueOf(200), list.get(2));
+
+      list = resultSet.get(1);
+      assertEquals(Long.valueOf(75), list.get(0));
+      assertEquals(Long.valueOf(125), list.get(1));
+      assertEquals(Long.valueOf(150), list.get(2));
+      assertEquals(Long.valueOf(175), list.get(3));
+
+      /* get largeListViewVector value at index 1 -- the value itself is a largeListViewVector */
+      result = largeListViewVector.getObject(1);
+      resultSet = (ArrayList<ArrayList<Long>>) result;
+
+      assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */
+      assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */
+      assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */
+      assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */
+
+      list = resultSet.get(0);
+      assertEquals(Long.valueOf(10), list.get(0));
+
+      list = resultSet.get(1);
+      assertEquals(Long.valueOf(15), list.get(0));
+      assertEquals(Long.valueOf(20), list.get(1));
+
+      list = resultSet.get(2);
+      assertEquals(Long.valueOf(25), list.get(0));
+      assertEquals(Long.valueOf(30), list.get(1));
+      assertEquals(Long.valueOf(35), list.get(2));
+
+      /* check underlying bitVector */
+      assertFalse(largeListViewVector.isNull(0));
+      assertFalse(largeListViewVector.isNull(1));
+
+      final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer();
+
+      // check offset buffer
+      assertEquals(0, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(2, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      // check size buffer
+      assertEquals(2, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(3, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      largeListViewVector.validate();
+    }
+  }
+
+  @Test
+  public void testBasicLargeListViewSetWithListViewWriter() {
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("sourceVector", allocator)) {
+      // Allocate buffers in largeListViewVector by calling `allocateNew` method.
+      largeListViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = largeListViewVector.getDataVector();
+      fieldVector.clear();
+
+      BigIntVector childVector = (BigIntVector) fieldVector;
+      childVector.allocateNew(7);
+
+      childVector.set(0, 12);
+      childVector.set(1, -7);
+      childVector.set(2, 25);
+      childVector.set(3, 0);
+      childVector.set(4, -127);
+      childVector.set(5, 127);
+      childVector.set(6, 50);
+
+      childVector.setValueCount(7);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+
+      largeListViewVector.setValidity(0, 1);
+      largeListViewVector.setValidity(1, 0);
+      largeListViewVector.setValidity(2, 1);
+      largeListViewVector.setValidity(3, 1);
+
+      largeListViewVector.setOffset(0, 0);
+      largeListViewVector.setOffset(1, 3);
+      largeListViewVector.setOffset(2, 3);
+      largeListViewVector.setOffset(3, 7);
+
+      largeListViewVector.setSize(0, 3);
+      largeListViewVector.setSize(1, 0);
+      largeListViewVector.setSize(2, 4);
+      largeListViewVector.setSize(3, 0);
+
+      // Set value count using `setValueCount` method.
+      largeListViewVector.setValueCount(4);
+
+      final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer();
+
+      // check offset buffer
+      assertEquals(0, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(7, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      // check size buffer
+      assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      // check values
+      assertEquals(12, ((BigIntVector) largeListViewVector.getDataVector()).get(0));
+      assertEquals(-7, ((BigIntVector) largeListViewVector.getDataVector()).get(1));
+      assertEquals(25, ((BigIntVector) largeListViewVector.getDataVector()).get(2));
+      assertEquals(0, ((BigIntVector) largeListViewVector.getDataVector()).get(3));
+      assertEquals(-127, ((BigIntVector) largeListViewVector.getDataVector()).get(4));
+      assertEquals(127, ((BigIntVector) largeListViewVector.getDataVector()).get(5));
+      assertEquals(50, ((BigIntVector) largeListViewVector.getDataVector()).get(6));
+
+      UnionLargeListViewWriter listViewWriter = largeListViewVector.getWriter();
+
+      listViewWriter.setPosition(4);
+      listViewWriter.startListView();
+
+      listViewWriter.bigInt().writeBigInt(121);
+      listViewWriter.bigInt().writeBigInt(-71);
+      listViewWriter.bigInt().writeBigInt(251);
+      listViewWriter.endListView();
+
+      largeListViewVector.setValueCount(5);
+
+      // check offset buffer
+      assertEquals(0, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(7, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(7, offSetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      // check size buffer
+      assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(3, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      // check values
+      assertEquals(12, ((BigIntVector) largeListViewVector.getDataVector()).get(0));
+      assertEquals(-7, ((BigIntVector) largeListViewVector.getDataVector()).get(1));
+      assertEquals(25, ((BigIntVector) largeListViewVector.getDataVector()).get(2));
+      assertEquals(0, ((BigIntVector) largeListViewVector.getDataVector()).get(3));
+      assertEquals(-127, ((BigIntVector) largeListViewVector.getDataVector()).get(4));
+      assertEquals(127, ((BigIntVector) largeListViewVector.getDataVector()).get(5));
+      assertEquals(50, ((BigIntVector) largeListViewVector.getDataVector()).get(6));
+      assertEquals(121, ((BigIntVector) largeListViewVector.getDataVector()).get(7));
+      assertEquals(-71, ((BigIntVector) largeListViewVector.getDataVector()).get(8));
+      assertEquals(251, ((BigIntVector) largeListViewVector.getDataVector()).get(9));
+
+      largeListViewVector.validate();
+    }
+  }
+
+  @Test
+  public void testConsistentChildName() throws Exception {
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("sourceVector", allocator)) {
+      String emptyListStr = largeListViewVector.getField().toString();
+      assertTrue(emptyListStr.contains(LargeListViewVector.DATA_VECTOR_NAME));
+
+      largeListViewVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+      String emptyVectorStr = largeListViewVector.getField().toString();
+      assertTrue(emptyVectorStr.contains(LargeListViewVector.DATA_VECTOR_NAME));
+    }
+  }
+
+  @Test
+  public void testSetInitialCapacity() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("", allocator)) {
+      vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+
+      vector.setInitialCapacity(512);
+      vector.allocateNew();
+      assertEquals(512, vector.getValueCapacity());
+      assertTrue(vector.getDataVector().getValueCapacity() >= 512);
+
+      vector.setInitialCapacity(512, 4);
+      vector.allocateNew();
+      assertEquals(512, vector.getValueCapacity());
+      assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4);
+
+      vector.setInitialCapacity(512, 0.1);
+      vector.allocateNew();
+      assertEquals(512, vector.getValueCapacity());
+      assertTrue(vector.getDataVector().getValueCapacity() >= 51);
+
+      vector.setInitialCapacity(512, 0.01);
+      vector.allocateNew();
+      assertEquals(512, vector.getValueCapacity());
+      assertTrue(vector.getDataVector().getValueCapacity() >= 5);
+
+      vector.setInitialCapacity(5, 0.1);
+      vector.allocateNew();
+      assertEquals(8, vector.getValueCapacity());
+      assertTrue(vector.getDataVector().getValueCapacity() >= 1);
+
+      vector.validate();
+    }
+  }
+
+  @Test
+  public void testClearAndReuse() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("list", allocator)) {
+      BigIntVector bigIntVector =
+          (BigIntVector)
+              vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector();
+      vector.setInitialCapacity(10);
+      vector.allocateNew();
+
+      vector.startNewValue(0);
+      bigIntVector.setSafe(0, 7);
+      vector.endValue(0, 1);
+      vector.startNewValue(1);
+      bigIntVector.setSafe(1, 8);
+      vector.endValue(1, 1);
+      vector.setValueCount(2);
+
+      Object result = vector.getObject(0);
+      ArrayList<Long> resultSet = (ArrayList<Long>) result;
+      assertEquals(Long.valueOf(7), resultSet.get(0));
+
+      result = vector.getObject(1);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(Long.valueOf(8), resultSet.get(0));
+
+      // Clear and release the buffers to trigger a realloc when adding next value
+      vector.clear();
+
+      // The list vector should reuse a buffer when reallocating the offset buffer
+      vector.startNewValue(0);
+      bigIntVector.setSafe(0, 7);
+      vector.endValue(0, 1);
+      vector.startNewValue(1);
+      bigIntVector.setSafe(1, 8);
+      vector.endValue(1, 1);
+      vector.setValueCount(2);
+
+      result = vector.getObject(0);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(Long.valueOf(7), resultSet.get(0));
+
+      result = vector.getObject(1);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(Long.valueOf(8), resultSet.get(0));
+    }
+  }
+
+  @Test
+  public void testWriterGetField() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("list", allocator)) {
+
+      UnionLargeListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      // set some values
+      writer.startListView();
+      writer.integer().writeInt(1);
+      writer.integer().writeInt(2);
+      writer.endListView();
+      vector.setValueCount(2);
+
+      Field expectedDataField =
+          new Field(
+              BaseLargeRepeatedValueViewVector.DATA_VECTOR_NAME,
+              FieldType.nullable(new ArrowType.Int(32, true)),
+              null);
+      Field expectedField =
+          new Field(
+              vector.getName(),
+              FieldType.nullable(ArrowType.LargeListView.INSTANCE),
+              Collections.singletonList(expectedDataField));
+
+      assertEquals(expectedField, writer.getField());
+    }
+  }
+
+  @Test
+  public void testClose() throws Exception {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) {
+
+      UnionLargeListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      // set some values
+      writer.startListView();
+      writer.integer().writeInt(1);
+      writer.integer().writeInt(2);
+      writer.endListView();
+      vector.setValueCount(2);
+
+      assertTrue(vector.getBufferSize() > 0);
+      assertTrue(vector.getDataVector().getBufferSize() > 0);
+
+      writer.close();
+      assertEquals(0, vector.getBufferSize());
+      assertEquals(0, vector.getDataVector().getBufferSize());
+
+      vector.validate();
+    }
+  }
+
+  @Test
+  public void testGetBufferSizeFor() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) {
+
+      UnionLargeListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      // set some values
+      writeIntValues(writer, new int[] {1, 2});
+      writeIntValues(writer, new int[] {3, 4});
+      writeIntValues(writer, new int[] {5, 6});
+      writeIntValues(writer, new int[] {7, 8, 9, 10});
+      writeIntValues(writer, new int[] {11, 12, 13, 14});
+      writer.setValueCount(5);
+
+      IntVector dataVector = (IntVector) vector.getDataVector();
+      int[] indices = new int[] {0, 2, 4, 6, 10, 14};
+
+      for (int valueCount = 1; valueCount <= 5; valueCount++) {
+        int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount);
+        int offsetBufferSize = valueCount * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH;
+        int sizeBufferSize = valueCount * BaseLargeRepeatedValueViewVector.SIZE_WIDTH;
+
+        int expectedSize =
+            validityBufferSize
+                + offsetBufferSize
+                + sizeBufferSize
+                + dataVector.getBufferSizeFor(indices[valueCount]);
+        assertEquals(expectedSize, vector.getBufferSizeFor(valueCount));
+      }
+      vector.validate();
+    }
+  }
+
+  @Test
+  public void testIsEmpty() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) {
+      UnionLargeListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      // set values [1,2], null, [], [5,6]
+      writeIntValues(writer, new int[] {1, 2});
+      writer.setPosition(2);
+      writeIntValues(writer, new int[] {});
+      writeIntValues(writer, new int[] {5, 6});
+      writer.setValueCount(4);
+
+      assertFalse(vector.isEmpty(0));
+      assertTrue(vector.isNull(1));
+      assertTrue(vector.isEmpty(1));
+      assertFalse(vector.isNull(2));
+      assertTrue(vector.isEmpty(2));
+      assertFalse(vector.isEmpty(3));
+
+      vector.validate();
+    }
+  }
+
+  @Test
+  public void testTotalCapacity() {
+    final FieldType type = FieldType.nullable(MinorType.INT.getType());
+    try (final LargeListViewVector vector =
+        new LargeListViewVector("largelistview", allocator, type, null)) {
+      // Force the child vector to be allocated based on the type
+      // (this is a bad API: we have to track and repeat the type twice)
+      vector.addOrGetVector(type);
+
+      // Specify the allocation size but do not actually allocate
+      vector.setInitialTotalCapacity(10, 100);
+
+      // Finally, actually do the allocation
+      vector.allocateNewSafe();
+
+      // Note: allocator rounds up and can be greater than the requested allocation.
+      assertTrue(vector.getValueCapacity() >= 10);
+      assertTrue(vector.getDataVector().getValueCapacity() >= 100);
+    }
+  }
+
+  @Test
+  public void testSetNull1() {
+    try (LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) {
+      UnionLargeListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      writer.setPosition(0);
+      writer.startListView();
+      writer.bigInt().writeBigInt(10);
+      writer.bigInt().writeBigInt(20);
+      writer.endListView();
+
+      vector.setNull(1);
+
+      writer.setPosition(2);
+      writer.startListView();
+      writer.bigInt().writeBigInt(30);
+      writer.bigInt().writeBigInt(40);
+      writer.endListView();
+
+      vector.setNull(3);
+      vector.setNull(4);
+
+      writer.setPosition(5);
+      writer.startListView();
+      writer.bigInt().writeBigInt(50);
+      writer.bigInt().writeBigInt(60);
+      writer.endListView();
+
+      vector.setValueCount(6);
+
+      assertFalse(vector.isNull(0));
+      assertTrue(vector.isNull(1));
+      assertFalse(vector.isNull(2));
+      assertTrue(vector.isNull(3));
+      assertTrue(vector.isNull(4));
+      assertFalse(vector.isNull(5));
+
+      // validate buffers
+
+      final ArrowBuf validityBuffer = vector.getValidityBuffer();
+      final ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = vector.getSizeBuffer();
+
+      assertEquals(1, BitVectorHelper.get(validityBuffer, 0));
+      assertEquals(0, BitVectorHelper.get(validityBuffer, 1));
+      assertEquals(1, BitVectorHelper.get(validityBuffer, 2));
+      assertEquals(0, BitVectorHelper.get(validityBuffer, 3));
+      assertEquals(0, BitVectorHelper.get(validityBuffer, 4));
+      assertEquals(1, BitVectorHelper.get(validityBuffer, 5));
+
+      assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(2, offsetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(4, offsetBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      assertEquals(2, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(2, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(2, sizeBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      // validate values
+
+      Object result = vector.getObject(0);
+      ArrayList<Long> resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(10), resultSet.get(0));
+      assertEquals(Long.valueOf(20), resultSet.get(1));
+
+      result = vector.getObject(2);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(30), resultSet.get(0));
+      assertEquals(Long.valueOf(40), resultSet.get(1));
+
+      result = vector.getObject(5);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(50), resultSet.get(0));
+      assertEquals(Long.valueOf(60), resultSet.get(1));
+
+      vector.validate();
+    }
+  }
+
+  @Test
+  public void testSetNull2() {
+    try (LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) {
+      // validate setting nulls first and then writing values
+      UnionLargeListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      vector.setNull(0);
+      vector.setNull(2);
+      vector.setNull(4);
+
+      writer.setPosition(1);
+      writer.startListView();
+      writer.bigInt().writeBigInt(10);
+      writer.bigInt().writeBigInt(20);
+      writer.bigInt().writeBigInt(30);
+      writer.endListView();
+
+      writer.setPosition(3);
+      writer.startListView();
+      writer.bigInt().writeBigInt(40);
+      writer.bigInt().writeBigInt(50);
+      writer.endListView();
+
+      writer.setPosition(5);
+      writer.startListView();
+      writer.bigInt().writeBigInt(60);
+      writer.bigInt().writeBigInt(70);
+      writer.bigInt().writeBigInt(80);
+      writer.endListView();
+
+      vector.setValueCount(6);
+
+      assertTrue(vector.isNull(0));
+      assertFalse(vector.isNull(1));
+      assertTrue(vector.isNull(2));
+      assertFalse(vector.isNull(3));
+      assertTrue(vector.isNull(4));
+      assertFalse(vector.isNull(5));
+
+      // validate buffers
+
+      final ArrowBuf validityBuffer = vector.getValidityBuffer();
+      final ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = vector.getSizeBuffer();
+
+      assertEquals(0, BitVectorHelper.get(validityBuffer, 0));
+      assertEquals(1, BitVectorHelper.get(validityBuffer, 1));
+      assertEquals(0, BitVectorHelper.get(validityBuffer, 2));
+      assertEquals(1, BitVectorHelper.get(validityBuffer, 3));
+      assertEquals(0, BitVectorHelper.get(validityBuffer, 4));
+      assertEquals(1, BitVectorHelper.get(validityBuffer, 5));
+
+      assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offsetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(5, offsetBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      assertEquals(0, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(3, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(2, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(3, sizeBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      // validate values
+
+      Object result = vector.getObject(1);
+      ArrayList<Long> resultSet = (ArrayList<Long>) result;
+      assertEquals(3, resultSet.size());
+      assertEquals(Long.valueOf(10), resultSet.get(0));
+      assertEquals(Long.valueOf(20), resultSet.get(1));
+      assertEquals(Long.valueOf(30), resultSet.get(2));
+
+      result = vector.getObject(3);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(40), resultSet.get(0));
+      assertEquals(Long.valueOf(50), resultSet.get(1));
+
+      result = vector.getObject(5);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(3, resultSet.size());
+      assertEquals(Long.valueOf(60), resultSet.get(0));
+      assertEquals(Long.valueOf(70), resultSet.get(1));
+      assertEquals(Long.valueOf(80), resultSet.get(2));
+
+      vector.validate();
+    }
+  }
+
+  @Test
+  public void testSetNull3() {
+    try (LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) {
+      // validate setting values first and then writing nulls
+      UnionLargeListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      writer.setPosition(1);
+      writer.startListView();
+      writer.bigInt().writeBigInt(10);
+      writer.bigInt().writeBigInt(20);
+      writer.bigInt().writeBigInt(30);
+      writer.endListView();
+
+      writer.setPosition(3);
+      writer.startListView();
+      writer.bigInt().writeBigInt(40);
+      writer.bigInt().writeBigInt(50);
+      writer.endListView();
+
+      writer.setPosition(5);
+      writer.startListView();
+      writer.bigInt().writeBigInt(60);
+      writer.bigInt().writeBigInt(70);
+      writer.bigInt().writeBigInt(80);
+      writer.endListView();
+
+      vector.setNull(0);
+      vector.setNull(2);
+      vector.setNull(4);
+
+      vector.setValueCount(6);
+
+      assertTrue(vector.isNull(0));
+      assertFalse(vector.isNull(1));
+      assertTrue(vector.isNull(2));
+      assertFalse(vector.isNull(3));
+      assertTrue(vector.isNull(4));
+      assertFalse(vector.isNull(5));
+
+      // validate buffers
+
+      final ArrowBuf validityBuffer = vector.getValidityBuffer();
+      final ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = vector.getSizeBuffer();
+
+      assertEquals(0, BitVectorHelper.get(validityBuffer, 0));
+      assertEquals(1, BitVectorHelper.get(validityBuffer, 1));
+      assertEquals(0, BitVectorHelper.get(validityBuffer, 2));
+      assertEquals(1, BitVectorHelper.get(validityBuffer, 3));
+      assertEquals(0, BitVectorHelper.get(validityBuffer, 4));
+      assertEquals(1, BitVectorHelper.get(validityBuffer, 5));
+
+      assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offsetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offsetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(5, offsetBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      assertEquals(0, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(3, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(2, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(3, sizeBuffer.getInt(5 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      // validate values
+
+      Object result = vector.getObject(1);
+      ArrayList<Long> resultSet = (ArrayList<Long>) result;
+      assertEquals(3, resultSet.size());
+      assertEquals(Long.valueOf(10), resultSet.get(0));
+      assertEquals(Long.valueOf(20), resultSet.get(1));
+      assertEquals(Long.valueOf(30), resultSet.get(2));
+
+      result = vector.getObject(3);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(40), resultSet.get(0));
+      assertEquals(Long.valueOf(50), resultSet.get(1));
+
+      result = vector.getObject(5);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(3, resultSet.size());
+      assertEquals(Long.valueOf(60), resultSet.get(0));
+      assertEquals(Long.valueOf(70), resultSet.get(1));
+      assertEquals(Long.valueOf(80), resultSet.get(2));
+
+      vector.validate();
+    }
+  }
+
+  @Test
+  public void testOverWrite1() {
+    try (LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) {
+      UnionLargeListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      writer.setPosition(0);
+      writer.startListView();
+      writer.bigInt().writeBigInt(10);
+      writer.bigInt().writeBigInt(20);
+      writer.bigInt().writeBigInt(30);
+      writer.endListView();
+
+      writer.setPosition(1);
+      writer.startListView();
+      writer.bigInt().writeBigInt(40);
+      writer.bigInt().writeBigInt(50);
+      writer.endListView();
+
+      vector.setValueCount(2);
+
+      writer.setPosition(0);
+      writer.startListView();
+      writer.bigInt().writeBigInt(60);
+      writer.bigInt().writeBigInt(70);
+      writer.endListView();
+
+      writer.setPosition(1);
+      writer.startListView();
+      writer.bigInt().writeBigInt(80);
+      writer.bigInt().writeBigInt(90);
+      writer.endListView();
+
+      vector.setValueCount(2);
+
+      Object result = vector.getObject(0);
+      ArrayList<Long> resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(60), resultSet.get(0));
+      assertEquals(Long.valueOf(70), resultSet.get(1));
+
+      result = vector.getObject(1);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(80), resultSet.get(0));
+      assertEquals(Long.valueOf(90), resultSet.get(1));
+
+      vector.validate();
+    }
+  }
+
+  @Test
+  public void testOverwriteWithNull() {
+    try (LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) {
+      UnionLargeListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+      ArrowBuf sizeBuffer = vector.getSizeBuffer();
+
+      writer.setPosition(0);
+      writer.startListView();
+      writer.bigInt().writeBigInt(10);
+      writer.bigInt().writeBigInt(20);
+      writer.bigInt().writeBigInt(30);
+      writer.endListView();
+
+      writer.setPosition(1);
+      writer.startListView();
+      writer.bigInt().writeBigInt(40);
+      writer.bigInt().writeBigInt(50);
+      writer.endListView();
+
+      vector.setValueCount(2);
+
+      assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offsetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(2, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      vector.setNull(0);
+
+      assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      vector.setNull(1);
+
+      assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      assertTrue(vector.isNull(0));
+      assertTrue(vector.isNull(1));
+
+      writer.setPosition(0);
+      writer.startListView();
+      writer.bigInt().writeBigInt(60);
+      writer.bigInt().writeBigInt(70);
+      writer.endListView();
+
+      assertEquals(0, offsetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(2, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      writer.setPosition(1);
+      writer.startListView();
+      writer.bigInt().writeBigInt(80);
+      writer.bigInt().writeBigInt(90);
+      writer.endListView();
+
+      assertEquals(2, offsetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(2, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      vector.setValueCount(2);
+
+      assertFalse(vector.isNull(0));
+      assertFalse(vector.isNull(1));
+
+      Object result = vector.getObject(0);
+      ArrayList<Long> resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(60), resultSet.get(0));
+      assertEquals(Long.valueOf(70), resultSet.get(1));
+
+      result = vector.getObject(1);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(80), resultSet.get(0));
+      assertEquals(Long.valueOf(90), resultSet.get(1));
+
+      vector.validate();
+    }
+  }
+
+  @Test
+  public void testOutOfOrderOffset1() {
+    // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]]
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("largelistview", allocator)) {
+      // Allocate buffers in largeListViewVector by calling `allocateNew` method.
+      largeListViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = largeListViewVector.getDataVector();
+      fieldVector.clear();
+
+      SmallIntVector childVector = (SmallIntVector) fieldVector;
+
+      childVector.allocateNew(7);
+
+      childVector.set(0, 0);
+      childVector.set(1, -127);
+      childVector.set(2, 127);
+      childVector.set(3, 50);
+      childVector.set(4, 12);
+      childVector.set(5, -7);
+      childVector.set(6, 25);
+
+      childVector.setValueCount(7);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      largeListViewVector.setValidity(0, 1);
+      largeListViewVector.setValidity(1, 0);
+      largeListViewVector.setValidity(2, 1);
+      largeListViewVector.setValidity(3, 1);
+      largeListViewVector.setValidity(4, 1);
+
+      largeListViewVector.setOffset(0, 4);
+      largeListViewVector.setOffset(1, 7);
+      largeListViewVector.setOffset(2, 0);
+      largeListViewVector.setOffset(3, 0);
+      largeListViewVector.setOffset(4, 3);
+
+      largeListViewVector.setSize(0, 3);
+      largeListViewVector.setSize(1, 0);
+      largeListViewVector.setSize(2, 4);
+      largeListViewVector.setSize(3, 0);
+      largeListViewVector.setSize(4, 2);
+
+      // Set value count using `setValueCount` method.
+      largeListViewVector.setValueCount(5);
+
+      final ArrowBuf offSetBuffer = largeListViewVector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = largeListViewVector.getSizeBuffer();
+
+      // check offset buffer
+      assertEquals(4, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(7, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offSetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      // check size buffer
+      assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(2, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      // check child vector
+      assertEquals(0, ((SmallIntVector) largeListViewVector.getDataVector()).get(0));
+      assertEquals(-127, ((SmallIntVector) largeListViewVector.getDataVector()).get(1));
+      assertEquals(127, ((SmallIntVector) largeListViewVector.getDataVector()).get(2));
+      assertEquals(50, ((SmallIntVector) largeListViewVector.getDataVector()).get(3));
+      assertEquals(12, ((SmallIntVector) largeListViewVector.getDataVector()).get(4));
+      assertEquals(-7, ((SmallIntVector) largeListViewVector.getDataVector()).get(5));
+      assertEquals(25, ((SmallIntVector) largeListViewVector.getDataVector()).get(6));
+
+      // check values
+      Object result = largeListViewVector.getObject(0);
+      ArrayList<Integer> resultSet = (ArrayList<Integer>) result;
+      assertEquals(3, resultSet.size());
+      assertEquals(Short.valueOf("12"), resultSet.get(0));
+      assertEquals(Short.valueOf("-7"), resultSet.get(1));
+      assertEquals(Short.valueOf("25"), resultSet.get(2));
+
+      assertTrue(largeListViewVector.isNull(1));
+
+      result = largeListViewVector.getObject(2);
+      resultSet = (ArrayList<Integer>) result;
+      assertEquals(4, resultSet.size());
+      assertEquals(Short.valueOf("0"), resultSet.get(0));
+      assertEquals(Short.valueOf("-127"), resultSet.get(1));
+      assertEquals(Short.valueOf("127"), resultSet.get(2));
+      assertEquals(Short.valueOf("50"), resultSet.get(3));
+
+      assertTrue(largeListViewVector.isEmpty(3));
+
+      result = largeListViewVector.getObject(4);
+      resultSet = (ArrayList<Integer>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Short.valueOf("50"), resultSet.get(0));
+      assertEquals(Short.valueOf("12"), resultSet.get(1));
+
+      largeListViewVector.validate();
+    }
+  }
+
+  private int validateSizeBufferAndCalculateMinOffset(
+      int start,
+      int splitLength,
+      ArrowBuf fromOffsetBuffer,
+      ArrowBuf fromSizeBuffer,
+      ArrowBuf toSizeBuffer) {
+    int minOffset = fromOffsetBuffer.getInt((long) start * LargeListViewVector.OFFSET_WIDTH);
+    int fromDataLength;
+    int toDataLength;
+
+    for (int i = 0; i < splitLength; i++) {
+      fromDataLength = fromSizeBuffer.getInt((long) (start + i) * LargeListViewVector.SIZE_WIDTH);
+      toDataLength = toSizeBuffer.getInt((long) (i) * LargeListViewVector.SIZE_WIDTH);
+
+      /* validate size */
+      assertEquals(
+          fromDataLength,
+          toDataLength,
+          "Different data lengths at index: " + i + " and start: " + start);
+
+      /* calculate minimum offset */
+      int currentOffset =
+          fromOffsetBuffer.getInt((long) (start + i) * LargeListViewVector.OFFSET_WIDTH);
+      if (currentOffset < minOffset) {
+        minOffset = currentOffset;
+      }
+    }
+
+    return minOffset;
+  }
+
+  private void validateOffsetBuffer(
+      int start,
+      int splitLength,
+      ArrowBuf fromOffsetBuffer,
+      ArrowBuf toOffsetBuffer,
+      int minOffset) {
+    int offset1;
+    int offset2;
+
+    for (int i = 0; i < splitLength; i++) {
+      offset1 = fromOffsetBuffer.getInt((long) (start + i) * LargeListViewVector.OFFSET_WIDTH);
+      offset2 = toOffsetBuffer.getInt((long) (i) * LargeListViewVector.OFFSET_WIDTH);
+      assertEquals(
+          offset1 - minOffset,
+          offset2,
+          "Different offset values at index: " + i + " and start: " + start);
+    }
+  }
+
+  private void validateDataBuffer(
+      int start,
+      int splitLength,
+      ArrowBuf fromOffsetBuffer,
+      ArrowBuf fromSizeBuffer,
+      BigIntVector fromDataVector,
+      ArrowBuf toOffsetBuffer,
+      BigIntVector toDataVector) {
+    int dataLength;
+    Long fromValue;
+    for (int i = 0; i < splitLength; i++) {
+      dataLength = fromSizeBuffer.getInt((long) (start + i) * LargeListViewVector.SIZE_WIDTH);
+      for (int j = 0; j < dataLength; j++) {
+        fromValue =
+            fromDataVector.getObject(
+                (fromOffsetBuffer.getInt((long) (start + i) * LargeListViewVector.OFFSET_WIDTH)
+                    + j));
+        Long toValue =
+            toDataVector.getObject(
+                (toOffsetBuffer.getInt((long) i * LargeListViewVector.OFFSET_WIDTH) + j));
+        assertEquals(
+            fromValue, toValue, "Different data values at index: " + i + " and start: " + start);
+      }
+    }
+  }
+
+  /**
+   * Validate split and transfer of data from fromVector to toVector. Note that this method assumes
+   * that the child vector is BigIntVector.
+   *
+   * @param start start index
+   * @param splitLength length of data to split and transfer
+   * @param fromVector fromVector
+   * @param toVector toVector
+   */
+  private void validateSplitAndTransfer(
+      TransferPair transferPair,
+      int start,
+      int splitLength,
+      LargeListViewVector fromVector,
+      LargeListViewVector toVector) {
+
+    transferPair.splitAndTransfer(start, splitLength);
+
+    /* get offsetBuffer of toVector */
+    final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer();
+
+    /* get sizeBuffer of toVector */
+    final ArrowBuf toSizeBuffer = toVector.getSizeBuffer();
+
+    /* get dataVector of toVector */
+    BigIntVector toDataVector = (BigIntVector) toVector.getDataVector();
+
+    /* get offsetBuffer of toVector */
+    final ArrowBuf fromOffsetBuffer = fromVector.getOffsetBuffer();
+
+    /* get sizeBuffer of toVector */
+    final ArrowBuf fromSizeBuffer = fromVector.getSizeBuffer();
+
+    /* get dataVector of toVector */
+    BigIntVector fromDataVector = (BigIntVector) fromVector.getDataVector();
+
+    /* validate size buffers */
+    int minOffset =
+        validateSizeBufferAndCalculateMinOffset(
+            start, splitLength, fromOffsetBuffer, fromSizeBuffer, toSizeBuffer);
+    /* validate offset buffers */
+    validateOffsetBuffer(start, splitLength, fromOffsetBuffer, toOffsetBuffer, minOffset);
+    /* validate data */
+    validateDataBuffer(
+        start,
+        splitLength,
+        fromOffsetBuffer,
+        fromSizeBuffer,
+        fromDataVector,
+        toOffsetBuffer,
+        toDataVector);
+  }
+
+  @Test
+  public void testSplitAndTransfer() throws Exception {
+    try (LargeListViewVector fromVector = LargeListViewVector.empty("sourceVector", allocator)) {
+
+      /* Explicitly add the dataVector */
+      MinorType type = MinorType.BIGINT;
+      fromVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+      UnionLargeListViewWriter listViewWriter = fromVector.getWriter();
+
+      /* allocate memory */
+      listViewWriter.allocate();
+
+      /* populate data */
+      listViewWriter.setPosition(0);
+      listViewWriter.startListView();
+      listViewWriter.bigInt().writeBigInt(10);
+      listViewWriter.bigInt().writeBigInt(11);
+      listViewWriter.bigInt().writeBigInt(12);
+      listViewWriter.endListView();
+
+      listViewWriter.setPosition(1);
+      listViewWriter.startListView();
+      listViewWriter.bigInt().writeBigInt(13);
+      listViewWriter.bigInt().writeBigInt(14);
+      listViewWriter.endListView();
+
+      listViewWriter.setPosition(2);
+      listViewWriter.startListView();
+      listViewWriter.bigInt().writeBigInt(15);
+      listViewWriter.bigInt().writeBigInt(16);
+      listViewWriter.bigInt().writeBigInt(17);
+      listViewWriter.bigInt().writeBigInt(18);
+      listViewWriter.endListView();
+
+      listViewWriter.setPosition(3);
+      listViewWriter.startListView();
+      listViewWriter.bigInt().writeBigInt(19);
+      listViewWriter.endListView();
+
+      listViewWriter.setPosition(4);
+      listViewWriter.startListView();
+      listViewWriter.bigInt().writeBigInt(20);
+      listViewWriter.bigInt().writeBigInt(21);
+      listViewWriter.bigInt().writeBigInt(22);
+      listViewWriter.bigInt().writeBigInt(23);
+      listViewWriter.endListView();
+
+      fromVector.setValueCount(5);
+
+      /* get offset buffer */
+      final ArrowBuf offsetBuffer = fromVector.getOffsetBuffer();
+
+      /* get size buffer */
+      final ArrowBuf sizeBuffer = fromVector.getSizeBuffer();
+
+      /* get dataVector */
+      BigIntVector dataVector = (BigIntVector) fromVector.getDataVector();
+
+      /* check the vector output */
+
+      int index = 0;
+      int offset;
+      int size = 0;
+      Long actual;
+
+      /* index 0 */
+      assertFalse(fromVector.isNull(index));
+      offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH);
+      assertEquals(Integer.toString(0), Integer.toString(offset));
+
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(10), actual);
+      offset++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(11), actual);
+      offset++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(12), actual);
+      assertEquals(
+          Integer.toString(3),
+          Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH)));
+
+      /* index 1 */
+      index++;
+      assertFalse(fromVector.isNull(index));
+      offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH);
+      assertEquals(Integer.toString(3), Integer.toString(offset));
+
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(13), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(14), actual);
+      size++;
+      assertEquals(
+          Integer.toString(size),
+          Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH)));
+
+      /* index 2 */
+      size = 0;
+      index++;
+      assertFalse(fromVector.isNull(index));
+      offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH);
+      assertEquals(Integer.toString(5), Integer.toString(offset));
+      size++;
+
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(15), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(16), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(17), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(18), actual);
+      assertEquals(
+          Integer.toString(size),
+          Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH)));
+
+      /* index 3 */
+      size = 0;
+      index++;
+      assertFalse(fromVector.isNull(index));
+      offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH);
+      assertEquals(Integer.toString(9), Integer.toString(offset));
+
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(19), actual);
+      size++;
+      assertEquals(
+          Integer.toString(size),
+          Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH)));
+
+      /* index 4 */
+      size = 0;
+      index++;
+      assertFalse(fromVector.isNull(index));
+      offset = offsetBuffer.getInt(index * LargeListViewVector.OFFSET_WIDTH);
+      assertEquals(Integer.toString(10), Integer.toString(offset));
+
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(20), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(21), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(22), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(23), actual);
+      size++;
+      assertEquals(
+          Integer.toString(size),
+          Integer.toString(sizeBuffer.getInt(index * LargeListViewVector.SIZE_WIDTH)));
+
+      /* do split and transfer */
+      try (LargeListViewVector toVector = LargeListViewVector.empty("toVector", allocator)) {
+        int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}};
+        TransferPair transferPair = fromVector.makeTransferPair(toVector);
+
+        for (final int[] transferLength : transferLengths) {
+          int start = transferLength[0];
+          int splitLength = transferLength[1];
+          validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testGetTransferPairWithField() throws Exception {
+    try (final LargeListViewVector fromVector = LargeListViewVector.empty("listview", allocator)) {
+
+      UnionLargeListViewWriter writer = fromVector.getWriter();
+      writer.allocate();
+
+      // set some values
+      writer.startListView();
+      writer.integer().writeInt(1);
+      writer.integer().writeInt(2);
+      writer.endListView();
+      fromVector.setValueCount(2);
+
+      final TransferPair transferPair =
+          fromVector.getTransferPair(fromVector.getField(), allocator);
+      final LargeListViewVector toVector = (LargeListViewVector) transferPair.getTo();
+      // Field inside a new vector created by reusing a field should be the same in memory as the
+      // original field.
+      assertSame(toVector.getField(), fromVector.getField());
+    }
+  }
+
+  @Test
+  public void testOutOfOrderOffsetSplitAndTransfer() {
+    // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]]
+    try (LargeListViewVector fromVector = LargeListViewVector.empty("fromVector", allocator)) {
+      // Allocate buffers in LargeListViewVector by calling `allocateNew` method.
+      fromVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      fromVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = fromVector.getDataVector();
+      fieldVector.clear();
+
+      BigIntVector childVector = (BigIntVector) fieldVector;
+
+      childVector.allocateNew(7);
+
+      childVector.set(0, 0);
+      childVector.set(1, -127);
+      childVector.set(2, 127);
+      childVector.set(3, 50);
+      childVector.set(4, 12);
+      childVector.set(5, -7);
+      childVector.set(6, 25);
+
+      childVector.setValueCount(7);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      fromVector.setValidity(0, 1);
+      fromVector.setValidity(1, 0);
+      fromVector.setValidity(2, 1);
+      fromVector.setValidity(3, 1);
+      fromVector.setValidity(4, 1);
+
+      fromVector.setOffset(0, 4);
+      fromVector.setOffset(1, 7);
+      fromVector.setOffset(2, 0);
+      fromVector.setOffset(3, 0);
+      fromVector.setOffset(4, 3);
+
+      fromVector.setSize(0, 3);
+      fromVector.setSize(1, 0);
+      fromVector.setSize(2, 4);
+      fromVector.setSize(3, 0);
+      fromVector.setSize(4, 2);
+
+      // Set value count using `setValueCount` method.
+      fromVector.setValueCount(5);
+
+      final ArrowBuf offSetBuffer = fromVector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = fromVector.getSizeBuffer();
+
+      // check offset buffer
+      assertEquals(4, offSetBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(7, offSetBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offSetBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offSetBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offSetBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH));
+
+      // check size buffer
+      assertEquals(3, sizeBuffer.getInt(0 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(1 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(4, sizeBuffer.getInt(2 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(3 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(2, sizeBuffer.getInt(4 * BaseLargeRepeatedValueViewVector.SIZE_WIDTH));
+
+      // check child vector
+      assertEquals(0, ((BigIntVector) fromVector.getDataVector()).get(0));
+      assertEquals(-127, ((BigIntVector) fromVector.getDataVector()).get(1));
+      assertEquals(127, ((BigIntVector) fromVector.getDataVector()).get(2));
+      assertEquals(50, ((BigIntVector) fromVector.getDataVector()).get(3));
+      assertEquals(12, ((BigIntVector) fromVector.getDataVector()).get(4));
+      assertEquals(-7, ((BigIntVector) fromVector.getDataVector()).get(5));
+      assertEquals(25, ((BigIntVector) fromVector.getDataVector()).get(6));
+
+      // check values
+      Object result = fromVector.getObject(0);
+      ArrayList<Long> resultSet = (ArrayList<Long>) result;
+      assertEquals(3, resultSet.size());
+      assertEquals(Long.valueOf(12), resultSet.get(0));
+      assertEquals(Long.valueOf(-7), resultSet.get(1));
+      assertEquals(Long.valueOf(25), resultSet.get(2));
+
+      assertTrue(fromVector.isNull(1));
+
+      result = fromVector.getObject(2);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(4, resultSet.size());
+      assertEquals(Long.valueOf(0), resultSet.get(0));
+      assertEquals(Long.valueOf(-127), resultSet.get(1));
+      assertEquals(Long.valueOf(127), resultSet.get(2));
+      assertEquals(Long.valueOf(50), resultSet.get(3));
+
+      assertTrue(fromVector.isEmpty(3));
+
+      result = fromVector.getObject(4);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(50), resultSet.get(0));
+      assertEquals(Long.valueOf(12), resultSet.get(1));
+
+      fromVector.validate();
+
+      /* do split and transfer */
+      try (LargeListViewVector toVector = LargeListViewVector.empty("toVector", allocator)) {
+        int[][] transferLengths = {{2, 3}, {0, 1}, {0, 3}};
+        TransferPair transferPair = fromVector.makeTransferPair(toVector);
+
+        for (final int[] transferLength : transferLengths) {
+          int start = transferLength[0];
+          int splitLength = transferLength[1];
+          validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testRangeChildVector1() {
+    /*
+     * Non-overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [4, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[0, 1, 2, 3], [2]]
+     * */
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("largelistview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      largeListViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = largeListViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      largeListViewVector.setValidity(0, 1);
+      largeListViewVector.setValidity(1, 1);
+
+      largeListViewVector.setOffset(0, 0);
+      largeListViewVector.setOffset(1, 2);
+
+      largeListViewVector.setSize(0, 4);
+      largeListViewVector.setSize(1, 1);
+
+      assertEquals(8, largeListViewVector.getDataVector().getValueCount());
+
+      largeListViewVector.setValueCount(2);
+      assertEquals(4, largeListViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) largeListViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
+  @Test
+  public void testRangeChildVector2() {
+    /*
+     * Overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [3, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[1, 2, 3], [2]]
+     * */
+    try (LargeListViewVector largeListViewVector =
+        LargeListViewVector.empty("largelistview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      largeListViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      largeListViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = largeListViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      largeListViewVector.setValidity(0, 1);
+      largeListViewVector.setValidity(1, 1);
+
+      largeListViewVector.setOffset(0, 1);
+      largeListViewVector.setOffset(1, 2);
+
+      largeListViewVector.setSize(0, 3);
+      largeListViewVector.setSize(1, 1);
+
+      assertEquals(8, largeListViewVector.getDataVector().getValueCount());
+
+      largeListViewVector.setValueCount(2);
+      assertEquals(4, largeListViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) largeListViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
+  private void writeIntValues(UnionLargeListViewWriter writer, int[] values) {
+    writer.startListView();
+    for (int v : values) {
+      writer.integer().writeInt(v);
+    }
+    writer.endListView();
+  }
+}
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
index 1a58b65e3be4a..639585fc48d0a 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java
@@ -32,7 +32,6 @@
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.impl.UnionListViewWriter;
-import org.apache.arrow.vector.complex.impl.UnionListWriter;
 import org.apache.arrow.vector.holders.DurationHolder;
 import org.apache.arrow.vector.holders.TimeStampMilliTZHolder;
 import org.apache.arrow.vector.types.TimeUnit;
@@ -40,6 +39,7 @@
 import org.apache.arrow.vector.types.pojo.ArrowType;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -68,40 +68,40 @@ public void testBasicListViewVector() {
 
       /* write the first list at index 0 */
       listViewWriter.setPosition(0);
-      listViewWriter.startList();
+      listViewWriter.startListView();
 
       listViewWriter.bigInt().writeBigInt(12);
       listViewWriter.bigInt().writeBigInt(-7);
       listViewWriter.bigInt().writeBigInt(25);
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
       /* the second list at index 1 is null (we are not setting any)*/
 
       /* write the third list at index 2 */
       listViewWriter.setPosition(2);
-      listViewWriter.startList();
+      listViewWriter.startListView();
 
       listViewWriter.bigInt().writeBigInt(0);
       listViewWriter.bigInt().writeBigInt(-127);
       listViewWriter.bigInt().writeBigInt(127);
       listViewWriter.bigInt().writeBigInt(50);
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
       /* write the fourth list at index 3 (empty list) */
       listViewWriter.setPosition(3);
-      listViewWriter.startList();
-      listViewWriter.endList();
+      listViewWriter.startListView();
+      listViewWriter.endListView();
 
       /* write the fifth list at index 4 */
       listViewWriter.setPosition(4);
-      listViewWriter.startList();
+      listViewWriter.startListView();
       listViewWriter.bigInt().writeBigInt(1);
       listViewWriter.bigInt().writeBigInt(2);
       listViewWriter.bigInt().writeBigInt(3);
       listViewWriter.bigInt().writeBigInt(4);
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
-      listViewVector.setValueCount(5);
+      listViewWriter.setValueCount(5);
       // check value count
       assertEquals(5, listViewVector.getValueCount());
 
@@ -158,7 +158,7 @@ public void testImplicitNullVectors() {
       listViewWriter.bigInt().writeBigInt(12);
       listViewWriter.bigInt().writeBigInt(-7);
       listViewWriter.bigInt().writeBigInt(25);
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
       int offSet0 = offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH);
       int size0 = sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH);
@@ -172,7 +172,7 @@ public void testImplicitNullVectors() {
       assertEquals(3, size0);
 
       listViewWriter.setPosition(5);
-      listViewWriter.startList();
+      listViewWriter.startListView();
 
       // writing the 6th list at index 5,
       // and the list items from index 1 through 4 are not populated.
@@ -192,7 +192,7 @@ public void testImplicitNullVectors() {
 
       listViewWriter.bigInt().writeBigInt(12);
       listViewWriter.bigInt().writeBigInt(25);
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
       int offSet5 = offSetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH);
       int size5 = sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH);
@@ -201,7 +201,7 @@ public void testImplicitNullVectors() {
       assertEquals(2, size5);
 
       listViewWriter.setPosition(10);
-      listViewWriter.startList();
+      listViewWriter.startListView();
 
       // writing the 11th list at index 10,
       // and the list items from index 6 through 10 are not populated.
@@ -219,7 +219,7 @@ public void testImplicitNullVectors() {
       }
 
       listViewWriter.bigInt().writeBigInt(12);
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
       int offSet11 = offSetBuffer.getInt(10 * BaseRepeatedValueViewVector.OFFSET_WIDTH);
       int size11 = sizeBuffer.getInt(10 * BaseRepeatedValueViewVector.SIZE_WIDTH);
@@ -247,43 +247,43 @@ public void testNestedListViewVector() {
 
       /* write one or more inner lists at index 0 */
       listViewWriter.setPosition(0);
-      listViewWriter.startList();
+      listViewWriter.startListView();
 
-      listViewWriter.list().startList();
-      listViewWriter.list().bigInt().writeBigInt(50);
-      listViewWriter.list().bigInt().writeBigInt(100);
-      listViewWriter.list().bigInt().writeBigInt(200);
-      listViewWriter.list().endList();
+      listViewWriter.listView().startListView();
+      listViewWriter.listView().bigInt().writeBigInt(50);
+      listViewWriter.listView().bigInt().writeBigInt(100);
+      listViewWriter.listView().bigInt().writeBigInt(200);
+      listViewWriter.listView().endListView();
 
-      listViewWriter.list().startList();
-      listViewWriter.list().bigInt().writeBigInt(75);
-      listViewWriter.list().bigInt().writeBigInt(125);
-      listViewWriter.list().bigInt().writeBigInt(150);
-      listViewWriter.list().bigInt().writeBigInt(175);
-      listViewWriter.list().endList();
+      listViewWriter.listView().startListView();
+      listViewWriter.listView().bigInt().writeBigInt(75);
+      listViewWriter.listView().bigInt().writeBigInt(125);
+      listViewWriter.listView().bigInt().writeBigInt(150);
+      listViewWriter.listView().bigInt().writeBigInt(175);
+      listViewWriter.listView().endListView();
 
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
       /* write one or more inner lists at index 1 */
       listViewWriter.setPosition(1);
-      listViewWriter.startList();
+      listViewWriter.startListView();
 
-      listViewWriter.list().startList();
-      listViewWriter.list().bigInt().writeBigInt(10);
-      listViewWriter.list().endList();
+      listViewWriter.listView().startListView();
+      listViewWriter.listView().bigInt().writeBigInt(10);
+      listViewWriter.listView().endListView();
 
-      listViewWriter.list().startList();
-      listViewWriter.list().bigInt().writeBigInt(15);
-      listViewWriter.list().bigInt().writeBigInt(20);
-      listViewWriter.list().endList();
+      listViewWriter.listView().startListView();
+      listViewWriter.listView().bigInt().writeBigInt(15);
+      listViewWriter.listView().bigInt().writeBigInt(20);
+      listViewWriter.listView().endListView();
 
-      listViewWriter.list().startList();
-      listViewWriter.list().bigInt().writeBigInt(25);
-      listViewWriter.list().bigInt().writeBigInt(30);
-      listViewWriter.list().bigInt().writeBigInt(35);
-      listViewWriter.list().endList();
+      listViewWriter.listView().startListView();
+      listViewWriter.listView().bigInt().writeBigInt(25);
+      listViewWriter.listView().bigInt().writeBigInt(30);
+      listViewWriter.listView().bigInt().writeBigInt(35);
+      listViewWriter.listView().endListView();
 
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
       listViewVector.setValueCount(2);
 
@@ -392,8 +392,8 @@ private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth)
 
   /*
    * Setting up the buffers directly needs to be validated with the base method used in
-   * the ListVector class where we use the approach of startList(),
-   * write to the child vector and endList().
+   * the ListVector class where we use the approach of startListView(),
+   * write to the child vector and endListView().
    * <p>
    * To support this, we have to consider the following scenarios;
    * <p>
@@ -499,7 +499,7 @@ public void testBasicListViewSetNested() {
       listViewVector.allocateNew();
 
       // Initialize the child vector using `initializeChildrenFromFields` method.
-      FieldType fieldType = new FieldType(true, new ArrowType.List(), null, null);
+      FieldType fieldType = new FieldType(true, new ArrowType.ListView(), null, null);
       FieldType childFieldType = new FieldType(true, new ArrowType.Int(64, true), null, null);
       Field childField = new Field("child-vector", childFieldType, null);
       List<Field> children = new ArrayList<>();
@@ -511,52 +511,52 @@ public void testBasicListViewSetNested() {
       FieldVector fieldVector = listViewVector.getDataVector();
       fieldVector.clear();
 
-      ListVector childVector = (ListVector) fieldVector;
-      UnionListWriter listWriter = childVector.getWriter();
-      listWriter.allocate();
+      ListViewVector childVector = (ListViewVector) fieldVector;
+      UnionListViewWriter listViewWriter = childVector.getWriter();
+      listViewWriter.allocate();
 
-      listWriter.setPosition(0);
-      listWriter.startList();
+      listViewWriter.setPosition(0);
+      listViewWriter.startListView();
 
-      listWriter.bigInt().writeBigInt(50);
-      listWriter.bigInt().writeBigInt(100);
-      listWriter.bigInt().writeBigInt(200);
+      listViewWriter.bigInt().writeBigInt(50);
+      listViewWriter.bigInt().writeBigInt(100);
+      listViewWriter.bigInt().writeBigInt(200);
 
-      listWriter.endList();
+      listViewWriter.endListView();
 
-      listWriter.setPosition(1);
-      listWriter.startList();
+      listViewWriter.setPosition(1);
+      listViewWriter.startListView();
 
-      listWriter.bigInt().writeBigInt(75);
-      listWriter.bigInt().writeBigInt(125);
-      listWriter.bigInt().writeBigInt(150);
-      listWriter.bigInt().writeBigInt(175);
+      listViewWriter.bigInt().writeBigInt(75);
+      listViewWriter.bigInt().writeBigInt(125);
+      listViewWriter.bigInt().writeBigInt(150);
+      listViewWriter.bigInt().writeBigInt(175);
 
-      listWriter.endList();
+      listViewWriter.endListView();
 
-      listWriter.setPosition(2);
-      listWriter.startList();
+      listViewWriter.setPosition(2);
+      listViewWriter.startListView();
 
-      listWriter.bigInt().writeBigInt(10);
+      listViewWriter.bigInt().writeBigInt(10);
 
-      listWriter.endList();
+      listViewWriter.endListView();
 
-      listWriter.startList();
-      listWriter.setPosition(3);
+      listViewWriter.startListView();
+      listViewWriter.setPosition(3);
 
-      listWriter.bigInt().writeBigInt(15);
-      listWriter.bigInt().writeBigInt(20);
+      listViewWriter.bigInt().writeBigInt(15);
+      listViewWriter.bigInt().writeBigInt(20);
 
-      listWriter.endList();
+      listViewWriter.endListView();
 
-      listWriter.startList();
-      listWriter.setPosition(4);
+      listViewWriter.startListView();
+      listViewWriter.setPosition(4);
 
-      listWriter.bigInt().writeBigInt(25);
-      listWriter.bigInt().writeBigInt(30);
-      listWriter.bigInt().writeBigInt(35);
+      listViewWriter.bigInt().writeBigInt(25);
+      listViewWriter.bigInt().writeBigInt(30);
+      listViewWriter.bigInt().writeBigInt(35);
 
-      listWriter.endList();
+      listViewWriter.endListView();
 
       childVector.setValueCount(5);
 
@@ -713,12 +713,12 @@ public void testBasicListViewSetWithListViewWriter() {
       UnionListViewWriter listViewWriter = listViewVector.getWriter();
 
       listViewWriter.setPosition(4);
-      listViewWriter.startList();
+      listViewWriter.startListView();
 
       listViewWriter.bigInt().writeBigInt(121);
       listViewWriter.bigInt().writeBigInt(-71);
       listViewWriter.bigInt().writeBigInt(251);
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
       listViewVector.setValueCount(5);
 
@@ -762,17 +762,17 @@ public void testGetBufferAddress() throws Exception {
       listViewWriter.allocate();
 
       listViewWriter.setPosition(0);
-      listViewWriter.startList();
+      listViewWriter.startListView();
       listViewWriter.bigInt().writeBigInt(50);
       listViewWriter.bigInt().writeBigInt(100);
       listViewWriter.bigInt().writeBigInt(200);
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
       listViewWriter.setPosition(1);
-      listViewWriter.startList();
+      listViewWriter.startListView();
       listViewWriter.bigInt().writeBigInt(250);
       listViewWriter.bigInt().writeBigInt(300);
-      listViewWriter.endList();
+      listViewWriter.endListView();
 
       listViewVector.setValueCount(2);
 
@@ -919,10 +919,10 @@ public void testWriterGetField() {
       writer.allocate();
 
       // set some values
-      writer.startList();
+      writer.startListView();
       writer.integer().writeInt(1);
       writer.integer().writeInt(2);
-      writer.endList();
+      writer.endListView();
       vector.setValueCount(2);
 
       Field expectedDataField =
@@ -951,7 +951,7 @@ public void testWriterUsingHolderGetTimestampMilliTZField() {
 
       TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder();
       holder.timezone = "SomeFakeTimeZone";
-      writer.startList();
+      writer.startListView();
       holder.value = 12341234L;
       writer.timeStampMilliTZ().write(holder);
       holder.value = 55555L;
@@ -967,7 +967,7 @@ public void testWriterUsingHolderGetTimestampMilliTZField() {
           "holder.timezone: AsdfTimeZone not equal to vector timezone: SomeFakeTimeZone",
           ex.getMessage());
 
-      writer.endList();
+      writer.endListView();
       vector.setValueCount(1);
 
       Field expectedDataField =
@@ -997,7 +997,7 @@ public void testWriterGetDurationField() {
       DurationHolder durationHolder = new DurationHolder();
       durationHolder.unit = TimeUnit.MILLISECOND;
 
-      writer.startList();
+      writer.startListView();
       durationHolder.value = 812374L;
       writer.duration().write(durationHolder);
       durationHolder.value = 143451L;
@@ -1011,7 +1011,7 @@ public void testWriterGetDurationField() {
               IllegalArgumentException.class, () -> writer.duration().write(durationHolder));
       assertEquals("holder.unit: SECOND not equal to vector unit: MILLISECOND", ex.getMessage());
 
-      writer.endList();
+      writer.endListView();
       vector.setValueCount(1);
 
       Field expectedDataField =
@@ -1039,10 +1039,10 @@ public void testClose() throws Exception {
       writer.allocate();
 
       // set some values
-      writer.startList();
+      writer.startListView();
       writer.integer().writeInt(1);
       writer.integer().writeInt(2);
-      writer.endList();
+      writer.endListView();
       vector.setValueCount(2);
 
       assertTrue(vector.getBufferSize() > 0);
@@ -1144,27 +1144,27 @@ public void testSetNull1() {
       writer.allocate();
 
       writer.setPosition(0);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(10);
       writer.bigInt().writeBigInt(20);
-      writer.endList();
+      writer.endListView();
 
       vector.setNull(1);
 
       writer.setPosition(2);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(30);
       writer.bigInt().writeBigInt(40);
-      writer.endList();
+      writer.endListView();
 
       vector.setNull(3);
       vector.setNull(4);
 
       writer.setPosition(5);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(50);
       writer.bigInt().writeBigInt(60);
-      writer.endList();
+      writer.endListView();
 
       vector.setValueCount(6);
 
@@ -1238,24 +1238,24 @@ public void testSetNull2() {
       vector.setNull(4);
 
       writer.setPosition(1);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(10);
       writer.bigInt().writeBigInt(20);
       writer.bigInt().writeBigInt(30);
-      writer.endList();
+      writer.endListView();
 
       writer.setPosition(3);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(40);
       writer.bigInt().writeBigInt(50);
-      writer.endList();
+      writer.endListView();
 
       writer.setPosition(5);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(60);
       writer.bigInt().writeBigInt(70);
       writer.bigInt().writeBigInt(80);
-      writer.endList();
+      writer.endListView();
 
       vector.setValueCount(6);
 
@@ -1327,24 +1327,24 @@ public void testSetNull3() {
       writer.allocate();
 
       writer.setPosition(1);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(10);
       writer.bigInt().writeBigInt(20);
       writer.bigInt().writeBigInt(30);
-      writer.endList();
+      writer.endListView();
 
       writer.setPosition(3);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(40);
       writer.bigInt().writeBigInt(50);
-      writer.endList();
+      writer.endListView();
 
       writer.setPosition(5);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(60);
       writer.bigInt().writeBigInt(70);
       writer.bigInt().writeBigInt(80);
-      writer.endList();
+      writer.endListView();
 
       vector.setNull(0);
       vector.setNull(2);
@@ -1419,31 +1419,31 @@ public void testOverWrite1() {
       writer.allocate();
 
       writer.setPosition(0);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(10);
       writer.bigInt().writeBigInt(20);
       writer.bigInt().writeBigInt(30);
-      writer.endList();
+      writer.endListView();
 
       writer.setPosition(1);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(40);
       writer.bigInt().writeBigInt(50);
-      writer.endList();
+      writer.endListView();
 
       vector.setValueCount(2);
 
       writer.setPosition(0);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(60);
       writer.bigInt().writeBigInt(70);
-      writer.endList();
+      writer.endListView();
 
       writer.setPosition(1);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(80);
       writer.bigInt().writeBigInt(90);
-      writer.endList();
+      writer.endListView();
 
       vector.setValueCount(2);
 
@@ -1473,17 +1473,17 @@ public void testOverwriteWithNull() {
       ArrowBuf sizeBuffer = vector.getSizeBuffer();
 
       writer.setPosition(0);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(10);
       writer.bigInt().writeBigInt(20);
       writer.bigInt().writeBigInt(30);
-      writer.endList();
+      writer.endListView();
 
       writer.setPosition(1);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(40);
       writer.bigInt().writeBigInt(50);
-      writer.endList();
+      writer.endListView();
 
       vector.setValueCount(2);
 
@@ -1507,19 +1507,19 @@ public void testOverwriteWithNull() {
       assertTrue(vector.isNull(1));
 
       writer.setPosition(0);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(60);
       writer.bigInt().writeBigInt(70);
-      writer.endList();
+      writer.endListView();
 
       assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH));
       assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH));
 
       writer.setPosition(1);
-      writer.startList();
+      writer.startListView();
       writer.bigInt().writeBigInt(80);
       writer.bigInt().writeBigInt(90);
-      writer.endList();
+      writer.endListView();
 
       assertEquals(2, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH));
       assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH));
@@ -1655,11 +1655,572 @@ public void testOutOfOrderOffset1() {
     }
   }
 
+  private int validateSizeBufferAndCalculateMinOffset(
+      int start,
+      int splitLength,
+      ArrowBuf fromOffsetBuffer,
+      ArrowBuf fromSizeBuffer,
+      ArrowBuf toSizeBuffer) {
+    int minOffset = fromOffsetBuffer.getInt((long) start * ListViewVector.OFFSET_WIDTH);
+    int fromDataLength;
+    int toDataLength;
+
+    for (int i = 0; i < splitLength; i++) {
+      fromDataLength = fromSizeBuffer.getInt((long) (start + i) * ListViewVector.SIZE_WIDTH);
+      toDataLength = toSizeBuffer.getInt((long) (i) * ListViewVector.SIZE_WIDTH);
+
+      /* validate size */
+      assertEquals(
+          fromDataLength,
+          toDataLength,
+          "Different data lengths at index: " + i + " and start: " + start);
+
+      /* calculate minimum offset */
+      int currentOffset = fromOffsetBuffer.getInt((long) (start + i) * ListViewVector.OFFSET_WIDTH);
+      if (currentOffset < minOffset) {
+        minOffset = currentOffset;
+      }
+    }
+
+    return minOffset;
+  }
+
+  private void validateOffsetBuffer(
+      int start,
+      int splitLength,
+      ArrowBuf fromOffsetBuffer,
+      ArrowBuf toOffsetBuffer,
+      int minOffset) {
+    int offset1;
+    int offset2;
+
+    for (int i = 0; i < splitLength; i++) {
+      offset1 = fromOffsetBuffer.getInt((long) (start + i) * ListViewVector.OFFSET_WIDTH);
+      offset2 = toOffsetBuffer.getInt((long) (i) * ListViewVector.OFFSET_WIDTH);
+      assertEquals(
+          offset1 - minOffset,
+          offset2,
+          "Different offset values at index: " + i + " and start: " + start);
+    }
+  }
+
+  private void validateDataBuffer(
+      int start,
+      int splitLength,
+      ArrowBuf fromOffsetBuffer,
+      ArrowBuf fromSizeBuffer,
+      BigIntVector fromDataVector,
+      ArrowBuf toOffsetBuffer,
+      BigIntVector toDataVector) {
+    int dataLength;
+    Long fromValue;
+    for (int i = 0; i < splitLength; i++) {
+      dataLength = fromSizeBuffer.getInt((long) (start + i) * ListViewVector.SIZE_WIDTH);
+      for (int j = 0; j < dataLength; j++) {
+        fromValue =
+            fromDataVector.getObject(
+                (fromOffsetBuffer.getInt((long) (start + i) * ListViewVector.OFFSET_WIDTH) + j));
+        Long toValue =
+            toDataVector.getObject(
+                (toOffsetBuffer.getInt((long) i * ListViewVector.OFFSET_WIDTH) + j));
+        assertEquals(
+            fromValue, toValue, "Different data values at index: " + i + " and start: " + start);
+      }
+    }
+  }
+
+  /**
+   * Validate split and transfer of data from fromVector to toVector. Note that this method assumes
+   * that the child vector is BigIntVector.
+   *
+   * @param start start index
+   * @param splitLength length of data to split and transfer
+   * @param fromVector fromVector
+   * @param toVector toVector
+   */
+  private void validateSplitAndTransfer(
+      TransferPair transferPair,
+      int start,
+      int splitLength,
+      ListViewVector fromVector,
+      ListViewVector toVector) {
+
+    transferPair.splitAndTransfer(start, splitLength);
+
+    /* get offsetBuffer of toVector */
+    final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer();
+
+    /* get sizeBuffer of toVector */
+    final ArrowBuf toSizeBuffer = toVector.getSizeBuffer();
+
+    /* get dataVector of toVector */
+    BigIntVector toDataVector = (BigIntVector) toVector.getDataVector();
+
+    /* get offsetBuffer of toVector */
+    final ArrowBuf fromOffsetBuffer = fromVector.getOffsetBuffer();
+
+    /* get sizeBuffer of toVector */
+    final ArrowBuf fromSizeBuffer = fromVector.getSizeBuffer();
+
+    /* get dataVector of toVector */
+    BigIntVector fromDataVector = (BigIntVector) fromVector.getDataVector();
+
+    /* validate size buffers */
+    int minOffset =
+        validateSizeBufferAndCalculateMinOffset(
+            start, splitLength, fromOffsetBuffer, fromSizeBuffer, toSizeBuffer);
+    /* validate offset buffers */
+    validateOffsetBuffer(start, splitLength, fromOffsetBuffer, toOffsetBuffer, minOffset);
+    /* validate data */
+    validateDataBuffer(
+        start,
+        splitLength,
+        fromOffsetBuffer,
+        fromSizeBuffer,
+        fromDataVector,
+        toOffsetBuffer,
+        toDataVector);
+  }
+
+  @Test
+  public void testSplitAndTransfer() throws Exception {
+    try (ListViewVector fromVector = ListViewVector.empty("sourceVector", allocator)) {
+
+      /* Explicitly add the dataVector */
+      MinorType type = MinorType.BIGINT;
+      fromVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+      UnionListViewWriter listViewWriter = fromVector.getWriter();
+
+      /* allocate memory */
+      listViewWriter.allocate();
+
+      /* populate data */
+      listViewWriter.setPosition(0);
+      listViewWriter.startListView();
+      listViewWriter.bigInt().writeBigInt(10);
+      listViewWriter.bigInt().writeBigInt(11);
+      listViewWriter.bigInt().writeBigInt(12);
+      listViewWriter.endListView();
+
+      listViewWriter.setPosition(1);
+      listViewWriter.startListView();
+      listViewWriter.bigInt().writeBigInt(13);
+      listViewWriter.bigInt().writeBigInt(14);
+      listViewWriter.endListView();
+
+      listViewWriter.setPosition(2);
+      listViewWriter.startListView();
+      listViewWriter.bigInt().writeBigInt(15);
+      listViewWriter.bigInt().writeBigInt(16);
+      listViewWriter.bigInt().writeBigInt(17);
+      listViewWriter.bigInt().writeBigInt(18);
+      listViewWriter.endListView();
+
+      listViewWriter.setPosition(3);
+      listViewWriter.startListView();
+      listViewWriter.bigInt().writeBigInt(19);
+      listViewWriter.endListView();
+
+      listViewWriter.setPosition(4);
+      listViewWriter.startListView();
+      listViewWriter.bigInt().writeBigInt(20);
+      listViewWriter.bigInt().writeBigInt(21);
+      listViewWriter.bigInt().writeBigInt(22);
+      listViewWriter.bigInt().writeBigInt(23);
+      listViewWriter.endListView();
+
+      fromVector.setValueCount(5);
+
+      /* get offset buffer */
+      final ArrowBuf offsetBuffer = fromVector.getOffsetBuffer();
+
+      /* get size buffer */
+      final ArrowBuf sizeBuffer = fromVector.getSizeBuffer();
+
+      /* get dataVector */
+      BigIntVector dataVector = (BigIntVector) fromVector.getDataVector();
+
+      /* check the vector output */
+
+      int index = 0;
+      int offset;
+      int size = 0;
+      Long actual;
+
+      /* index 0 */
+      assertFalse(fromVector.isNull(index));
+      offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH);
+      assertEquals(Integer.toString(0), Integer.toString(offset));
+
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(10), actual);
+      offset++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(11), actual);
+      offset++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(12), actual);
+      assertEquals(
+          Integer.toString(3),
+          Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH)));
+
+      /* index 1 */
+      index++;
+      assertFalse(fromVector.isNull(index));
+      offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH);
+      assertEquals(Integer.toString(3), Integer.toString(offset));
+
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(13), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(14), actual);
+      size++;
+      assertEquals(
+          Integer.toString(size),
+          Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH)));
+
+      /* index 2 */
+      size = 0;
+      index++;
+      assertFalse(fromVector.isNull(index));
+      offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH);
+      assertEquals(Integer.toString(5), Integer.toString(offset));
+      size++;
+
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(15), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(16), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(17), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(18), actual);
+      assertEquals(
+          Integer.toString(size),
+          Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH)));
+
+      /* index 3 */
+      size = 0;
+      index++;
+      assertFalse(fromVector.isNull(index));
+      offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH);
+      assertEquals(Integer.toString(9), Integer.toString(offset));
+
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(19), actual);
+      size++;
+      assertEquals(
+          Integer.toString(size),
+          Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH)));
+
+      /* index 4 */
+      size = 0;
+      index++;
+      assertFalse(fromVector.isNull(index));
+      offset = offsetBuffer.getInt(index * ListViewVector.OFFSET_WIDTH);
+      assertEquals(Integer.toString(10), Integer.toString(offset));
+
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(20), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(21), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(22), actual);
+      offset++;
+      size++;
+      actual = dataVector.getObject(offset);
+      assertEquals(Long.valueOf(23), actual);
+      size++;
+      assertEquals(
+          Integer.toString(size),
+          Integer.toString(sizeBuffer.getInt(index * ListViewVector.SIZE_WIDTH)));
+
+      /* do split and transfer */
+      try (ListViewVector toVector = ListViewVector.empty("toVector", allocator)) {
+        int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}};
+        TransferPair transferPair = fromVector.makeTransferPair(toVector);
+
+        for (final int[] transferLength : transferLengths) {
+          int start = transferLength[0];
+          int splitLength = transferLength[1];
+          validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testOutOfOrderOffsetSplitAndTransfer() {
+    // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]]
+    try (ListViewVector fromVector = ListViewVector.empty("fromVector", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      fromVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      fromVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = fromVector.getDataVector();
+      fieldVector.clear();
+
+      BigIntVector childVector = (BigIntVector) fieldVector;
+
+      childVector.allocateNew(7);
+
+      childVector.set(0, 0);
+      childVector.set(1, -127);
+      childVector.set(2, 127);
+      childVector.set(3, 50);
+      childVector.set(4, 12);
+      childVector.set(5, -7);
+      childVector.set(6, 25);
+
+      childVector.setValueCount(7);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      fromVector.setValidity(0, 1);
+      fromVector.setValidity(1, 0);
+      fromVector.setValidity(2, 1);
+      fromVector.setValidity(3, 1);
+      fromVector.setValidity(4, 1);
+
+      fromVector.setOffset(0, 4);
+      fromVector.setOffset(1, 7);
+      fromVector.setOffset(2, 0);
+      fromVector.setOffset(3, 0);
+      fromVector.setOffset(4, 3);
+
+      fromVector.setSize(0, 3);
+      fromVector.setSize(1, 0);
+      fromVector.setSize(2, 4);
+      fromVector.setSize(3, 0);
+      fromVector.setSize(4, 2);
+
+      // Set value count using `setValueCount` method.
+      fromVector.setValueCount(5);
+
+      final ArrowBuf offSetBuffer = fromVector.getOffsetBuffer();
+      final ArrowBuf sizeBuffer = fromVector.getSizeBuffer();
+
+      // check offset buffer
+      assertEquals(4, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(7, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(0, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH));
+      assertEquals(3, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH));
+
+      // check size buffer
+      assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH));
+      assertEquals(2, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH));
+
+      // check child vector
+      assertEquals(0, ((BigIntVector) fromVector.getDataVector()).get(0));
+      assertEquals(-127, ((BigIntVector) fromVector.getDataVector()).get(1));
+      assertEquals(127, ((BigIntVector) fromVector.getDataVector()).get(2));
+      assertEquals(50, ((BigIntVector) fromVector.getDataVector()).get(3));
+      assertEquals(12, ((BigIntVector) fromVector.getDataVector()).get(4));
+      assertEquals(-7, ((BigIntVector) fromVector.getDataVector()).get(5));
+      assertEquals(25, ((BigIntVector) fromVector.getDataVector()).get(6));
+
+      // check values
+      Object result = fromVector.getObject(0);
+      ArrayList<Long> resultSet = (ArrayList<Long>) result;
+      assertEquals(3, resultSet.size());
+      assertEquals(Long.valueOf(12), resultSet.get(0));
+      assertEquals(Long.valueOf(-7), resultSet.get(1));
+      assertEquals(Long.valueOf(25), resultSet.get(2));
+
+      assertTrue(fromVector.isNull(1));
+
+      result = fromVector.getObject(2);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(4, resultSet.size());
+      assertEquals(Long.valueOf(0), resultSet.get(0));
+      assertEquals(Long.valueOf(-127), resultSet.get(1));
+      assertEquals(Long.valueOf(127), resultSet.get(2));
+      assertEquals(Long.valueOf(50), resultSet.get(3));
+
+      assertTrue(fromVector.isEmpty(3));
+
+      result = fromVector.getObject(4);
+      resultSet = (ArrayList<Long>) result;
+      assertEquals(2, resultSet.size());
+      assertEquals(Long.valueOf(50), resultSet.get(0));
+      assertEquals(Long.valueOf(12), resultSet.get(1));
+
+      fromVector.validate();
+
+      /* do split and transfer */
+      try (ListViewVector toVector = ListViewVector.empty("toVector", allocator)) {
+        int[][] transferLengths = {{2, 3}, {0, 1}, {0, 3}};
+        TransferPair transferPair = fromVector.makeTransferPair(toVector);
+
+        for (final int[] transferLength : transferLengths) {
+          int start = transferLength[0];
+          int splitLength = transferLength[1];
+          validateSplitAndTransfer(transferPair, start, splitLength, fromVector, toVector);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testRangeChildVector1() {
+    /*
+     * Non-overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [4, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[0, 1, 2, 3], [2]]
+     * */
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      listViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      listViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = listViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      listViewVector.setValidity(0, 1);
+      listViewVector.setValidity(1, 1);
+
+      listViewVector.setOffset(0, 0);
+      listViewVector.setOffset(1, 2);
+
+      listViewVector.setSize(0, 4);
+      listViewVector.setSize(1, 1);
+
+      assertEquals(8, listViewVector.getDataVector().getValueCount());
+
+      listViewVector.setValueCount(2);
+      assertEquals(4, listViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) listViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
+  @Test
+  public void testRangeChildVector2() {
+    /*
+     * Overlapping ranges
+     * offsets: [0, 2]
+     * sizes: [3, 1]
+     * values: [0, 1, 2, 3]
+     *
+     * vector: [[1, 2, 3], [2]]
+     * */
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      // Allocate buffers in listViewVector by calling `allocateNew` method.
+      listViewVector.allocateNew();
+
+      // Initialize the child vector using `initializeChildrenFromFields` method.
+
+      FieldType fieldType = new FieldType(true, new ArrowType.Int(32, true), null, null);
+      Field field = new Field("child-vector", fieldType, null);
+      listViewVector.initializeChildrenFromFields(Collections.singletonList(field));
+
+      // Set values in the child vector.
+      FieldVector fieldVector = listViewVector.getDataVector();
+      fieldVector.clear();
+
+      IntVector childVector = (IntVector) fieldVector;
+
+      childVector.allocateNew(8);
+
+      childVector.set(0, 0);
+      childVector.set(1, 1);
+      childVector.set(2, 2);
+      childVector.set(3, 3);
+      childVector.set(4, 4);
+      childVector.set(5, 5);
+      childVector.set(6, 6);
+      childVector.set(7, 7);
+
+      childVector.setValueCount(8);
+
+      // Set validity, offset and size buffers using `setValidity`,
+      //  `setOffset` and `setSize` methods.
+      listViewVector.setValidity(0, 1);
+      listViewVector.setValidity(1, 1);
+
+      listViewVector.setOffset(0, 1);
+      listViewVector.setOffset(1, 2);
+
+      listViewVector.setSize(0, 3);
+      listViewVector.setSize(1, 1);
+
+      assertEquals(8, listViewVector.getDataVector().getValueCount());
+
+      listViewVector.setValueCount(2);
+      assertEquals(4, listViewVector.getDataVector().getValueCount());
+
+      IntVector childVector1 = (IntVector) listViewVector.getDataVector();
+      final ArrowBuf dataBuffer = childVector1.getDataBuffer();
+      final ArrowBuf validityBuffer = childVector1.getValidityBuffer();
+
+      // yet the underneath buffer contains the original buffer
+      for (int i = 0; i < validityBuffer.capacity(); i++) {
+        assertEquals(i, dataBuffer.getInt((long) i * IntVector.TYPE_WIDTH));
+      }
+    }
+  }
+
   private void writeIntValues(UnionListViewWriter writer, int[] values) {
-    writer.startList();
+    writer.startListView();
     for (int v : values) {
       writer.integer().writeInt(v);
     }
-    writer.endList();
+    writer.endListView();
   }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java
new file mode 100644
index 0000000000000..9fd9b580b361f
--- /dev/null
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestOpaqueExtensionType.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.stream.Stream;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.extension.InvalidExtensionMetadataException;
+import org.apache.arrow.vector.extension.OpaqueType;
+import org.apache.arrow.vector.extension.OpaqueVector;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+class TestOpaqueExtensionType {
+  BufferAllocator allocator;
+
+  @BeforeEach
+  void beforeEach() {
+    allocator = new RootAllocator();
+  }
+
+  @AfterEach
+  void afterEach() {
+    allocator.close();
+  }
+
+  @ParameterizedTest
+  @ValueSource(
+      strings = {
+        "{\"type_name\": \"\", \"vendor_name\": \"\"}",
+        "{\"type_name\": \"\", \"vendor_name\": \"\", \"extra_field\": 42}",
+        "{\"type_name\": \"array\", \"vendor_name\": \"postgresql\"}",
+        "{\"type_name\": \"foo.bar\", \"vendor_name\": \"postgresql\"}",
+      })
+  void testDeserializeValid(String serialized) {
+    ArrowType storageType = Types.MinorType.NULL.getType();
+    OpaqueType type = new OpaqueType(storageType, "", "");
+
+    assertDoesNotThrow(() -> type.deserialize(storageType, serialized));
+  }
+
+  @ParameterizedTest
+  @ValueSource(
+      strings = {
+        "",
+        "{\"type_name\": \"\"}",
+        "{\"vendor_name\": \"\"}",
+        "{\"type_name\": null, \"vendor_name\": \"\"}",
+        "{\"type_name\": \"\", \"vendor_name\": null}",
+        "{\"type_name\": 42, \"vendor_name\": \"\"}",
+        "{\"type_name\": \"\", \"vendor_name\": 42}",
+        "{\"type_name\": \"\", \"vendor_name\": \"\"",
+      })
+  void testDeserializeInvalid(String serialized) {
+    ArrowType storageType = Types.MinorType.NULL.getType();
+    OpaqueType type = new OpaqueType(storageType, "", "");
+
+    assertThrows(
+        InvalidExtensionMetadataException.class, () -> type.deserialize(storageType, serialized));
+  }
+
+  @ParameterizedTest
+  @MethodSource("storageType")
+  void testRoundTrip(ArrowType storageType) {
+    OpaqueType type = new OpaqueType(storageType, "foo", "bar");
+    assertEquals(storageType, type.storageType());
+    assertEquals("foo", type.typeName());
+    if (storageType.isComplex()) {
+      assertThrows(
+          UnsupportedOperationException.class,
+          () -> type.getNewVector("name", FieldType.nullable(type), allocator));
+    } else {
+      assertDoesNotThrow(() -> type.getNewVector("name", FieldType.nullable(type), allocator))
+          .close();
+    }
+
+    String serialized = assertDoesNotThrow(type::serialize);
+    OpaqueType holder = new OpaqueType(Types.MinorType.NULL.getType(), "", "");
+    OpaqueType deserialized = (OpaqueType) holder.deserialize(storageType, serialized);
+    assertEquals(type, deserialized);
+    assertNotEquals(holder, deserialized);
+  }
+
+  @ParameterizedTest
+  @MethodSource("storageType")
+  void testIpcRoundTrip(ArrowType storageType) {
+    OpaqueType.ensureRegistered();
+
+    OpaqueType type = new OpaqueType(storageType, "foo", "bar");
+    Schema schema = new Schema(Collections.singletonList(Field.nullable("unknown", type)));
+    byte[] serialized = schema.serializeAsMessage();
+    Schema deseralized = Schema.deserializeMessage(ByteBuffer.wrap(serialized));
+    assertEquals(schema, deseralized);
+  }
+
+  @Test
+  void testVectorType() throws IOException {
+    OpaqueType.ensureRegistered();
+
+    ArrowType storageType = Types.MinorType.VARBINARY.getType();
+    OpaqueType type = new OpaqueType(storageType, "foo", "bar");
+    try (FieldVector vector = type.getNewVector("field", FieldType.nullable(type), allocator)) {
+      OpaqueVector opaque = assertInstanceOf(OpaqueVector.class, vector);
+      assertEquals("field", opaque.getField().getName());
+      assertEquals(type, opaque.getField().getType());
+
+      VarBinaryVector binary =
+          assertInstanceOf(VarBinaryVector.class, opaque.getUnderlyingVector());
+      binary.setSafe(0, new byte[] {0, 1, 2, 3});
+      binary.setNull(1);
+      opaque.setValueCount(2);
+
+      ByteArrayOutputStream baos = new ByteArrayOutputStream();
+      try (VectorSchemaRoot root = new VectorSchemaRoot(Collections.singletonList(opaque));
+          ArrowStreamWriter writer =
+              new ArrowStreamWriter(root, new DictionaryProvider.MapDictionaryProvider(), baos)) {
+        writer.start();
+        writer.writeBatch();
+      }
+
+      try (ArrowStreamReader reader =
+          new ArrowStreamReader(new ByteArrayInputStream(baos.toByteArray()), allocator)) {
+        assertTrue(reader.loadNextBatch());
+        VectorSchemaRoot root = reader.getVectorSchemaRoot();
+        assertEquals(2, root.getRowCount());
+        assertEquals(new Schema(Collections.singletonList(opaque.getField())), root.getSchema());
+
+        OpaqueVector actual = assertInstanceOf(OpaqueVector.class, root.getVector("field"));
+        assertFalse(actual.isNull(0));
+        assertTrue(actual.isNull(1));
+        assertArrayEquals(new byte[] {0, 1, 2, 3}, (byte[]) actual.getObject(0));
+        assertNull(actual.getObject(1));
+      }
+    }
+  }
+
+  static Stream<ArrowType> storageType() {
+    return Stream.of(
+        Types.MinorType.NULL.getType(),
+        Types.MinorType.BIGINT.getType(),
+        Types.MinorType.BIT.getType(),
+        Types.MinorType.VARBINARY.getType(),
+        Types.MinorType.VARCHAR.getType(),
+        Types.MinorType.LIST.getType(),
+        new ArrowType.Decimal(12, 4, 128));
+  }
+}
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestRunEndEncodedVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestRunEndEncodedVector.java
new file mode 100644
index 0000000000000..3f4be2e52ce56
--- /dev/null
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestRunEndEncodedVector.java
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.vector;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.List;
+import java.util.function.Function;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.complex.RunEndEncodedVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+public class TestRunEndEncodedVector {
+
+  private BufferAllocator allocator;
+
+  @BeforeEach
+  public void init() {
+    allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+  }
+
+  @AfterEach
+  public void terminate() throws Exception {
+    allocator.close();
+  }
+
+  @Test
+  public void testInitializeChildrenFromFields() {
+    final FieldType valueType = FieldType.notNullable(Types.MinorType.BIGINT.getType());
+    final FieldType runEndType = FieldType.notNullable(Types.MinorType.INT.getType());
+    final Field valueField = new Field("value", valueType, null);
+    final Field runEndField = new Field("ree", runEndType, null);
+
+    try (RunEndEncodedVector reeVector = RunEndEncodedVector.empty("empty", allocator)) {
+      reeVector.initializeChildrenFromFields(List.of(runEndField, valueField));
+      reeVector.validate();
+    }
+  }
+
+  /** Create REE vector with constant value. */
+  @Test
+  public void testConstantValueVector() {
+    final Field runEndEncodedField = createBigIntRunEndEncodedField("constant");
+    int logicalValueCount = 100;
+
+    // constant vector
+    try (RunEndEncodedVector reeVector =
+        new RunEndEncodedVector(runEndEncodedField, allocator, null)) {
+      Long value = 65536L;
+      setConstantVector(reeVector, value, logicalValueCount);
+      assertEquals(logicalValueCount, reeVector.getValueCount());
+      for (int i = 0; i < logicalValueCount; i++) {
+        assertEquals(value, reeVector.getObject(i));
+      }
+    }
+
+    // constant null vector
+    try (RunEndEncodedVector reeVector =
+        new RunEndEncodedVector(runEndEncodedField, allocator, null)) {
+      setConstantVector(reeVector, null, logicalValueCount);
+      assertEquals(logicalValueCount, reeVector.getValueCount());
+      // Null count is always 0 for run-end encoded array
+      assertEquals(0, reeVector.getNullCount());
+      for (int i = 0; i < logicalValueCount; i++) {
+        assertTrue(reeVector.isNull(i));
+        assertNull(reeVector.getObject(i));
+      }
+    }
+  }
+
+  @Test
+  public void testBasicRunEndEncodedVector() {
+    try (RunEndEncodedVector reeVector =
+        new RunEndEncodedVector(createBigIntRunEndEncodedField("basic"), allocator, null)) {
+
+      // Create REE vector representing:
+      // [null, 2, 2, null, null, null, 4, 4, 4, 4, null, null, null, null, null].
+      int runCount = 5;
+      final int logicalValueCount =
+          setBasicVector(reeVector, runCount, i -> i % 2 == 0 ? null : i + 1, i -> i + 1);
+
+      assertEquals(15, reeVector.getValueCount());
+      int index = 0;
+      for (int run = 0; run < runCount; run++) {
+        long expectedRunValue = (long) run + 1;
+        for (int j = 0; j <= run; j++) {
+          if (run % 2 == 0) {
+            assertNull(reeVector.getObject(index));
+          } else {
+            assertEquals(expectedRunValue, reeVector.getObject(index));
+          }
+          index++;
+        }
+      }
+
+      // test index out of bound
+      assertThrows(IndexOutOfBoundsException.class, () -> reeVector.getObject(-1));
+      assertThrows(IndexOutOfBoundsException.class, () -> reeVector.getObject(logicalValueCount));
+    }
+  }
+
+  @Test
+  public void testRangeCompare() {
+    // test compare same constant vector
+    RunEndEncodedVector constantVector =
+        new RunEndEncodedVector(createBigIntRunEndEncodedField("constant"), allocator, null);
+    int logicalValueCount = 15;
+
+    setConstantVector(constantVector, 1L, logicalValueCount);
+
+    assertTrue(
+        constantVector.accept(
+            new RangeEqualsVisitor(constantVector, constantVector),
+            new Range(0, 0, logicalValueCount)));
+    assertTrue(
+        constantVector.accept(
+            new RangeEqualsVisitor(constantVector, constantVector), new Range(1, 1, 14)));
+    assertTrue(
+        constantVector.accept(
+            new RangeEqualsVisitor(constantVector, constantVector), new Range(1, 2, 13)));
+    assertFalse(
+        constantVector.accept(
+            new RangeEqualsVisitor(constantVector, constantVector), new Range(1, 10, 10)));
+    assertFalse(
+        constantVector.accept(
+            new RangeEqualsVisitor(constantVector, constantVector), new Range(10, 1, 10)));
+
+    // Create REE vector representing: [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5].
+    RunEndEncodedVector reeVector =
+        new RunEndEncodedVector(createBigIntRunEndEncodedField("basic"), allocator, null);
+    setBasicVector(reeVector, 5, i -> i + 1, i -> i + 1);
+
+    assertTrue(
+        reeVector.accept(
+            new RangeEqualsVisitor(reeVector, reeVector), new Range(0, 0, logicalValueCount)));
+    assertTrue(
+        reeVector.accept(
+            new RangeEqualsVisitor(reeVector, reeVector), new Range(2, 2, logicalValueCount - 2)));
+    assertFalse(
+        reeVector.accept(
+            new RangeEqualsVisitor(reeVector, reeVector), new Range(1, 2, logicalValueCount - 2)));
+
+    assertFalse(
+        reeVector.accept(
+            new RangeEqualsVisitor(reeVector, constantVector), new Range(0, 0, logicalValueCount)));
+
+    // Create REE vector representing: [2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5].
+    RunEndEncodedVector reeVector2 =
+        new RunEndEncodedVector(createBigIntRunEndEncodedField("basic"), allocator, null);
+    setBasicVector(reeVector2, 4, i -> i + 2, i -> i + 2);
+
+    assertTrue(
+        reeVector.accept(
+            new RangeEqualsVisitor(reeVector, reeVector2), new Range(1, 0, logicalValueCount - 1)));
+
+    constantVector.close();
+    reeVector.close();
+    reeVector2.close();
+  }
+
+  private static Field createBigIntRunEndEncodedField(String fieldName) {
+    final FieldType valueType = FieldType.notNullable(Types.MinorType.BIGINT.getType());
+    final FieldType runEndType = FieldType.notNullable(Types.MinorType.INT.getType());
+
+    final Field valueField = new Field("value", valueType, null);
+    final Field runEndField = new Field("ree", runEndType, null);
+
+    return new Field(
+        fieldName, FieldType.notNullable(RunEndEncoded.INSTANCE), List.of(runEndField, valueField));
+  }
+
+  private static void setConstantVector(
+      RunEndEncodedVector constantVector, Long value, long logicalValueCount) {
+    setBasicVector(constantVector, 1, i -> value, i -> logicalValueCount);
+  }
+
+  private static int setBasicVector(
+      RunEndEncodedVector reeVector,
+      int runCount,
+      Function<Long, Long> runValueSupplier,
+      Function<Long, Long> runLengthSupplier) {
+    reeVector.allocateNew();
+    reeVector.setInitialCapacity(runCount);
+    int end = 0;
+    for (int i = 0; i < runCount; i++) {
+      Long runValue = runValueSupplier.apply((long) i);
+      if (runValue == null) {
+        reeVector.getValuesVector().setNull(i);
+      } else {
+        ((BigIntVector) reeVector.getValuesVector()).set(i, runValue);
+      }
+
+      Long runLength = runLengthSupplier.apply((long) i);
+      assert runLength != null && runLength > 0;
+      end += runLength;
+      ((IntVector) reeVector.getRunEndsVector()).set(i, end);
+    }
+
+    final int logicalValueCount = end;
+    reeVector.getValuesVector().setValueCount(runCount);
+    reeVector.getRunEndsVector().setValueCount(runCount);
+    reeVector.setValueCount(logicalValueCount);
+    return logicalValueCount;
+  }
+}
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
index d20dc3348b1c9..a3f25bc5207b6 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
@@ -29,6 +29,7 @@
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.StructVector;
@@ -852,6 +853,25 @@ public void testListVectorZeroStartIndexAndLength() {
     }
   }
 
+  @Test
+  public void testLargeListViewVectorZeroStartIndexAndLength() {
+    try (final LargeListViewVector listVector =
+            LargeListViewVector.empty("largelistview", allocator);
+        final LargeListViewVector newListVector = LargeListViewVector.empty("newList", allocator)) {
+
+      listVector.allocateNew();
+      final int valueCount = 0;
+      listVector.setValueCount(valueCount);
+
+      final TransferPair tp = listVector.makeTransferPair(newListVector);
+
+      tp.splitAndTransfer(0, 0);
+      assertEquals(valueCount, newListVector.getValueCount());
+
+      newListVector.clear();
+    }
+  }
+
   @Test
   public void testStructVectorZeroStartIndexAndLength() {
     Map<String, String> metadata = new HashMap<>();
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
index 40e55fce9bfa2..83e470ae2581d 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -46,10 +46,14 @@
 import org.apache.arrow.vector.compare.VectorEqualsVisitor;
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter;
+import org.apache.arrow.vector.complex.impl.UnionListViewWriter;
 import org.apache.arrow.vector.complex.impl.UnionListWriter;
 import org.apache.arrow.vector.holders.NullableIntHolder;
 import org.apache.arrow.vector.holders.NullableUInt4Holder;
@@ -2880,6 +2884,63 @@ public void testListVectorEqualsWithNull() {
     }
   }
 
+  @Test
+  public void testListViewVectorEqualsWithNull() {
+    try (final ListViewVector vector1 = ListViewVector.empty("listview", allocator);
+        final ListViewVector vector2 = ListViewVector.empty("listview", allocator); ) {
+
+      UnionListViewWriter writer1 = vector1.getWriter();
+      writer1.allocate();
+
+      // set some values
+      writeListViewVector(writer1, new int[] {1, 2});
+      writeListViewVector(writer1, new int[] {3, 4});
+      writeListViewVector(writer1, new int[] {});
+      writer1.setValueCount(3);
+
+      UnionListViewWriter writer2 = vector2.getWriter();
+      writer2.allocate();
+
+      // set some values
+      writeListViewVector(writer2, new int[] {1, 2});
+      writeListViewVector(writer2, new int[] {3, 4});
+      writer2.setValueCount(3);
+
+      VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+
+      assertFalse(visitor.vectorEquals(vector1, vector2));
+    }
+  }
+
+  @Test
+  public void testLargeListViewVectorEqualsWithNull() {
+    try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator);
+        final LargeListViewVector vector2 =
+            LargeListViewVector.empty("largelistview", allocator); ) {
+
+      UnionLargeListViewWriter writer1 = vector1.getWriter();
+      writer1.allocate();
+
+      // set some values
+      writeLargeListViewVector(writer1, new int[] {1, 2});
+      writeLargeListViewVector(writer1, new int[] {3, 4});
+      writeLargeListViewVector(writer1, new int[] {});
+      writer1.setValueCount(3);
+
+      UnionLargeListViewWriter writer2 = vector2.getWriter();
+      writer2.allocate();
+
+      // set some values
+      writeLargeListViewVector(writer2, new int[] {1, 2});
+      writeLargeListViewVector(writer2, new int[] {3, 4});
+      writer2.setValueCount(3);
+
+      VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+
+      assertFalse(visitor.vectorEquals(vector1, vector2));
+    }
+  }
+
   @Test
   public void testListVectorEquals() {
     try (final ListVector vector1 = ListVector.empty("list", allocator);
@@ -2912,6 +2973,71 @@ public void testListVectorEquals() {
     }
   }
 
+  @Test
+  public void testListViewVectorEquals() {
+    try (final ListViewVector vector1 = ListViewVector.empty("listview", allocator);
+        final ListViewVector vector2 = ListViewVector.empty("listview", allocator); ) {
+
+      UnionListViewWriter writer1 = vector1.getWriter();
+      writer1.allocate();
+
+      // set some values
+      writeListViewVector(writer1, new int[] {1, 2});
+      writeListViewVector(writer1, new int[] {3, 4});
+      writeListViewVector(writer1, new int[] {5, 6});
+      writer1.setValueCount(3);
+
+      UnionListViewWriter writer2 = vector2.getWriter();
+      writer2.allocate();
+
+      // set some values
+      writeListViewVector(writer2, new int[] {1, 2});
+      writeListViewVector(writer2, new int[] {3, 4});
+      writer2.setValueCount(2);
+
+      VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+      assertFalse(visitor.vectorEquals(vector1, vector2));
+
+      writeListViewVector(writer2, new int[] {5, 6});
+      writer2.setValueCount(3);
+
+      assertTrue(visitor.vectorEquals(vector1, vector2));
+    }
+  }
+
+  @Test
+  public void testLargeListViewVectorEquals() {
+    try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator);
+        final LargeListViewVector vector2 =
+            LargeListViewVector.empty("largelistview", allocator); ) {
+
+      UnionLargeListViewWriter writer1 = vector1.getWriter();
+      writer1.allocate();
+
+      // set some values
+      writeLargeListViewVector(writer1, new int[] {1, 2});
+      writeLargeListViewVector(writer1, new int[] {3, 4});
+      writeLargeListViewVector(writer1, new int[] {5, 6});
+      writer1.setValueCount(3);
+
+      UnionLargeListViewWriter writer2 = vector2.getWriter();
+      writer2.allocate();
+
+      // set some values
+      writeLargeListViewVector(writer2, new int[] {1, 2});
+      writeLargeListViewVector(writer2, new int[] {3, 4});
+      writer2.setValueCount(2);
+
+      VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+      assertFalse(visitor.vectorEquals(vector1, vector2));
+
+      writeLargeListViewVector(writer2, new int[] {5, 6});
+      writer2.setValueCount(3);
+
+      assertTrue(visitor.vectorEquals(vector1, vector2));
+    }
+  }
+
   @Test
   public void testListVectorSetNull() {
     try (final ListVector vector = ListVector.empty("list", allocator)) {
@@ -2935,6 +3061,52 @@ public void testListVectorSetNull() {
     }
   }
 
+  @Test
+  public void testListViewVectorSetNull() {
+    try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) {
+      UnionListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      writeListViewVector(writer, new int[] {1, 2});
+      writeListViewVector(writer, new int[] {3, 4});
+      writeListViewVector(writer, new int[] {5, 6});
+      vector.setNull(3);
+      vector.setNull(4);
+      vector.setNull(5);
+      writer.setValueCount(6);
+
+      assertEquals(vector.getObject(0), Arrays.asList(1, 2));
+      assertEquals(vector.getObject(1), Arrays.asList(3, 4));
+      assertEquals(vector.getObject(2), Arrays.asList(5, 6));
+      assertTrue(vector.isNull(3));
+      assertTrue(vector.isNull(4));
+      assertTrue(vector.isNull(5));
+    }
+  }
+
+  @Test
+  public void testLargeListViewVectorSetNull() {
+    try (final LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) {
+      UnionLargeListViewWriter writer = vector.getWriter();
+      writer.allocate();
+
+      writeLargeListViewVector(writer, new int[] {1, 2});
+      writeLargeListViewVector(writer, new int[] {3, 4});
+      writeLargeListViewVector(writer, new int[] {5, 6});
+      vector.setNull(3);
+      vector.setNull(4);
+      vector.setNull(5);
+      writer.setValueCount(6);
+
+      assertEquals(vector.getObject(0), Arrays.asList(1, 2));
+      assertEquals(vector.getObject(1), Arrays.asList(3, 4));
+      assertEquals(vector.getObject(2), Arrays.asList(5, 6));
+      assertTrue(vector.isNull(3));
+      assertTrue(vector.isNull(4));
+      assertTrue(vector.isNull(5));
+    }
+  }
+
   @Test
   public void testStructVectorEqualsWithNull() {
 
@@ -3266,6 +3438,22 @@ private void writeListVector(UnionListWriter writer, int[] values) {
     writer.endList();
   }
 
+  private void writeListViewVector(UnionListViewWriter writer, int[] values) {
+    writer.startListView();
+    for (int v : values) {
+      writer.integer().writeInt(v);
+    }
+    writer.endListView();
+  }
+
+  private void writeLargeListViewVector(UnionLargeListViewWriter writer, int[] values) {
+    writer.startListView();
+    for (int v : values) {
+      writer.integer().writeInt(v);
+    }
+    writer.endListView();
+  }
+
   @Test
   public void testVariableVectorGetEndOffset() {
     try (final VarCharVector vector1 = new VarCharVector("v1", allocator);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java
index 308431fdeb9cf..232eec9ef1b23 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java
@@ -33,7 +33,9 @@
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Random;
 import java.util.function.BiConsumer;
@@ -261,6 +263,39 @@ public void testDataBufferBasedAllocationInOtherBuffer() {
     }
   }
 
+  @Test
+  public void testSetSafe() {
+    try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
+      viewVarCharVector.allocateNew(1, 1);
+      byte[] str6 = generateRandomString(40).getBytes();
+      final List<byte[]> strings = List.of(STR0, STR1, STR2, STR3, STR4, STR5, str6);
+
+      // set data to a position out of capacity index
+      Map<Integer, byte[]> expected = new HashMap<>();
+      for (byte[] string : strings) {
+        int cap = viewVarCharVector.getValueCapacity();
+        expected.put(cap, string);
+        viewVarCharVector.setSafe(cap, string);
+      }
+      int nullIndex = viewVarCharVector.getValueCapacity();
+      viewVarCharVector.setNull(nullIndex);
+      int valueCount = nullIndex + 1;
+      viewVarCharVector.setValueCount(valueCount);
+      assertEquals(viewVarCharVector.getNullCount(), valueCount - strings.size());
+
+      assertEquals(128, viewVarCharVector.getValueCapacity());
+      assertEquals(2, viewVarCharVector.dataBuffers.size());
+
+      for (int i = 0; i < viewVarCharVector.getValueCapacity(); i++) {
+        if (expected.containsKey(i)) {
+          assertArrayEquals(expected.get(i), viewVarCharVector.get(i));
+        } else {
+          assertNull(viewVarCharVector.get(i));
+        }
+      }
+    }
+  }
+
   @Test
   public void testMixedAllocation() {
     try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) {
@@ -1749,12 +1784,12 @@ public void testCopyFromWithNulls(
         } else if (i % 3 == 1) {
           assertArrayEquals(
               Integer.toString(i).getBytes(StandardCharsets.UTF_8),
-              vector.get(i),
+              vector2.get(i),
               "unexpected value at index: " + i);
         } else {
           assertArrayEquals(
               (i + prefixString).getBytes(StandardCharsets.UTF_8),
-              vector.get(i),
+              vector2.get(i),
               "unexpected value at index: " + i);
         }
       }
@@ -1769,12 +1804,12 @@ public void testCopyFromWithNulls(
         } else if (i % 3 == 1) {
           assertArrayEquals(
               Integer.toString(i).getBytes(StandardCharsets.UTF_8),
-              vector.get(i),
+              vector2.get(i),
               "unexpected value at index: " + i);
         } else {
           assertArrayEquals(
               (i + prefixString).getBytes(StandardCharsets.UTF_8),
-              vector.get(i),
+              vector2.get(i),
               "unexpected value at index: " + i);
         }
       }
@@ -1846,12 +1881,12 @@ public void testCopyFromSafeWithNulls(
         } else if (i % 3 == 1) {
           assertArrayEquals(
               Integer.toString(i).getBytes(StandardCharsets.UTF_8),
-              vector.get(i),
+              vector2.get(i),
               "unexpected value at index: " + i);
         } else {
           assertArrayEquals(
               (i + prefixString).getBytes(StandardCharsets.UTF_8),
-              vector.get(i),
+              vector2.get(i),
               "unexpected value at index: " + i);
         }
       }
@@ -1867,15 +1902,83 @@ public void testCopyFromSafeWithNulls(
         } else if (i % 3 == 1) {
           assertArrayEquals(
               Integer.toString(i).getBytes(StandardCharsets.UTF_8),
-              vector.get(i),
+              vector2.get(i),
               "unexpected value at index: " + i);
         } else {
           assertArrayEquals(
               (i + prefixString).getBytes(StandardCharsets.UTF_8),
-              vector.get(i),
+              vector2.get(i),
+              "unexpected value at index: " + i);
+        }
+      }
+
+      // make it reallocate
+      int valueCapacity = vector2.getValueCapacity();
+      for (int i = 0; i < numberOfValues; i++) {
+        int thisIndex = i + valueCapacity;
+        vector2.copyFromSafe(i, thisIndex, vector);
+        if (i % 3 == 0) {
+          assertNull(vector2.getObject(thisIndex));
+        } else if (i % 3 == 1) {
+          assertArrayEquals(
+              Integer.toString(i).getBytes(StandardCharsets.UTF_8),
+              vector2.get(thisIndex),
+              "unexpected value at index: " + i);
+        } else {
+          assertArrayEquals(
+              (i + prefixString).getBytes(StandardCharsets.UTF_8),
+              vector2.get(thisIndex),
               "unexpected value at index: " + i);
         }
       }
+
+      // test target vector with different initialCapacity
+      try (final BaseVariableWidthViewVector vector3 = vectorCreator.apply(allocator)) {
+        vector3.setInitialCapacity(16);
+        vector3.allocateNew();
+        for (int i = 0; i < numberOfValues; i++) {
+          vector3.copyFromSafe(i, i, vector);
+          if (i % 3 == 0) {
+            assertNull(vector3.getObject(i));
+          } else {
+            assertArrayEquals(vector.get(i), vector3.get(i));
+          }
+        }
+      }
+
+      // test overwrite a used vector by copy
+      try (final BaseVariableWidthViewVector targetVector = vectorCreator.apply(allocator)) {
+
+        targetVector.setInitialCapacity(initialCapacity);
+        targetVector.allocateNew();
+
+        // source vector: null, short, long...
+        // target vector: long, null, short...
+        for (int i = 0; i < numberOfValues; i++) {
+          if (i % 3 == 0) {
+            // long strings
+            byte[] b = (i + prefixString).getBytes(StandardCharsets.UTF_8);
+            targetVector.set(i, b, 0, b.length);
+          } else if (i % 3 == 1) {
+            // null values
+            targetVector.setNull(i);
+          } else {
+            // short strings
+            byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8);
+            targetVector.set(i, b, 0, b.length);
+          }
+        }
+        targetVector.setValueCount(numberOfValues);
+
+        for (int i = 0; i < numberOfValues; i++) {
+          targetVector.copyFromSafe(i, i, vector);
+          if (i % 3 == 0) {
+            assertNull(targetVector.getObject(i));
+          } else {
+            assertArrayEquals(vector.get(i), targetVector.get(i));
+          }
+        }
+      }
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
index 48cf78a4c2e4a..28d73a8fdfff9 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
@@ -25,6 +25,7 @@
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
@@ -122,7 +123,10 @@ public void testListTypeReset() {
                 "VarList", allocator, FieldType.nullable(MinorType.INT.getType()), null);
         final FixedSizeListVector fixedList =
             new FixedSizeListVector(
-                "FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null)) {
+                "FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null);
+        final ListViewVector variableViewList =
+            new ListViewVector(
+                "VarListView", allocator, FieldType.nullable(MinorType.INT.getType()), null)) {
       // ListVector
       variableList.allocateNewSafe();
       variableList.startNewValue(0);
@@ -136,6 +140,13 @@ public void testListTypeReset() {
       fixedList.setNull(0);
       fixedList.setValueCount(1);
       resetVectorAndVerify(fixedList, fixedList.getBuffers(false));
+
+      // ListViewVector
+      variableViewList.allocateNewSafe();
+      variableViewList.startNewValue(0);
+      variableViewList.endValue(0, 0);
+      variableViewList.setValueCount(1);
+      resetVectorAndVerify(variableViewList, variableViewList.getBuffers(false));
     }
   }
 
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java
index bab8c737f6a7d..08da786eb272c 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java
@@ -36,11 +36,15 @@
 import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers;
 import org.apache.arrow.vector.complex.DenseUnionVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.complex.impl.NullableStructWriter;
 import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
+import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter;
+import org.apache.arrow.vector.complex.impl.UnionListViewWriter;
 import org.apache.arrow.vector.complex.impl.UnionListWriter;
 import org.apache.arrow.vector.holders.NullableBigIntHolder;
 import org.apache.arrow.vector.holders.NullableFloat4Holder;
@@ -201,6 +205,43 @@ public void testListVectorWithDifferentChild() {
     }
   }
 
+  @Test
+  public void testListViewVectorWithDifferentChild() {
+    try (final ListViewVector vector1 = ListViewVector.empty("listview", allocator);
+        final ListViewVector vector2 = ListViewVector.empty("listview", allocator); ) {
+
+      vector1.allocateNew();
+      vector1.initializeChildrenFromFields(
+          Arrays.asList(Field.nullable("child", new ArrowType.Int(32, true))));
+
+      vector2.allocateNew();
+      vector2.initializeChildrenFromFields(
+          Arrays.asList(Field.nullable("child", new ArrowType.Int(64, true))));
+
+      RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+      assertFalse(visitor.rangeEquals(new Range(0, 0, 0)));
+    }
+  }
+
+  @Test
+  public void testLargeListViewVectorWithDifferentChild() {
+    try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator);
+        final LargeListViewVector vector2 =
+            LargeListViewVector.empty("largelistview", allocator); ) {
+
+      vector1.allocateNew();
+      vector1.initializeChildrenFromFields(
+          Arrays.asList(Field.nullable("child", new ArrowType.Int(32, true))));
+
+      vector2.allocateNew();
+      vector2.initializeChildrenFromFields(
+          Arrays.asList(Field.nullable("child", new ArrowType.Int(64, true))));
+
+      RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+      assertFalse(visitor.rangeEquals(new Range(0, 0, 0)));
+    }
+  }
+
   @Test
   public void testListVectorRangeEquals() {
     try (final ListVector vector1 = ListVector.empty("list", allocator);
@@ -233,6 +274,71 @@ public void testListVectorRangeEquals() {
     }
   }
 
+  @Test
+  public void testListViewVectorRangeEquals() {
+    try (final ListViewVector vector1 = ListViewVector.empty("listview", allocator);
+        final ListViewVector vector2 = ListViewVector.empty("listview", allocator); ) {
+
+      UnionListViewWriter writer1 = vector1.getWriter();
+      writer1.allocate();
+
+      // set some values
+      writeListViewVector(writer1, new int[] {1, 2});
+      writeListViewVector(writer1, new int[] {3, 4});
+      writeListViewVector(writer1, new int[] {5, 6});
+      writeListViewVector(writer1, new int[] {7, 8});
+      writeListViewVector(writer1, new int[] {9, 10});
+      writer1.setValueCount(5);
+
+      UnionListViewWriter writer2 = vector2.getWriter();
+      writer2.allocate();
+
+      // set some values
+      writeListViewVector(writer2, new int[] {0, 0});
+      writeListViewVector(writer2, new int[] {3, 4});
+      writeListViewVector(writer2, new int[] {5, 6});
+      writeListViewVector(writer2, new int[] {7, 8});
+      writeListViewVector(writer2, new int[] {0, 0});
+      writer2.setValueCount(5);
+
+      RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+      assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+    }
+  }
+
+  @Test
+  public void testLargeListViewVectorRangeEquals() {
+    try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator);
+        final LargeListViewVector vector2 =
+            LargeListViewVector.empty("largelistview", allocator); ) {
+
+      UnionLargeListViewWriter writer1 = vector1.getWriter();
+      writer1.allocate();
+
+      // set some values
+      writeLargeListViewVector(writer1, new int[] {1, 2});
+      writeLargeListViewVector(writer1, new int[] {3, 4});
+      writeLargeListViewVector(writer1, new int[] {5, 6});
+      writeLargeListViewVector(writer1, new int[] {7, 8});
+      writeLargeListViewVector(writer1, new int[] {9, 10});
+      writer1.setValueCount(5);
+
+      UnionLargeListViewWriter writer2 = vector2.getWriter();
+      writer2.allocate();
+
+      // set some values
+      writeLargeListViewVector(writer2, new int[] {0, 0});
+      writeLargeListViewVector(writer2, new int[] {3, 4});
+      writeLargeListViewVector(writer2, new int[] {5, 6});
+      writeLargeListViewVector(writer2, new int[] {7, 8});
+      writeLargeListViewVector(writer2, new int[] {0, 0});
+      writer2.setValueCount(5);
+
+      RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+      assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+    }
+  }
+
   @Test
   public void testBitVectorRangeEquals() {
     try (final BitVector vector1 = new BitVector("v1", allocator);
@@ -328,17 +434,18 @@ public void testStructVectorRangeEquals() {
       NullableStructWriter writer1 = vector1.getWriter();
       writer1.allocate();
 
+      writeStructVector(writer1, 0, 0L);
       writeStructVector(writer1, 1, 10L);
       writeStructVector(writer1, 2, 20L);
       writeStructVector(writer1, 3, 30L);
       writeStructVector(writer1, 4, 40L);
       writeStructVector(writer1, 5, 50L);
-      writer1.setValueCount(5);
+      writer1.setValueCount(6);
 
       NullableStructWriter writer2 = vector2.getWriter();
       writer2.allocate();
 
-      writeStructVector(writer2, 0, 00L);
+      writeStructVector(writer2, 0, 0L);
       writeStructVector(writer2, 2, 20L);
       writeStructVector(writer2, 3, 30L);
       writeStructVector(writer2, 4, 40L);
@@ -346,7 +453,20 @@ public void testStructVectorRangeEquals() {
       writer2.setValueCount(5);
 
       RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
-      assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+      assertTrue(visitor.rangeEquals(new Range(2, 1, 3)));
+
+      // different nullability but same values
+      vector1.setNull(3);
+      assertFalse(visitor.rangeEquals(new Range(2, 1, 3)));
+      // both null and same values
+      vector2.setNull(2);
+      assertTrue(visitor.rangeEquals(new Range(2, 1, 3)));
+      // both not null but different values
+      assertFalse(visitor.rangeEquals(new Range(2, 1, 4)));
+      // both null but different values
+      vector1.setNull(5);
+      vector2.setNull(4);
+      assertTrue(visitor.rangeEquals(new Range(2, 1, 4)));
     }
   }
 
@@ -819,6 +939,70 @@ public void testListVectorApproxEquals() {
     }
   }
 
+  @Test
+  public void testListViewVectorApproxEquals() {
+    try (final ListViewVector right = ListViewVector.empty("listview", allocator);
+        final ListViewVector left1 = ListViewVector.empty("listview", allocator);
+        final ListViewVector left2 = ListViewVector.empty("listview", allocator); ) {
+
+      final float epsilon = 1.0E-6f;
+
+      UnionListViewWriter rightWriter = right.getWriter();
+      rightWriter.allocate();
+      writeListViewVector(rightWriter, new double[] {1, 2});
+      writeListViewVector(rightWriter, new double[] {1.01, 2.02});
+      rightWriter.setValueCount(2);
+
+      UnionListViewWriter leftWriter1 = left1.getWriter();
+      leftWriter1.allocate();
+      writeListViewVector(leftWriter1, new double[] {1, 2});
+      writeListViewVector(leftWriter1, new double[] {1.01 + epsilon / 2, 2.02 - epsilon / 2});
+      leftWriter1.setValueCount(2);
+
+      UnionListViewWriter leftWriter2 = left2.getWriter();
+      leftWriter2.allocate();
+      writeListViewVector(leftWriter2, new double[] {1, 2});
+      writeListViewVector(leftWriter2, new double[] {1.01 + epsilon * 2, 2.02 - epsilon * 2});
+      leftWriter2.setValueCount(2);
+
+      Range range = new Range(0, 0, right.getValueCount());
+      assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range));
+      assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range));
+    }
+  }
+
+  @Test
+  public void testLargeListViewVectorApproxEquals() {
+    try (final LargeListViewVector right = LargeListViewVector.empty("largelistview", allocator);
+        final LargeListViewVector left1 = LargeListViewVector.empty("largelistview", allocator);
+        final LargeListViewVector left2 = LargeListViewVector.empty("largelistview", allocator); ) {
+
+      final float epsilon = 1.0E-6f;
+
+      UnionLargeListViewWriter rightWriter = right.getWriter();
+      rightWriter.allocate();
+      writeLargeListViewVector(rightWriter, new double[] {1, 2});
+      writeLargeListViewVector(rightWriter, new double[] {1.01, 2.02});
+      rightWriter.setValueCount(2);
+
+      UnionLargeListViewWriter leftWriter1 = left1.getWriter();
+      leftWriter1.allocate();
+      writeLargeListViewVector(leftWriter1, new double[] {1, 2});
+      writeLargeListViewVector(leftWriter1, new double[] {1.01 + epsilon / 2, 2.02 - epsilon / 2});
+      leftWriter1.setValueCount(2);
+
+      UnionLargeListViewWriter leftWriter2 = left2.getWriter();
+      leftWriter2.allocate();
+      writeLargeListViewVector(leftWriter2, new double[] {1, 2});
+      writeLargeListViewVector(leftWriter2, new double[] {1.01 + epsilon * 2, 2.02 - epsilon * 2});
+      leftWriter2.setValueCount(2);
+
+      Range range = new Range(0, 0, right.getValueCount());
+      assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range));
+      assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range));
+    }
+  }
+
   private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
     writer.start();
     writer.integer("f0").writeInt(value1);
@@ -841,6 +1025,22 @@ private void writeListVector(UnionListWriter writer, int[] values) {
     writer.endList();
   }
 
+  private void writeListViewVector(UnionListViewWriter writer, int[] values) {
+    writer.startListView();
+    for (int v : values) {
+      writer.integer().writeInt(v);
+    }
+    writer.endListView();
+  }
+
+  private void writeLargeListViewVector(UnionLargeListViewWriter writer, int[] values) {
+    writer.startListView();
+    for (int v : values) {
+      writer.integer().writeInt(v);
+    }
+    writer.endListView();
+  }
+
   private void writeFixedSizeListVector(UnionFixedSizeListWriter writer, int[] values) {
     writer.startList();
     for (int v : values) {
@@ -856,4 +1056,20 @@ private void writeListVector(UnionListWriter writer, double[] values) {
     }
     writer.endList();
   }
+
+  private void writeListViewVector(UnionListViewWriter writer, double[] values) {
+    writer.startListView();
+    for (double v : values) {
+      writer.float8().writeFloat8(v);
+    }
+    writer.endListView();
+  }
+
+  private void writeLargeListViewVector(UnionLargeListViewWriter writer, double[] values) {
+    writer.startListView();
+    for (double v : values) {
+      writer.float8().writeFloat8(v);
+    }
+    writer.endListView();
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java
index 4cd3603e2071f..ce029493473bb 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java
@@ -32,7 +32,9 @@
 import org.apache.arrow.vector.ViewVarBinaryVector;
 import org.apache.arrow.vector.ViewVarCharVector;
 import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.types.Types;
@@ -104,6 +106,38 @@ public void testListTypeEquals() {
     }
   }
 
+  @Test
+  public void testListViewTypeEquals() {
+    try (final ListViewVector right = ListViewVector.empty("listview", allocator);
+        final ListViewVector left1 = ListViewVector.empty("listview", allocator);
+        final ListViewVector left2 = ListViewVector.empty("listview", allocator)) {
+
+      right.addOrGetVector(FieldType.nullable(new ArrowType.Utf8()));
+      left1.addOrGetVector(FieldType.nullable(new ArrowType.Utf8()));
+      left2.addOrGetVector(FieldType.nullable(new ArrowType.FixedSizeBinary(2)));
+
+      TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+      assertTrue(visitor.equals(left1));
+      assertFalse(visitor.equals(left2));
+    }
+  }
+
+  @Test
+  public void testLargeListViewTypeEquals() {
+    try (final LargeListViewVector right = LargeListViewVector.empty("largelistview", allocator);
+        final LargeListViewVector left1 = LargeListViewVector.empty("largelistview", allocator);
+        final LargeListViewVector left2 = LargeListViewVector.empty("largelistview", allocator)) {
+
+      right.addOrGetVector(FieldType.nullable(new ArrowType.Utf8()));
+      left1.addOrGetVector(FieldType.nullable(new ArrowType.Utf8()));
+      left2.addOrGetVector(FieldType.nullable(new ArrowType.FixedSizeBinary(2)));
+
+      TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+      assertTrue(visitor.equals(left1));
+      assertFalse(visitor.equals(left2));
+    }
+  }
+
   @Test
   public void testStructTypeEquals() {
     try (final StructVector right = StructVector.empty("struct", allocator);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
index 654940908bf38..2745386db4e22 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -47,6 +47,7 @@
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.ViewVarCharVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.MapVector;
 import org.apache.arrow.vector.complex.NonNullableStructVector;
 import org.apache.arrow.vector.complex.StructVector;
@@ -57,6 +58,8 @@
 import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl;
 import org.apache.arrow.vector.complex.impl.SingleStructWriter;
 import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.complex.impl.UnionListViewReader;
+import org.apache.arrow.vector.complex.impl.UnionListViewWriter;
 import org.apache.arrow.vector.complex.impl.UnionListWriter;
 import org.apache.arrow.vector.complex.impl.UnionMapReader;
 import org.apache.arrow.vector.complex.impl.UnionReader;
@@ -116,6 +119,195 @@ public void terminate() throws Exception {
     allocator.close();
   }
 
+  /* Test Utils */
+
+  private void checkNullableStruct(NonNullableStructVector structVector) {
+    StructReader rootReader = new SingleStructReaderImpl(structVector).reader("root");
+    for (int i = 0; i < COUNT; i++) {
+      rootReader.setPosition(i);
+      assertTrue(rootReader.isSet(), "index is set: " + i);
+      FieldReader struct = rootReader.reader("struct");
+      if (i % 2 == 0) {
+        assertTrue(struct.isSet(), "index is set: " + i);
+        assertNotNull(struct.readObject(), "index is set: " + i);
+        assertEquals(i, struct.reader("nested").readLong().longValue());
+      } else {
+        assertFalse(struct.isSet(), "index is not set: " + i);
+        assertNull(struct.readObject(), "index is not set: " + i);
+      }
+    }
+  }
+
+  private void createListTypeVectorWithScalarType(FieldWriter writer) {
+    for (int i = 0; i < COUNT; i++) {
+      writer.startList();
+      for (int j = 0; j < i % 7; j++) {
+        if (j % 2 == 0) {
+          writer.writeInt(j);
+        } else {
+          IntHolder holder = new IntHolder();
+          holder.value = j;
+          writer.write(holder);
+        }
+      }
+      writer.endList();
+    }
+  }
+
+  private void checkListTypeVectorWithScalarType(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        reader.next();
+        assertEquals(j, reader.reader().readInteger().intValue());
+      }
+    }
+  }
+
+  private void createListTypeVectorWithScalarNull(FieldWriter writer) {
+    for (int i = 0; i < COUNT; i++) {
+      writer.startList();
+      for (int j = 0; j < i % 7; j++) {
+        if (j % 2 == 0) {
+          writer.writeNull();
+        } else {
+          IntHolder holder = new IntHolder();
+          holder.value = j;
+          writer.write(holder);
+        }
+      }
+      writer.endList();
+    }
+  }
+
+  private void checkListTypeVectorWithScalarNull(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        reader.next();
+        if (j % 2 == 0) {
+          assertFalse(reader.reader().isSet(), "index is set: " + j);
+        } else {
+          assertTrue(reader.reader().isSet(), "index is not set: " + j);
+          assertEquals(j, reader.reader().readInteger().intValue());
+        }
+      }
+    }
+  }
+
+  private void createListTypeVectorWithDecimalType(FieldWriter writer, DecimalHolder holder) {
+    holder.buffer = allocator.buffer(DecimalVector.TYPE_WIDTH);
+    ArrowType arrowType = new ArrowType.Decimal(10, 0, 128);
+    for (int i = 0; i < COUNT; i++) {
+      writer.startList();
+      for (int j = 0; j < i % 7; j++) {
+        if (j % 4 == 0) {
+          writer.writeDecimal(new BigDecimal(j));
+        } else if (j % 4 == 1) {
+          DecimalUtility.writeBigDecimalToArrowBuf(
+              new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
+          holder.start = 0;
+          holder.scale = 0;
+          holder.precision = 10;
+          writer.write(holder);
+        } else if (j % 4 == 2) {
+          DecimalUtility.writeBigDecimalToArrowBuf(
+              new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
+          writer.writeDecimal(0, holder.buffer, arrowType);
+        } else {
+          byte[] value = BigDecimal.valueOf(j).unscaledValue().toByteArray();
+          writer.writeBigEndianBytesToDecimal(value, arrowType);
+        }
+      }
+      writer.endList();
+    }
+  }
+
+  private void checkListTypeVectorWithDecimalType(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        reader.next();
+        Object expected = new BigDecimal(j);
+        Object actual = reader.reader().readBigDecimal();
+        assertEquals(expected, actual);
+      }
+    }
+  }
+
+  private void createListTypeVectorWithTimeStampMilliTZType(FieldWriter writer) {
+    for (int i = 0; i < COUNT; i++) {
+      writer.startList();
+      for (int j = 0; j < i % 7; j++) {
+        if (j % 2 == 0) {
+          writer.writeNull();
+        } else {
+          TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder();
+          holder.timezone = "FakeTimeZone";
+          holder.value = j;
+          writer.timeStampMilliTZ().write(holder);
+        }
+      }
+      writer.endList();
+    }
+  }
+
+  private void checkListTypeVectorWithTimeStampMilliTZType(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        reader.next();
+        if (j % 2 == 0) {
+          assertFalse(reader.reader().isSet(), "index is set: " + j);
+        } else {
+          NullableTimeStampMilliTZHolder actual = new NullableTimeStampMilliTZHolder();
+          reader.reader().read(actual);
+          assertEquals(j, actual.value);
+          assertEquals("FakeTimeZone", actual.timezone);
+        }
+      }
+    }
+  }
+
+  private void createNullsWithListWriters(FieldWriter writer) {
+    for (int i = 0; i < COUNT; i++) {
+      writer.setPosition(i);
+      if (i % 2 == 0) {
+        writer.startList();
+        if (i % 4 == 0) {
+          writer.integer().writeNull();
+        } else {
+          writer.integer().writeInt(i);
+          writer.integer().writeInt(i * 2);
+        }
+        writer.endList();
+      } else {
+        writer.writeNull();
+      }
+    }
+  }
+
+  private void checkNullsWithListWriters(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      if (i % 2 == 0) {
+        assertTrue(reader.isSet());
+        reader.next();
+        if (i % 4 == 0) {
+          assertNull(reader.reader().readInteger());
+        } else {
+          assertEquals(i, reader.reader().readInteger().intValue());
+          reader.next();
+          assertEquals(i * 2, reader.reader().readInteger().intValue());
+        }
+      } else {
+        assertFalse(reader.isSet());
+      }
+    }
+  }
+
+  /* Test Cases */
+
   @Test
   public void simpleNestedTypes() {
     NonNullableStructVector parent = populateStructVector(null);
@@ -213,23 +405,6 @@ public void nullableStruct2() {
     }
   }
 
-  private void checkNullableStruct(NonNullableStructVector structVector) {
-    StructReader rootReader = new SingleStructReaderImpl(structVector).reader("root");
-    for (int i = 0; i < COUNT; i++) {
-      rootReader.setPosition(i);
-      assertTrue(rootReader.isSet(), "index is set: " + i);
-      FieldReader struct = rootReader.reader("struct");
-      if (i % 2 == 0) {
-        assertTrue(struct.isSet(), "index is set: " + i);
-        assertNotNull(struct.readObject(), "index is set: " + i);
-        assertEquals(i, struct.reader("nested").readLong().longValue());
-      } else {
-        assertFalse(struct.isSet(), "index is not set: " + i);
-        assertNull(struct.readObject(), "index is not set: " + i);
-      }
-    }
-  }
-
   @Test
   public void testList() {
     try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
@@ -260,72 +435,259 @@ public void testList() {
     }
   }
 
-  @Test
-  public void listScalarType() {
-    try (ListVector listVector = ListVector.empty("list", allocator)) {
-      listVector.allocateNew();
-      UnionListWriter listWriter = new UnionListWriter(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listWriter.startList();
+  private void createListTypeVectorWithDurationType(FieldWriter writer) {
+    for (int i = 0; i < COUNT; i++) {
+      writer.startList();
+      for (int j = 0; j < i % 7; j++) {
+        if (j % 2 == 0) {
+          writer.writeNull();
+        } else {
+          DurationHolder holder = new DurationHolder();
+          holder.unit = TimeUnit.MICROSECOND;
+          holder.value = j;
+          writer.duration().write(holder);
+        }
+      }
+      writer.endList();
+    }
+  }
+
+  private void checkListTypeVectorWithDurationType(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        reader.next();
+        if (j % 2 == 0) {
+          assertFalse(reader.reader().isSet(), "index is set: " + j);
+        } else {
+          NullableDurationHolder actual = new NullableDurationHolder();
+          reader.reader().read(actual);
+          assertEquals(TimeUnit.MICROSECOND, actual.unit);
+          assertEquals(j, actual.value);
+        }
+      }
+    }
+  }
+
+  private void createScalarTypeVectorWithNullableType(FieldWriter writer) {
+    for (int i = 0; i < COUNT; i++) {
+      if (i % 2 == 0) {
+        writer.setPosition(i);
+        writer.startList();
         for (int j = 0; j < i % 7; j++) {
-          if (j % 2 == 0) {
-            listWriter.writeInt(j);
+          writer.writeInt(j);
+        }
+        writer.endList();
+      }
+    }
+  }
+
+  private void checkScalarTypeVectorWithNullableType(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      if (i % 2 == 0) {
+        assertTrue(reader.isSet(), "index is set: " + i);
+        assertEquals(i % 7, ((List<?>) reader.readObject()).size(), "correct length at: " + i);
+      } else {
+        assertFalse(reader.isSet(), "index is not set: " + i);
+        assertNull(reader.readObject(), "index is not set: " + i);
+      }
+    }
+  }
+
+  private void createListTypeVectorWithStructType(
+      FieldWriter fieldWriter, StructWriter structWriter) {
+    for (int i = 0; i < COUNT; i++) {
+      fieldWriter.startList();
+      for (int j = 0; j < i % 7; j++) {
+        structWriter.start();
+        structWriter.integer("int").writeInt(j);
+        structWriter.bigInt("bigInt").writeBigInt(j);
+        structWriter.end();
+      }
+      fieldWriter.endList();
+    }
+  }
+
+  private void checkListTypeVectorWithStructType(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        reader.next();
+        assertEquals(j, reader.reader().reader("int").readInteger().intValue(), "record: " + i);
+        assertEquals(j, reader.reader().reader("bigInt").readLong().longValue());
+      }
+    }
+  }
+
+  private void checkListOfListTypes(final FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        reader.next();
+        FieldReader innerListReader = reader.reader();
+        for (int k = 0; k < i % 13; k++) {
+          innerListReader.next();
+          assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i);
+        }
+      }
+    }
+  }
+
+  private void checkUnionListType(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        reader.next();
+        FieldReader innerListReader = reader.reader();
+        for (int k = 0; k < i % 13; k++) {
+          innerListReader.next();
+          if (k % 2 == 0) {
+            assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i);
           } else {
-            IntHolder holder = new IntHolder();
-            holder.value = j;
-            listWriter.write(holder);
+            assertEquals(k, innerListReader.reader().readLong().longValue(), "record: " + i);
           }
         }
-        listWriter.endList();
       }
-      listWriter.setValueCount(COUNT);
-      UnionListReader listReader = new UnionListReader(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listReader.setPosition(i);
-        for (int j = 0; j < i % 7; j++) {
-          listReader.next();
-          assertEquals(j, listReader.reader().readInteger().intValue());
+    }
+  }
+
+  private static void createListTypeVectorWithMapType(FieldWriter writer) {
+    MapWriter innerMapWriter = writer.map(true);
+    for (int i = 0; i < COUNT; i++) {
+      writer.startList();
+      for (int j = 0; j < i % 7; j++) {
+        innerMapWriter.startMap();
+        for (int k = 0; k < i % 13; k++) {
+          innerMapWriter.startEntry();
+          innerMapWriter.key().integer().writeInt(k);
+          if (k % 2 == 0) {
+            innerMapWriter.value().bigInt().writeBigInt(k);
+          }
+          innerMapWriter.endEntry();
         }
+        innerMapWriter.endMap();
       }
+      writer.endList();
+    }
+  }
+
+  private void checkListTypeMap(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        reader.next();
+        UnionMapReader mapReader = (UnionMapReader) reader.reader();
+        for (int k = 0; k < i % 13; k++) {
+          mapReader.next();
+          assertEquals(k, mapReader.key().readInteger().intValue(), "record key: " + i);
+          if (k % 2 == 0) {
+            assertEquals(k, mapReader.value().readLong().longValue(), "record value: " + i);
+          } else {
+            assertNull(mapReader.value().readLong(), "record value: " + i);
+          }
+        }
+      }
+    }
+  }
+
+  /* Test Cases */
+
+  private void createListTypeVectorWithFixedSizeBinaryType(
+      FieldWriter writer, List<ArrowBuf> buffers) {
+    for (int i = 0; i < COUNT; i++) {
+      writer.startList();
+      for (int j = 0; j < i % 7; j++) {
+        if (j % 2 == 0) {
+          writer.writeNull();
+        } else {
+          ArrowBuf buf = allocator.buffer(4);
+          buf.setInt(0, j);
+          FixedSizeBinaryHolder holder = new FixedSizeBinaryHolder();
+          holder.byteWidth = 4;
+          holder.buffer = buf;
+          writer.fixedSizeBinary().write(holder);
+          buffers.add(buf);
+        }
+      }
+      writer.endList();
+    }
+  }
+
+  private void checkListTypeVectorWithFixedSizeBinaryType(FieldReader reader) {
+    for (int i = 0; i < COUNT; i++) {
+      reader.setPosition(i);
+      for (int j = 0; j < i % 7; j++) {
+        reader.next();
+        if (j % 2 == 0) {
+          assertFalse(reader.reader().isSet(), "index is set: " + j);
+        } else {
+          NullableFixedSizeBinaryHolder actual = new NullableFixedSizeBinaryHolder();
+          reader.reader().read(actual);
+          assertEquals(j, actual.buffer.getInt(0));
+          assertEquals(4, actual.byteWidth);
+        }
+      }
+    }
+  }
+
+  @Test
+  public void listScalarType() {
+    try (ListVector listVector = ListVector.empty("list", allocator)) {
+      listVector.allocateNew();
+      UnionListWriter listWriter = new UnionListWriter(listVector);
+      createListTypeVectorWithScalarType(listWriter);
+      listWriter.setValueCount(COUNT);
+      UnionListReader listReader = new UnionListReader(listVector);
+      // validate
+      checkListTypeVectorWithScalarType(listReader);
+    }
+  }
+
+  @Test
+  public void listViewScalarType() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      createListTypeVectorWithScalarType(listViewWriter);
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listViewReader = new UnionListViewReader(listViewVector);
+      // validate
+      checkListTypeVectorWithScalarType(listViewReader);
     }
   }
 
   @Test
   public void testListScalarNull() {
-    /* Write to a integer list vector
-     * each list of size 8 and having it's data values alternating between null and a non-null.
+    /* Write to an integer list vector
+     * each list of size 8
+     * and having its data values alternating between null and a non-null.
      * Read and verify
      */
     try (ListVector listVector = ListVector.empty("list", allocator)) {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listWriter.startList();
-        for (int j = 0; j < i % 7; j++) {
-          if (j % 2 == 0) {
-            listWriter.writeNull();
-          } else {
-            IntHolder holder = new IntHolder();
-            holder.value = j;
-            listWriter.write(holder);
-          }
-        }
-        listWriter.endList();
-      }
+      createListTypeVectorWithScalarNull(listWriter);
       listWriter.setValueCount(COUNT);
       UnionListReader listReader = new UnionListReader(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listReader.setPosition(i);
-        for (int j = 0; j < i % 7; j++) {
-          listReader.next();
-          if (j % 2 == 0) {
-            assertFalse(listReader.reader().isSet(), "index is set: " + j);
-          } else {
-            assertTrue(listReader.reader().isSet(), "index is not set: " + j);
-            assertEquals(j, listReader.reader().readInteger().intValue());
-          }
-        }
-      }
+      checkListTypeVectorWithScalarNull(listReader);
+    }
+  }
+
+  @Test
+  public void testListViewScalarNull() {
+    /* Write to an integer list vector
+     * each list of size 8
+     * and having its data values alternating between null and a non-null.
+     * Read and verify
+     */
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      createListTypeVectorWithScalarNull(listViewWriter);
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listViewReader = new UnionListViewReader(listViewVector);
+      checkListTypeVectorWithScalarNull(listViewReader);
     }
   }
 
@@ -335,42 +697,24 @@ public void listDecimalType() {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
       DecimalHolder holder = new DecimalHolder();
-      holder.buffer = allocator.buffer(DecimalVector.TYPE_WIDTH);
-      ArrowType arrowType = new ArrowType.Decimal(10, 0, 128);
-      for (int i = 0; i < COUNT; i++) {
-        listWriter.startList();
-        for (int j = 0; j < i % 7; j++) {
-          if (j % 4 == 0) {
-            listWriter.writeDecimal(new BigDecimal(j));
-          } else if (j % 4 == 1) {
-            DecimalUtility.writeBigDecimalToArrowBuf(
-                new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
-            holder.start = 0;
-            holder.scale = 0;
-            holder.precision = 10;
-            listWriter.write(holder);
-          } else if (j % 4 == 2) {
-            DecimalUtility.writeBigDecimalToArrowBuf(
-                new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
-            listWriter.writeDecimal(0, holder.buffer, arrowType);
-          } else {
-            byte[] value = BigDecimal.valueOf(j).unscaledValue().toByteArray();
-            listWriter.writeBigEndianBytesToDecimal(value, arrowType);
-          }
-        }
-        listWriter.endList();
-      }
+      createListTypeVectorWithDecimalType(listWriter, holder);
       listWriter.setValueCount(COUNT);
       UnionListReader listReader = new UnionListReader(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listReader.setPosition(i);
-        for (int j = 0; j < i % 7; j++) {
-          listReader.next();
-          Object expected = new BigDecimal(j);
-          Object actual = listReader.reader().readBigDecimal();
-          assertEquals(expected, actual);
-        }
-      }
+      checkListTypeVectorWithDecimalType(listReader);
+      holder.buffer.close();
+    }
+  }
+
+  @Test
+  public void listViewDecimalType() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      DecimalHolder holder = new DecimalHolder();
+      createListTypeVectorWithDecimalType(listViewWriter, holder);
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listViewReader = new UnionListViewReader(listViewVector);
+      checkListTypeVectorWithDecimalType(listViewReader);
       holder.buffer.close();
     }
   }
@@ -380,36 +724,22 @@ public void listTimeStampMilliTZType() {
     try (ListVector listVector = ListVector.empty("list", allocator)) {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listWriter.startList();
-        for (int j = 0; j < i % 7; j++) {
-          if (j % 2 == 0) {
-            listWriter.writeNull();
-          } else {
-            TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder();
-            holder.timezone = "FakeTimeZone";
-            holder.value = j;
-            listWriter.timeStampMilliTZ().write(holder);
-          }
-        }
-        listWriter.endList();
-      }
+      createListTypeVectorWithTimeStampMilliTZType(listWriter);
       listWriter.setValueCount(COUNT);
       UnionListReader listReader = new UnionListReader(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listReader.setPosition(i);
-        for (int j = 0; j < i % 7; j++) {
-          listReader.next();
-          if (j % 2 == 0) {
-            assertFalse(listReader.reader().isSet(), "index is set: " + j);
-          } else {
-            NullableTimeStampMilliTZHolder actual = new NullableTimeStampMilliTZHolder();
-            listReader.reader().read(actual);
-            assertEquals(j, actual.value);
-            assertEquals("FakeTimeZone", actual.timezone);
-          }
-        }
-      }
+      checkListTypeVectorWithTimeStampMilliTZType(listReader);
+    }
+  }
+
+  @Test
+  public void listViewTimeStampMilliTZType() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      createListTypeVectorWithTimeStampMilliTZType(listViewWriter);
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listViewReader = new UnionListViewReader(listViewVector);
+      checkListTypeVectorWithTimeStampMilliTZType(listViewReader);
     }
   }
 
@@ -418,80 +748,51 @@ public void listDurationType() {
     try (ListVector listVector = ListVector.empty("list", allocator)) {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listWriter.startList();
-        for (int j = 0; j < i % 7; j++) {
-          if (j % 2 == 0) {
-            listWriter.writeNull();
-          } else {
-            DurationHolder holder = new DurationHolder();
-            holder.unit = TimeUnit.MICROSECOND;
-            holder.value = j;
-            listWriter.duration().write(holder);
-          }
-        }
-        listWriter.endList();
-      }
+      createListTypeVectorWithDurationType(listWriter);
       listWriter.setValueCount(COUNT);
       UnionListReader listReader = new UnionListReader(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listReader.setPosition(i);
-        for (int j = 0; j < i % 7; j++) {
-          listReader.next();
-          if (j % 2 == 0) {
-            assertFalse(listReader.reader().isSet(), "index is set: " + j);
-          } else {
-            NullableDurationHolder actual = new NullableDurationHolder();
-            listReader.reader().read(actual);
-            assertEquals(TimeUnit.MICROSECOND, actual.unit);
-            assertEquals(j, actual.value);
-          }
-        }
-      }
+      checkListTypeVectorWithDurationType(listReader);
+    }
+  }
+
+  @Test
+  public void listViewDurationType() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      createListTypeVectorWithDurationType(listViewWriter);
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listReader = new UnionListViewReader(listViewVector);
+      checkListTypeVectorWithDurationType(listReader);
     }
   }
 
   @Test
   public void listFixedSizeBinaryType() throws Exception {
-    List<ArrowBuf> bufs = new ArrayList<ArrowBuf>();
+    List<ArrowBuf> buffers = new ArrayList<>();
     try (ListVector listVector = ListVector.empty("list", allocator)) {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listWriter.startList();
-        for (int j = 0; j < i % 7; j++) {
-          if (j % 2 == 0) {
-            listWriter.writeNull();
-          } else {
-            ArrowBuf buf = allocator.buffer(4);
-            buf.setInt(0, j);
-            FixedSizeBinaryHolder holder = new FixedSizeBinaryHolder();
-            holder.byteWidth = 4;
-            holder.buffer = buf;
-            listWriter.fixedSizeBinary().write(holder);
-            bufs.add(buf);
-          }
-        }
-        listWriter.endList();
-      }
+      createListTypeVectorWithFixedSizeBinaryType(listWriter, buffers);
       listWriter.setValueCount(COUNT);
-      UnionListReader listReader = new UnionListReader(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listReader.setPosition(i);
-        for (int j = 0; j < i % 7; j++) {
-          listReader.next();
-          if (j % 2 == 0) {
-            assertFalse(listReader.reader().isSet(), "index is set: " + j);
-          } else {
-            NullableFixedSizeBinaryHolder actual = new NullableFixedSizeBinaryHolder();
-            listReader.reader().read(actual);
-            assertEquals(j, actual.buffer.getInt(0));
-            assertEquals(4, actual.byteWidth);
-          }
-        }
-      }
+      UnionListReader listReader = new UnionListReader(listVector);
+      checkListTypeVectorWithFixedSizeBinaryType(listReader);
     }
-    AutoCloseables.close(bufs);
+    AutoCloseables.close(buffers);
+  }
+
+  @Test
+  public void listViewFixedSizeBinaryType() throws Exception {
+    List<ArrowBuf> buffers = new ArrayList<>();
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      createListTypeVectorWithFixedSizeBinaryType(listViewWriter, buffers);
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listReader = new UnionListViewReader(listViewVector);
+      checkListTypeVectorWithFixedSizeBinaryType(listReader);
+    }
+    AutoCloseables.close(buffers);
   }
 
   @Test
@@ -499,29 +800,22 @@ public void listScalarTypeNullable() {
     try (ListVector listVector = ListVector.empty("list", allocator)) {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        if (i % 2 == 0) {
-          listWriter.setPosition(i);
-          listWriter.startList();
-          for (int j = 0; j < i % 7; j++) {
-            listWriter.writeInt(j);
-          }
-          listWriter.endList();
-        }
-      }
+      createScalarTypeVectorWithNullableType(listWriter);
       listWriter.setValueCount(COUNT);
       UnionListReader listReader = new UnionListReader(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listReader.setPosition(i);
-        if (i % 2 == 0) {
-          assertTrue(listReader.isSet(), "index is set: " + i);
-          assertEquals(
-              i % 7, ((List<?>) listReader.readObject()).size(), "correct length at: " + i);
-        } else {
-          assertFalse(listReader.isSet(), "index is not set: " + i);
-          assertNull(listReader.readObject(), "index is not set: " + i);
-        }
-      }
+      checkScalarTypeVectorWithNullableType(listReader);
+    }
+  }
+
+  @Test
+  public void listViewScalarTypeNullable() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      createScalarTypeVectorWithNullableType(listViewWriter);
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listReader = new UnionListViewReader(listViewVector);
+      checkScalarTypeVectorWithNullableType(listReader);
     }
   }
 
@@ -529,29 +823,25 @@ public void listScalarTypeNullable() {
   public void listStructType() {
     try (ListVector listVector = ListVector.empty("list", allocator)) {
       listVector.allocateNew();
-      UnionListWriter listWriter = new UnionListWriter(listVector);
-      StructWriter structWriter = listWriter.struct();
-      for (int i = 0; i < COUNT; i++) {
-        listWriter.startList();
-        for (int j = 0; j < i % 7; j++) {
-          structWriter.start();
-          structWriter.integer("int").writeInt(j);
-          structWriter.bigInt("bigInt").writeBigInt(j);
-          structWriter.end();
-        }
-        listWriter.endList();
-      }
-      listWriter.setValueCount(COUNT);
+      UnionListWriter listViewWriter = new UnionListWriter(listVector);
+      StructWriter structWriter = listViewWriter.struct();
+      createListTypeVectorWithStructType(listViewWriter, structWriter);
+      listViewWriter.setValueCount(COUNT);
       UnionListReader listReader = new UnionListReader(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listReader.setPosition(i);
-        for (int j = 0; j < i % 7; j++) {
-          listReader.next();
-          assertEquals(
-              j, listReader.reader().reader("int").readInteger().intValue(), "record: " + i);
-          assertEquals(j, listReader.reader().reader("bigInt").readLong().longValue());
-        }
-      }
+      checkListTypeVectorWithStructType(listReader);
+    }
+  }
+
+  @Test
+  public void listViewStructType() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      StructWriter structWriter = listViewWriter.struct();
+      createListTypeVectorWithStructType(listViewWriter, structWriter);
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listReader = new UnionListViewReader(listViewVector);
+      checkListTypeVectorWithStructType(listReader);
     }
   }
 
@@ -573,7 +863,31 @@ public void listListType() {
         listWriter.endList();
       }
       listWriter.setValueCount(COUNT);
-      checkListOfLists(listVector);
+      UnionListReader listReader = new UnionListReader(listVector);
+      checkListOfListTypes(listReader);
+    }
+  }
+
+  @Test
+  public void listViewListType() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      for (int i = 0; i < COUNT; i++) {
+        listViewWriter.startListView();
+        for (int j = 0; j < i % 7; j++) {
+          ListWriter innerListWriter = listViewWriter.listView();
+          innerListWriter.startListView();
+          for (int k = 0; k < i % 13; k++) {
+            innerListWriter.integer().writeInt(k);
+          }
+          innerListWriter.endListView();
+        }
+        listViewWriter.endListView();
+      }
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listReader = new UnionListViewReader(listViewVector);
+      checkListOfListTypes(listReader);
     }
   }
 
@@ -587,7 +901,6 @@ public void listListType2() {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
       ListWriter innerListWriter = listWriter.list();
-
       for (int i = 0; i < COUNT; i++) {
         listWriter.startList();
         for (int j = 0; j < i % 7; j++) {
@@ -600,22 +913,31 @@ public void listListType2() {
         listWriter.endList();
       }
       listWriter.setValueCount(COUNT);
-      checkListOfLists(listVector);
+      UnionListReader listReader = new UnionListReader(listVector);
+      checkListOfListTypes(listReader);
     }
   }
 
-  private void checkListOfLists(final ListVector listVector) {
-    UnionListReader listReader = new UnionListReader(listVector);
-    for (int i = 0; i < COUNT; i++) {
-      listReader.setPosition(i);
-      for (int j = 0; j < i % 7; j++) {
-        listReader.next();
-        FieldReader innerListReader = listReader.reader();
-        for (int k = 0; k < i % 13; k++) {
-          innerListReader.next();
-          assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i);
+  @Test
+  public void listViewListType2() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      ListWriter innerListWriter = listViewWriter.list();
+      for (int i = 0; i < COUNT; i++) {
+        listViewWriter.startListView();
+        for (int j = 0; j < i % 7; j++) {
+          innerListWriter.startListView();
+          for (int k = 0; k < i % 13; k++) {
+            innerListWriter.integer().writeInt(k);
+          }
+          innerListWriter.endListView();
         }
+        listViewWriter.endListView();
       }
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listReader = new UnionListViewReader(listViewVector);
+      checkListOfListTypes(listReader);
     }
   }
 
@@ -641,7 +963,35 @@ public void unionListListType() {
         listWriter.endList();
       }
       listWriter.setValueCount(COUNT);
-      checkUnionList(listVector);
+      UnionListReader listReader = new UnionListReader(listVector);
+      checkUnionListType(listReader);
+    }
+  }
+
+  @Test
+  public void unionListViewListType() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      for (int i = 0; i < COUNT; i++) {
+        listViewWriter.startList();
+        for (int j = 0; j < i % 7; j++) {
+          ListWriter innerListWriter = listViewWriter.listView();
+          innerListWriter.startListView();
+          for (int k = 0; k < i % 13; k++) {
+            if (k % 2 == 0) {
+              innerListWriter.integer().writeInt(k);
+            } else {
+              innerListWriter.bigInt().writeBigInt(k);
+            }
+          }
+          innerListWriter.endListView();
+        }
+        listViewWriter.endListView();
+      }
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listViewReader = new UnionListViewReader(listViewVector);
+      checkUnionListType(listViewReader);
     }
   }
 
@@ -654,8 +1004,7 @@ public void unionListListType2() {
     try (ListVector listVector = ListVector.empty("list", allocator)) {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
-      ListWriter innerListWriter = listWriter.list();
-
+      ListWriter innerListWriter = listWriter.listView();
       for (int i = 0; i < COUNT; i++) {
         listWriter.startList();
         for (int j = 0; j < i % 7; j++) {
@@ -672,26 +1021,39 @@ public void unionListListType2() {
         listWriter.endList();
       }
       listWriter.setValueCount(COUNT);
-      checkUnionList(listVector);
+      UnionListReader listReader = new UnionListReader(listVector);
+      checkUnionListType(listReader);
     }
   }
 
-  private void checkUnionList(ListVector listVector) {
-    UnionListReader listReader = new UnionListReader(listVector);
-    for (int i = 0; i < COUNT; i++) {
-      listReader.setPosition(i);
-      for (int j = 0; j < i % 7; j++) {
-        listReader.next();
-        FieldReader innerListReader = listReader.reader();
-        for (int k = 0; k < i % 13; k++) {
-          innerListReader.next();
-          if (k % 2 == 0) {
-            assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i);
-          } else {
-            assertEquals(k, innerListReader.reader().readLong().longValue(), "record: " + i);
+  /**
+   * This test is similar to {@link #unionListViewListType()} but we get the inner list writer once
+   * at the beginning.
+   */
+  @Test
+  public void unionListViewListType2() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+      ListWriter innerListWriter = listViewWriter.listView();
+      for (int i = 0; i < COUNT; i++) {
+        listViewWriter.startListView();
+        for (int j = 0; j < i % 7; j++) {
+          innerListWriter.startListView();
+          for (int k = 0; k < i % 13; k++) {
+            if (k % 2 == 0) {
+              innerListWriter.integer().writeInt(k);
+            } else {
+              innerListWriter.bigInt().writeBigInt(k);
+            }
           }
+          innerListWriter.endListView();
         }
+        listViewWriter.endListView();
       }
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listViewReader = new UnionListViewReader(listViewVector);
+      checkUnionListType(listViewReader);
     }
   }
 
@@ -700,27 +1062,11 @@ public void testListMapType() {
     try (ListVector listVector = ListVector.empty("list", allocator)) {
       listVector.allocateNew();
       UnionListWriter listWriter = new UnionListWriter(listVector);
-      MapWriter innerMapWriter = listWriter.map(true);
 
-      for (int i = 0; i < COUNT; i++) {
-        listWriter.startList();
-        for (int j = 0; j < i % 7; j++) {
-          innerMapWriter.startMap();
-          for (int k = 0; k < i % 13; k++) {
-            innerMapWriter.startEntry();
-            innerMapWriter.key().integer().writeInt(k);
-            if (k % 2 == 0) {
-              innerMapWriter.value().bigInt().writeBigInt(k);
-            }
-            innerMapWriter.endEntry();
-          }
-          innerMapWriter.endMap();
-        }
-        listWriter.endList();
-      }
+      createListTypeVectorWithMapType(listWriter);
       listWriter.setValueCount(COUNT);
-      checkListMap(listVector);
-
+      UnionListReader listReader = new UnionListReader(listVector);
+      checkListTypeMap(listReader);
       // Verify that the map vector has keysSorted = true
       MapVector mapVector = (MapVector) listVector.getDataVector();
       ArrowType arrowType = mapVector.getField().getFieldType().getType();
@@ -728,23 +1074,20 @@ public void testListMapType() {
     }
   }
 
-  private void checkListMap(ListVector listVector) {
-    UnionListReader listReader = new UnionListReader(listVector);
-    for (int i = 0; i < COUNT; i++) {
-      listReader.setPosition(i);
-      for (int j = 0; j < i % 7; j++) {
-        listReader.next();
-        UnionMapReader mapReader = (UnionMapReader) listReader.reader();
-        for (int k = 0; k < i % 13; k++) {
-          mapReader.next();
-          assertEquals(k, mapReader.key().readInteger().intValue(), "record key: " + i);
-          if (k % 2 == 0) {
-            assertEquals(k, mapReader.value().readLong().longValue(), "record value: " + i);
-          } else {
-            assertNull(mapReader.value().readLong(), "record value: " + i);
-          }
-        }
-      }
+  @Test
+  public void testListViewMapType() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) {
+      listViewVector.allocateNew();
+      UnionListViewWriter listViewWriter = new UnionListViewWriter(listViewVector);
+
+      createListTypeVectorWithMapType(listViewWriter);
+      listViewWriter.setValueCount(COUNT);
+      UnionListViewReader listViewReader = new UnionListViewReader(listViewVector);
+      checkListTypeMap(listViewReader);
+      // Verify that the map vector has keysSorted = true
+      MapVector mapVector = (MapVector) listViewVector.getDataVector();
+      ArrowType arrowType = mapVector.getField().getFieldType().getType();
+      assertTrue(((ArrowType.Map) arrowType).getKeysSorted());
     }
   }
 
@@ -1212,6 +1555,7 @@ public void complexCopierWithList() {
       ComplexWriter writer = new ComplexWriterImpl("root", parent);
       StructWriter rootWriter = writer.rootAsStruct();
       ListWriter listWriter = rootWriter.list("list");
+
       StructWriter innerStructWriter = listWriter.struct();
       IntWriter outerIntWriter = listWriter.integer();
       rootWriter.start();
@@ -1246,6 +1590,47 @@ public void complexCopierWithList() {
     }
   }
 
+  @Test
+  public void complexCopierWithListView() {
+    try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+      ComplexWriter writer = new ComplexWriterImpl("root", parent);
+      StructWriter rootWriter = writer.rootAsStruct();
+      ListWriter listViewWriter = rootWriter.listView("listView");
+
+      StructWriter innerStructWriter = listViewWriter.struct();
+      IntWriter outerIntWriter = listViewWriter.integer();
+      rootWriter.start();
+      listViewWriter.startListView();
+      outerIntWriter.writeInt(1);
+      outerIntWriter.writeInt(2);
+      innerStructWriter.start();
+      IntWriter intWriter = innerStructWriter.integer("a");
+      intWriter.writeInt(1);
+      innerStructWriter.end();
+      innerStructWriter.start();
+      intWriter = innerStructWriter.integer("a");
+      intWriter.writeInt(2);
+      innerStructWriter.end();
+      listViewWriter.endListView();
+      rootWriter.end();
+      writer.setValueCount(1);
+
+      StructVector structVector = (StructVector) parent.getChild("root");
+      TransferPair tp = structVector.getTransferPair(allocator);
+      tp.splitAndTransfer(0, 1);
+      NonNullableStructVector toStructVector = (NonNullableStructVector) tp.getTo();
+      JsonStringHashMap<?, ?> toMapValue = (JsonStringHashMap<?, ?>) toStructVector.getObject(0);
+      JsonStringArrayList<?> object = (JsonStringArrayList<?>) toMapValue.get("listView");
+      assertEquals(1, object.get(0));
+      assertEquals(2, object.get(1));
+      JsonStringHashMap<?, ?> innerStruct = (JsonStringHashMap<?, ?>) object.get(2);
+      assertEquals(1, innerStruct.get("a"));
+      innerStruct = (JsonStringHashMap<?, ?>) object.get(3);
+      assertEquals(2, innerStruct.get("a"));
+      toStructVector.close();
+    }
+  }
+
   @Test
   public void testSingleStructWriter1() {
     /* initialize a SingleStructWriter with empty StructVector and then lazily
@@ -1262,6 +1647,7 @@ public void testSingleStructWriter1() {
       Float4Writer float4Writer = singleStructWriter.float4("float4Field");
       Float8Writer float8Writer = singleStructWriter.float8("float8Field");
       ListWriter listWriter = singleStructWriter.list("listField");
+      ListWriter listViewWriter = singleStructWriter.listView("listViewField");
       MapWriter mapWriter = singleStructWriter.map("mapField", false);
 
       int intValue = 100;
@@ -1285,6 +1671,14 @@ public void testSingleStructWriter1() {
         listWriter.integer().writeInt(intValue + i + 3);
         listWriter.endList();
 
+        listViewWriter.setPosition(i);
+        listViewWriter.startListView();
+        listViewWriter.integer().writeInt(intValue + i);
+        listViewWriter.integer().writeInt(intValue + i + 1);
+        listViewWriter.integer().writeInt(intValue + i + 2);
+        listViewWriter.integer().writeInt(intValue + i + 3);
+        listViewWriter.endListView();
+
         mapWriter.setPosition(i);
         mapWriter.startMap();
         mapWriter.startEntry();
@@ -1323,6 +1717,8 @@ public void testSingleStructWriter1() {
       Float4Reader float4Reader = singleStructReader.reader("float4Field");
       Float8Reader float8Reader = singleStructReader.reader("float8Field");
       UnionListReader listReader = (UnionListReader) singleStructReader.reader("listField");
+      UnionListViewReader listViewReader =
+          (UnionListViewReader) singleStructReader.reader("listViewField");
       UnionMapReader mapReader = (UnionMapReader) singleStructReader.reader("mapField");
 
       for (int i = 0; i < initialCapacity; i++) {
@@ -1331,6 +1727,7 @@ public void testSingleStructWriter1() {
         float4Reader.setPosition(i);
         float8Reader.setPosition(i);
         listReader.setPosition(i);
+        listViewReader.setPosition(i);
         mapReader.setPosition(i);
 
         assertEquals(intValue + i, intReader.readInteger().intValue());
@@ -1343,6 +1740,11 @@ public void testSingleStructWriter1() {
           assertEquals(intValue + i + j, listReader.reader().readInteger().intValue());
         }
 
+        for (int j = 0; j < 4; j++) {
+          listViewReader.next();
+          assertEquals(intValue + i + j, listViewReader.reader().readInteger().intValue());
+        }
+
         for (int k = 0; k < 4; k += 2) {
           mapReader.next();
           assertEquals(intValue + k + i, mapReader.key().readInteger().intValue());
@@ -1362,40 +1764,31 @@ public void testListWriterWithNulls() {
       UnionListWriter listWriter = listVector.getWriter();
 
       // expected listVector :  [[null], null, [2, 4], null, [null], null, [6, 12], ...]
-      for (int i = 0; i < COUNT; i++) {
-        listWriter.setPosition(i);
-        if (i % 2 == 0) {
-          listWriter.startList();
-          if (i % 4 == 0) {
-            listWriter.integer().writeNull();
-          } else {
-            listWriter.integer().writeInt(i);
-            listWriter.integer().writeInt(i * 2);
-          }
-          listWriter.endList();
-        } else {
-          listWriter.writeNull();
-        }
-      }
+      createNullsWithListWriters(listWriter);
       listVector.setValueCount(COUNT);
 
       UnionListReader listReader = new UnionListReader(listVector);
-      for (int i = 0; i < COUNT; i++) {
-        listReader.setPosition(i);
-        if (i % 2 == 0) {
-          assertTrue(listReader.isSet());
-          listReader.next();
-          if (i % 4 == 0) {
-            assertNull(listReader.reader().readInteger());
-          } else {
-            assertEquals(i, listReader.reader().readInteger().intValue());
-            listReader.next();
-            assertEquals(i * 2, listReader.reader().readInteger().intValue());
-          }
-        } else {
-          assertFalse(listReader.isSet());
-        }
-      }
+      checkNullsWithListWriters(listReader);
+    }
+  }
+
+  @Test
+  public void testListViewWriterWithNulls() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listView", allocator)) {
+      listViewVector.setInitialCapacity(COUNT);
+      listViewVector.allocateNew();
+      listViewVector
+          .getValidityBuffer()
+          .setOne(0, (int) listViewVector.getValidityBuffer().capacity());
+
+      UnionListViewWriter listWriter = listViewVector.getWriter();
+
+      // expected listVector :  [[null], null, [2, 4], null, [null], null, [6, 12], ...]
+      createNullsWithListWriters(listWriter);
+      listViewVector.setValueCount(COUNT);
+
+      UnionListViewReader listReader = new UnionListViewReader(listViewVector);
+      checkNullsWithListWriters(listReader);
     }
   }
 
@@ -1452,6 +1845,61 @@ public void testListOfListWriterWithNulls() {
     }
   }
 
+  @Test
+  public void testListViewOfListViewWriterWithNulls() {
+    try (ListViewVector listViewVector = ListViewVector.empty("listViewoflistView", allocator)) {
+      listViewVector.setInitialCapacity(COUNT);
+      listViewVector.allocateNew();
+      listViewVector
+          .getValidityBuffer()
+          .setOne(0, (int) listViewVector.getValidityBuffer().capacity());
+
+      UnionListViewWriter listViewWriter = listViewVector.getWriter();
+
+      // create list : [ [null], null, [[null, 2, 4]], null, [null], null, [[null, 6, 12]], ... ]
+      for (int i = 0; i < COUNT; i++) {
+        listViewWriter.setPosition(i);
+        if (i % 2 == 0) {
+          listViewWriter.startListView();
+          if (i % 4 == 0) {
+            listViewWriter.listView().writeNull();
+          } else {
+            listViewWriter.listView().startListView();
+            listViewWriter.listView().integer().writeNull();
+            listViewWriter.listView().integer().writeInt(i);
+            listViewWriter.listView().integer().writeInt(i * 2);
+            listViewWriter.listView().endListView();
+          }
+          listViewWriter.endListView();
+        } else {
+          listViewWriter.writeNull();
+        }
+      }
+      listViewVector.setValueCount(COUNT);
+
+      UnionListViewReader listViewReader = new UnionListViewReader(listViewVector);
+      for (int i = 0; i < COUNT; i++) {
+        listViewReader.setPosition(i);
+        if (i % 2 == 0) {
+          assertTrue(listViewReader.isSet());
+          listViewReader.next();
+          if (i % 4 == 0) {
+            assertFalse(listViewReader.reader().isSet());
+          } else {
+            listViewReader.reader().next();
+            assertFalse(listViewReader.reader().reader().isSet());
+            listViewReader.reader().next();
+            assertEquals(i, listViewReader.reader().reader().readInteger().intValue());
+            listViewReader.reader().next();
+            assertEquals(i * 2, listViewReader.reader().reader().readInteger().intValue());
+          }
+        } else {
+          assertFalse(listViewReader.isSet());
+        }
+      }
+    }
+  }
+
   @Test
   public void testListOfListOfListWriterWithNulls() {
     try (ListVector listVector = ListVector.empty("listoflistoflist", allocator)) {
@@ -1515,6 +1963,72 @@ public void testListOfListOfListWriterWithNulls() {
     }
   }
 
+  @Test
+  public void testListViewOfListViewOfListViewWriterWithNulls() {
+    try (ListViewVector listViewVector =
+        ListViewVector.empty("listViewoflistViewoflistView", allocator)) {
+      listViewVector.setInitialCapacity(COUNT);
+      listViewVector.allocateNew();
+      listViewVector
+          .getValidityBuffer()
+          .setOne(0, (int) listViewVector.getValidityBuffer().capacity());
+
+      UnionListViewWriter listViewWriter = listViewVector.getWriter();
+
+      // create list : [ null, [null], [[null]], [[[null, 1, 2]]], null, [null], ...
+      for (int i = 0; i < COUNT; i++) {
+        listViewWriter.setPosition(i);
+        if (i % 4 == 0) {
+          listViewWriter.writeNull();
+        } else {
+          listViewWriter.startListView();
+          if (i % 4 == 1) {
+            listViewWriter.listView().writeNull();
+          } else if (i % 4 == 2) {
+            listViewWriter.listView().startListView();
+            listViewWriter.listView().listView().writeNull();
+            listViewWriter.listView().endListView();
+          } else {
+            listViewWriter.listView().startListView();
+            listViewWriter.listView().listView().startListView();
+            listViewWriter.listView().listView().integer().writeNull();
+            listViewWriter.listView().listView().integer().writeInt(i);
+            listViewWriter.listView().listView().integer().writeInt(i * 2);
+            listViewWriter.listView().listView().endListView();
+            listViewWriter.listView().endListView();
+          }
+          listViewWriter.endListView();
+        }
+      }
+      listViewVector.setValueCount(COUNT);
+
+      UnionListViewReader listViewReader = new UnionListViewReader(listViewVector);
+      for (int i = 0; i < COUNT; i++) {
+        listViewReader.setPosition(i);
+        if (i % 4 == 0) {
+          assertFalse(listViewReader.isSet());
+        } else {
+          assertTrue(listViewReader.isSet());
+          listViewReader.next();
+          if (i % 4 == 1) {
+            assertFalse(listViewReader.reader().isSet());
+          } else if (i % 4 == 2) {
+            listViewReader.reader().next();
+            assertFalse(listViewReader.reader().reader().isSet());
+          } else {
+            listViewReader.reader().next();
+            listViewReader.reader().reader().next();
+            assertFalse(listViewReader.reader().reader().reader().isSet());
+            listViewReader.reader().reader().next();
+            assertEquals(i, listViewReader.reader().reader().reader().readInteger().intValue());
+            listViewReader.reader().reader().next();
+            assertEquals(i * 2, listViewReader.reader().reader().reader().readInteger().intValue());
+          }
+        }
+      }
+    }
+  }
+
   @Test
   public void testStructOfList() {
     try (StructVector structVector = StructVector.empty("struct1", allocator)) {
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
index c18f6faeb548f..281f050dfb662 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
@@ -579,7 +579,7 @@ public void validateUnionData(int count, VectorSchemaRoot root) {
     FieldReader unionReader = root.getVector("union").getReader();
     for (int i = 0; i < count; i++) {
       unionReader.setPosition(i);
-      switch (i % 4) {
+      switch (i % 5) {
         case 0:
           assertEquals(i, unionReader.readInteger().intValue());
           break;
@@ -590,6 +590,9 @@ public void validateUnionData(int count, VectorSchemaRoot root) {
           assertEquals(i % 3, unionReader.size());
           break;
         case 3:
+          assertEquals(3, unionReader.size());
+          break;
+        case 4:
           NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder();
           unionReader.reader("timestamp").read(h);
           assertEquals(i, h.value);
@@ -612,9 +615,10 @@ public void writeUnionData(int count, StructVector parent) {
     IntWriter intWriter = rootWriter.integer("union");
     BigIntWriter bigIntWriter = rootWriter.bigInt("union");
     ListWriter listWriter = rootWriter.list("union");
+    ListWriter listViewWriter = rootWriter.listView("union");
     StructWriter structWriter = rootWriter.struct("union");
     for (int i = 0; i < count; i++) {
-      switch (i % 4) {
+      switch (i % 5) {
         case 0:
           intWriter.setPosition(i);
           intWriter.writeInt(i);
@@ -632,6 +636,14 @@ public void writeUnionData(int count, StructVector parent) {
           listWriter.endList();
           break;
         case 3:
+          listViewWriter.setPosition(i);
+          listViewWriter.startListView();
+          for (int j = 0; j < i % 5; j++) {
+            listViewWriter.varChar().writeVarChar(0, 3, varchar);
+          }
+          listViewWriter.endListView();
+          break;
+        case 4:
           structWriter.setPosition(i);
           structWriter.start();
           structWriter.timeStampMilli("timestamp").writeTimeStampMilli(i);
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
index c69a3bfbc1ee2..8037212aaea21 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
@@ -437,10 +437,18 @@ public void testRoundtripEmptyVector() throws Exception {
                 "list",
                 FieldType.nullable(ArrowType.List.INSTANCE),
                 Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))),
+            new Field(
+                "listview",
+                FieldType.nullable(ArrowType.ListView.INSTANCE),
+                Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))),
             new Field(
                 "largelist",
                 FieldType.nullable(ArrowType.LargeList.INSTANCE),
                 Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))),
+            new Field(
+                "largelistview",
+                FieldType.nullable(ArrowType.LargeListView.INSTANCE),
+                Collections.singletonList(Field.nullable("items", new ArrowType.Int(32, true)))),
             new Field(
                 "map",
                 FieldType.nullable(new ArrowType.Map(/*keyssorted*/ false)),
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
index 9b2c80ef181d1..afbc30f019ef6 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -60,10 +60,14 @@
 import org.apache.arrow.vector.VarBinaryVector;
 import org.apache.arrow.vector.VarCharVector;
 import org.apache.arrow.vector.VariableWidthFieldVector;
+import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
 import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector;
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.LargeListViewVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.ListViewVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.holders.IntervalDayHolder;
 import org.apache.arrow.vector.types.Types;
@@ -728,4 +732,66 @@ public static void setVector(StructVector vector, Map<String, List<Integer>> val
     }
     vector.setValueCount(valueCount);
   }
+
+  /** Populate values for {@link ListViewVector}. */
+  public static void setVector(ListViewVector vector, List<Integer>... values) {
+    vector.allocateNewSafe();
+    Types.MinorType type = Types.MinorType.INT;
+    vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+    IntVector dataVector = (IntVector) vector.getDataVector();
+    dataVector.allocateNew();
+
+    // set underlying vectors
+    int curPos = 0;
+    for (int i = 0; i < values.length; i++) {
+      vector.getOffsetBuffer().setInt((long) i * BaseRepeatedValueViewVector.OFFSET_WIDTH, curPos);
+      if (values[i] == null) {
+        BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+      } else {
+        BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+        for (int value : values[i]) {
+          dataVector.setSafe(curPos, value);
+          curPos += 1;
+        }
+      }
+      vector
+          .getSizeBuffer()
+          .setInt((long) i * BaseRepeatedValueViewVector.SIZE_WIDTH, values[i].size());
+    }
+    dataVector.setValueCount(curPos);
+    vector.setValueCount(values.length);
+  }
+
+  /** Populate values for {@link ListViewVector}. */
+  public static void setVector(LargeListViewVector vector, List<Integer>... values) {
+    vector.allocateNewSafe();
+    Types.MinorType type = Types.MinorType.INT;
+    vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+    IntVector dataVector = (IntVector) vector.getDataVector();
+    dataVector.allocateNew();
+
+    // set underlying vectors
+    int curPos = 0;
+    for (int i = 0; i < values.length; i++) {
+      vector
+          .getOffsetBuffer()
+          .setInt((long) i * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH, curPos);
+      if (values[i] == null) {
+        BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+      } else {
+        BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+        for (int value : values[i]) {
+          dataVector.setSafe(curPos, value);
+          curPos += 1;
+        }
+      }
+      vector
+          .getSizeBuffer()
+          .setInt((long) i * BaseRepeatedValueViewVector.SIZE_WIDTH, values[i].size());
+    }
+    dataVector.setValueCount(curPos);
+    vector.setValueCount(values.length);
+  }
 }
diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java
index 60c4c3a9bc6d2..35c15bdf538f3 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java
@@ -23,6 +23,7 @@
 
 import java.nio.charset.Charset;
 import java.util.Arrays;
+import java.util.List;
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.memory.RootAllocator;
 import org.apache.arrow.vector.BigIntVector;
@@ -33,6 +34,7 @@
 import org.apache.arrow.vector.complex.FixedSizeListVector;
 import org.apache.arrow.vector.complex.LargeListVector;
 import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.RunEndEncodedVector;
 import org.apache.arrow.vector.complex.StructVector;
 import org.apache.arrow.vector.complex.UnionVector;
 import org.apache.arrow.vector.complex.impl.NullableStructWriter;
@@ -40,6 +42,7 @@
 import org.apache.arrow.vector.holders.NullableFloat8Holder;
 import org.apache.arrow.vector.types.Types;
 import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded;
 import org.apache.arrow.vector.types.pojo.Field;
 import org.apache.arrow.vector.types.pojo.FieldType;
 import org.junit.jupiter.api.AfterEach;
@@ -265,6 +268,41 @@ public void testBaseFixedWidthVectorInstanceMethod() {
     }
   }
 
+  @Test
+  public void testRunEndEncodedVector() {
+    final FieldType valueType = FieldType.notNullable(Types.MinorType.BIGINT.getType());
+    final FieldType runEndType = FieldType.notNullable(Types.MinorType.INT.getType());
+
+    final Field valueField = new Field("value", valueType, null);
+    final Field runEndField = new Field("ree", runEndType, null);
+
+    try (RunEndEncodedVector vector =
+        new RunEndEncodedVector(
+            new Field(
+                "ree",
+                FieldType.notNullable(RunEndEncoded.INSTANCE),
+                List.of(runEndField, valueField)),
+            allocator,
+            null)) {
+      vector.validate();
+
+      int runCount = 1;
+      vector.allocateNew();
+      ((BigIntVector) vector.getValuesVector()).set(0, 1);
+      ((IntVector) vector.getRunEndsVector()).set(0, 10);
+      vector.getValuesVector().setValueCount(runCount);
+      vector.getRunEndsVector().setValueCount(runCount);
+      vector.setValueCount(10);
+
+      vector.validate();
+
+      vector.getRunEndsVector().setValueCount(0);
+      ValidateUtil.ValidateException e =
+          assertThrows(ValidateUtil.ValidateException.class, () -> vector.validate());
+      assertTrue(e.getMessage().contains("Run end vector does not contain enough elements"));
+    }
+  }
+
   private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
     writer.start();
     writer.integer("f0").writeInt(value1);
diff --git a/js/gulp/closure-task.js b/js/gulp/closure-task.js
index 80f841bf729f1..7a76d21d46100 100644
--- a/js/gulp/closure-task.js
+++ b/js/gulp/closure-task.js
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-import { targetDir, mainExport, esmRequire, gCCLanguageNames, publicModulePaths, observableFromStreams, shouldRunInChildProcess, spawnGulpCommandInChildProcess } from "./util.js";
+import { targetDir, mainExport, gCCLanguageNames, publicModulePaths, observableFromStreams, shouldRunInChildProcess, spawnGulpCommandInChildProcess } from "./util.js";
 
 import fs from 'node:fs';
 import gulp from 'gulp';
@@ -48,12 +48,13 @@ export const closureTask = ((cache) => memoizeTask(cache, async function closure
     const externs = Path.join(`${out}/${mainExport}.externs.js`);
     const entry_point = Path.join(`${src}/${mainExport}.dom.cls.js`);
 
-    const exportedImports = publicModulePaths(srcAbsolute).reduce((entries, publicModulePath) => [
-        ...entries, {
+    const exportedImports = [];
+    for (const publicModulePath of publicModulePaths(srcAbsolute)) {
+        exportedImports.push({
             publicModulePath,
-            exports_: getPublicExportedNames(esmRequire(publicModulePath))
-        }
-    ], []);
+            exports_: getPublicExportedNames(await import(`file://${publicModulePath}`))
+        });
+    }
 
     await mkdirp(out);
 
diff --git a/js/gulp/package-task.js b/js/gulp/package-task.js
index 0b0f4cfa20b8b..36e0e57b9a7ae 100644
--- a/js/gulp/package-task.js
+++ b/js/gulp/package-task.js
@@ -40,7 +40,6 @@ export default packageTask;
 
 const createMainPackageJson = (target, format) => (orig) => ({
     ...createTypeScriptPackageJson(target, format)(orig),
-    bin: orig.bin,
     name: npmPkgName,
     type: 'commonjs',
     main: `${mainExport}.node.js`,
@@ -90,7 +89,6 @@ const createMainPackageJson = (target, format) => (orig) => ({
 
 const createTypeScriptPackageJson = (target, format) => (orig) => ({
     ...createScopedPackageJSON(target, format)(orig),
-    bin: undefined,
     main: `${mainExport}.node.ts`,
     module: `${mainExport}.node.ts`,
     types: `${mainExport}.node.ts`,
@@ -108,6 +106,9 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
     packageJSONFields.reduce(
         (xs, key) => ({ ...xs, [key]: xs[key] || orig[key] }),
         {
+            bin: Object.entries(orig.bin).reduce((xs, [key, val]) => ({
+                ...xs, [key]: val.replace('.cjs', '.js')
+            }), {}),
             // un-set version, since it's automatically applied during the release process
             version: undefined,
             // set the scoped package name (e.g. "@apache-arrow/esnext-esm")
@@ -120,11 +121,11 @@ const createScopedPackageJSON = (target, format) => (({ name, ...orig }) =>
             // set "main" to "Arrow" if building scoped UMD target, otherwise "Arrow.node"
             main:     format === 'umd' ? `${mainExport}.js` : `${mainExport}.node.js`,
             // set "type" to `module` or `commonjs` (https://nodejs.org/api/packages.html#packages_type)
-            type:     format === 'esm' ? `module` : `commonjs`,
+            type:     format === 'esm' || format === 'cls' ? `module` : `commonjs`,
             // set "module" if building scoped ESM target
-            module:   format === 'esm' ? `${mainExport}.node.js` : undefined,
+            module:   format === 'esm' || format === 'cls' ? `${mainExport}.node.js` : undefined,
             // set "sideEffects" to false as a hint to Webpack that it's safe to tree-shake the ESM target
-            sideEffects: format === 'esm' ? false : undefined,
+            sideEffects: format === 'esm' || format === 'cls' ? false : undefined,
             // include "esm" settings for https://www.npmjs.com/package/esm if building scoped ESM target
             esm:      format === `esm` ? { mode: `auto`, sourceMap: true } : undefined,
             // set "types" to "Arrow.dom" if building scoped UMD target, otherwise "Arrow.node"
diff --git a/js/gulp/util.js b/js/gulp/util.js
index 2ce756f4acafa..a96c0891b87ed 100644
--- a/js/gulp/util.js
+++ b/js/gulp/util.js
@@ -27,7 +27,6 @@ import { ReplaySubject, empty as ObservableEmpty, throwError as ObservableThrow,
 import { share, flatMap, takeUntil, defaultIfEmpty, mergeWith } from 'rxjs/operators';
 const asyncDone = util.promisify(asyncDoneSync);
 import { createRequire } from 'node:module';
-import esmRequire from './esm-require.cjs'
 
 const require = createRequire(import.meta.url);
 
@@ -177,7 +176,7 @@ export {
     knownTargets, knownModules, tasksToSkipPerTargetOrFormat, gCCLanguageNames,
 
     taskName, packageName, tsconfigName, targetDir, combinations, observableFromStreams,
-    publicModulePaths, esmRequire, shouldRunInChildProcess, spawnGulpCommandInChildProcess,
+    publicModulePaths, shouldRunInChildProcess, spawnGulpCommandInChildProcess,
 };
 
 export const targetAndModuleCombinations = [...combinations(targets, modules)];
diff --git a/js/package.json b/js/package.json
index cc8fc6f9b4708..d8a784b784d3c 100644
--- a/js/package.json
+++ b/js/package.json
@@ -72,19 +72,18 @@
     "@types/glob": "8.1.0",
     "@types/jest": "29.5.12",
     "@types/multistream": "4.1.3",
-    "@typescript-eslint/eslint-plugin": "7.12.0",
+    "@typescript-eslint/eslint-plugin": "7.18.0",
     "@typescript-eslint/parser": "7.14.1",
     "async-done": "2.0.0",
     "benny": "3.7.1",
     "cross-env": "7.0.3",
     "del": "7.1.0",
     "del-cli": "5.1.0",
-    "esbuild": "0.22.0",
+    "esbuild": "0.23.0",
     "esbuild-plugin-alias": "0.2.1",
     "eslint": "8.57.0",
     "eslint-plugin-jest": "28.5.0",
-    "eslint-plugin-unicorn": "54.0.0",
-    "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz",
+    "eslint-plugin-unicorn": "55.0.0",
     "gulp": "4.0.2",
     "glob": "10.4.1",
     "google-closure-compiler": "20240317.0.0",
@@ -96,14 +95,14 @@
     "gulp-terser": "2.1.0",
     "gulp-typescript": "5.0.1",
     "gulp-vinyl-size": "1.1.4",
-    "ix": "6.0.0",
+    "ix": "7.0.0",
     "jest": "29.7.0",
     "jest-silent-reporter": "0.6.0",
     "memfs": "4.9.2",
     "mkdirp": "3.0.1",
     "multistream": "4.1.0",
     "regenerator-runtime": "0.14.1",
-    "rollup": "4.18.0",
+    "rollup": "4.19.2",
     "rxjs": "7.8.1",
     "ts-jest": "29.1.4",
     "ts-node": "10.9.2",
@@ -121,5 +120,5 @@
   "engines": {
     "node": ">=12.0"
   },
-  "version": "17.0.0-SNAPSHOT"
+  "version": "18.0.0-SNAPSHOT"
 }
diff --git a/js/yarn.lock b/js/yarn.lock
index cfa45edef4f00..e8223fba9aad2 100644
--- a/js/yarn.lock
+++ b/js/yarn.lock
@@ -2,14 +2,6 @@
 # yarn lockfile v1
 
 
-"@75lb/deep-merge@^1.1.1":
-  version "1.1.1"
-  resolved "https://registry.yarnpkg.com/@75lb/deep-merge/-/deep-merge-1.1.1.tgz#3b06155b90d34f5f8cc2107d796f1853ba02fd6d"
-  integrity sha512-xvgv6pkMGBA6GwdyJbNAnDmfAIR/DfWhrj9jgWh3TY7gRm3KO46x/GPjRg6wJ0nOepwqrNxFfojebh0Df4h4Tw==
-  dependencies:
-    lodash.assignwith "^4.2.0"
-    typical "^7.1.1"
-
 "@aashutoshrathi/word-wrap@^1.2.3":
   version "1.2.6"
   resolved "https://registry.yarnpkg.com/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz#bd9154aec9983f77b3a034ecaa015c2e4201f6cf"
@@ -362,235 +354,235 @@
   resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz#c7184a326533fcdf1b8ee0733e21c713b975575f"
   integrity sha512-1SDgH6ZSPTlggy1yI6+Dbkiz8xzpHJEVAlF/AM1tHPLsf5STom9rwtjE4hKAF20FfXXNTFqEYXyJNWh1GiZedQ==
 
-"@esbuild/aix-ppc64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.22.0.tgz#6ff1ec509335ffbaee3fc4a5a11373d6f029b2c4"
-  integrity sha512-uvQR2crZ/zgzSHDvdygHyNI+ze9zwS8mqz0YtGXotSqvEE0UkYE9s+FZKQNTt1VtT719mfP3vHrUdCpxBNQZhQ==
+"@esbuild/aix-ppc64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.23.0.tgz#145b74d5e4a5223489cabdc238d8dad902df5259"
+  integrity sha512-3sG8Zwa5fMcA9bgqB8AfWPQ+HFke6uD3h1s3RIwUNK8EG7a4buxvuFTs3j1IMs2NXAk9F30C/FF4vxRgQCcmoQ==
 
 "@esbuild/android-arm64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.21.5.tgz#09d9b4357780da9ea3a7dfb833a1f1ff439b4052"
   integrity sha512-c0uX9VAUBQ7dTDCjq+wdyGLowMdtR/GoC2U5IYk/7D1H1JYC0qseD7+11iMP2mRLN9RcCMRcjC4YMclCzGwS/A==
 
-"@esbuild/android-arm64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.22.0.tgz#a02ef8650fe5ce17807c9f3229a36d326d2b07ea"
-  integrity sha512-UKhPb3o2gAB/bfXcl58ZXTn1q2oVu1rEu/bKrCtmm+Nj5MKUbrOwR5WAixE2v+lk0amWuwPvhnPpBRLIGiq7ig==
+"@esbuild/android-arm64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.23.0.tgz#453bbe079fc8d364d4c5545069e8260228559832"
+  integrity sha512-EuHFUYkAVfU4qBdyivULuu03FhJO4IJN9PGuABGrFy4vUuzk91P2d+npxHcFdpUnfYKy0PuV+n6bKIpHOB3prQ==
 
 "@esbuild/android-arm@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.21.5.tgz#9b04384fb771926dfa6d7ad04324ecb2ab9b2e28"
   integrity sha512-vCPvzSjpPHEi1siZdlvAlsPxXl7WbOVUBBAowWug4rJHb68Ox8KualB+1ocNvT5fjv6wpkX6o/iEpbDrf68zcg==
 
-"@esbuild/android-arm@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.22.0.tgz#dd26ec407db736eee0eb060195a43aa13f618013"
-  integrity sha512-PBnyP+r8vJE4ifxsWys9l+Mc2UY/yYZOpX82eoyGISXXb3dRr0M21v+s4fgRKWMFPMSf/iyowqPW/u7ScSUkjQ==
+"@esbuild/android-arm@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.23.0.tgz#26c806853aa4a4f7e683e519cd9d68e201ebcf99"
+  integrity sha512-+KuOHTKKyIKgEEqKbGTK8W7mPp+hKinbMBeEnNzjJGyFcWsfrXjSTNluJHCY1RqhxFurdD8uNXQDei7qDlR6+g==
 
 "@esbuild/android-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.21.5.tgz#29918ec2db754cedcb6c1b04de8cd6547af6461e"
   integrity sha512-D7aPRUUNHRBwHxzxRvp856rjUHRFW1SdQATKXH2hqA0kAZb1hKmi02OpYRacl0TxIGz/ZmXWlbZgjwWYaCakTA==
 
-"@esbuild/android-x64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.22.0.tgz#f02771a20be264ccc22478dcc7de8f2bde858af8"
-  integrity sha512-IjTYtvIrjhR41Ijy2dDPgYjQHWG/x/A4KXYbs1fiU3efpRdoxMChK3oEZV6GPzVEzJqxFgcuBaiX1kwEvWUxSw==
+"@esbuild/android-x64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.23.0.tgz#1e51af9a6ac1f7143769f7ee58df5b274ed202e6"
+  integrity sha512-WRrmKidLoKDl56LsbBMhzTTBxrsVwTKdNbKDalbEZr0tcsBgCLbEtoNthOW6PX942YiYq8HzEnb4yWQMLQuipQ==
 
 "@esbuild/darwin-arm64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.21.5.tgz#e495b539660e51690f3928af50a76fb0a6ccff2a"
   integrity sha512-DwqXqZyuk5AiWWf3UfLiRDJ5EDd49zg6O9wclZ7kUMv2WRFr4HKjXp/5t8JZ11QbQfUS6/cRCKGwYhtNAY88kQ==
 
-"@esbuild/darwin-arm64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.22.0.tgz#d905f2b951aeba328dd02e3a09f86b5d4e5e6741"
-  integrity sha512-mqt+Go4y9wRvEz81bhKd9RpHsQR1LwU8Xm6jZRUV/xpM7cIQFbFH6wBCLPTNsdELBvfoHeumud7X78jQQJv2TA==
+"@esbuild/darwin-arm64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.23.0.tgz#d996187a606c9534173ebd78c58098a44dd7ef9e"
+  integrity sha512-YLntie/IdS31H54Ogdn+v50NuoWF5BDkEUFpiOChVa9UnKpftgwzZRrI4J132ETIi+D8n6xh9IviFV3eXdxfow==
 
 "@esbuild/darwin-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.21.5.tgz#c13838fa57372839abdddc91d71542ceea2e1e22"
   integrity sha512-se/JjF8NlmKVG4kNIuyWMV/22ZaerB+qaSi5MdrXtd6R08kvs2qCN4C09miupktDitvh8jRFflwGFBQcxZRjbw==
 
-"@esbuild/darwin-x64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.22.0.tgz#d07b4fe501fe9985590285b2790039ed4743f86e"
-  integrity sha512-vTaTQ9OgYc3VTaWtOE5pSuDT6H3d/qSRFRfSBbnxFfzAvYoB3pqKXA0LEbi/oT8GUOEAutspfRMqPj2ezdFaMw==
+"@esbuild/darwin-x64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.23.0.tgz#30c8f28a7ef4e32fe46501434ebe6b0912e9e86c"
+  integrity sha512-IMQ6eme4AfznElesHUPDZ+teuGwoRmVuuixu7sv92ZkdQcPbsNHzutd+rAfaBKo8YK3IrBEi9SLLKWJdEvJniQ==
 
 "@esbuild/freebsd-arm64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.5.tgz#646b989aa20bf89fd071dd5dbfad69a3542e550e"
   integrity sha512-5JcRxxRDUJLX8JXp/wcBCy3pENnCgBR9bN6JsY4OmhfUtIHe3ZW0mawA7+RDAcMLrMIZaf03NlQiX9DGyB8h4g==
 
-"@esbuild/freebsd-arm64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.22.0.tgz#4251e0a14716116f4fa7e22d908f47408b6c2fb5"
-  integrity sha512-0e1ZgoobJzaGnR4reD7I9rYZ7ttqdh1KPvJWnquUoDJhL0rYwdneeLailBzd2/4g/U5p4e5TIHEWa68NF2hFpQ==
+"@esbuild/freebsd-arm64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.23.0.tgz#30f4fcec8167c08a6e8af9fc14b66152232e7fb4"
+  integrity sha512-0muYWCng5vqaxobq6LB3YNtevDFSAZGlgtLoAc81PjUfiFz36n4KMpwhtAd4he8ToSI3TGyuhyx5xmiWNYZFyw==
 
 "@esbuild/freebsd-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.21.5.tgz#aa615cfc80af954d3458906e38ca22c18cf5c261"
   integrity sha512-J95kNBj1zkbMXtHVH29bBriQygMXqoVQOQYA+ISs0/2l3T9/kj42ow2mpqerRBxDJnmkUDCaQT/dfNXWX/ZZCQ==
 
-"@esbuild/freebsd-x64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.22.0.tgz#7dbd35616a71f8a9b61a9435c5a79d87fc0b2f1a"
-  integrity sha512-BFgyYwlCwRWyPQJtkzqq2p6pJbiiWgp0P9PNf7a5FQ1itKY4czPuOMAlFVItirSmEpRPCeImuwePNScZS0pL5Q==
+"@esbuild/freebsd-x64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.23.0.tgz#1003a6668fe1f5d4439e6813e5b09a92981bc79d"
+  integrity sha512-XKDVu8IsD0/q3foBzsXGt/KjD/yTKBCIwOHE1XwiXmrRwrX6Hbnd5Eqn/WvDekddK21tfszBSrE/WMaZh+1buQ==
 
 "@esbuild/linux-arm64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.21.5.tgz#70ac6fa14f5cb7e1f7f887bcffb680ad09922b5b"
   integrity sha512-ibKvmyYzKsBeX8d8I7MH/TMfWDXBF3db4qM6sy+7re0YXya+K1cem3on9XgdT2EQGMu4hQyZhan7TeQ8XkGp4Q==
 
-"@esbuild/linux-arm64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.22.0.tgz#77cded446dd0c3b723d272e0243b3d9ddb3cb46e"
-  integrity sha512-V/K2rctCUgC0PCXpN7AqT4hoazXKgIYugFGu/myk2+pfe6jTW2guz/TBwq4cZ7ESqusR/IzkcQaBkcjquuBWsw==
+"@esbuild/linux-arm64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.23.0.tgz#3b9a56abfb1410bb6c9138790f062587df3e6e3a"
+  integrity sha512-j1t5iG8jE7BhonbsEg5d9qOYcVZv/Rv6tghaXM/Ug9xahM0nX/H2gfu6X6z11QRTMT6+aywOMA8TDkhPo8aCGw==
 
 "@esbuild/linux-arm@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.21.5.tgz#fc6fd11a8aca56c1f6f3894f2bea0479f8f626b9"
   integrity sha512-bPb5AHZtbeNGjCKVZ9UGqGwo8EUu4cLq68E95A53KlxAPRmUyYv2D6F0uUI65XisGOL1hBP5mTronbgo+0bFcA==
 
-"@esbuild/linux-arm@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.22.0.tgz#6587d3e423e09766ea997229827e292e7c4acd6f"
-  integrity sha512-KEMWiA9aGuPUD4BH5yjlhElLgaRXe+Eri6gKBoDazoPBTo1BXc/e6IW5FcJO9DoL19FBeCxgONyh95hLDNepIg==
+"@esbuild/linux-arm@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.23.0.tgz#237a8548e3da2c48cd79ae339a588f03d1889aad"
+  integrity sha512-SEELSTEtOFu5LPykzA395Mc+54RMg1EUgXP+iw2SJ72+ooMwVsgfuwXo5Fn0wXNgWZsTVHwY2cg4Vi/bOD88qw==
 
 "@esbuild/linux-ia32@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.21.5.tgz#3271f53b3f93e3d093d518d1649d6d68d346ede2"
   integrity sha512-YvjXDqLRqPDl2dvRODYmmhz4rPeVKYvppfGYKSNGdyZkA01046pLWyRKKI3ax8fbJoK5QbxblURkwK/MWY18Tg==
 
-"@esbuild/linux-ia32@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.22.0.tgz#2d06d7b4abc443e05a820ff50d4c2d98cc04c22f"
-  integrity sha512-r2ZZqkOMOrpUhzNwxI7uLAHIDwkfeqmTnrv1cjpL/rjllPWszgqmprd/om9oviKXUBpMqHbXmppvjAYgISb26Q==
+"@esbuild/linux-ia32@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.23.0.tgz#4269cd19cb2de5de03a7ccfc8855dde3d284a238"
+  integrity sha512-P7O5Tkh2NbgIm2R6x1zGJJsnacDzTFcRWZyTTMgFdVit6E98LTxO+v8LCCLWRvPrjdzXHx9FEOA8oAZPyApWUA==
 
 "@esbuild/linux-loong64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.21.5.tgz#ed62e04238c57026aea831c5a130b73c0f9f26df"
   integrity sha512-uHf1BmMG8qEvzdrzAqg2SIG/02+4/DHB6a9Kbya0XDvwDEKCoC8ZRWI5JJvNdUjtciBGFQ5PuBlpEOXQj+JQSg==
 
-"@esbuild/linux-loong64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.22.0.tgz#a3e7faabe9a046ac4557bc515ce0981cfe5a6e0f"
-  integrity sha512-qaowLrV/YOMAL2RfKQ4C/VaDzAuLDuylM2sd/LH+4OFirMl6CuDpRlCq4u49ZBaVV8pkI/Y+hTdiibvQRhojCA==
+"@esbuild/linux-loong64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.23.0.tgz#82b568f5658a52580827cc891cb69d2cb4f86280"
+  integrity sha512-InQwepswq6urikQiIC/kkx412fqUZudBO4SYKu0N+tGhXRWUqAx+Q+341tFV6QdBifpjYgUndV1hhMq3WeJi7A==
 
 "@esbuild/linux-mips64el@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.21.5.tgz#e79b8eb48bf3b106fadec1ac8240fb97b4e64cbe"
   integrity sha512-IajOmO+KJK23bj52dFSNCMsz1QP1DqM6cwLUv3W1QwyxkyIWecfafnI555fvSGqEKwjMXVLokcV5ygHW5b3Jbg==
 
-"@esbuild/linux-mips64el@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.22.0.tgz#3a2877a78f6719e5eed4cfdded5121c5ab9305a4"
-  integrity sha512-hgrezzjQTRxjkQ5k08J6rtZN5PNnkWx/Rz6Kmj9gnsdCAX1I4Dn4ZPqvFRkXo55Q3pnVQJBwbdtrTO7tMGtyVA==
+"@esbuild/linux-mips64el@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.23.0.tgz#9a57386c926262ae9861c929a6023ed9d43f73e5"
+  integrity sha512-J9rflLtqdYrxHv2FqXE2i1ELgNjT+JFURt/uDMoPQLcjWQA5wDKgQA4t/dTqGa88ZVECKaD0TctwsUfHbVoi4w==
 
 "@esbuild/linux-ppc64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.21.5.tgz#5f2203860a143b9919d383ef7573521fb154c3e4"
   integrity sha512-1hHV/Z4OEfMwpLO8rp7CvlhBDnjsC3CttJXIhBi+5Aj5r+MBvy4egg7wCbe//hSsT+RvDAG7s81tAvpL2XAE4w==
 
-"@esbuild/linux-ppc64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.22.0.tgz#6609478066083e05cc1854a8b272daf62a7e944b"
-  integrity sha512-ewxg6FLLUio883XgSjfULEmDl3VPv/TYNnRprVAS3QeGFLdCYdx1tIudBcd7n9jIdk82v1Ajov4jx87qW7h9+g==
+"@esbuild/linux-ppc64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.23.0.tgz#f3a79fd636ba0c82285d227eb20ed8e31b4444f6"
+  integrity sha512-cShCXtEOVc5GxU0fM+dsFD10qZ5UpcQ8AM22bYj0u/yaAykWnqXJDpd77ublcX6vdDsWLuweeuSNZk4yUxZwtw==
 
 "@esbuild/linux-riscv64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.21.5.tgz#07bcafd99322d5af62f618cb9e6a9b7f4bb825dc"
   integrity sha512-2HdXDMd9GMgTGrPWnJzP2ALSokE/0O5HhTUvWIbD3YdjME8JwvSCnNGBnTThKGEB91OZhzrJ4qIIxk/SBmyDDA==
 
-"@esbuild/linux-riscv64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.22.0.tgz#d786a89903cf98e8d34befe6a71c69562bb4ceac"
-  integrity sha512-Az5XbgSJC2lE8XK8pdcutsf9RgdafWdTpUK/+6uaDdfkviw/B4JCwAfh1qVeRWwOohwdsl4ywZrWBNWxwrPLFg==
+"@esbuild/linux-riscv64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.23.0.tgz#f9d2ef8356ce6ce140f76029680558126b74c780"
+  integrity sha512-HEtaN7Y5UB4tZPeQmgz/UhzoEyYftbMXrBCUjINGjh3uil+rB/QzzpMshz3cNUxqXN7Vr93zzVtpIDL99t9aRw==
 
 "@esbuild/linux-s390x@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.21.5.tgz#b7ccf686751d6a3e44b8627ababc8be3ef62d8de"
   integrity sha512-zus5sxzqBJD3eXxwvjN1yQkRepANgxE9lgOW2qLnmr8ikMTphkjgXu1HR01K4FJg8h1kEEDAqDcZQtbrRnB41A==
 
-"@esbuild/linux-s390x@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.22.0.tgz#a7ab13ae163307ac615dac5ce7f60a6b0a067d59"
-  integrity sha512-8j4a2ChT9+V34NNNY9c/gMldutaJFmfMacTPq4KfNKwv2fitBCLYjee7c+Vxaha2nUhPK7cXcZpJtJ3+Y7ZdVQ==
+"@esbuild/linux-s390x@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.23.0.tgz#45390f12e802201f38a0229e216a6aed4351dfe8"
+  integrity sha512-WDi3+NVAuyjg/Wxi+o5KPqRbZY0QhI9TjrEEm+8dmpY9Xir8+HE/HNx2JoLckhKbFopW0RdO2D72w8trZOV+Wg==
 
 "@esbuild/linux-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.21.5.tgz#6d8f0c768e070e64309af8004bb94e68ab2bb3b0"
   integrity sha512-1rYdTpyv03iycF1+BhzrzQJCdOuAOtaqHTWJZCWvijKD2N5Xu0TtVC8/+1faWqcP9iBCWOmjmhoH94dH82BxPQ==
 
-"@esbuild/linux-x64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.22.0.tgz#24949de431013354da1d8c29e53299798f8c27ef"
-  integrity sha512-JUQyOnpbAkkRFOk/AhsEemz5TfWN4FJZxVObUlnlNCbe7QBl61ZNfM4cwBXayQA6laMJMUcqLHaYQHAB6YQ95Q==
+"@esbuild/linux-x64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.23.0.tgz#c8409761996e3f6db29abcf9b05bee8d7d80e910"
+  integrity sha512-a3pMQhUEJkITgAw6e0bWA+F+vFtCciMjW/LPtoj99MhVt+Mfb6bbL9hu2wmTZgNd994qTAEw+U/r6k3qHWWaOQ==
 
 "@esbuild/netbsd-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz#bbe430f60d378ecb88decb219c602667387a6047"
   integrity sha512-Woi2MXzXjMULccIwMnLciyZH4nCIMpWQAs049KEeMvOcNADVxo0UBIQPfSmxB3CWKedngg7sWZdLvLczpe0tLg==
 
-"@esbuild/netbsd-x64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.22.0.tgz#bc3f51c41eaab89cf5fdb09d0c633affb39cb1a1"
-  integrity sha512-11PoCoHXo4HFNbLsXuMB6bpMPWGDiw7xETji6COdJss4SQZLvcgNoeSqWtATRm10Jj1uEHiaIk4N0PiN6x4Fcg==
+"@esbuild/netbsd-x64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.23.0.tgz#ba70db0114380d5f6cfb9003f1d378ce989cd65c"
+  integrity sha512-cRK+YDem7lFTs2Q5nEv/HHc4LnrfBCbH5+JHu6wm2eP+d8OZNoSMYgPZJq78vqQ9g+9+nMuIsAO7skzphRXHyw==
 
-"@esbuild/openbsd-arm64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/openbsd-arm64/-/openbsd-arm64-0.22.0.tgz#7cb42e3a0d3da039d1a4b7ccbd0c19b0f71ae453"
-  integrity sha512-Ezlhu/YyITmXwKSB+Zu/QqD7cxrjrpiw85cc0Rbd3AWr2wsgp+dWbWOE8MqHaLW9NKMZvuL0DhbJbvzR7F6Zvg==
+"@esbuild/openbsd-arm64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/openbsd-arm64/-/openbsd-arm64-0.23.0.tgz#72fc55f0b189f7a882e3cf23f332370d69dfd5db"
+  integrity sha512-suXjq53gERueVWu0OKxzWqk7NxiUWSUlrxoZK7usiF50C6ipColGR5qie2496iKGYNLhDZkPxBI3erbnYkU0rQ==
 
 "@esbuild/openbsd-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz#99d1cf2937279560d2104821f5ccce220cb2af70"
   integrity sha512-HLNNw99xsvx12lFBUwoT8EVCsSvRNDVxNpjZ7bPn947b8gJPzeHWyNVhFsaerc0n3TsbOINvRP2byTZ5LKezow==
 
-"@esbuild/openbsd-x64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.22.0.tgz#194aa9915323962e9ea66c5a13ff3e1db272a683"
-  integrity sha512-ufjdW5tFJGUjlH9j/5cCE9lrwRffyZh+T4vYvoDKoYsC6IXbwaFeV/ENxeNXcxotF0P8CDzoICXVSbJaGBhkrw==
+"@esbuild/openbsd-x64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.23.0.tgz#b6ae7a0911c18fe30da3db1d6d17a497a550e5d8"
+  integrity sha512-6p3nHpby0DM/v15IFKMjAaayFhqnXV52aEmv1whZHX56pdkK+MEaLoQWj+H42ssFarP1PcomVhbsR4pkz09qBg==
 
 "@esbuild/sunos-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz#08741512c10d529566baba837b4fe052c8f3487b"
   integrity sha512-6+gjmFpfy0BHU5Tpptkuh8+uw3mnrvgs+dSPQXQOv3ekbordwnzTVEb4qnIvQcYXq6gzkyTnoZ9dZG+D4garKg==
 
-"@esbuild/sunos-x64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.22.0.tgz#2be9d2459ae181ebedb6470e4469349a27c4f060"
-  integrity sha512-zY6ly/AoSmKnmNTowDJsK5ehra153/5ZhqxNLfq9NRsTTltetr+yHHcQ4RW7QDqw4JC8A1uC1YmeSfK9NRcK1w==
+"@esbuild/sunos-x64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.23.0.tgz#58f0d5e55b9b21a086bfafaa29f62a3eb3470ad8"
+  integrity sha512-BFelBGfrBwk6LVrmFzCq1u1dZbG4zy/Kp93w2+y83Q5UGYF1d8sCzeLI9NXjKyujjBBniQa8R8PzLFAUrSM9OA==
 
 "@esbuild/win32-arm64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.21.5.tgz#675b7385398411240735016144ab2e99a60fc75d"
   integrity sha512-Z0gOTd75VvXqyq7nsl93zwahcTROgqvuAcYDUr+vOv8uHhNSKROyU961kgtCD1e95IqPKSQKH7tBTslnS3tA8A==
 
-"@esbuild/win32-arm64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.22.0.tgz#6b4224f2d049c26f37026904210a4293e34c2747"
-  integrity sha512-Kml5F7tv/1Maam0pbbCrvkk9vj046dPej30kFzlhXnhuCtYYBP6FGy/cLbc5yUT1lkZznGLf2OvuvmLjscO5rw==
+"@esbuild/win32-arm64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.23.0.tgz#b858b2432edfad62e945d5c7c9e5ddd0f528ca6d"
+  integrity sha512-lY6AC8p4Cnb7xYHuIxQ6iYPe6MfO2CC43XXKo9nBXDb35krYt7KGhQnOkRGar5psxYkircpCqfbNDB4uJbS2jQ==
 
 "@esbuild/win32-ia32@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.21.5.tgz#1bfc3ce98aa6ca9a0969e4d2af72144c59c1193b"
   integrity sha512-SWXFF1CL2RVNMaVs+BBClwtfZSvDgtL//G/smwAc5oVK/UPu2Gu9tIaRgFmYFFKrmg3SyAjSrElf0TiJ1v8fYA==
 
-"@esbuild/win32-ia32@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.22.0.tgz#4a1184f6fd4a7594c4f1e68b1e649248534f7832"
-  integrity sha512-IOgwn+mYTM3RrcydP4Og5IpXh+ftN8oF+HELTXSmbWBlujuci4Qa3DTeO+LEErceisI7KUSfEIiX+WOUlpELkw==
+"@esbuild/win32-ia32@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.23.0.tgz#167ef6ca22a476c6c0c014a58b4f43ae4b80dec7"
+  integrity sha512-7L1bHlOTcO4ByvI7OXVI5pNN6HSu6pUQq9yodga8izeuB1KcT2UkHaH6118QJwopExPn0rMHIseCTx1CRo/uNA==
 
 "@esbuild/win32-x64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.21.5.tgz#acad351d582d157bb145535db2a6ff53dd514b5c"
   integrity sha512-tQd/1efJuzPC6rCFwEvLtci/xNFcTZknmXs98FYDfGE4wP9ClFV98nyKrzJKVPMhdDnjzLhdUyMX4PsQAPjwIw==
 
-"@esbuild/win32-x64@0.22.0":
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.22.0.tgz#4b83e9449a205e7d94d5368035450fc1680fe525"
-  integrity sha512-4bDHJrk2WHBXJPhy1y80X7/5b5iZTZP3LGcKIlAP1J+KqZ4zQAPMLEzftGyjjfcKbA4JDlPt/+2R/F1ZTeRgrw==
+"@esbuild/win32-x64@0.23.0":
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.23.0.tgz#db44a6a08520b5f25bbe409f34a59f2d4bcc7ced"
+  integrity sha512-Arm+WgUFLUATuoxCJcahGuk6Yj9Pzxd6l11Zb/2aAuv5kWWvvfhLFo2fni4uSK5vzlUdCGZ/BdV5tH8klj8p8g==
 
 "@eslint-community/eslint-utils@^4.2.0", "@eslint-community/eslint-utils@^4.4.0":
   version "4.4.0"
@@ -619,21 +611,6 @@
     minimatch "^3.1.2"
     strip-json-comments "^3.1.1"
 
-"@eslint/eslintrc@^3.0.2":
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-3.1.0.tgz#dbd3482bfd91efa663cbe7aa1f506839868207b6"
-  integrity sha512-4Bfj15dVJdoy3RfZmmo86RK1Fwzn6SstsvK9JS+BaVKqC6QQQQyXekNaC+g+LKNgkQ+2VhGAzm6hO40AhMR3zQ==
-  dependencies:
-    ajv "^6.12.4"
-    debug "^4.3.2"
-    espree "^10.0.1"
-    globals "^14.0.0"
-    ignore "^5.2.0"
-    import-fresh "^3.2.1"
-    js-yaml "^4.1.0"
-    minimatch "^3.1.2"
-    strip-json-comments "^3.1.1"
-
 "@eslint/js@8.57.0":
   version "8.57.0"
   resolved "https://registry.yarnpkg.com/@eslint/js/-/js-8.57.0.tgz#a5417ae8427873f1dd08b70b3574b453e67b5f7f"
@@ -1040,85 +1017,85 @@
     estree-walker "^2.0.2"
     picomatch "^2.3.1"
 
-"@rollup/rollup-android-arm-eabi@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.18.0.tgz#bbd0e616b2078cd2d68afc9824d1fadb2f2ffd27"
-  integrity sha512-Tya6xypR10giZV1XzxmH5wr25VcZSncG0pZIjfePT0OVBvqNEurzValetGNarVrGiq66EBVAFn15iYX4w6FKgQ==
-
-"@rollup/rollup-android-arm64@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.18.0.tgz#97255ef6384c5f73f4800c0de91f5f6518e21203"
-  integrity sha512-avCea0RAP03lTsDhEyfy+hpfr85KfyTctMADqHVhLAF3MlIkq83CP8UfAHUssgXTYd+6er6PaAhx/QGv4L1EiA==
-
-"@rollup/rollup-darwin-arm64@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.18.0.tgz#b6dd74e117510dfe94541646067b0545b42ff096"
-  integrity sha512-IWfdwU7KDSm07Ty0PuA/W2JYoZ4iTj3TUQjkVsO/6U+4I1jN5lcR71ZEvRh52sDOERdnNhhHU57UITXz5jC1/w==
-
-"@rollup/rollup-darwin-x64@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.18.0.tgz#e07d76de1cec987673e7f3d48ccb8e106d42c05c"
-  integrity sha512-n2LMsUz7Ynu7DoQrSQkBf8iNrjOGyPLrdSg802vk6XT3FtsgX6JbE8IHRvposskFm9SNxzkLYGSq9QdpLYpRNA==
-
-"@rollup/rollup-linux-arm-gnueabihf@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.18.0.tgz#9f1a6d218b560c9d75185af4b8bb42f9f24736b8"
-  integrity sha512-C/zbRYRXFjWvz9Z4haRxcTdnkPt1BtCkz+7RtBSuNmKzMzp3ZxdM28Mpccn6pt28/UWUCTXa+b0Mx1k3g6NOMA==
-
-"@rollup/rollup-linux-arm-musleabihf@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.18.0.tgz#53618b92e6ffb642c7b620e6e528446511330549"
-  integrity sha512-l3m9ewPgjQSXrUMHg93vt0hYCGnrMOcUpTz6FLtbwljo2HluS4zTXFy2571YQbisTnfTKPZ01u/ukJdQTLGh9A==
-
-"@rollup/rollup-linux-arm64-gnu@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.18.0.tgz#99a7ba5e719d4f053761a698f7b52291cefba577"
-  integrity sha512-rJ5D47d8WD7J+7STKdCUAgmQk49xuFrRi9pZkWoRD1UeSMakbcepWXPF8ycChBoAqs1pb2wzvbY6Q33WmN2ftw==
-
-"@rollup/rollup-linux-arm64-musl@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.18.0.tgz#f53db99a45d9bc00ce94db8a35efa7c3c144a58c"
-  integrity sha512-be6Yx37b24ZwxQ+wOQXXLZqpq4jTckJhtGlWGZs68TgdKXJgw54lUUoFYrg6Zs/kjzAQwEwYbp8JxZVzZLRepQ==
-
-"@rollup/rollup-linux-powerpc64le-gnu@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.18.0.tgz#cbb0837408fe081ce3435cf3730e090febafc9bf"
-  integrity sha512-hNVMQK+qrA9Todu9+wqrXOHxFiD5YmdEi3paj6vP02Kx1hjd2LLYR2eaN7DsEshg09+9uzWi2W18MJDlG0cxJA==
-
-"@rollup/rollup-linux-riscv64-gnu@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.18.0.tgz#8ed09c1d1262ada4c38d791a28ae0fea28b80cc9"
-  integrity sha512-ROCM7i+m1NfdrsmvwSzoxp9HFtmKGHEqu5NNDiZWQtXLA8S5HBCkVvKAxJ8U+CVctHwV2Gb5VUaK7UAkzhDjlg==
-
-"@rollup/rollup-linux-s390x-gnu@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.18.0.tgz#938138d3c8e0c96f022252a28441dcfb17afd7ec"
-  integrity sha512-0UyyRHyDN42QL+NbqevXIIUnKA47A+45WyasO+y2bGJ1mhQrfrtXUpTxCOrfxCR4esV3/RLYyucGVPiUsO8xjg==
-
-"@rollup/rollup-linux-x64-gnu@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.18.0.tgz#1a7481137a54740bee1ded4ae5752450f155d942"
-  integrity sha512-xuglR2rBVHA5UsI8h8UbX4VJ470PtGCf5Vpswh7p2ukaqBGFTnsfzxUBetoWBWymHMxbIG0Cmx7Y9qDZzr648w==
-
-"@rollup/rollup-linux-x64-musl@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.18.0.tgz#f1186afc601ac4f4fc25fac4ca15ecbee3a1874d"
-  integrity sha512-LKaqQL9osY/ir2geuLVvRRs+utWUNilzdE90TpyoX0eNqPzWjRm14oMEE+YLve4k/NAqCdPkGYDaDF5Sw+xBfg==
-
-"@rollup/rollup-win32-arm64-msvc@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.18.0.tgz#ed6603e93636a96203c6915be4117245c1bd2daf"
-  integrity sha512-7J6TkZQFGo9qBKH0pk2cEVSRhJbL6MtfWxth7Y5YmZs57Pi+4x6c2dStAUvaQkHQLnEQv1jzBUW43GvZW8OFqA==
-
-"@rollup/rollup-win32-ia32-msvc@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.18.0.tgz#14e0b404b1c25ebe6157a15edb9c46959ba74c54"
-  integrity sha512-Txjh+IxBPbkUB9+SXZMpv+b/vnTEtFyfWZgJ6iyCmt2tdx0OF5WhFowLmnh8ENGNpfUlUZkdI//4IEmhwPieNg==
-
-"@rollup/rollup-win32-x64-msvc@4.18.0":
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.18.0.tgz#5d694d345ce36b6ecf657349e03eb87297e68da4"
-  integrity sha512-UOo5FdvOL0+eIVTgS4tIdbW+TtnBLWg1YBCcU2KWM7nuNwRz9bksDX1bekJJCpu25N1DVWaCwnT39dVQxzqS8g==
+"@rollup/rollup-android-arm-eabi@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.19.2.tgz#6b991cb44bf69e50163528ea85bed545330ba821"
+  integrity sha512-OHflWINKtoCFSpm/WmuQaWW4jeX+3Qt3XQDepkkiFTsoxFc5BpF3Z5aDxFZgBqRjO6ATP5+b1iilp4kGIZVWlA==
+
+"@rollup/rollup-android-arm64@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.19.2.tgz#5d3c8c2f9742d62ba258cc378bd2d4720f0c431c"
+  integrity sha512-k0OC/b14rNzMLDOE6QMBCjDRm3fQOHAL8Ldc9bxEWvMo4Ty9RY6rWmGetNTWhPo+/+FNd1lsQYRd0/1OSix36A==
+
+"@rollup/rollup-darwin-arm64@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.19.2.tgz#8eac8682a34a705bb6a57eb3e739fd6bbedfabed"
+  integrity sha512-IIARRgWCNWMTeQH+kr/gFTHJccKzwEaI0YSvtqkEBPj7AshElFq89TyreKNFAGh5frLfDCbodnq+Ye3dqGKPBw==
+
+"@rollup/rollup-darwin-x64@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.19.2.tgz#70a9953fc624bd7f645901f4250f6b5807ac7e92"
+  integrity sha512-52udDMFDv54BTAdnw+KXNF45QCvcJOcYGl3vQkp4vARyrcdI/cXH8VXTEv/8QWfd6Fru8QQuw1b2uNersXOL0g==
+
+"@rollup/rollup-linux-arm-gnueabihf@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.19.2.tgz#8f6c4ff4c4972413ff94345080380d4e3caa3c69"
+  integrity sha512-r+SI2t8srMPYZeoa1w0o/AfoVt9akI1ihgazGYPQGRilVAkuzMGiTtexNZkrPkQsyFrvqq/ni8f3zOnHw4hUbA==
+
+"@rollup/rollup-linux-arm-musleabihf@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.19.2.tgz#5d3c0fe5ea5ddf2feb511b3cb031df17eaa7e33d"
+  integrity sha512-+tYiL4QVjtI3KliKBGtUU7yhw0GMcJJuB9mLTCEauHEsqfk49gtUBXGtGP3h1LW8MbaTY6rSFIQV1XOBps1gBA==
+
+"@rollup/rollup-linux-arm64-gnu@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.19.2.tgz#b7f104388b2f5624d9f8adfff10ba59af8ab8ed1"
+  integrity sha512-OR5DcvZiYN75mXDNQQxlQPTv4D+uNCUsmSCSY2FolLf9W5I4DSoJyg7z9Ea3TjKfhPSGgMJiey1aWvlWuBzMtg==
+
+"@rollup/rollup-linux-arm64-musl@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.19.2.tgz#6d5ca6d3904309bec285ea5202d589cebb93dee4"
+  integrity sha512-Hw3jSfWdUSauEYFBSFIte6I8m6jOj+3vifLg8EU3lreWulAUpch4JBjDMtlKosrBzkr0kwKgL9iCfjA8L3geoA==
+
+"@rollup/rollup-linux-powerpc64le-gnu@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.19.2.tgz#4df9be1396ea9eb0ca99fd0f2e858008d7f063e3"
+  integrity sha512-rhjvoPBhBwVnJRq/+hi2Q3EMiVF538/o9dBuj9TVLclo9DuONqt5xfWSaE6MYiFKpo/lFPJ/iSI72rYWw5Hc7w==
+
+"@rollup/rollup-linux-riscv64-gnu@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.19.2.tgz#80d63c5562915a2f8616a04251fcaee0218112b0"
+  integrity sha512-EAz6vjPwHHs2qOCnpQkw4xs14XJq84I81sDRGPEjKPFVPBw7fwvtwhVjcZR6SLydCv8zNK8YGFblKWd/vRmP8g==
+
+"@rollup/rollup-linux-s390x-gnu@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.19.2.tgz#ef62e9bc5cc3b84fcfe96ec0a42d1989691217b3"
+  integrity sha512-IJSUX1xb8k/zN9j2I7B5Re6B0NNJDJ1+soezjNojhT8DEVeDNptq2jgycCOpRhyGj0+xBn7Cq+PK7Q+nd2hxLA==
+
+"@rollup/rollup-linux-x64-gnu@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.19.2.tgz#6a275282a0080fee98ddd9fda0de23c4c6bafd48"
+  integrity sha512-OgaToJ8jSxTpgGkZSkwKE+JQGihdcaqnyHEFOSAU45utQ+yLruE1dkonB2SDI8t375wOKgNn8pQvaWY9kPzxDQ==
+
+"@rollup/rollup-linux-x64-musl@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.19.2.tgz#64f0c704107e6b45b26dd8c2e1ff64246e4a1251"
+  integrity sha512-5V3mPpWkB066XZZBgSd1lwozBk7tmOkKtquyCJ6T4LN3mzKENXyBwWNQn8d0Ci81hvlBw5RoFgleVpL6aScLYg==
+
+"@rollup/rollup-win32-arm64-msvc@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.19.2.tgz#bada17b0c5017ff58d0feba401c43ff5a646c693"
+  integrity sha512-ayVstadfLeeXI9zUPiKRVT8qF55hm7hKa+0N1V6Vj+OTNFfKSoUxyZvzVvgtBxqSb5URQ8sK6fhwxr9/MLmxdA==
+
+"@rollup/rollup-win32-ia32-msvc@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.19.2.tgz#a716d862f6ac39d88bdb825e27f63aeb0387cd66"
+  integrity sha512-Mda7iG4fOLHNsPqjWSjANvNZYoW034yxgrndof0DwCy0D3FvTjeNo+HGE6oGWgvcLZNLlcp0hLEFcRs+UGsMLg==
+
+"@rollup/rollup-win32-x64-msvc@4.19.2":
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.19.2.tgz#d67206c5f2e4b2832ce360bbbde194e96d16dc51"
+  integrity sha512-DPi0ubYhSow/00YqmG1jWm3qt1F8aXziHc/UNy8bo9cpCacqhuWu+iSq/fp2SyEQK7iYTZ60fBU9cat3MXTjIQ==
 
 "@rollup/stream@3.0.1":
   version "3.0.1"
@@ -1224,9 +1201,9 @@
   integrity sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==
 
 "@swc/helpers@^0.5.11":
-  version "0.5.11"
-  resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.11.tgz#5bab8c660a6e23c13b2d23fcd1ee44a2db1b0cb7"
-  integrity sha512-YNlnKRWF2sVojTpIyzwou9XoTNbzbzONwRhOoniEioF1AtaitTvVZblaQRrAzChWQ1bLYyYSWzM18y4WwgzJ+A==
+  version "0.5.12"
+  resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.12.tgz#37aaca95284019eb5d2207101249435659709f4b"
+  integrity sha512-KMZNXiGibsW9kvZAO1Pam2JPTDBm+KSHMMHWdsyI/1DbIZjT2A6Gy3hblVXUMEDvUAKq+e0vL0X0o54owWji7g==
   dependencies:
     tslib "^2.4.0"
 
@@ -1444,16 +1421,16 @@
   dependencies:
     "@types/yargs-parser" "*"
 
-"@typescript-eslint/eslint-plugin@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.12.0.tgz#f87a32e8972b8a60024f2f8f12205e7c8108bc41"
-  integrity sha512-7F91fcbuDf/d3S8o21+r3ZncGIke/+eWk0EpO21LXhDfLahriZF9CGj4fbAetEjlaBdjdSm9a6VeXbpbT6Z40Q==
+"@typescript-eslint/eslint-plugin@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.18.0.tgz#b16d3cf3ee76bf572fdf511e79c248bdec619ea3"
+  integrity sha512-94EQTWZ40mzBc42ATNIBimBEDltSJ9RQHCC8vc/PDbxi4k8dVwUAv4o98dk50M1zB+JGFxp43FP7f8+FP8R6Sw==
   dependencies:
     "@eslint-community/regexpp" "^4.10.0"
-    "@typescript-eslint/scope-manager" "7.12.0"
-    "@typescript-eslint/type-utils" "7.12.0"
-    "@typescript-eslint/utils" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
+    "@typescript-eslint/scope-manager" "7.18.0"
+    "@typescript-eslint/type-utils" "7.18.0"
+    "@typescript-eslint/utils" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
     graphemer "^1.4.0"
     ignore "^5.3.1"
     natural-compare "^1.4.0"
@@ -1470,14 +1447,6 @@
     "@typescript-eslint/visitor-keys" "7.14.1"
     debug "^4.3.4"
 
-"@typescript-eslint/scope-manager@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.12.0.tgz#259c014362de72dd34f995efe6bd8dda486adf58"
-  integrity sha512-itF1pTnN6F3unPak+kutH9raIkL3lhH1YRPGgt7QQOh43DQKVJXmWkpb+vpc/TiDHs6RSd9CTbDsc/Y+Ygq7kg==
-  dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
-
 "@typescript-eslint/scope-manager@7.14.1":
   version "7.14.1"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.14.1.tgz#63de7a577bc6fe8ee6e412a5b85499f654b93ee5"
@@ -1486,39 +1455,33 @@
     "@typescript-eslint/types" "7.14.1"
     "@typescript-eslint/visitor-keys" "7.14.1"
 
-"@typescript-eslint/type-utils@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.12.0.tgz#9dfaaa1972952f395ec5be4f5bbfc4d3cdc63908"
-  integrity sha512-lib96tyRtMhLxwauDWUp/uW3FMhLA6D0rJ8T7HmH7x23Gk1Gwwu8UZ94NMXBvOELn6flSPiBrCKlehkiXyaqwA==
+"@typescript-eslint/scope-manager@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.18.0.tgz#c928e7a9fc2c0b3ed92ab3112c614d6bd9951c83"
+  integrity sha512-jjhdIE/FPF2B7Z1uzc6i3oWKbGcHb87Qw7AWj6jmEqNOfDFbJWtjt/XfwCpvNkpGWlcJaog5vTR+VV8+w9JflA==
+  dependencies:
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
+
+"@typescript-eslint/type-utils@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.18.0.tgz#2165ffaee00b1fbbdd2d40aa85232dab6998f53b"
+  integrity sha512-XL0FJXuCLaDuX2sYqZUUSOJ2sG5/i1AAze+axqmLnSkNEVMVYLF+cbwlB2w8D1tinFuSikHmFta+P+HOofrLeA==
   dependencies:
-    "@typescript-eslint/typescript-estree" "7.12.0"
-    "@typescript-eslint/utils" "7.12.0"
+    "@typescript-eslint/typescript-estree" "7.18.0"
+    "@typescript-eslint/utils" "7.18.0"
     debug "^4.3.4"
     ts-api-utils "^1.3.0"
 
-"@typescript-eslint/types@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.12.0.tgz#bf208f971a8da1e7524a5d9ae2b5f15192a37981"
-  integrity sha512-o+0Te6eWp2ppKY3mLCU+YA9pVJxhUJE15FV7kxuD9jgwIAa+w/ycGJBMrYDTpVGUM/tgpa9SeMOugSabWFq7bg==
-
 "@typescript-eslint/types@7.14.1":
   version "7.14.1"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.14.1.tgz#a43a540dbe5df7f2a11269683d777fc50b4350aa"
   integrity sha512-mL7zNEOQybo5R3AavY+Am7KLv8BorIv7HCYS5rKoNZKQD9tsfGUpO4KdAn3sSUvTiS4PQkr2+K0KJbxj8H9NDg==
 
-"@typescript-eslint/typescript-estree@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.12.0.tgz#e6c1074f248b3db6573ab6a7c47a39c4cd498ff9"
-  integrity sha512-5bwqLsWBULv1h6pn7cMW5dXX/Y2amRqLaKqsASVwbBHMZSnHqE/HN4vT4fE0aFsiwxYvr98kqOWh1a8ZKXalCQ==
-  dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/visitor-keys" "7.12.0"
-    debug "^4.3.4"
-    globby "^11.1.0"
-    is-glob "^4.0.3"
-    minimatch "^9.0.4"
-    semver "^7.6.0"
-    ts-api-utils "^1.3.0"
+"@typescript-eslint/types@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.18.0.tgz#b90a57ccdea71797ffffa0321e744f379ec838c9"
+  integrity sha512-iZqi+Ds1y4EDYUtlOOC+aUmxnE9xS/yCigkjA7XpTKV6nCBd3Hp/PRGGmdwnfkV2ThMyYldP1wRpm/id99spTQ==
 
 "@typescript-eslint/typescript-estree@7.14.1":
   version "7.14.1"
@@ -1534,23 +1497,29 @@
     semver "^7.6.0"
     ts-api-utils "^1.3.0"
 
-"@typescript-eslint/utils@7.12.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.12.0.tgz#c6e58fd7f724cdccc848f71e388ad80cbdb95dd0"
-  integrity sha512-Y6hhwxwDx41HNpjuYswYp6gDbkiZ8Hin9Bf5aJQn1bpTs3afYY4GX+MPYxma8jtoIV2GRwTM/UJm/2uGCVv+DQ==
+"@typescript-eslint/typescript-estree@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.18.0.tgz#b5868d486c51ce8f312309ba79bdb9f331b37931"
+  integrity sha512-aP1v/BSPnnyhMHts8cf1qQ6Q1IFwwRvAQGRvBFkWlo3/lH29OXA3Pts+c10nxRxIBrDnoMqzhgdwVe5f2D6OzA==
   dependencies:
-    "@eslint-community/eslint-utils" "^4.4.0"
-    "@typescript-eslint/scope-manager" "7.12.0"
-    "@typescript-eslint/types" "7.12.0"
-    "@typescript-eslint/typescript-estree" "7.12.0"
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/visitor-keys" "7.18.0"
+    debug "^4.3.4"
+    globby "^11.1.0"
+    is-glob "^4.0.3"
+    minimatch "^9.0.4"
+    semver "^7.6.0"
+    ts-api-utils "^1.3.0"
 
-"@typescript-eslint/visitor-keys@7.12.0":
-  version "7.12.0"
-  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.12.0.tgz#c053b55a996679528beeedd8e565710ce1ae1ad3"
-  integrity sha512-uZk7DevrQLL3vSnfFl5bj4sL75qC9D6EdjemIdbtkuUmIheWpuiiylSY01JxJE7+zGrOWDZrp1WxOuDntvKrHQ==
+"@typescript-eslint/utils@7.18.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.18.0.tgz#bca01cde77f95fc6a8d5b0dbcbfb3d6ca4be451f"
+  integrity sha512-kK0/rNa2j74XuHVcoCZxdFBMF+aq/vH83CXAOHieC+2Gis4mF8jJXT5eAfyD3K0sAxtPuwxaIOIOvhwzVDt/kw==
   dependencies:
-    "@typescript-eslint/types" "7.12.0"
-    eslint-visitor-keys "^3.4.3"
+    "@eslint-community/eslint-utils" "^4.4.0"
+    "@typescript-eslint/scope-manager" "7.18.0"
+    "@typescript-eslint/types" "7.18.0"
+    "@typescript-eslint/typescript-estree" "7.18.0"
 
 "@typescript-eslint/visitor-keys@7.14.1":
   version "7.14.1"
@@ -1560,6 +1529,14 @@
     "@typescript-eslint/types" "7.14.1"
     eslint-visitor-keys "^3.4.3"
 
+"@typescript-eslint/visitor-keys@7.18.0":
+  version "7.18.0"
+  resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.18.0.tgz#0564629b6124d67607378d0f0332a0495b25e7d7"
+  integrity sha512-cDF0/Gf81QpY3xYyJKDV14Zwdmid5+uuENhjH2EqFaF0ni+yAyq/LzMaIJdhNJXZI7uLzwIlA+V7oWoyn6Curg==
+  dependencies:
+    "@typescript-eslint/types" "7.18.0"
+    eslint-visitor-keys "^3.4.3"
+
 "@ungap/structured-clone@^1.2.0":
   version "1.2.0"
   resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406"
@@ -1716,7 +1693,7 @@ acorn@^6.4.1:
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.4.2.tgz#35866fd710528e92de10cf06016498e47e39e1e6"
   integrity sha512-XtGIhXwF8YM8bJhGxG5kXgjkEuNGLTkoYqVE+KMR+aspr4KGYmKYg7yUe3KghyQ9yheNwLnjmzh/7+gfDBmHCQ==
 
-acorn@^8.0.4, acorn@^8.11.3, acorn@^8.4.1, acorn@^8.7.1, acorn@^8.8.2, acorn@^8.9.0:
+acorn@^8.0.4, acorn@^8.4.1, acorn@^8.7.1, acorn@^8.8.2, acorn@^8.9.0:
   version "8.11.3"
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.11.3.tgz#71e0b14e13a4ec160724b38fb7b0f233b1b81d7a"
   integrity sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg==
@@ -2494,14 +2471,14 @@ command-line-args@^5.2.1:
     lodash.camelcase "^4.3.0"
     typical "^4.0.0"
 
-command-line-usage@^7.0.0, command-line-usage@^7.0.1:
-  version "7.0.1"
-  resolved "https://registry.yarnpkg.com/command-line-usage/-/command-line-usage-7.0.1.tgz#e540afef4a4f3bc501b124ffde33956309100655"
-  integrity sha512-NCyznE//MuTjwi3y84QVUGEOT+P5oto1e1Pk/jFPVdPPfsG03qpTIl3yw6etR+v73d0lXsoojRpvbru2sqePxQ==
+command-line-usage@^7.0.1:
+  version "7.0.3"
+  resolved "https://registry.yarnpkg.com/command-line-usage/-/command-line-usage-7.0.3.tgz#6bce992354f6af10ecea2b631bfdf0c8b3bfaea3"
+  integrity sha512-PqMLy5+YGwhMh1wS04mVG44oqDsgyLRSKJBdOo1bnYhMKBW65gZF1dRp2OZRhiTjgUHljy99qkO7bsctLaw35Q==
   dependencies:
     array-back "^6.2.2"
     chalk-template "^0.4.0"
-    table-layout "^3.0.0"
+    table-layout "^4.1.0"
     typical "^7.1.1"
 
 commander@^2.20.0:
@@ -2957,35 +2934,35 @@ esbuild-plugin-alias@0.2.1:
   resolved "https://registry.yarnpkg.com/esbuild-plugin-alias/-/esbuild-plugin-alias-0.2.1.tgz#45a86cb941e20e7c2bc68a2bea53562172494fcb"
   integrity sha512-jyfL/pwPqaFXyKnj8lP8iLk6Z0m099uXR45aSN8Av1XD4vhvQutxxPzgA2bTcAwQpa1zCXDcWOlhFgyP3GKqhQ==
 
-esbuild@0.22.0:
-  version "0.22.0"
-  resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.22.0.tgz#9742e664aac9f61e2898f4c27bd4dd4272e6f661"
-  integrity sha512-zNYA6bFZsVnsU481FnGAQjLDW0Pl/8BGG7EvAp15RzUvGC+ME7hf1q7LvIfStEQBz/iEHuBJCYcOwPmNCf1Tlw==
+esbuild@0.23.0:
+  version "0.23.0"
+  resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.23.0.tgz#de06002d48424d9fdb7eb52dbe8e95927f852599"
+  integrity sha512-1lvV17H2bMYda/WaFb2jLPeHU3zml2k4/yagNMG8Q/YtfMjCwEUZa2eXXMgZTVSL5q1n4H7sQ0X6CdJDqqeCFA==
   optionalDependencies:
-    "@esbuild/aix-ppc64" "0.22.0"
-    "@esbuild/android-arm" "0.22.0"
-    "@esbuild/android-arm64" "0.22.0"
-    "@esbuild/android-x64" "0.22.0"
-    "@esbuild/darwin-arm64" "0.22.0"
-    "@esbuild/darwin-x64" "0.22.0"
-    "@esbuild/freebsd-arm64" "0.22.0"
-    "@esbuild/freebsd-x64" "0.22.0"
-    "@esbuild/linux-arm" "0.22.0"
-    "@esbuild/linux-arm64" "0.22.0"
-    "@esbuild/linux-ia32" "0.22.0"
-    "@esbuild/linux-loong64" "0.22.0"
-    "@esbuild/linux-mips64el" "0.22.0"
-    "@esbuild/linux-ppc64" "0.22.0"
-    "@esbuild/linux-riscv64" "0.22.0"
-    "@esbuild/linux-s390x" "0.22.0"
-    "@esbuild/linux-x64" "0.22.0"
-    "@esbuild/netbsd-x64" "0.22.0"
-    "@esbuild/openbsd-arm64" "0.22.0"
-    "@esbuild/openbsd-x64" "0.22.0"
-    "@esbuild/sunos-x64" "0.22.0"
-    "@esbuild/win32-arm64" "0.22.0"
-    "@esbuild/win32-ia32" "0.22.0"
-    "@esbuild/win32-x64" "0.22.0"
+    "@esbuild/aix-ppc64" "0.23.0"
+    "@esbuild/android-arm" "0.23.0"
+    "@esbuild/android-arm64" "0.23.0"
+    "@esbuild/android-x64" "0.23.0"
+    "@esbuild/darwin-arm64" "0.23.0"
+    "@esbuild/darwin-x64" "0.23.0"
+    "@esbuild/freebsd-arm64" "0.23.0"
+    "@esbuild/freebsd-x64" "0.23.0"
+    "@esbuild/linux-arm" "0.23.0"
+    "@esbuild/linux-arm64" "0.23.0"
+    "@esbuild/linux-ia32" "0.23.0"
+    "@esbuild/linux-loong64" "0.23.0"
+    "@esbuild/linux-mips64el" "0.23.0"
+    "@esbuild/linux-ppc64" "0.23.0"
+    "@esbuild/linux-riscv64" "0.23.0"
+    "@esbuild/linux-s390x" "0.23.0"
+    "@esbuild/linux-x64" "0.23.0"
+    "@esbuild/netbsd-x64" "0.23.0"
+    "@esbuild/openbsd-arm64" "0.23.0"
+    "@esbuild/openbsd-x64" "0.23.0"
+    "@esbuild/sunos-x64" "0.23.0"
+    "@esbuild/win32-arm64" "0.23.0"
+    "@esbuild/win32-ia32" "0.23.0"
+    "@esbuild/win32-x64" "0.23.0"
 
 esbuild@^0.21.5:
   version "0.21.5"
@@ -3048,18 +3025,18 @@ eslint-plugin-jest@28.5.0:
   dependencies:
     "@typescript-eslint/utils" "^6.0.0 || ^7.0.0"
 
-eslint-plugin-unicorn@54.0.0:
-  version "54.0.0"
-  resolved "https://registry.yarnpkg.com/eslint-plugin-unicorn/-/eslint-plugin-unicorn-54.0.0.tgz#ce3ea853e8fd7ca2bda2fd6065bf065adb5d8b6d"
-  integrity sha512-XxYLRiYtAWiAjPv6z4JREby1TAE2byBC7wlh0V4vWDCpccOSU1KovWV//jqPXF6bq3WKxqX9rdjoRQ1EhdmNdQ==
+eslint-plugin-unicorn@55.0.0:
+  version "55.0.0"
+  resolved "https://registry.yarnpkg.com/eslint-plugin-unicorn/-/eslint-plugin-unicorn-55.0.0.tgz#e2aeb397914799895702480970e7d148df5bcc7b"
+  integrity sha512-n3AKiVpY2/uDcGrS3+QsYDkjPfaOrNrsfQxU9nt5nitd9KuvVXrfAvgCO9DYPSfap+Gqjw9EOrXIsBp5tlHZjA==
   dependencies:
     "@babel/helper-validator-identifier" "^7.24.5"
     "@eslint-community/eslint-utils" "^4.4.0"
-    "@eslint/eslintrc" "^3.0.2"
     ci-info "^4.0.0"
     clean-regexp "^1.0.0"
     core-js-compat "^3.37.0"
     esquery "^1.5.0"
+    globals "^15.7.0"
     indent-string "^4.0.0"
     is-builtin-module "^3.2.1"
     jsesc "^3.0.2"
@@ -3091,11 +3068,6 @@ eslint-visitor-keys@^3.3.0, eslint-visitor-keys@^3.4.1, eslint-visitor-keys@^3.4
   resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz#0cd72fe8550e3c2eae156a96a4dddcd1c8ac5800"
   integrity sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==
 
-eslint-visitor-keys@^4.0.0:
-  version "4.0.0"
-  resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-4.0.0.tgz#e3adc021aa038a2a8e0b2f8b0ce8f66b9483b1fb"
-  integrity sha512-OtIRv/2GyiF6o/d8K7MYKKbXrOUBIK6SfkIRM4Z0dY3w+LiQ0vy3F57m0Z71bjbyeiWFiHJ8brqnmE6H6/jEuw==
-
 eslint@8.57.0:
   version "8.57.0"
   resolved "https://registry.yarnpkg.com/eslint/-/eslint-8.57.0.tgz#c786a6fd0e0b68941aaf624596fb987089195668"
@@ -3140,10 +3112,6 @@ eslint@8.57.0:
     strip-ansi "^6.0.1"
     text-table "^0.2.0"
 
-"esm@https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz":
-  version "3.2.25"
-  resolved "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz#c463cfa4e14aceea6b7cd7e669ef90de072ea60a"
-
 esniff@^2.0.1:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/esniff/-/esniff-2.0.1.tgz#a4d4b43a5c71c7ec51c51098c1d8a29081f9b308"
@@ -3154,15 +3122,6 @@ esniff@^2.0.1:
     event-emitter "^0.3.5"
     type "^2.7.2"
 
-espree@^10.0.1:
-  version "10.0.1"
-  resolved "https://registry.yarnpkg.com/espree/-/espree-10.0.1.tgz#600e60404157412751ba4a6f3a2ee1a42433139f"
-  integrity sha512-MWkrWZbJsL2UwnjxTX3gG8FneachS/Mwg7tdGXce011sJd5b0JG54vat5KHnfSBODZ3Wvzd2WnjxyzsRoVv+ww==
-  dependencies:
-    acorn "^8.11.3"
-    acorn-jsx "^5.3.2"
-    eslint-visitor-keys "^4.0.0"
-
 espree@^9.6.0, espree@^9.6.1:
   version "9.6.1"
   resolved "https://registry.yarnpkg.com/espree/-/espree-9.6.1.tgz#a2a17b8e434690a5432f2f8018ce71d331a48c6f"
@@ -3730,10 +3689,10 @@ globals@^13.19.0:
   dependencies:
     type-fest "^0.20.2"
 
-globals@^14.0.0:
-  version "14.0.0"
-  resolved "https://registry.yarnpkg.com/globals/-/globals-14.0.0.tgz#898d7413c29babcf6bafe56fcadded858ada724e"
-  integrity sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==
+globals@^15.7.0:
+  version "15.9.0"
+  resolved "https://registry.yarnpkg.com/globals/-/globals-15.9.0.tgz#e9de01771091ffbc37db5714dab484f9f69ff399"
+  integrity sha512-SmSKyLLKFbSr6rptvP8izbyxJL4ILwqO9Jg23UA0sDlGlu58V59D1//I3vlc0KJphVdUR7vMjHIplYnzBxorQA==
 
 globby@^11.1.0:
   version "11.1.0"
@@ -4422,10 +4381,10 @@ istextorbinary@^3.0.0:
     binaryextensions "^2.2.0"
     textextensions "^3.2.0"
 
-ix@6.0.0:
-  version "6.0.0"
-  resolved "https://registry.yarnpkg.com/ix/-/ix-6.0.0.tgz#c1875523f8090c7146dc3ac3412a763663887f27"
-  integrity sha512-B/KeYkHtOWbr3ttckqWT9uha2ixw9fGVDxX+DwVXhO+P5eOhyCadt+aC30hRBvG+do+tbI3xbYDMYN6dp1C4Vw==
+ix@7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/ix/-/ix-7.0.0.tgz#df4c9a242614178f0836aa3cd1965441fae301d1"
+  integrity sha512-hgVnphYh+ytIEsmjeym5wP2GPaM3+RZf7zCrZXE7gjwwmpIBEg0t6GRX7BbdXzTosXCstEAzdPxpyplGBYnIbw==
   dependencies:
     "@types/node" ">=13.7.4"
     tslib "^2.6.2"
@@ -5058,11 +5017,6 @@ locate-path@^6.0.0:
   dependencies:
     p-locate "^5.0.0"
 
-lodash.assignwith@^4.2.0:
-  version "4.2.0"
-  resolved "https://registry.yarnpkg.com/lodash.assignwith/-/lodash.assignwith-4.2.0.tgz#127a97f02adc41751a954d24b0de17e100e038eb"
-  integrity sha512-ZznplvbvtjK2gMvnQ1BR/zqPFZmS6jbK4p+6Up4xcRYA7yMIwxHCfbTcrYxXKzzqLsQ05eJPVznEW3tuwV7k1g==
-
 lodash.camelcase@^4.3.0:
   version "4.3.0"
   resolved "https://registry.yarnpkg.com/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz#b28aa6288a2b9fc651035c7711f65ab6190331a6"
@@ -6225,29 +6179,29 @@ rimraf@^3.0.2:
   dependencies:
     glob "^7.1.3"
 
-rollup@4.18.0:
-  version "4.18.0"
-  resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.18.0.tgz#497f60f0c5308e4602cf41136339fbf87d5f5dda"
-  integrity sha512-QmJz14PX3rzbJCN1SG4Xe/bAAX2a6NpCP8ab2vfu2GiUr8AQcr2nCV/oEO3yneFarB67zk8ShlIyWb2LGTb3Sg==
+rollup@4.19.2:
+  version "4.19.2"
+  resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.19.2.tgz#4985cd2028965157e8d674a70e49f33aca9038eb"
+  integrity sha512-6/jgnN1svF9PjNYJ4ya3l+cqutg49vOZ4rVgsDKxdl+5gpGPnByFXWGyfH9YGx9i3nfBwSu1Iyu6vGwFFA0BdQ==
   dependencies:
     "@types/estree" "1.0.5"
   optionalDependencies:
-    "@rollup/rollup-android-arm-eabi" "4.18.0"
-    "@rollup/rollup-android-arm64" "4.18.0"
-    "@rollup/rollup-darwin-arm64" "4.18.0"
-    "@rollup/rollup-darwin-x64" "4.18.0"
-    "@rollup/rollup-linux-arm-gnueabihf" "4.18.0"
-    "@rollup/rollup-linux-arm-musleabihf" "4.18.0"
-    "@rollup/rollup-linux-arm64-gnu" "4.18.0"
-    "@rollup/rollup-linux-arm64-musl" "4.18.0"
-    "@rollup/rollup-linux-powerpc64le-gnu" "4.18.0"
-    "@rollup/rollup-linux-riscv64-gnu" "4.18.0"
-    "@rollup/rollup-linux-s390x-gnu" "4.18.0"
-    "@rollup/rollup-linux-x64-gnu" "4.18.0"
-    "@rollup/rollup-linux-x64-musl" "4.18.0"
-    "@rollup/rollup-win32-arm64-msvc" "4.18.0"
-    "@rollup/rollup-win32-ia32-msvc" "4.18.0"
-    "@rollup/rollup-win32-x64-msvc" "4.18.0"
+    "@rollup/rollup-android-arm-eabi" "4.19.2"
+    "@rollup/rollup-android-arm64" "4.19.2"
+    "@rollup/rollup-darwin-arm64" "4.19.2"
+    "@rollup/rollup-darwin-x64" "4.19.2"
+    "@rollup/rollup-linux-arm-gnueabihf" "4.19.2"
+    "@rollup/rollup-linux-arm-musleabihf" "4.19.2"
+    "@rollup/rollup-linux-arm64-gnu" "4.19.2"
+    "@rollup/rollup-linux-arm64-musl" "4.19.2"
+    "@rollup/rollup-linux-powerpc64le-gnu" "4.19.2"
+    "@rollup/rollup-linux-riscv64-gnu" "4.19.2"
+    "@rollup/rollup-linux-s390x-gnu" "4.19.2"
+    "@rollup/rollup-linux-x64-gnu" "4.19.2"
+    "@rollup/rollup-linux-x64-musl" "4.19.2"
+    "@rollup/rollup-win32-arm64-msvc" "4.19.2"
+    "@rollup/rollup-win32-ia32-msvc" "4.19.2"
+    "@rollup/rollup-win32-x64-msvc" "4.19.2"
     fsevents "~2.3.2"
 
 run-parallel@^1.1.9:
@@ -6573,11 +6527,6 @@ stream-exhaust@^1.0.1, stream-exhaust@^1.0.2:
   resolved "https://registry.yarnpkg.com/stream-exhaust/-/stream-exhaust-1.0.2.tgz#acdac8da59ef2bc1e17a2c0ccf6c320d120e555d"
   integrity sha512-b/qaq/GlBK5xaq1yrK9/zFcyRSTNxmcZwFLGSTG0mXgZl/4Z6GgiyYOXOvY7N3eEvFRAG1bkDRz5EPGSvPYQlw==
 
-stream-read-all@^3.0.1:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/stream-read-all/-/stream-read-all-3.0.1.tgz#60762ae45e61d93ba0978cda7f3913790052ad96"
-  integrity sha512-EWZT9XOceBPlVJRrYcykW8jyRSZYbkb/0ZK36uLEmoWVO5gxBOnntNTseNzfREsqxqdfEGQrD8SXQ3QWbBmq8A==
-
 stream-shift@^1.0.0:
   version "1.0.3"
   resolved "https://registry.yarnpkg.com/stream-shift/-/stream-shift-1.0.3.tgz#85b8fab4d71010fc3ba8772e8046cc49b8a3864b"
@@ -6754,17 +6703,12 @@ sver-compat@^1.5.0:
     es6-iterator "^2.0.1"
     es6-symbol "^3.1.1"
 
-table-layout@^3.0.0:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/table-layout/-/table-layout-3.0.2.tgz#69c2be44388a5139b48c59cf21e73b488021769a"
-  integrity sha512-rpyNZYRw+/C+dYkcQ3Pr+rLxW4CfHpXjPDnG7lYhdRoUcZTUt+KEsX+94RGp/aVp/MQU35JCITv2T/beY4m+hw==
+table-layout@^4.1.0:
+  version "4.1.1"
+  resolved "https://registry.yarnpkg.com/table-layout/-/table-layout-4.1.1.tgz#0f72965de1a5c0c1419c9ba21cae4e73a2f73a42"
+  integrity sha512-iK5/YhZxq5GO5z8wb0bY1317uDF3Zjpha0QFFLA8/trAoiLbQD0HUbMesEaxyzUgDxi2QlcbM8IvqOlEjgoXBA==
   dependencies:
-    "@75lb/deep-merge" "^1.1.1"
     array-back "^6.2.2"
-    command-line-args "^5.2.1"
-    command-line-usage "^7.0.0"
-    stream-read-all "^3.0.1"
-    typical "^7.1.1"
     wordwrapjs "^5.1.0"
 
 tapable@^2.1.1, tapable@^2.2.0:
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index 4daca3a5bca8f..91c186a6765a5 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -100,7 +100,7 @@ endfunction()
 
 set(CMAKE_CXX_STANDARD 17)
 
-set(MLARROW_VERSION "17.0.0-SNAPSHOT")
+set(MLARROW_VERSION "18.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}")
 
 project(mlarrow VERSION "${MLARROW_BASE_VERSION}")
diff --git a/matlab/src/cpp/arrow/matlab/api/visibility.h b/matlab/src/cpp/arrow/matlab/api/visibility.h
index 1570de06c4e17..9c5cc28565113 100644
--- a/matlab/src/cpp/arrow/matlab/api/visibility.h
+++ b/matlab/src/cpp/arrow/matlab/api/visibility.h
@@ -18,11 +18,11 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)
-#ifdef ARROW_MATLAB_EXPORTING
-#define ARROW_MATLAB_EXPORT __declspec(dllexport)
-#else
-#define ARROW_MATLAB_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_MATLAB_EXPORTING
+#    define ARROW_MATLAB_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_MATLAB_EXPORT __declspec(dllimport)
+#  endif
 #else  // Not Windows
-#define ARROW_MATLAB_EXPORT __attribute__((visibility("default")))
+#  define ARROW_MATLAB_EXPORT __attribute__((visibility("default")))
 #endif
diff --git a/python/.gitignore b/python/.gitignore
index ce7f065412728..fbc3b192433b9 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -17,6 +17,7 @@ Testing/
 *.cpp
 pyarrow/lib.h
 pyarrow/*_api.h
+pyarrow/_cuda.h
 pyarrow/_generated_version.py
 cython_debug
 
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 980a63133c83c..912719b20f0e4 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -21,7 +21,14 @@
 cmake_minimum_required(VERSION 3.16)
 project(pyarrow)
 
-set(PYARROW_VERSION "17.0.0-SNAPSHOT")
+# This is needed for 3.13 free-threading. CMake used to add Python
+# include directories with `-isystem`, which led to some Python-internal
+# includes to resolve to normal 3.13 includes (cause -isystem includes
+# are searched after system directories), instead of 3.13-freethreading,
+# which in turn meant that Py_GIL_DISABLED was not set.
+set(CMAKE_NO_SYSTEM_FROM_IMPORTED ON)
+
+set(PYARROW_VERSION "18.0.0-SNAPSHOT")
 string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}")
 
 # Running from a Python sdist tarball
@@ -84,7 +91,7 @@ set(CMAKE_MACOSX_RPATH 1)
 if(DEFINED ENV{MACOSX_DEPLOYMENT_TARGET})
   set(CMAKE_OSX_DEPLOYMENT_TARGET $ENV{MACOSX_DEPLOYMENT_TARGET})
 else()
-  set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
+  set(CMAKE_OSX_DEPLOYMENT_TARGET 12.0)
 endif()
 
 # Generate a Clang compile_commands.json "compilation database" file for use
@@ -260,6 +267,7 @@ message(STATUS "Found NumPy version: ${Python3_NumPy_VERSION}")
 message(STATUS "NumPy include dir: ${NUMPY_INCLUDE_DIRS}")
 
 include(UseCython)
+message(STATUS "Found Cython version: ${CYTHON_VERSION}")
 
 # Arrow C++ and set default PyArrow build options
 include(GNUInstallDirs)
@@ -338,17 +346,17 @@ set(PYARROW_CPP_SRCS
     ${PYARROW_CPP_SOURCE_DIR}/gdb.cc
     ${PYARROW_CPP_SOURCE_DIR}/helpers.cc
     ${PYARROW_CPP_SOURCE_DIR}/inference.cc
-    ${PYARROW_CPP_SOURCE_DIR}/init.cc
     ${PYARROW_CPP_SOURCE_DIR}/io.cc
     ${PYARROW_CPP_SOURCE_DIR}/ipc.cc
     ${PYARROW_CPP_SOURCE_DIR}/numpy_convert.cc
+    ${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
     ${PYARROW_CPP_SOURCE_DIR}/numpy_to_arrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/python_test.cc
     ${PYARROW_CPP_SOURCE_DIR}/python_to_arrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/pyarrow.cc
     ${PYARROW_CPP_SOURCE_DIR}/serialize.cc
     ${PYARROW_CPP_SOURCE_DIR}/udf.cc)
-set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/init.cc
+set_source_files_properties(${PYARROW_CPP_SOURCE_DIR}/numpy_init.cc
                             PROPERTIES SKIP_PRECOMPILE_HEADERS ON
                                        SKIP_UNITY_BUILD_INCLUSION ON)
 
@@ -855,6 +863,10 @@ set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--warning-errors")
 # undocumented Cython feature.
 set(CYTHON_FLAGS "${CYTHON_FLAGS}" "--no-c-in-traceback")
 
+if(CYTHON_VERSION VERSION_GREATER_EQUAL "3.1.0a0")
+  list(APPEND CYTHON_FLAGS "-Xfreethreading_compatible=True")
+endif()
+
 foreach(module ${CYTHON_EXTENSIONS})
   string(REPLACE "." ";" directories ${module})
   list(GET directories -1 module_name)
diff --git a/python/asv.conf.json b/python/asv.conf.json
index 4d8d73e34b778..86fcd3537dc91 100644
--- a/python/asv.conf.json
+++ b/python/asv.conf.json
@@ -65,7 +65,7 @@
 
     // The Pythons you'd like to test against.  If not provided, defaults
     // to the current version of Python used to run `asv`.
-    "pythons": ["3.8"],
+    "pythons": ["3.9"],
 
     // The matrix of dependencies to test.  Each key is the name of a
     // package (in PyPI) and the values are version numbers.  An empty
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index e52e0d242bee5..d31c93119b73a 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -172,7 +172,7 @@ def print_entry(label, value):
                          union, sparse_union, dense_union,
                          dictionary,
                          run_end_encoded,
-                         fixed_shape_tensor,
+                         bool8, fixed_shape_tensor, opaque, uuid,
                          field,
                          type_for_alias,
                          DataType, DictionaryType, StructType,
@@ -182,7 +182,8 @@ def print_entry(label, value):
                          TimestampType, Time32Type, Time64Type, DurationType,
                          FixedSizeBinaryType, Decimal128Type, Decimal256Type,
                          BaseExtensionType, ExtensionType,
-                         RunEndEncodedType, FixedShapeTensorType,
+                         RunEndEncodedType, Bool8Type, FixedShapeTensorType,
+                         OpaqueType, UuidType,
                          PyExtensionType, UnknownExtensionType,
                          register_extension_type, unregister_extension_type,
                          DictionaryMemo,
@@ -216,7 +217,8 @@ def print_entry(label, value):
                          Time32Array, Time64Array, DurationArray,
                          MonthDayNanoIntervalArray,
                          Decimal128Array, Decimal256Array, StructArray, ExtensionArray,
-                         RunEndEncodedArray, FixedShapeTensorArray,
+                         RunEndEncodedArray, Bool8Array, FixedShapeTensorArray,
+                         OpaqueArray, UuidArray,
                          scalar, NA, _NULL as NULL, Scalar,
                          NullScalar, BooleanScalar,
                          Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar,
@@ -233,7 +235,8 @@ def print_entry(label, value):
                          StringScalar, LargeStringScalar, StringViewScalar,
                          FixedSizeBinaryScalar, DictionaryScalar,
                          MapScalar, StructScalar, UnionScalar,
-                         RunEndEncodedScalar, ExtensionScalar)
+                         RunEndEncodedScalar, Bool8Scalar, ExtensionScalar,
+                         FixedShapeTensorScalar, OpaqueScalar, UuidScalar)
 
 # Buffers, allocation
 from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index 0e860eaf4c6b8..d39120934d5fd 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -33,7 +33,10 @@ from pyarrow.util import _DEPR_MSG
 from libcpp cimport bool as c_bool
 
 import inspect
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import warnings
 
 
@@ -43,6 +46,11 @@ _substrait_msg = (
 )
 
 
+SUPPORTED_INPUT_ARR_TYPES = (list, tuple)
+if np is not None:
+    SUPPORTED_INPUT_ARR_TYPES += (np.ndarray, )
+
+
 def _pas():
     global __pas
     if __pas is None:
@@ -473,7 +481,7 @@ cdef class MetaFunction(Function):
 
 cdef _pack_compute_args(object values, vector[CDatum]* out):
     for val in values:
-        if isinstance(val, (list, np.ndarray)):
+        if isinstance(val, SUPPORTED_INPUT_ARR_TYPES):
             val = lib.asarray(val)
 
         if isinstance(val, Array):
@@ -2189,7 +2197,7 @@ class QuantileOptions(_QuantileOptions):
 
     def __init__(self, q=0.5, *, interpolation="linear", skip_nulls=True,
                  min_count=0):
-        if not isinstance(q, (list, tuple, np.ndarray)):
+        if not isinstance(q, SUPPORTED_INPUT_ARR_TYPES):
             q = [q]
         self._set_options(q, interpolation, skip_nulls, min_count)
 
@@ -2222,7 +2230,7 @@ class TDigestOptions(_TDigestOptions):
 
     def __init__(self, q=0.5, *, delta=100, buffer_size=500, skip_nulls=True,
                  min_count=0):
-        if not isinstance(q, (list, tuple, np.ndarray)):
+        if not isinstance(q, SUPPORTED_INPUT_ARR_TYPES):
             q = [q]
         self._set_options(q, delta, buffer_size, skip_nulls, min_count)
 
diff --git a/python/pyarrow/_cuda.pyx b/python/pyarrow/_cuda.pyx
index ba799a105e7e1..5aed9f8a28518 100644
--- a/python/pyarrow/_cuda.pyx
+++ b/python/pyarrow/_cuda.pyx
@@ -185,6 +185,28 @@ cdef class Context(_Weakrefable):
             cudabuf = GetResultValue(self.context.get().Allocate(nbytes))
         return pyarrow_wrap_cudabuffer(cudabuf)
 
+    @property
+    def memory_manager(self):
+        """
+        The default memory manager tied to this context's device.
+
+        Returns
+        -------
+        MemoryManager
+        """
+        return MemoryManager.wrap(self.context.get().memory_manager())
+
+    @property
+    def device(self):
+        """
+        The device instance associated with this context.
+
+        Returns
+        -------
+        Device
+        """
+        return Device.wrap(self.context.get().device())
+
     def foreign_buffer(self, address, size, base=None):
         """
         Create device buffer from address and size as a view.
diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd
index 35d15227ee5dc..d6aebd8284f4a 100644
--- a/python/pyarrow/_parquet.pxd
+++ b/python/pyarrow/_parquet.pxd
@@ -328,6 +328,7 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil:
         unique_ptr[CColumnCryptoMetaData] crypto_metadata() const
         optional[ParquetIndexLocation] GetColumnIndexLocation() const
         optional[ParquetIndexLocation] GetOffsetIndexLocation() const
+        shared_ptr[const CKeyValueMetadata] key_value_metadata() const
 
     struct CSortingColumn" parquet::SortingColumn":
         int column_idx
diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
index 41b15b633d3d2..254bfe3b09a9c 100644
--- a/python/pyarrow/_parquet.pyx
+++ b/python/pyarrow/_parquet.pyx
@@ -508,6 +508,19 @@ cdef class ColumnChunkMetaData(_Weakrefable):
         """Whether the column chunk has a column index"""
         return self.metadata.GetColumnIndexLocation().has_value()
 
+    @property
+    def metadata(self):
+        """Additional metadata as key value pairs (dict[bytes, bytes])."""
+        cdef:
+            unordered_map[c_string, c_string] metadata
+            const CKeyValueMetadata* underlying_metadata
+        underlying_metadata = self.metadata.key_value_metadata().get()
+        if underlying_metadata != NULL:
+            underlying_metadata.ToUnorderedMap(&metadata)
+            return metadata
+        else:
+            return None
+
 
 cdef class SortingColumn:
     """
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index b1f90cd16537b..ae9e7fd777ed1 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -50,6 +50,8 @@ cdef _sequence_to_array(object sequence, object mask, object size,
 
 
 cdef inline _is_array_like(obj):
+    if np is None:
+        return False
     if isinstance(obj, np.ndarray):
         return True
     return pandas_api._have_pandas_internal() and pandas_api.is_array_like(obj)
@@ -1435,7 +1437,8 @@ cdef class Array(_PandasConvertible):
 
         Returns
         -------
-        sliced : RecordBatch
+        sliced : Array
+            An array with the same datatype, containing the sliced values.
         """
         cdef shared_ptr[CArray] result
 
@@ -1581,7 +1584,7 @@ cdef class Array(_PandasConvertible):
 
     def to_numpy(self, zero_copy_only=True, writable=False):
         """
-        Return a NumPy view or copy of this array (experimental).
+        Return a NumPy view or copy of this array.
 
         By default, tries to return a view of this array. This is only
         supported for primitive arrays with the same memory layout as NumPy
@@ -1608,6 +1611,9 @@ cdef class Array(_PandasConvertible):
         """
         self._assert_cpu()
 
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef:
             PyObject* out
             PandasOptions c_options
@@ -1702,6 +1708,42 @@ cdef class Array(_PandasConvertible):
         _append_array_buffers(self.sp_array.get().data().get(), res)
         return res
 
+    def copy_to(self, destination):
+        """
+        Construct a copy of the array with all buffers on destination
+        device.
+
+        This method recursively copies the array's buffers and those of its
+        children onto the destination MemoryManager device and returns the
+        new Array.
+
+        Parameters
+        ----------
+        destination : pyarrow.MemoryManager or pyarrow.Device
+            The destination device to copy the array to.
+
+        Returns
+        -------
+        Array
+        """
+        cdef:
+            shared_ptr[CArray] c_array
+            shared_ptr[CMemoryManager] c_memory_manager
+
+        if isinstance(destination, Device):
+            c_memory_manager = (<Device>destination).unwrap().get().default_memory_manager()
+        elif isinstance(destination, MemoryManager):
+            c_memory_manager = (<MemoryManager>destination).unwrap()
+        else:
+            raise TypeError(
+                "Argument 'destination' has incorrect type (expected a "
+                f"pyarrow Device or MemoryManager, got {type(destination)})"
+            )
+
+        with nogil:
+            c_array = GetResultValue(self.ap.CopyTo(c_memory_manager))
+        return pyarrow_wrap_array(c_array)
+
     def _export_to_c(self, out_ptr, out_schema_ptr=0):
         """
         Export to a C ArrowArray struct, given its pointer.
@@ -3977,12 +4019,12 @@ cdef class StructArray(Array):
 
     @staticmethod
     def from_arrays(arrays, names=None, fields=None, mask=None,
-                    memory_pool=None):
+                    memory_pool=None, type=None):
         """
         Construct StructArray from collection of arrays representing
         each field in the struct.
 
-        Either field names or field instances must be passed.
+        Either field names, field instances or a struct type must be passed.
 
         Parameters
         ----------
@@ -3995,6 +4037,8 @@ cdef class StructArray(Array):
             Indicate which values are null (True) or not null (False).
         memory_pool : MemoryPool (optional)
             For memory allocations, if required, otherwise uses default pool.
+        type : pyarrow.StructType (optional)
+            Struct type for name and type of each child. 
 
         Returns
         -------
@@ -4013,6 +4057,14 @@ cdef class StructArray(Array):
             Field py_field
             DataType struct_type
 
+        if fields is not None and type is not None:
+            raise ValueError('Must pass either fields or type, not both')
+
+        if type is not None:
+            fields = []
+            for field in type:
+                fields.append(field)
+
         if names is None and fields is None:
             raise ValueError('Must pass either names or fields')
         if names is not None and fields is not None:
@@ -4292,6 +4344,12 @@ cdef class ExtensionArray(Array):
         return result
 
 
+class UuidArray(ExtensionArray):
+    """
+    Concrete class for Arrow arrays of UUID data type.
+    """
+
+
 cdef class FixedShapeTensorArray(ExtensionArray):
     """
     Concrete class for fixed shape tensor extension arrays.
@@ -4438,6 +4496,146 @@ cdef class FixedShapeTensorArray(ExtensionArray):
         )
 
 
+cdef class OpaqueArray(ExtensionArray):
+    """
+    Concrete class for opaque extension arrays.
+
+    Examples
+    --------
+    Define the extension type for an opaque array
+
+    >>> import pyarrow as pa
+    >>> opaque_type = pa.opaque(
+    ...     pa.binary(),
+    ...     type_name="geometry",
+    ...     vendor_name="postgis",
+    ... )
+
+    Create an extension array
+
+    >>> arr = [None, b"data"]
+    >>> storage = pa.array(arr, pa.binary())
+    >>> pa.ExtensionArray.from_storage(opaque_type, storage)
+    <pyarrow.lib.OpaqueArray object at ...>
+    [
+      null,
+      64617461
+    ]
+    """
+
+
+cdef class Bool8Array(ExtensionArray):
+    """
+    Concrete class for bool8 extension arrays.
+
+    Examples
+    --------
+    Define the extension type for an bool8 array
+
+    >>> import pyarrow as pa
+    >>> bool8_type = pa.bool8()
+
+    Create an extension array
+
+    >>> arr = [-1, 0, 1, 2, None]
+    >>> storage = pa.array(arr, pa.int8())
+    >>> pa.ExtensionArray.from_storage(bool8_type, storage)
+    <pyarrow.lib.Bool8Array object at ...>
+    [
+      -1,
+      0,
+      1,
+      2,
+      null
+    ]
+    """
+
+    def to_numpy(self, zero_copy_only=True, writable=False):
+        """
+        Return a NumPy bool view or copy of this array.
+
+        By default, tries to return a view of this array. This is only
+        supported for arrays without any nulls.
+
+        Parameters
+        ----------
+        zero_copy_only : bool, default True
+            If True, an exception will be raised if the conversion to a numpy
+            array would require copying the underlying data (e.g. in presence
+            of nulls).
+        writable : bool, default False
+            For numpy arrays created with zero copy (view on the Arrow data),
+            the resulting array is not writable (Arrow data is immutable).
+            By setting this to True, a copy of the array is made to ensure
+            it is writable.
+
+        Returns
+        -------
+        array : numpy.ndarray
+        """
+        if not writable:
+            try:
+                return self.storage.to_numpy().view(np.bool_)
+            except ArrowInvalid as e:
+                if zero_copy_only:
+                    raise e
+
+        return _pc().not_equal(self.storage, 0).to_numpy(zero_copy_only=zero_copy_only, writable=writable)
+
+    @staticmethod
+    def from_storage(Int8Array storage):
+        """
+        Construct Bool8Array from Int8Array storage.
+
+        Parameters
+        ----------
+        storage : Int8Array
+            The underlying storage for the result array.
+
+        Returns
+        -------
+        bool8_array : Bool8Array
+        """
+        return ExtensionArray.from_storage(bool8(), storage)
+
+    @staticmethod
+    def from_numpy(obj):
+        """
+        Convert numpy array to a bool8 extension array without making a copy.
+        The input array must be 1-dimensional, with either bool_ or int8 dtype.
+
+        Parameters
+        ----------
+        obj : numpy.ndarray
+
+        Returns
+        -------
+        bool8_array : Bool8Array
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> arr = np.array([True, False, True], dtype=np.bool_)
+        >>> pa.Bool8Array.from_numpy(arr)
+        <pyarrow.lib.Bool8Array object at ...>
+        [
+          1,
+          0,
+          1
+        ]
+        """
+
+        if obj.ndim != 1:
+            raise ValueError(f"Cannot convert {obj.ndim}-D array to bool8 array")
+
+        if obj.dtype not in [np.bool_, np.int8]:
+            raise TypeError(f"Array dtype {obj.dtype} incompatible with bool8 storage")
+
+        storage_arr = array(obj.view(np.int8), type=int8())
+        return Bool8Array.from_storage(storage_arr)
+
+
 cdef dict _array_classes = {
     _Type_NA: NullArray,
     _Type_BOOL: BooleanArray,
diff --git a/python/pyarrow/builder.pxi b/python/pyarrow/builder.pxi
index 2af39e2c589e6..fbab5bbdb5a01 100644
--- a/python/pyarrow/builder.pxi
+++ b/python/pyarrow/builder.pxi
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import math
+
 
 cdef class StringBuilder(_Weakrefable):
     """
@@ -42,10 +44,10 @@ cdef class StringBuilder(_Weakrefable):
         value : string/bytes or np.nan/None
             The value to append to the string array builder.
         """
-        if value is None or value is np.nan:
-            self.builder.get().AppendNull()
-        elif isinstance(value, (bytes, str)):
+        if isinstance(value, (bytes, str)):
             self.builder.get().Append(tobytes(value))
+        elif value is None or math.isnan(value):
+            self.builder.get().AppendNull()
         else:
             raise TypeError('StringBuilder only accepts string objects')
 
@@ -108,10 +110,10 @@ cdef class StringViewBuilder(_Weakrefable):
         value : string/bytes or np.nan/None
             The value to append to the string array builder.
         """
-        if value is None or value is np.nan:
-            self.builder.get().AppendNull()
-        elif isinstance(value, (bytes, str)):
+        if isinstance(value, (bytes, str)):
             self.builder.get().Append(tobytes(value))
+        elif value is None or math.isnan(value):
+            self.builder.get().AppendNull()
         else:
             raise TypeError('StringViewBuilder only accepts string objects')
 
diff --git a/python/pyarrow/conftest.py b/python/pyarrow/conftest.py
index 29c850c142da1..10a2e72f923cb 100644
--- a/python/pyarrow/conftest.py
+++ b/python/pyarrow/conftest.py
@@ -25,7 +25,6 @@
 from pyarrow.tests.util import windows_has_tzdata
 import sys
 
-import numpy as np
 
 groups = [
     'acero',
@@ -46,6 +45,8 @@
     'lz4',
     'memory_leak',
     'nopandas',
+    'nonumpy',
+    'numpy',
     'orc',
     'pandas',
     'parquet',
@@ -81,6 +82,8 @@
     'lz4': Codec.is_available('lz4'),
     'memory_leak': False,
     'nopandas': False,
+    'nonumpy': False,
+    'numpy': False,
     'orc': False,
     'pandas': False,
     'parquet': False,
@@ -158,6 +161,12 @@
 except ImportError:
     defaults['nopandas'] = True
 
+try:
+    import numpy  # noqa
+    defaults['numpy'] = True
+except ImportError:
+    defaults['nonumpy'] = True
+
 try:
     import pyarrow.parquet  # noqa
     defaults['parquet'] = True
@@ -327,6 +336,7 @@ def unary_agg_func_fixture():
     Register a unary aggregate function (mean)
     """
     from pyarrow import compute as pc
+    import numpy as np
 
     def func(ctx, x):
         return pa.scalar(np.nanmean(x))
@@ -352,6 +362,7 @@ def varargs_agg_func_fixture():
     Register a unary aggregate function
     """
     from pyarrow import compute as pc
+    import numpy as np
 
     def func(ctx, *args):
         sum = 0.0
diff --git a/python/pyarrow/device.pxi b/python/pyarrow/device.pxi
index 6e6034752085a..26256de62093e 100644
--- a/python/pyarrow/device.pxi
+++ b/python/pyarrow/device.pxi
@@ -64,6 +64,9 @@ cdef class Device(_Weakrefable):
         self.init(device)
         return self
 
+    cdef inline shared_ptr[CDevice] unwrap(self) nogil:
+        return self.device
+
     def __eq__(self, other):
         if not isinstance(other, Device):
             return False
@@ -130,6 +133,9 @@ cdef class MemoryManager(_Weakrefable):
         self.init(mm)
         return self
 
+    cdef inline shared_ptr[CMemoryManager] unwrap(self) nogil:
+        return self.memory_manager
+
     def __repr__(self):
         return "<pyarrow.MemoryManager device: {}>".format(
             frombytes(self.memory_manager.get().device().get().ToString())
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 0d871f411b11b..8e6922a912a32 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -234,7 +234,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         CStatus Validate() const
         CStatus ValidateFull() const
         CResult[shared_ptr[CArray]] View(const shared_ptr[CDataType]& type)
+
         CDeviceAllocationType device_type()
+        CResult[shared_ptr[CArray]] CopyTo(const shared_ptr[CMemoryManager]& to) const
 
     shared_ptr[CArray] MakeArray(const shared_ptr[CArrayData]& data)
     CResult[shared_ptr[CArray]] MakeArrayOfNull(
@@ -981,6 +983,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
         CResult[vector[shared_ptr[CChunkedArray]]] Flatten(CMemoryPool* pool)
 
+        c_bool is_cpu() const
+
         CStatus Validate() const
         CStatus ValidateFull() const
 
@@ -1027,6 +1031,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         shared_ptr[CRecordBatch] Slice(int64_t offset)
         shared_ptr[CRecordBatch] Slice(int64_t offset, int64_t length)
 
+        CResult[shared_ptr[CRecordBatch]] CopyTo(const shared_ptr[CMemoryManager]& to) const
+
         CResult[shared_ptr[CTensor]] ToTensor(c_bool null_to_nan, c_bool row_major,
                                               CMemoryPool* pool) const
 
@@ -2861,6 +2867,16 @@ cdef extern from "arrow/extension_type.h" namespace "arrow":
         shared_ptr[CArray] storage()
 
 
+cdef extern from "arrow/extension/uuid.h" namespace "arrow::extension" nogil:
+    cdef cppclass CUuidType" arrow::extension::UuidType"(CExtensionType):
+
+        @staticmethod
+        CResult[shared_ptr[CDataType]] Make()
+
+    cdef cppclass CUuidArray" arrow::extension::UuidArray"(CExtensionArray):
+        pass
+
+
 cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace "arrow::extension" nogil:
     cdef cppclass CFixedShapeTensorType \
             " arrow::extension::FixedShapeTensorType"(CExtensionType):
@@ -2882,6 +2898,28 @@ cdef extern from "arrow/extension/fixed_shape_tensor.h" namespace "arrow::extens
             " arrow::extension::FixedShapeTensorArray"(CExtensionArray):
         const CResult[shared_ptr[CTensor]] ToTensor() const
 
+
+cdef extern from "arrow/extension/opaque.h" namespace "arrow::extension" nogil:
+    cdef cppclass COpaqueType \
+            " arrow::extension::OpaqueType"(CExtensionType):
+
+        c_string type_name()
+        c_string vendor_name()
+
+    cdef cppclass COpaqueArray \
+            " arrow::extension::OpaqueArray"(CExtensionArray):
+        pass
+
+
+cdef extern from "arrow/extension/bool8.h" namespace "arrow::extension" nogil:
+    cdef cppclass CBool8Type" arrow::extension::Bool8Type"(CExtensionType):
+
+        @staticmethod
+        CResult[shared_ptr[CDataType]] Make()
+
+    cdef cppclass CBool8Array" arrow::extension::Bool8Array"(CExtensionArray):
+        pass
+
 cdef extern from "arrow/util/compression.h" namespace "arrow" nogil:
     cdef enum CCompressionType" arrow::Compression::type":
         CCompressionType_UNCOMPRESSED" arrow::Compression::UNCOMPRESSED"
diff --git a/python/pyarrow/includes/libarrow_cuda.pxd b/python/pyarrow/includes/libarrow_cuda.pxd
index 3ac943cf941d8..39ca53908a68b 100644
--- a/python/pyarrow/includes/libarrow_cuda.pxd
+++ b/python/pyarrow/includes/libarrow_cuda.pxd
@@ -41,6 +41,8 @@ cdef extern from "arrow/gpu/cuda_api.h" namespace "arrow::cuda" nogil:
         const void* handle() const
         int device_number() const
         CResult[uintptr_t] GetDeviceAddress(uintptr_t addr)
+        shared_ptr[CDevice] device() const
+        shared_ptr[CMemoryManager] memory_manager() const
 
     cdef cppclass CCudaIpcMemHandle" arrow::cuda::CudaIpcMemHandle":
         @staticmethod
diff --git a/python/pyarrow/includes/libarrow_python.pxd b/python/pyarrow/includes/libarrow_python.pxd
index 9fcc97aaf0a9c..96725c9c3862b 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -248,7 +248,7 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py::internal" nogil:
     CResult[PyObject*] StringToTzinfo(c_string)
 
 
-cdef extern from "arrow/python/init.h":
+cdef extern from "arrow/python/numpy_init.h" namespace "arrow::py":
     int arrow_init_numpy() except -1
 
 
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 082d8470cdbb0..25a7945dc3ddc 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -214,6 +214,17 @@ cdef class FixedShapeTensorType(BaseExtensionType):
     cdef:
         const CFixedShapeTensorType* tensor_ext_type
 
+cdef class Bool8Type(BaseExtensionType):
+    cdef:
+        const CBool8Type* bool8_ext_type
+
+cdef class OpaqueType(BaseExtensionType):
+    cdef:
+        const COpaqueType* opaque_ext_type
+
+cdef class UuidType(BaseExtensionType):
+    cdef:
+        const CUuidType* uuid_ext_type
 
 cdef class PyExtensionType(ExtensionType):
     pass
@@ -495,6 +506,8 @@ cdef class ChunkedArray(_PandasConvertible):
     cdef:
         shared_ptr[CChunkedArray] sp_chunked_array
         CChunkedArray* chunked_array
+        c_bool _is_cpu
+        c_bool _init_is_cpu
 
     cdef readonly:
         # To allow Table to propagate metadata to pandas.Series
@@ -505,13 +518,15 @@ cdef class ChunkedArray(_PandasConvertible):
 
 
 cdef class _Tabular(_PandasConvertible):
-    pass
+    cdef void _assert_cpu(self) except *
 
 
 cdef class Table(_Tabular):
     cdef:
         shared_ptr[CTable] sp_table
         CTable* table
+        c_bool _is_cpu
+        c_bool _init_is_cpu
 
     cdef void init(self, const shared_ptr[CTable]& table)
 
@@ -534,6 +549,8 @@ cdef class Device(_Weakrefable):
     @staticmethod
     cdef wrap(const shared_ptr[CDevice]& device)
 
+    cdef inline shared_ptr[CDevice] unwrap(self) nogil
+
 
 cdef class MemoryManager(_Weakrefable):
     cdef:
@@ -544,6 +561,8 @@ cdef class MemoryManager(_Weakrefable):
     @staticmethod
     cdef wrap(const shared_ptr[CMemoryManager]& mm)
 
+    cdef inline shared_ptr[CMemoryManager] unwrap(self) nogil
+
 
 cdef class Buffer(_Weakrefable):
     cdef:
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index c72841c299566..6b82eb6566896 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -21,7 +21,10 @@
 
 import datetime
 import decimal as _pydecimal
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import os
 import sys
 
@@ -32,8 +35,11 @@ from pyarrow.includes.common cimport PyObject_to_object
 cimport pyarrow.includes.libarrow_python as libarrow_python
 cimport cpython as cp
 
-# Initialize NumPy C API
-arrow_init_numpy()
+
+# Initialize NumPy C API only if numpy was able to be imported
+if np is not None:
+    arrow_init_numpy()
+
 # Initialize PyArrow C++ API
 # (used from some of our C++ code, see e.g. ARROW-5260)
 import_pyarrow()
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index fcccf564fc619..7fbde36bc23e9 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -30,13 +30,17 @@
 import re
 import warnings
 
-import numpy as np
-
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 from pyarrow.lib import _pandas_api, frombytes, is_threading_enabled  # noqa
 
 
 _logical_type_map = {}
+_numpy_logical_type_map = {}
+_pandas_logical_type_map = {}
 
 
 def get_logical_type_map():
@@ -85,27 +89,32 @@ def get_logical_type(arrow_type):
         return 'object'
 
 
-_numpy_logical_type_map = {
-    np.bool_: 'bool',
-    np.int8: 'int8',
-    np.int16: 'int16',
-    np.int32: 'int32',
-    np.int64: 'int64',
-    np.uint8: 'uint8',
-    np.uint16: 'uint16',
-    np.uint32: 'uint32',
-    np.uint64: 'uint64',
-    np.float32: 'float32',
-    np.float64: 'float64',
-    'datetime64[D]': 'date',
-    np.str_: 'string',
-    np.bytes_: 'bytes',
-}
+def get_numpy_logical_type_map():
+    global _numpy_logical_type_map
+    if not _numpy_logical_type_map:
+        _numpy_logical_type_map.update({
+            np.bool_: 'bool',
+            np.int8: 'int8',
+            np.int16: 'int16',
+            np.int32: 'int32',
+            np.int64: 'int64',
+            np.uint8: 'uint8',
+            np.uint16: 'uint16',
+            np.uint32: 'uint32',
+            np.uint64: 'uint64',
+            np.float32: 'float32',
+            np.float64: 'float64',
+            'datetime64[D]': 'date',
+            np.str_: 'string',
+            np.bytes_: 'bytes',
+        })
+    return _numpy_logical_type_map
 
 
 def get_logical_type_from_numpy(pandas_collection):
+    numpy_logical_type_map = get_numpy_logical_type_map()
     try:
-        return _numpy_logical_type_map[pandas_collection.dtype.type]
+        return numpy_logical_type_map[pandas_collection.dtype.type]
     except KeyError:
         if hasattr(pandas_collection.dtype, 'tz'):
             return 'datetimetz'
@@ -1023,18 +1032,23 @@ def _is_generated_index_name(name):
     return re.match(pattern, name) is not None
 
 
-_pandas_logical_type_map = {
-    'date': 'datetime64[D]',
-    'datetime': 'datetime64[ns]',
-    'datetimetz': 'datetime64[ns]',
-    'unicode': np.str_,
-    'bytes': np.bytes_,
-    'string': np.str_,
-    'integer': np.int64,
-    'floating': np.float64,
-    'decimal': np.object_,
-    'empty': np.object_,
-}
+def get_pandas_logical_type_map():
+    global _pandas_logical_type_map
+
+    if not _pandas_logical_type_map:
+        _pandas_logical_type_map.update({
+            'date': 'datetime64[D]',
+            'datetime': 'datetime64[ns]',
+            'datetimetz': 'datetime64[ns]',
+            'unicode': np.str_,
+            'bytes': np.bytes_,
+            'string': np.str_,
+            'integer': np.int64,
+            'floating': np.float64,
+            'decimal': np.object_,
+            'empty': np.object_,
+        })
+    return _pandas_logical_type_map
 
 
 def _pandas_type_to_numpy_type(pandas_type):
@@ -1050,8 +1064,9 @@ def _pandas_type_to_numpy_type(pandas_type):
     dtype : np.dtype
         The dtype that corresponds to `pandas_type`.
     """
+    pandas_logical_type_map = get_pandas_logical_type_map()
     try:
-        return _pandas_logical_type_map[pandas_type]
+        return pandas_logical_type_map[pandas_type]
     except KeyError:
         if 'mixed' in pandas_type:
             # catching 'mixed', 'mixed-integer' and 'mixed-integer-float'
diff --git a/python/pyarrow/public-api.pxi b/python/pyarrow/public-api.pxi
index 966273b4bea84..d3e2ff2e99d91 100644
--- a/python/pyarrow/public-api.pxi
+++ b/python/pyarrow/public-api.pxi
@@ -120,10 +120,17 @@ cdef api object pyarrow_wrap_data_type(
     elif type.get().id() == _Type_EXTENSION:
         ext_type = <const CExtensionType*> type.get()
         cpy_ext_type = dynamic_cast[_CPyExtensionTypePtr](ext_type)
+        extension_name = ext_type.extension_name()
         if cpy_ext_type != nullptr:
             return cpy_ext_type.GetInstance()
-        elif ext_type.extension_name() == b"arrow.fixed_shape_tensor":
+        elif extension_name == b"arrow.bool8":
+            out = Bool8Type.__new__(Bool8Type)
+        elif extension_name == b"arrow.fixed_shape_tensor":
             out = FixedShapeTensorType.__new__(FixedShapeTensorType)
+        elif extension_name == b"arrow.opaque":
+            out = OpaqueType.__new__(OpaqueType)
+        elif extension_name == b"arrow.uuid":
+            out = UuidType.__new__(UuidType)
         else:
             out = BaseExtensionType.__new__(BaseExtensionType)
     else:
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 41bfde39adb6f..68f77832c4342 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -17,6 +17,7 @@
 
 import collections
 from cython cimport binding
+from uuid import UUID
 
 
 cdef class Scalar(_Weakrefable):
@@ -1043,6 +1044,15 @@ cdef class ExtensionScalar(Scalar):
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
+class UuidScalar(ExtensionScalar):
+    """
+    Concrete class for Uuid extension scalar.
+    """
+
+    def as_py(self):
+        return None if self.value is None else UUID(bytes=self.value.as_py())
+
+
 cdef class FixedShapeTensorScalar(ExtensionScalar):
     """
     Concrete class for fixed shape tensor extension scalar.
@@ -1085,6 +1095,24 @@ cdef class FixedShapeTensorScalar(ExtensionScalar):
         return pyarrow_wrap_tensor(ctensor)
 
 
+cdef class OpaqueScalar(ExtensionScalar):
+    """
+    Concrete class for opaque extension scalar.
+    """
+
+
+cdef class Bool8Scalar(ExtensionScalar):
+    """
+    Concrete class for bool8 extension scalar.
+    """
+
+    def as_py(self):
+        """
+        Return this scalar as a Python object.
+        """
+        py_val = super().as_py()
+        return None if py_val is None else py_val != 0
+
 cdef dict _scalar_classes = {
     _Type_BOOL: BooleanScalar,
     _Type_UINT8: UInt8Scalar,
@@ -1193,6 +1221,11 @@ def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None):
     type = ensure_type(type, allow_none=True)
     pool = maybe_unbox_memory_pool(memory_pool)
 
+    extension_type = None
+    if type is not None and type.id == _Type_EXTENSION:
+        extension_type = type
+        type = type.storage_type
+
     if _is_array_like(value):
         value = get_values(value, &is_pandas_object)
 
@@ -1217,4 +1250,8 @@ def scalar(value, type=None, *, from_pandas=None, MemoryPool memory_pool=None):
 
     # retrieve the scalar from the first position
     scalar = GetResultValue(array.get().GetScalar(0))
-    return Scalar.wrap(scalar)
+    result = Scalar.wrap(scalar)
+
+    if extension_type is not None:
+        result = ExtensionScalar.from_storage(extension_type, result)
+    return result
diff --git a/python/pyarrow/src/arrow/python/CMakeLists.txt b/python/pyarrow/src/arrow/python/CMakeLists.txt
index ff355e46a4bdf..67508982eab82 100644
--- a/python/pyarrow/src/arrow/python/CMakeLists.txt
+++ b/python/pyarrow/src/arrow/python/CMakeLists.txt
@@ -16,3 +16,4 @@
 # under the License.
 
 arrow_install_all_headers("arrow/python")
+add_subdirectory(vendored)
diff --git a/python/pyarrow/src/arrow/python/datetime.h b/python/pyarrow/src/arrow/python/datetime.h
index 7346d6bc67791..9b21eeb434217 100644
--- a/python/pyarrow/src/arrow/python/datetime.h
+++ b/python/pyarrow/src/arrow/python/datetime.h
@@ -35,9 +35,9 @@
 // Instead, we redefine PyDateTimeAPI to point to a global variable,
 // which is initialized once by calling InitDatetime().
 #ifdef PYPY_VERSION
-#include "datetime.h"
+#  include "datetime.h"
 #else
-#define PyDateTimeAPI ::arrow::py::internal::datetime_api
+#  define PyDateTimeAPI ::arrow::py::internal::datetime_api
 #endif
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/deserialize.cc b/python/pyarrow/src/arrow/python/deserialize.cc
index 961a1686e0a89..ab300a182fa0a 100644
--- a/python/pyarrow/src/arrow/python/deserialize.cc
+++ b/python/pyarrow/src/arrow/python/deserialize.cc
@@ -46,6 +46,7 @@
 #include "arrow/python/numpy_convert.h"
 #include "arrow/python/pyarrow.h"
 #include "arrow/python/serialize.h"
+#include "arrow/python/vendored/pythoncapi_compat.h"
 
 namespace arrow {
 
@@ -88,8 +89,13 @@ Status DeserializeDict(PyObject* context, const Array& array, int64_t start_idx,
     // The latter two steal references whereas PyDict_SetItem does not. So we need
     // to make sure the reference count is decremented by letting the OwnedRef
     // go out of scope at the end.
-    int ret = PyDict_SetItem(result.obj(), PyList_GET_ITEM(keys.obj(), i - start_idx),
-                             PyList_GET_ITEM(vals.obj(), i - start_idx));
+    PyObject* key = PyList_GetItemRef(keys.obj(), i - start_idx);
+    RETURN_IF_PYERROR();
+    OwnedRef keyref(key);
+    PyObject* val = PyList_GetItemRef(vals.obj(), i - start_idx);
+    RETURN_IF_PYERROR();
+    OwnedRef valref(val);
+    int ret = PyDict_SetItem(result.obj(), key, val);
     if (ret != 0) {
       return ConvertPyError();
     }
@@ -398,7 +404,9 @@ Status GetSerializedFromComponents(int num_tensors,
 
   auto GetBuffer = [&data](Py_ssize_t index, std::shared_ptr<Buffer>* out) {
     ARROW_CHECK_LE(index, PyList_Size(data));
-    PyObject* py_buf = PyList_GET_ITEM(data, index);
+    PyObject* py_buf = PyList_GetItemRef(data, index);
+    RETURN_IF_PYERROR();
+    OwnedRef py_buf_ref(py_buf);
     return unwrap_buffer(py_buf).Value(out);
   };
 
diff --git a/python/pyarrow/src/arrow/python/deserialize.h b/python/pyarrow/src/arrow/python/deserialize.h
index 41b6a13a38875..fe1d73622a3db 100644
--- a/python/pyarrow/src/arrow/python/deserialize.h
+++ b/python/pyarrow/src/arrow/python/deserialize.h
@@ -24,6 +24,7 @@
 #include "arrow/python/serialize.h"
 #include "arrow/python/visibility.h"
 #include "arrow/status.h"
+#include "arrow/util/macros.h"
 
 namespace arrow {
 
@@ -55,6 +56,7 @@ struct ARROW_PYTHON_EXPORT SparseTensorCounts {
 /// \param[in] src a RandomAccessFile
 /// \param[out] out the reconstructed data
 /// \return Status
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject* out);
 
@@ -70,6 +72,7 @@ Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject* out);
 /// num_csf_tensors * (2 * ndim_csf + 3) + num_buffers in length
 /// \param[out] out the reconstructed object
 /// \return Status
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status GetSerializedFromComponents(int num_tensors,
                                    const SparseTensorCounts& num_sparse_tensors,
@@ -88,6 +91,7 @@ Status GetSerializedFromComponents(int num_tensors,
 /// \param[out] out The returned object
 /// \return Status
 /// This acquires the GIL
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status DeserializeObject(PyObject* context, const SerializedPyObject& object,
                          PyObject* base, PyObject** out);
@@ -96,9 +100,11 @@ Status DeserializeObject(PyObject* context, const SerializedPyObject& object,
 /// \param[in] object Object to deserialize
 /// \param[out] out The deserialized tensor
 /// \return Status
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status DeserializeNdarray(const SerializedPyObject& object, std::shared_ptr<Tensor>* out);
 
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status NdarrayFromBuffer(std::shared_ptr<Buffer> src, std::shared_ptr<Tensor>* out);
 
diff --git a/python/pyarrow/src/arrow/python/flight.h b/python/pyarrow/src/arrow/python/flight.h
index 82d93711e55fb..5243258495778 100644
--- a/python/pyarrow/src/arrow/python/flight.h
+++ b/python/pyarrow/src/arrow/python/flight.h
@@ -26,24 +26,24 @@
 #include "arrow/python/common.h"
 
 #if defined(_WIN32) || defined(__CYGWIN__)  // Windows
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
-
-#ifdef ARROW_PYTHON_STATIC
-#define ARROW_PYFLIGHT_EXPORT
-#elif defined(ARROW_PYFLIGHT_EXPORTING)
-#define ARROW_PYFLIGHT_EXPORT __declspec(dllexport)
-#else
-#define ARROW_PYFLIGHT_EXPORT __declspec(dllimport)
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
+
+#  ifdef ARROW_PYTHON_STATIC
+#    define ARROW_PYFLIGHT_EXPORT
+#  elif defined(ARROW_PYFLIGHT_EXPORTING)
+#    define ARROW_PYFLIGHT_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_PYFLIGHT_EXPORT __declspec(dllimport)
+#  endif
 
 #else  // Not Windows
-#ifndef ARROW_PYFLIGHT_EXPORT
-#define ARROW_PYFLIGHT_EXPORT __attribute__((visibility("default")))
-#endif
+#  ifndef ARROW_PYFLIGHT_EXPORT
+#    define ARROW_PYFLIGHT_EXPORT __attribute__((visibility("default")))
+#  endif
 #endif  // Non-Windows
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/gdb.cc b/python/pyarrow/src/arrow/python/gdb.cc
index 6941769e4efe8..7c58bae3342c2 100644
--- a/python/pyarrow/src/arrow/python/gdb.cc
+++ b/python/pyarrow/src/arrow/python/gdb.cc
@@ -22,7 +22,7 @@
 #include "arrow/array.h"
 #include "arrow/chunked_array.h"
 #include "arrow/datum.h"
-#include "arrow/extension_type.h"
+#include "arrow/extension/uuid.h"
 #include "arrow/ipc/json_simple.h"
 #include "arrow/python/gdb.h"
 #include "arrow/record_batch.h"
@@ -37,6 +37,8 @@
 
 namespace arrow {
 
+using extension::uuid;
+using extension::UuidType;
 using ipc::internal::json::ArrayFromJSON;
 using ipc::internal::json::ChunkedArrayFromJSON;
 using ipc::internal::json::ScalarFromJSON;
@@ -56,29 +58,6 @@ class CustomStatusDetail : public StatusDetail {
   std::string ToString() const override { return "This is a detail"; }
 };
 
-class UuidType : public ExtensionType {
- public:
-  UuidType() : ExtensionType(fixed_size_binary(16)) {}
-
-  std::string extension_name() const override { return "uuid"; }
-
-  bool ExtensionEquals(const ExtensionType& other) const override {
-    return (other.extension_name() == this->extension_name());
-  }
-
-  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
-    return std::make_shared<ExtensionArray>(data);
-  }
-
-  Result<std::shared_ptr<DataType>> Deserialize(
-      std::shared_ptr<DataType> storage_type,
-      const std::string& serialized) const override {
-    return Status::NotImplemented("");
-  }
-
-  std::string Serialize() const override { return "uuid-serialized"; }
-};
-
 std::shared_ptr<Array> SliceArrayFromJSON(const std::shared_ptr<DataType>& ty,
                                           std::string_view json, int64_t offset = 0,
                                           int64_t length = -1) {
diff --git a/python/pyarrow/src/arrow/python/helpers.cc b/python/pyarrow/src/arrow/python/helpers.cc
index 18302e6fe0401..ca89ebe9d8bdd 100644
--- a/python/pyarrow/src/arrow/python/helpers.cc
+++ b/python/pyarrow/src/arrow/python/helpers.cc
@@ -22,6 +22,7 @@
 
 #include <cmath>
 #include <limits>
+#include <mutex>
 #include <sstream>
 #include <type_traits>
 
@@ -292,7 +293,15 @@ bool PyFloat_IsNaN(PyObject* obj) {
 
 namespace {
 
+// This needs a conditional, because using std::once_flag could introduce
+// a deadlock when the GIL is enabled. See
+// https://github.com/apache/arrow/commit/f69061935e92e36e25bb891177ca8bc4f463b272 for
+// more info.
+#ifdef Py_GIL_DISABLED
+static std::once_flag pandas_static_initialized;
+#else
 static bool pandas_static_initialized = false;
+#endif
 
 // Once initialized, these variables hold borrowed references to Pandas static data.
 // We should not use OwnedRef here because Python destructors would be
@@ -304,15 +313,7 @@ static PyObject* pandas_Timestamp = nullptr;
 static PyTypeObject* pandas_NaTType = nullptr;
 static PyObject* pandas_DateOffset = nullptr;
 
-}  // namespace
-
-void InitPandasStaticData() {
-  // NOTE: This is called with the GIL held.  We needn't (and shouldn't,
-  // to avoid deadlocks) use an additional C++ lock (ARROW-10519).
-  if (pandas_static_initialized) {
-    return;
-  }
-
+void GetPandasStaticSymbols() {
   OwnedRef pandas;
 
   // Import pandas
@@ -321,11 +322,14 @@ void InitPandasStaticData() {
     return;
   }
 
+#ifndef Py_GIL_DISABLED
   // Since ImportModule can release the GIL, another thread could have
   // already initialized the static data.
   if (pandas_static_initialized) {
     return;
   }
+#endif
+
   OwnedRef ref;
 
   // set NaT sentinel and its type
@@ -355,9 +359,25 @@ void InitPandasStaticData() {
   if (ImportFromModule(pandas.obj(), "DateOffset", &ref).ok()) {
     pandas_DateOffset = ref.obj();
   }
+}
+
+}  // namespace
 
+#ifdef Py_GIL_DISABLED
+void InitPandasStaticData() {
+  std::call_once(pandas_static_initialized, GetPandasStaticSymbols);
+}
+#else
+void InitPandasStaticData() {
+  // NOTE: This is called with the GIL held.  We needn't (and shouldn't,
+  // to avoid deadlocks) use an additional C++ lock (ARROW-10519).
+  if (pandas_static_initialized) {
+    return;
+  }
+  GetPandasStaticSymbols();
   pandas_static_initialized = true;
 }
+#endif
 
 bool PandasObjectIsNull(PyObject* obj) {
   if (!MayHaveNaN(obj)) {
diff --git a/python/pyarrow/src/arrow/python/inference.cc b/python/pyarrow/src/arrow/python/inference.cc
index 10116f9afad69..1aa7915ba1e19 100644
--- a/python/pyarrow/src/arrow/python/inference.cc
+++ b/python/pyarrow/src/arrow/python/inference.cc
@@ -395,11 +395,11 @@ class TypeInferrer {
       *keep_going = make_unions_;
     } else if (arrow::py::is_scalar(obj)) {
       RETURN_NOT_OK(VisitArrowScalar(obj, keep_going));
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       RETURN_NOT_OK(VisitDType(PyArray_DescrFromScalar(obj), keep_going));
     } else if (PySet_Check(obj) || (Py_TYPE(obj) == &PyDictValues_Type)) {
       RETURN_NOT_OK(VisitSet(obj, keep_going));
-    } else if (PyArray_Check(obj)) {
+    } else if (has_numpy() && PyArray_Check(obj)) {
       RETURN_NOT_OK(VisitNdarray(obj, keep_going));
     } else if (PyDict_Check(obj)) {
       RETURN_NOT_OK(VisitDict(obj));
diff --git a/python/pyarrow/src/arrow/python/iterators.h b/python/pyarrow/src/arrow/python/iterators.h
index 7b31962dac5b8..dd467f6ac4077 100644
--- a/python/pyarrow/src/arrow/python/iterators.h
+++ b/python/pyarrow/src/arrow/python/iterators.h
@@ -22,6 +22,7 @@
 #include "arrow/array/array_primitive.h"
 
 #include "arrow/python/common.h"
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_internal.h"
 
 namespace arrow {
@@ -44,7 +45,7 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
   // VisitorFunc may set to false to terminate iteration
   bool keep_going = true;
 
-  if (PyArray_Check(obj)) {
+  if (has_numpy() && PyArray_Check(obj)) {
     PyArrayObject* arr_obj = reinterpret_cast<PyArrayObject*>(obj);
     if (PyArray_NDIM(arr_obj) != 1) {
       return Status::Invalid("Only 1D arrays accepted");
@@ -64,8 +65,13 @@ inline Status VisitSequenceGeneric(PyObject* obj, int64_t offset, VisitorFunc&&
     // This code path is inefficient: callers should implement dedicated
     // logic for non-object arrays.
   }
+
   if (PySequence_Check(obj)) {
+#ifdef Py_GIL_DISABLED
+    if (PyTuple_Check(obj)) {
+#else
     if (PyList_Check(obj) || PyTuple_Check(obj)) {
+#endif
       // Use fast item access
       const Py_ssize_t size = PySequence_Fast_GET_SIZE(obj);
       for (Py_ssize_t i = offset; keep_going && i < size; ++i) {
@@ -101,7 +107,7 @@ inline Status VisitSequence(PyObject* obj, int64_t offset, VisitorFunc&& func) {
 template <class VisitorFunc>
 inline Status VisitSequenceMasked(PyObject* obj, PyObject* mo, int64_t offset,
                                   VisitorFunc&& func) {
-  if (PyArray_Check(mo)) {
+  if (has_numpy() && PyArray_Check(mo)) {
     PyArrayObject* mask = reinterpret_cast<PyArrayObject*>(mo);
     if (PyArray_NDIM(mask) != 1) {
       return Status::Invalid("Mask must be 1D array");
diff --git a/python/pyarrow/src/arrow/python/numpy_convert.cc b/python/pyarrow/src/arrow/python/numpy_convert.cc
index 5fd2cb511ff8a..4113cc67d2fc6 100644
--- a/python/pyarrow/src/arrow/python/numpy_convert.cc
+++ b/python/pyarrow/src/arrow/python/numpy_convert.cc
@@ -488,7 +488,13 @@ Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject*
   std::vector<std::shared_ptr<Tensor>> indices(ndim);
 
   for (int i = 0; i < ndim - 1; ++i) {
+#ifdef Py_GIL_DISABLED
+    PyObject* item = PySequence_ITEM(indptr_ao, i);
+    RETURN_IF_PYERROR();
+    OwnedRef item_ref(item);
+#else
     PyObject* item = PySequence_Fast_GET_ITEM(indptr_ao, i);
+#endif
     if (!PyArray_Check(item)) {
       return Status::TypeError("Did not pass ndarray object for indptr");
     }
@@ -497,7 +503,13 @@ Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject*
   }
 
   for (int i = 0; i < ndim; ++i) {
+#ifdef Py_GIL_DISABLED
+    PyObject* item = PySequence_ITEM(indices_ao, i);
+    RETURN_IF_PYERROR();
+    OwnedRef item_ref(item);
+#else
     PyObject* item = PySequence_Fast_GET_ITEM(indices_ao, i);
+#endif
     if (!PyArray_Check(item)) {
       return Status::TypeError("Did not pass ndarray object for indices");
     }
diff --git a/python/pyarrow/src/arrow/python/init.cc b/python/pyarrow/src/arrow/python/numpy_init.cc
similarity index 78%
rename from python/pyarrow/src/arrow/python/init.cc
rename to python/pyarrow/src/arrow/python/numpy_init.cc
index dba293bbe2366..96e2c7b7ccb5c 100644
--- a/python/pyarrow/src/arrow/python/init.cc
+++ b/python/pyarrow/src/arrow/python/numpy_init.cc
@@ -18,7 +18,16 @@
 // Trigger the array import (inversion of NO_IMPORT_ARRAY)
 #define NUMPY_IMPORT_ARRAY
 
-#include "arrow/python/init.h"
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_interop.h"
 
-int arrow_init_numpy() { return arrow::py::import_numpy(); }
+namespace arrow::py {
+bool numpy_imported = false;
+
+int arrow_init_numpy() {
+  numpy_imported = true;
+  return arrow::py::import_numpy();
+}
+
+bool has_numpy() { return numpy_imported; }
+}  // namespace arrow::py
diff --git a/python/pyarrow/src/arrow/python/init.h b/python/pyarrow/src/arrow/python/numpy_init.h
similarity index 93%
rename from python/pyarrow/src/arrow/python/init.h
rename to python/pyarrow/src/arrow/python/numpy_init.h
index 2e6c954862bd9..36c544c1b51fd 100644
--- a/python/pyarrow/src/arrow/python/init.h
+++ b/python/pyarrow/src/arrow/python/numpy_init.h
@@ -20,7 +20,8 @@
 #include "arrow/python/platform.h"
 #include "arrow/python/visibility.h"
 
-extern "C" {
+namespace arrow::py {
 ARROW_PYTHON_EXPORT
 int arrow_init_numpy();
-}
+bool has_numpy();
+}  // namespace arrow::py
diff --git a/python/pyarrow/src/arrow/python/numpy_internal.h b/python/pyarrow/src/arrow/python/numpy_internal.h
index b9b632f9f9a12..0b4d0be00e42b 100644
--- a/python/pyarrow/src/arrow/python/numpy_internal.h
+++ b/python/pyarrow/src/arrow/python/numpy_internal.h
@@ -19,6 +19,7 @@
 
 #pragma once
 
+#include "arrow/python/numpy_init.h"
 #include "arrow/python/numpy_interop.h"
 
 #include "arrow/status.h"
@@ -155,15 +156,27 @@ inline Status VisitNumpyArrayInline(PyArrayObject* arr, VISITOR* visitor) {
 namespace internal {
 
 inline bool PyFloatScalar_Check(PyObject* obj) {
-  return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
+  if (has_numpy()) {
+    return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
+  } else {
+    return PyFloat_Check(obj);
+  }
 }
 
 inline bool PyIntScalar_Check(PyObject* obj) {
-  return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
+  if (has_numpy()) {
+    return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
+  } else {
+    return PyLong_Check(obj);
+  }
 }
 
 inline bool PyBoolScalar_Check(PyObject* obj) {
-  return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
+  if (has_numpy()) {
+    return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
+  } else {
+    return PyBool_Check(obj);
+  }
 }
 
 static inline PyArray_Descr* GetSafeNumPyDtype(int type) {
diff --git a/python/pyarrow/src/arrow/python/numpy_interop.h b/python/pyarrow/src/arrow/python/numpy_interop.h
index 7ea7d6e16f528..a83ae4a62b944 100644
--- a/python/pyarrow/src/arrow/python/numpy_interop.h
+++ b/python/pyarrow/src/arrow/python/numpy_interop.h
@@ -23,19 +23,19 @@
 
 // Don't use the deprecated Numpy functions
 #ifdef NPY_1_7_API_VERSION
-#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#  define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #else
-#define NPY_ARRAY_NOTSWAPPED NPY_NOTSWAPPED
-#define NPY_ARRAY_ALIGNED NPY_ALIGNED
-#define NPY_ARRAY_WRITEABLE NPY_WRITEABLE
-#define NPY_ARRAY_UPDATEIFCOPY NPY_UPDATEIFCOPY
+#  define NPY_ARRAY_NOTSWAPPED NPY_NOTSWAPPED
+#  define NPY_ARRAY_ALIGNED NPY_ALIGNED
+#  define NPY_ARRAY_WRITEABLE NPY_WRITEABLE
+#  define NPY_ARRAY_UPDATEIFCOPY NPY_UPDATEIFCOPY
 #endif
 
 // This is required to be able to access the NumPy C API properly in C++ files
 // other than init.cc.
 #define PY_ARRAY_UNIQUE_SYMBOL arrow_ARRAY_API
 #ifndef NUMPY_IMPORT_ARRAY
-#define NO_IMPORT_ARRAY
+#  define NO_IMPORT_ARRAY
 #endif
 
 #include <numpy/arrayobject.h>   // IWYU pragma: export
@@ -56,22 +56,22 @@
 // NPY_INT needs to be handled separately.
 
 #if NPY_BITSOF_LONG == 32 && NPY_BITSOF_LONGLONG == 64
-#define NPY_INT64_IS_LONG_LONG 1
+#  define NPY_INT64_IS_LONG_LONG 1
 #else
-#define NPY_INT64_IS_LONG_LONG 0
+#  define NPY_INT64_IS_LONG_LONG 0
 #endif
 
 #if NPY_BITSOF_INT == 32 && NPY_BITSOF_LONG == 64
-#define NPY_INT32_IS_INT 1
+#  define NPY_INT32_IS_INT 1
 #else
-#define NPY_INT32_IS_INT 0
+#  define NPY_INT32_IS_INT 0
 #endif
 
 // Backported NumPy 2 API (can be removed if numpy 2 is required)
 #if NPY_ABI_VERSION < 0x02000000
-#define PyDataType_ELSIZE(descr) ((descr)->elsize)
-#define PyDataType_C_METADATA(descr) ((descr)->c_metadata)
-#define PyDataType_FIELDS(descr) ((descr)->fields)
+#  define PyDataType_ELSIZE(descr) ((descr)->elsize)
+#  define PyDataType_C_METADATA(descr) ((descr)->c_metadata)
+#  define PyDataType_FIELDS(descr) ((descr)->fields)
 #endif
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
index 460b1d0ce3fa6..e78a301bce333 100644
--- a/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/numpy_to_arrow.cc
@@ -57,6 +57,7 @@
 #include "arrow/python/numpy_internal.h"
 #include "arrow/python/python_to_arrow.h"
 #include "arrow/python/type_traits.h"
+#include "arrow/python/vendored/pythoncapi_compat.h"
 
 namespace arrow {
 
@@ -757,8 +758,10 @@ Status NumPyConverter::Visit(const StructType& type) {
     }
 
     for (auto field : type.fields()) {
-      PyObject* tup =
-          PyDict_GetItemString(PyDataType_FIELDS(dtype_), field->name().c_str());
+      PyObject* tup;
+      PyDict_GetItemStringRef(PyDataType_FIELDS(dtype_), field->name().c_str(), &tup);
+      RETURN_IF_PYERROR();
+      OwnedRef tupref(tup);
       if (tup == NULL) {
         return Status::Invalid("Missing field '", field->name(), "' in struct array");
       }
diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h
index a1aaa30e260f5..7a107c89f0bdc 100644
--- a/python/pyarrow/src/arrow/python/parquet_encryption.h
+++ b/python/pyarrow/src/arrow/python/parquet_encryption.h
@@ -27,24 +27,24 @@
 #include "parquet/encryption/kms_client_factory.h"
 
 #if defined(_WIN32) || defined(__CYGWIN__)  // Windows
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
-
-#ifdef ARROW_PYTHON_STATIC
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
-#elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
-#else
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
+
+#  ifdef ARROW_PYTHON_STATIC
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#  elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
+#  endif
 
 #else  // Not Windows
-#ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
-#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __attribute__((visibility("default")))
-#endif
+#  ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#    define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __attribute__((visibility("default")))
+#  endif
 #endif  // Non-Windows
 
 namespace arrow {
diff --git a/python/pyarrow/src/arrow/python/platform.h b/python/pyarrow/src/arrow/python/platform.h
index e71c7ac85399e..48758cd1c8468 100644
--- a/python/pyarrow/src/arrow/python/platform.h
+++ b/python/pyarrow/src/arrow/python/platform.h
@@ -29,13 +29,13 @@
 
 // Work around C2528 error
 #ifdef _MSC_VER
-#if _MSC_VER >= 1900
-#undef timezone
-#endif
+#  if _MSC_VER >= 1900
+#    undef timezone
+#  endif
 
 // https://bugs.python.org/issue36020
 // TODO(wjones127): Can remove once we drop support for CPython 3.9
-#ifdef snprintf
-#undef snprintf
-#endif
+#  ifdef snprintf
+#    undef snprintf
+#  endif
 #endif
diff --git a/python/pyarrow/src/arrow/python/python_test.cc b/python/pyarrow/src/arrow/python/python_test.cc
index 746bf410911f9..eea6bf9459d1f 100644
--- a/python/pyarrow/src/arrow/python/python_test.cc
+++ b/python/pyarrow/src/arrow/python/python_test.cc
@@ -870,7 +870,7 @@ std::vector<TestCase> GetCppTestCases() {
        TestInferAllLeadingZerosExponentialNotationPositive},
       {"test_infer_all_leading_zeros_exponential_notation_negative",
        TestInferAllLeadingZerosExponentialNotationNegative},
-      {"test_object_block_write_fails", TestObjectBlockWriteFails},
+      {"test_object_block_write_fails_pandas_convert", TestObjectBlockWriteFails},
       {"test_mixed_type_fails", TestMixedTypeFails},
       {"test_from_python_decimal_rescale_not_truncateable",
        TestFromPythonDecimalRescaleNotTruncateable},
diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index a2a325fde8dbd..e7195e99072b0 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -54,6 +54,7 @@
 #include "arrow/python/iterators.h"
 #include "arrow/python/numpy_convert.h"
 #include "arrow/python/type_traits.h"
+#include "arrow/python/vendored/pythoncapi_compat.h"
 #include "arrow/visit_type_inline.h"
 
 namespace arrow {
@@ -201,7 +202,7 @@ class PyValue {
       return true;
     } else if (obj == Py_False) {
       return false;
-    } else if (PyArray_IsScalar(obj, Bool)) {
+    } else if (has_numpy() && PyArray_IsScalar(obj, Bool)) {
       return reinterpret_cast<PyBoolScalarObject*>(obj)->obval == NPY_TRUE;
     } else {
       return internal::InvalidValue(obj, "tried to convert to boolean");
@@ -384,7 +385,7 @@ class PyValue {
         default:
           return Status::UnknownError("Invalid time unit");
       }
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       // validate that the numpy scalar has np.datetime64 dtype
       ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
       if (!numpy_type->Equals(*type)) {
@@ -463,7 +464,7 @@ class PyValue {
         default:
           return Status::UnknownError("Invalid time unit");
       }
-    } else if (PyArray_CheckAnyScalarExact(obj)) {
+    } else if (has_numpy() && PyArray_CheckAnyScalarExact(obj)) {
       // validate that the numpy scalar has np.datetime64 dtype
       ARROW_ASSIGN_OR_RAISE(auto numpy_type, NumPyScalarToArrowDataType(obj));
       if (!numpy_type->Equals(*type)) {
@@ -663,7 +664,7 @@ class PyPrimitiveConverter<
       ARROW_ASSIGN_OR_RAISE(
           auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
       // Numpy NaT sentinels can be checked after the conversion
-      if (PyArray_CheckAnyScalarExact(value) &&
+      if (has_numpy() && PyArray_CheckAnyScalarExact(value) &&
           PyValue::IsNaT(this->primitive_type_, converted)) {
         this->primitive_builder_->UnsafeAppendNull();
       } else {
@@ -803,8 +804,7 @@ class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
     if (PyValue::IsNull(this->options_, value)) {
       return this->list_builder_->AppendNull();
     }
-
-    if (PyArray_Check(value)) {
+    if (has_numpy() && PyArray_Check(value)) {
       RETURN_NOT_OK(AppendNdarray(value));
     } else if (PySequence_Check(value)) {
       RETURN_NOT_OK(AppendSequence(value));
@@ -1107,11 +1107,13 @@ class PyStructConverter : public StructConverter<PyConverter, PyConverterTrait>
   Status AppendDict(PyObject* dict, PyObject* field_names) {
     // NOTE we're ignoring any extraneous dict items
     for (int i = 0; i < num_fields_; i++) {
-      PyObject* name = PyList_GET_ITEM(field_names, i);  // borrowed
-      PyObject* value = PyDict_GetItem(dict, name);      // borrowed
-      if (value == NULL) {
-        RETURN_IF_PYERROR();
-      }
+      PyObject* name = PyList_GetItemRef(field_names, i);
+      RETURN_IF_PYERROR();
+      OwnedRef nameref(name);
+      PyObject* value;
+      PyDict_GetItemRef(dict, name, &value);
+      RETURN_IF_PYERROR();
+      OwnedRef valueref(value);
       RETURN_NOT_OK(this->children_[i]->Append(value ? value : Py_None));
     }
     return Status::OK();
@@ -1141,7 +1143,9 @@ class PyStructConverter : public StructConverter<PyConverter, PyConverterTrait>
       ARROW_ASSIGN_OR_RAISE(auto pair, GetKeyValuePair(items, i));
 
       // validate that the key and the field name are equal
-      PyObject* name = PyList_GET_ITEM(field_names, i);
+      PyObject* name = PyList_GetItemRef(field_names, i);
+      RETURN_IF_PYERROR();
+      OwnedRef nameref(name);
       bool are_equal = PyObject_RichCompareBool(pair.first, name, Py_EQ);
       RETURN_IF_PYERROR();
 
diff --git a/python/pyarrow/src/arrow/python/serialize.h b/python/pyarrow/src/arrow/python/serialize.h
index fd207d3e06903..af6d2d81a61c4 100644
--- a/python/pyarrow/src/arrow/python/serialize.h
+++ b/python/pyarrow/src/arrow/python/serialize.h
@@ -24,6 +24,7 @@
 #include "arrow/python/visibility.h"
 #include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
+#include "arrow/util/macros.h"
 
 // Forward declaring PyObject, see
 // https://mail.python.org/pipermail/python-dev/2003-August/037601.html
@@ -92,6 +93,7 @@ struct ARROW_PYTHON_EXPORT SerializedPyObject {
 /// \return Status
 ///
 /// Release GIL before calling
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status SerializeObject(PyObject* context, PyObject* sequence, SerializedPyObject* out);
 
@@ -99,6 +101,7 @@ Status SerializeObject(PyObject* context, PyObject* sequence, SerializedPyObject
 /// \param[in] tensor Tensor to be serialized
 /// \param[out] out The serialized representation
 /// \return Status
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status SerializeTensor(std::shared_ptr<Tensor> tensor, py::SerializedPyObject* out);
 
@@ -108,6 +111,7 @@ Status SerializeTensor(std::shared_ptr<Tensor> tensor, py::SerializedPyObject* o
 /// \param[in] tensor_num_bytes The length of the Tensor data in bytes
 /// \param[in] dst The OutputStream to write the Tensor header to
 /// \return Status
+ARROW_DEPRECATED("Deprecated in 18.0.0. Will be removed in 20.0.0")
 ARROW_PYTHON_EXPORT
 Status WriteNdarrayHeader(std::shared_ptr<DataType> dtype,
                           const std::vector<int64_t>& shape, int64_t tensor_num_bytes,
diff --git a/python/pyarrow/src/arrow/python/udf.cc b/python/pyarrow/src/arrow/python/udf.cc
index b6a862af8ca07..74f16899c47eb 100644
--- a/python/pyarrow/src/arrow/python/udf.cc
+++ b/python/pyarrow/src/arrow/python/udf.cc
@@ -24,14 +24,11 @@
 #include "arrow/compute/kernel.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/python/common.h"
+#include "arrow/python/vendored/pythoncapi_compat.h"
 #include "arrow/table.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 
-// Py_IsFinalizing added in Python 3.13.0a4
-#if PY_VERSION_HEX < 0x030D00A4
-#define Py_IsFinalizing() _Py_IsFinalizing()
-#endif
 namespace arrow {
 using compute::ExecSpan;
 using compute::Grouper;
@@ -43,35 +40,18 @@ namespace py {
 namespace {
 
 struct PythonUdfKernelState : public compute::KernelState {
+  // NOTE: this KernelState constructor doesn't require the GIL.
+  // If it did, the corresponding KernelInit::operator() should be wrapped
+  // within SafeCallIntoPython (GH-43487).
   explicit PythonUdfKernelState(std::shared_ptr<OwnedRefNoGIL> function)
-      : function(function) {
-    Py_INCREF(function->obj());
-  }
-
-  // function needs to be destroyed at process exit
-  // and Python may no longer be initialized.
-  ~PythonUdfKernelState() {
-    if (Py_IsFinalizing()) {
-      function->detach();
-    }
-  }
+      : function(std::move(function)) {}
 
   std::shared_ptr<OwnedRefNoGIL> function;
 };
 
 struct PythonUdfKernelInit {
   explicit PythonUdfKernelInit(std::shared_ptr<OwnedRefNoGIL> function)
-      : function(function) {
-    Py_INCREF(function->obj());
-  }
-
-  // function needs to be destroyed at process exit
-  // and Python may no longer be initialized.
-  ~PythonUdfKernelInit() {
-    if (Py_IsFinalizing()) {
-      function->detach();
-    }
-  }
+      : function(std::move(function)) {}
 
   Result<std::unique_ptr<compute::KernelState>> operator()(
       compute::KernelContext*, const compute::KernelInitArgs&) {
@@ -94,68 +74,56 @@ struct HashUdfAggregator : public compute::KernelState {
   virtual Status Finalize(KernelContext* ctx, Datum* out) = 0;
 };
 
-arrow::Status AggregateUdfConsume(compute::KernelContext* ctx,
-                                  const compute::ExecSpan& batch) {
+Status AggregateUdfConsume(compute::KernelContext* ctx, const compute::ExecSpan& batch) {
   return checked_cast<ScalarUdfAggregator*>(ctx->state())->Consume(ctx, batch);
 }
 
-arrow::Status AggregateUdfMerge(compute::KernelContext* ctx, compute::KernelState&& src,
-                                compute::KernelState* dst) {
+Status AggregateUdfMerge(compute::KernelContext* ctx, compute::KernelState&& src,
+                         compute::KernelState* dst) {
   return checked_cast<ScalarUdfAggregator*>(dst)->MergeFrom(ctx, std::move(src));
 }
 
-arrow::Status AggregateUdfFinalize(compute::KernelContext* ctx, arrow::Datum* out) {
+Status AggregateUdfFinalize(compute::KernelContext* ctx, arrow::Datum* out) {
   return checked_cast<ScalarUdfAggregator*>(ctx->state())->Finalize(ctx, out);
 }
 
-arrow::Status HashAggregateUdfResize(KernelContext* ctx, int64_t size) {
+Status HashAggregateUdfResize(KernelContext* ctx, int64_t size) {
   return checked_cast<HashUdfAggregator*>(ctx->state())->Resize(ctx, size);
 }
 
-arrow::Status HashAggregateUdfConsume(KernelContext* ctx, const ExecSpan& batch) {
+Status HashAggregateUdfConsume(KernelContext* ctx, const ExecSpan& batch) {
   return checked_cast<HashUdfAggregator*>(ctx->state())->Consume(ctx, batch);
 }
 
-arrow::Status HashAggregateUdfMerge(KernelContext* ctx, KernelState&& src,
-                                    const ArrayData& group_id_mapping) {
+Status HashAggregateUdfMerge(KernelContext* ctx, KernelState&& src,
+                             const ArrayData& group_id_mapping) {
   return checked_cast<HashUdfAggregator*>(ctx->state())
       ->Merge(ctx, std::move(src), group_id_mapping);
 }
 
-arrow::Status HashAggregateUdfFinalize(KernelContext* ctx, Datum* out) {
+Status HashAggregateUdfFinalize(KernelContext* ctx, Datum* out) {
   return checked_cast<HashUdfAggregator*>(ctx->state())->Finalize(ctx, out);
 }
 
 struct PythonTableUdfKernelInit {
   PythonTableUdfKernelInit(std::shared_ptr<OwnedRefNoGIL> function_maker,
                            UdfWrapperCallback cb)
-      : function_maker(function_maker), cb(cb) {
-    Py_INCREF(function_maker->obj());
-  }
-
-  // function needs to be destroyed at process exit
-  // and Python may no longer be initialized.
-  ~PythonTableUdfKernelInit() {
-    if (Py_IsFinalizing()) {
-      function_maker->detach();
-    }
-  }
+      : function_maker(std::move(function_maker)), cb(std::move(cb)) {}
 
   Result<std::unique_ptr<compute::KernelState>> operator()(
       compute::KernelContext* ctx, const compute::KernelInitArgs&) {
-    UdfContext udf_context{ctx->memory_pool(), /*batch_length=*/0};
-    std::unique_ptr<OwnedRefNoGIL> function;
-    RETURN_NOT_OK(SafeCallIntoPython([this, &udf_context, &function] {
-      OwnedRef empty_tuple(PyTuple_New(0));
-      function = std::make_unique<OwnedRefNoGIL>(
-          cb(function_maker->obj(), udf_context, empty_tuple.obj()));
-      RETURN_NOT_OK(CheckPyError());
-      return Status::OK();
-    }));
-    if (!PyCallable_Check(function->obj())) {
-      return Status::TypeError("Expected a callable Python object.");
-    }
-    return std::make_unique<PythonUdfKernelState>(std::move(function));
+    return SafeCallIntoPython(
+        [this, ctx]() -> Result<std::unique_ptr<compute::KernelState>> {
+          UdfContext udf_context{ctx->memory_pool(), /*batch_length=*/0};
+          OwnedRef empty_tuple(PyTuple_New(0));
+          auto function = std::make_shared<OwnedRefNoGIL>(
+              cb(function_maker->obj(), udf_context, empty_tuple.obj()));
+          RETURN_NOT_OK(CheckPyError());
+          if (!PyCallable_Check(function->obj())) {
+            return Status::TypeError("Expected a callable Python object.");
+          }
+          return std::make_unique<PythonUdfKernelState>(std::move(function));
+        });
   }
 
   std::shared_ptr<OwnedRefNoGIL> function_maker;
@@ -167,8 +135,9 @@ struct PythonUdfScalarAggregatorImpl : public ScalarUdfAggregator {
                                 UdfWrapperCallback cb,
                                 std::vector<std::shared_ptr<DataType>> input_types,
                                 std::shared_ptr<DataType> output_type)
-      : function(function), cb(std::move(cb)), output_type(std::move(output_type)) {
-    Py_INCREF(function->obj());
+      : function(std::move(function)),
+        cb(std::move(cb)),
+        output_type(std::move(output_type)) {
     std::vector<std::shared_ptr<Field>> fields;
     for (size_t i = 0; i < input_types.size(); i++) {
       fields.push_back(field("", input_types[i]));
@@ -176,12 +145,6 @@ struct PythonUdfScalarAggregatorImpl : public ScalarUdfAggregator {
     input_schema = schema(std::move(fields));
   };
 
-  ~PythonUdfScalarAggregatorImpl() override {
-    if (Py_IsFinalizing()) {
-      function->detach();
-    }
-  }
-
   Status Consume(compute::KernelContext* ctx, const compute::ExecSpan& batch) override {
     ARROW_ASSIGN_OR_RAISE(
         auto rb, batch.ToExecBatch().ToRecordBatch(input_schema, ctx->memory_pool()));
@@ -263,8 +226,9 @@ struct PythonUdfHashAggregatorImpl : public HashUdfAggregator {
                               UdfWrapperCallback cb,
                               std::vector<std::shared_ptr<DataType>> input_types,
                               std::shared_ptr<DataType> output_type)
-      : function(function), cb(std::move(cb)), output_type(std::move(output_type)) {
-    Py_INCREF(function->obj());
+      : function(std::move(function)),
+        cb(std::move(cb)),
+        output_type(std::move(output_type)) {
     std::vector<std::shared_ptr<Field>> fields;
     fields.reserve(input_types.size());
     for (size_t i = 0; i < input_types.size(); i++) {
@@ -273,12 +237,6 @@ struct PythonUdfHashAggregatorImpl : public HashUdfAggregator {
     input_schema = schema(std::move(fields));
   };
 
-  ~PythonUdfHashAggregatorImpl() override {
-    if (Py_IsFinalizing()) {
-      function->detach();
-    }
-  }
-
   // same as ApplyGrouping in partition.cc
   // replicated the code here to avoid complicating the dependencies
   static Result<RecordBatchVector> ApplyGroupings(
@@ -416,10 +374,10 @@ struct PythonUdfHashAggregatorImpl : public HashUdfAggregator {
 struct PythonUdf : public PythonUdfKernelState {
   PythonUdf(std::shared_ptr<OwnedRefNoGIL> function, UdfWrapperCallback cb,
             std::vector<TypeHolder> input_types, compute::OutputType output_type)
-      : PythonUdfKernelState(function),
-        cb(cb),
-        input_types(input_types),
-        output_type(output_type) {}
+      : PythonUdfKernelState(std::move(function)),
+        cb(std::move(cb)),
+        input_types(std::move(input_types)),
+        output_type(std::move(output_type)) {}
 
   UdfWrapperCallback cb;
   std::vector<TypeHolder> input_types;
@@ -440,7 +398,7 @@ struct PythonUdf : public PythonUdfKernelState {
   Status Exec(compute::KernelContext* ctx, const compute::ExecSpan& batch,
               compute::ExecResult* out) {
     auto state = arrow::internal::checked_cast<PythonUdfKernelState*>(ctx->state());
-    std::shared_ptr<OwnedRefNoGIL>& function = state->function;
+    PyObject* function = state->function->obj();
     const int num_args = batch.num_values();
     UdfContext udf_context{ctx->memory_pool(), batch.length};
 
@@ -458,7 +416,7 @@ struct PythonUdf : public PythonUdfKernelState {
       }
     }
 
-    OwnedRef result(cb(function->obj(), udf_context, arg_tuple.obj()));
+    OwnedRef result(cb(function, udf_context, arg_tuple.obj()));
     RETURN_NOT_OK(CheckPyError());
     // unwrapping the output for expected output type
     if (is_array(result.obj())) {
@@ -497,12 +455,13 @@ Status RegisterUdf(PyObject* function, compute::KernelInit kernel_init,
   }
   auto scalar_func =
       std::make_shared<Function>(options.func_name, options.arity, options.func_doc);
-  Py_INCREF(function);
   std::vector<compute::InputType> input_types;
   for (const auto& in_dtype : options.input_types) {
     input_types.emplace_back(in_dtype);
   }
   compute::OutputType output_type(options.output_type);
+  // Take reference before wrapping with OwnedRefNoGIL
+  Py_INCREF(function);
   auto udf_data = std::make_shared<PythonUdf>(
       std::make_shared<OwnedRefNoGIL>(function), cb,
       TypeHolder::FromTypes(options.input_types), options.output_type);
@@ -565,11 +524,6 @@ Status RegisterScalarAggregateFunction(PyObject* function, UdfWrapperCallback cb
     registry = compute::GetFunctionRegistry();
   }
 
-  // Py_INCREF here so that once a function is registered
-  // its refcount gets increased by 1 and doesn't get gced
-  // if all existing refs are gone
-  Py_INCREF(function);
-
   static auto default_scalar_aggregate_options =
       compute::ScalarAggregateOptions::Defaults();
   auto aggregate_func = std::make_shared<compute::ScalarAggregateFunction>(
@@ -582,12 +536,16 @@ Status RegisterScalarAggregateFunction(PyObject* function, UdfWrapperCallback cb
   }
   compute::OutputType output_type(options.output_type);
 
-  compute::KernelInit init = [cb, function, options](compute::KernelContext* ctx,
-                                                     const compute::KernelInitArgs& args)
+  // Take reference before wrapping with OwnedRefNoGIL
+  Py_INCREF(function);
+  auto function_ref = std::make_shared<OwnedRefNoGIL>(function);
+
+  compute::KernelInit init = [cb, function_ref, options](
+                                 compute::KernelContext* ctx,
+                                 const compute::KernelInitArgs& args)
       -> Result<std::unique_ptr<compute::KernelState>> {
     return std::make_unique<PythonUdfScalarAggregatorImpl>(
-        std::make_shared<OwnedRefNoGIL>(function), cb, options.input_types,
-        options.output_type);
+        function_ref, cb, options.input_types, options.output_type);
   };
 
   auto sig = compute::KernelSignature::Make(
@@ -638,10 +596,6 @@ Status RegisterHashAggregateFunction(PyObject* function, UdfWrapperCallback cb,
     registry = compute::GetFunctionRegistry();
   }
 
-  // Py_INCREF here so that once a function is registered
-  // its refcount gets increased by 1 and doesn't get gced
-  // if all existing refs are gone
-  Py_INCREF(function);
   UdfOptions hash_options = AdjustForHashAggregate(options);
 
   std::vector<compute::InputType> input_types;
@@ -656,13 +610,15 @@ Status RegisterHashAggregateFunction(PyObject* function, UdfWrapperCallback cb,
       hash_options.func_name, hash_options.arity, hash_options.func_doc,
       &default_hash_aggregate_options);
 
-  compute::KernelInit init = [function, cb, hash_options](
+  // Take reference before wrapping with OwnedRefNoGIL
+  Py_INCREF(function);
+  auto function_ref = std::make_shared<OwnedRefNoGIL>(function);
+  compute::KernelInit init = [function_ref, cb, hash_options](
                                  compute::KernelContext* ctx,
                                  const compute::KernelInitArgs& args)
       -> Result<std::unique_ptr<compute::KernelState>> {
     return std::make_unique<PythonUdfHashAggregatorImpl>(
-        std::make_shared<OwnedRefNoGIL>(function), cb, hash_options.input_types,
-        hash_options.output_type);
+        function_ref, cb, hash_options.input_types, hash_options.output_type);
   };
 
   auto sig = compute::KernelSignature::Make(
diff --git a/.golangci.yaml b/python/pyarrow/src/arrow/python/vendored/CMakeLists.txt
similarity index 77%
rename from .golangci.yaml
rename to python/pyarrow/src/arrow/python/vendored/CMakeLists.txt
index 7d486a9e85a0a..6190072c0d384 100644
--- a/.golangci.yaml
+++ b/python/pyarrow/src/arrow/python/vendored/CMakeLists.txt
@@ -15,15 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-linters:
-  # Disable all linters.
-  # Default: false
-  disable-all: true
-  # Enable specific linter
-  # https://golangci-lint.run/usage/linters/#enabled-by-default
-  enable:
-    - gofmt
-    - goimports
-
-issues:
-  fix: true
\ No newline at end of file
+arrow_install_all_headers("arrow/python/vendored")
diff --git a/python/pyarrow/src/arrow/python/vendored/pythoncapi_compat.h b/python/pyarrow/src/arrow/python/vendored/pythoncapi_compat.h
new file mode 100644
index 0000000000000..4baa7b34a9350
--- /dev/null
+++ b/python/pyarrow/src/arrow/python/vendored/pythoncapi_compat.h
@@ -0,0 +1,1519 @@
+// Header file providing new C API functions to old Python versions.
+//
+// File distributed under the Zero Clause BSD (0BSD) license.
+// Copyright Contributors to the pythoncapi_compat project.
+//
+// Homepage:
+// https://github.com/python/pythoncapi_compat
+//
+// Latest version:
+// https://raw.githubusercontent.com/python/pythoncapi_compat/master/pythoncapi_compat.h
+//
+// Vendored from git revision:
+// 39e2663e6acc0b68d5dd75bdaad0af33152552ae
+// https://raw.githubusercontent.com/python/pythoncapi-compat/39e2663e6acc0b68d5dd75bdaad0af33152552ae/pythoncapi_compat.h
+//
+// SPDX-License-Identifier: 0BSD
+
+/* clang-format off */
+
+#ifndef PYTHONCAPI_COMPAT
+#define PYTHONCAPI_COMPAT
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <Python.h>
+
+// Python 3.11.0b4 added PyFrame_Back() to Python.h
+#if PY_VERSION_HEX < 0x030b00B4 && !defined(PYPY_VERSION)
+#  include "frameobject.h"        // PyFrameObject, PyFrame_GetBack()
+#endif
+
+
+#ifndef _Py_CAST
+#  define _Py_CAST(type, expr) ((type)(expr))
+#endif
+
+// Static inline functions should use _Py_NULL rather than using directly NULL
+// to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer,
+// _Py_NULL is defined as nullptr.
+#if (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \
+        || (defined(__cplusplus) && __cplusplus >= 201103)
+#  define _Py_NULL nullptr
+#else
+#  define _Py_NULL NULL
+#endif
+
+// Cast argument to PyObject* type.
+#ifndef _PyObject_CAST
+#  define _PyObject_CAST(op) _Py_CAST(PyObject*, op)
+#endif
+
+
+// bpo-42262 added Py_NewRef() to Python 3.10.0a3
+#if PY_VERSION_HEX < 0x030A00A3 && !defined(Py_NewRef)
+static inline PyObject* _Py_NewRef(PyObject *obj)
+{
+    Py_INCREF(obj);
+    return obj;
+}
+#define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj))
+#endif
+
+
+// bpo-42262 added Py_XNewRef() to Python 3.10.0a3
+#if PY_VERSION_HEX < 0x030A00A3 && !defined(Py_XNewRef)
+static inline PyObject* _Py_XNewRef(PyObject *obj)
+{
+    Py_XINCREF(obj);
+    return obj;
+}
+#define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj))
+#endif
+
+
+// bpo-39573 added Py_SET_REFCNT() to Python 3.9.0a4
+#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_REFCNT)
+static inline void _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt)
+{
+    ob->ob_refcnt = refcnt;
+}
+#define Py_SET_REFCNT(ob, refcnt) _Py_SET_REFCNT(_PyObject_CAST(ob), refcnt)
+#endif
+
+
+// Py_SETREF() and Py_XSETREF() were added to Python 3.5.2.
+// It is excluded from the limited C API.
+#if (PY_VERSION_HEX < 0x03050200 && !defined(Py_SETREF)) && !defined(Py_LIMITED_API)
+#define Py_SETREF(dst, src)                                     \
+    do {                                                        \
+        PyObject **_tmp_dst_ptr = _Py_CAST(PyObject**, &(dst)); \
+        PyObject *_tmp_dst = (*_tmp_dst_ptr);                   \
+        *_tmp_dst_ptr = _PyObject_CAST(src);                    \
+        Py_DECREF(_tmp_dst);                                    \
+    } while (0)
+
+#define Py_XSETREF(dst, src)                                    \
+    do {                                                        \
+        PyObject **_tmp_dst_ptr = _Py_CAST(PyObject**, &(dst)); \
+        PyObject *_tmp_dst = (*_tmp_dst_ptr);                   \
+        *_tmp_dst_ptr = _PyObject_CAST(src);                    \
+        Py_XDECREF(_tmp_dst);                                   \
+    } while (0)
+#endif
+
+
+// bpo-43753 added Py_Is(), Py_IsNone(), Py_IsTrue() and Py_IsFalse()
+// to Python 3.10.0b1.
+#if PY_VERSION_HEX < 0x030A00B1 && !defined(Py_Is)
+#  define Py_Is(x, y) ((x) == (y))
+#endif
+#if PY_VERSION_HEX < 0x030A00B1 && !defined(Py_IsNone)
+#  define Py_IsNone(x) Py_Is(x, Py_None)
+#endif
+#if (PY_VERSION_HEX < 0x030A00B1 || defined(PYPY_VERSION)) && !defined(Py_IsTrue)
+#  define Py_IsTrue(x) Py_Is(x, Py_True)
+#endif
+#if (PY_VERSION_HEX < 0x030A00B1 || defined(PYPY_VERSION)) && !defined(Py_IsFalse)
+#  define Py_IsFalse(x) Py_Is(x, Py_False)
+#endif
+
+
+// bpo-39573 added Py_SET_TYPE() to Python 3.9.0a4
+#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_TYPE)
+static inline void _Py_SET_TYPE(PyObject *ob, PyTypeObject *type)
+{
+    ob->ob_type = type;
+}
+#define Py_SET_TYPE(ob, type) _Py_SET_TYPE(_PyObject_CAST(ob), type)
+#endif
+
+
+// bpo-39573 added Py_SET_SIZE() to Python 3.9.0a4
+#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_SIZE)
+static inline void _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size)
+{
+    ob->ob_size = size;
+}
+#define Py_SET_SIZE(ob, size) _Py_SET_SIZE((PyVarObject*)(ob), size)
+#endif
+
+
+// bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1
+#if PY_VERSION_HEX < 0x030900B1 || defined(PYPY_VERSION)
+static inline PyCodeObject* PyFrame_GetCode(PyFrameObject *frame)
+{
+    assert(frame != _Py_NULL);
+    assert(frame->f_code != _Py_NULL);
+    return _Py_CAST(PyCodeObject*, Py_NewRef(frame->f_code));
+}
+#endif
+
+static inline PyCodeObject* _PyFrame_GetCodeBorrow(PyFrameObject *frame)
+{
+    PyCodeObject *code = PyFrame_GetCode(frame);
+    Py_DECREF(code);
+    return code;
+}
+
+
+// bpo-40421 added PyFrame_GetBack() to Python 3.9.0b1
+#if PY_VERSION_HEX < 0x030900B1 && !defined(PYPY_VERSION)
+static inline PyFrameObject* PyFrame_GetBack(PyFrameObject *frame)
+{
+    assert(frame != _Py_NULL);
+    return _Py_CAST(PyFrameObject*, Py_XNewRef(frame->f_back));
+}
+#endif
+
+#if !defined(PYPY_VERSION)
+static inline PyFrameObject* _PyFrame_GetBackBorrow(PyFrameObject *frame)
+{
+    PyFrameObject *back = PyFrame_GetBack(frame);
+    Py_XDECREF(back);
+    return back;
+}
+#endif
+
+
+// bpo-40421 added PyFrame_GetLocals() to Python 3.11.0a7
+#if PY_VERSION_HEX < 0x030B00A7 && !defined(PYPY_VERSION)
+static inline PyObject* PyFrame_GetLocals(PyFrameObject *frame)
+{
+#if PY_VERSION_HEX >= 0x030400B1
+    if (PyFrame_FastToLocalsWithError(frame) < 0) {
+        return NULL;
+    }
+#else
+    PyFrame_FastToLocals(frame);
+#endif
+    return Py_NewRef(frame->f_locals);
+}
+#endif
+
+
+// bpo-40421 added PyFrame_GetGlobals() to Python 3.11.0a7
+#if PY_VERSION_HEX < 0x030B00A7 && !defined(PYPY_VERSION)
+static inline PyObject* PyFrame_GetGlobals(PyFrameObject *frame)
+{
+    return Py_NewRef(frame->f_globals);
+}
+#endif
+
+
+// bpo-40421 added PyFrame_GetBuiltins() to Python 3.11.0a7
+#if PY_VERSION_HEX < 0x030B00A7 && !defined(PYPY_VERSION)
+static inline PyObject* PyFrame_GetBuiltins(PyFrameObject *frame)
+{
+    return Py_NewRef(frame->f_builtins);
+}
+#endif
+
+
+// bpo-40421 added PyFrame_GetLasti() to Python 3.11.0b1
+#if PY_VERSION_HEX < 0x030B00B1 && !defined(PYPY_VERSION)
+static inline int PyFrame_GetLasti(PyFrameObject *frame)
+{
+#if PY_VERSION_HEX >= 0x030A00A7
+    // bpo-27129: Since Python 3.10.0a7, f_lasti is an instruction offset,
+    // not a bytes offset anymore. Python uses 16-bit "wordcode" (2 bytes)
+    // instructions.
+    if (frame->f_lasti < 0) {
+        return -1;
+    }
+    return frame->f_lasti * 2;
+#else
+    return frame->f_lasti;
+#endif
+}
+#endif
+
+
+// gh-91248 added PyFrame_GetVar() to Python 3.12.0a2
+#if PY_VERSION_HEX < 0x030C00A2 && !defined(PYPY_VERSION)
+static inline PyObject* PyFrame_GetVar(PyFrameObject *frame, PyObject *name)
+{
+    PyObject *locals, *value;
+
+    locals = PyFrame_GetLocals(frame);
+    if (locals == NULL) {
+        return NULL;
+    }
+#if PY_VERSION_HEX >= 0x03000000
+    value = PyDict_GetItemWithError(locals, name);
+#else
+    value = _PyDict_GetItemWithError(locals, name);
+#endif
+    Py_DECREF(locals);
+
+    if (value == NULL) {
+        if (PyErr_Occurred()) {
+            return NULL;
+        }
+#if PY_VERSION_HEX >= 0x03000000
+        PyErr_Format(PyExc_NameError, "variable %R does not exist", name);
+#else
+        PyErr_SetString(PyExc_NameError, "variable does not exist");
+#endif
+        return NULL;
+    }
+    return Py_NewRef(value);
+}
+#endif
+
+
+// gh-91248 added PyFrame_GetVarString() to Python 3.12.0a2
+#if PY_VERSION_HEX < 0x030C00A2 && !defined(PYPY_VERSION)
+static inline PyObject*
+PyFrame_GetVarString(PyFrameObject *frame, const char *name)
+{
+    PyObject *name_obj, *value;
+#if PY_VERSION_HEX >= 0x03000000
+    name_obj = PyUnicode_FromString(name);
+#else
+    name_obj = PyString_FromString(name);
+#endif
+    if (name_obj == NULL) {
+        return NULL;
+    }
+    value = PyFrame_GetVar(frame, name_obj);
+    Py_DECREF(name_obj);
+    return value;
+}
+#endif
+
+
+// bpo-39947 added PyThreadState_GetInterpreter() to Python 3.9.0a5
+#if PY_VERSION_HEX < 0x030900A5 || defined(PYPY_VERSION)
+static inline PyInterpreterState *
+PyThreadState_GetInterpreter(PyThreadState *tstate)
+{
+    assert(tstate != _Py_NULL);
+    return tstate->interp;
+}
+#endif
+
+
+// bpo-40429 added PyThreadState_GetFrame() to Python 3.9.0b1
+#if PY_VERSION_HEX < 0x030900B1 && !defined(PYPY_VERSION)
+static inline PyFrameObject* PyThreadState_GetFrame(PyThreadState *tstate)
+{
+    assert(tstate != _Py_NULL);
+    return _Py_CAST(PyFrameObject *, Py_XNewRef(tstate->frame));
+}
+#endif
+
+#if !defined(PYPY_VERSION)
+static inline PyFrameObject*
+_PyThreadState_GetFrameBorrow(PyThreadState *tstate)
+{
+    PyFrameObject *frame = PyThreadState_GetFrame(tstate);
+    Py_XDECREF(frame);
+    return frame;
+}
+#endif
+
+
+// bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a5
+#if PY_VERSION_HEX < 0x030900A5 || defined(PYPY_VERSION)
+static inline PyInterpreterState* PyInterpreterState_Get(void)
+{
+    PyThreadState *tstate;
+    PyInterpreterState *interp;
+
+    tstate = PyThreadState_GET();
+    if (tstate == _Py_NULL) {
+        Py_FatalError("GIL released (tstate is NULL)");
+    }
+    interp = tstate->interp;
+    if (interp == _Py_NULL) {
+        Py_FatalError("no current interpreter");
+    }
+    return interp;
+}
+#endif
+
+
+// bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a6
+#if 0x030700A1 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030900A6 && !defined(PYPY_VERSION)
+static inline uint64_t PyThreadState_GetID(PyThreadState *tstate)
+{
+    assert(tstate != _Py_NULL);
+    return tstate->id;
+}
+#endif
+
+// bpo-43760 added PyThreadState_EnterTracing() to Python 3.11.0a2
+#if PY_VERSION_HEX < 0x030B00A2 && !defined(PYPY_VERSION)
+static inline void PyThreadState_EnterTracing(PyThreadState *tstate)
+{
+    tstate->tracing++;
+#if PY_VERSION_HEX >= 0x030A00A1
+    tstate->cframe->use_tracing = 0;
+#else
+    tstate->use_tracing = 0;
+#endif
+}
+#endif
+
+// bpo-43760 added PyThreadState_LeaveTracing() to Python 3.11.0a2
+#if PY_VERSION_HEX < 0x030B00A2 && !defined(PYPY_VERSION)
+static inline void PyThreadState_LeaveTracing(PyThreadState *tstate)
+{
+    int use_tracing = (tstate->c_tracefunc != _Py_NULL
+                       || tstate->c_profilefunc != _Py_NULL);
+    tstate->tracing--;
+#if PY_VERSION_HEX >= 0x030A00A1
+    tstate->cframe->use_tracing = use_tracing;
+#else
+    tstate->use_tracing = use_tracing;
+#endif
+}
+#endif
+
+
+// bpo-37194 added PyObject_CallNoArgs() to Python 3.9.0a1
+// PyObject_CallNoArgs() added to PyPy 3.9.16-v7.3.11
+#if !defined(PyObject_CallNoArgs) && PY_VERSION_HEX < 0x030900A1
+static inline PyObject* PyObject_CallNoArgs(PyObject *func)
+{
+    return PyObject_CallFunctionObjArgs(func, NULL);
+}
+#endif
+
+
+// bpo-39245 made PyObject_CallOneArg() public (previously called
+// _PyObject_CallOneArg) in Python 3.9.0a4
+// PyObject_CallOneArg() added to PyPy 3.9.16-v7.3.11
+#if !defined(PyObject_CallOneArg) && PY_VERSION_HEX < 0x030900A4
+static inline PyObject* PyObject_CallOneArg(PyObject *func, PyObject *arg)
+{
+    return PyObject_CallFunctionObjArgs(func, arg, NULL);
+}
+#endif
+
+
+// bpo-1635741 added PyModule_AddObjectRef() to Python 3.10.0a3
+#if PY_VERSION_HEX < 0x030A00A3
+static inline int
+PyModule_AddObjectRef(PyObject *module, const char *name, PyObject *value)
+{
+    int res;
+
+    if (!value && !PyErr_Occurred()) {
+        // PyModule_AddObject() raises TypeError in this case
+        PyErr_SetString(PyExc_SystemError,
+                        "PyModule_AddObjectRef() must be called "
+                        "with an exception raised if value is NULL");
+        return -1;
+    }
+
+    Py_XINCREF(value);
+    res = PyModule_AddObject(module, name, value);
+    if (res < 0) {
+        Py_XDECREF(value);
+    }
+    return res;
+}
+#endif
+
+
+// bpo-40024 added PyModule_AddType() to Python 3.9.0a5
+#if PY_VERSION_HEX < 0x030900A5
+static inline int PyModule_AddType(PyObject *module, PyTypeObject *type)
+{
+    const char *name, *dot;
+
+    if (PyType_Ready(type) < 0) {
+        return -1;
+    }
+
+    // inline _PyType_Name()
+    name = type->tp_name;
+    assert(name != _Py_NULL);
+    dot = strrchr(name, '.');
+    if (dot != _Py_NULL) {
+        name = dot + 1;
+    }
+
+    return PyModule_AddObjectRef(module, name, _PyObject_CAST(type));
+}
+#endif
+
+
+// bpo-40241 added PyObject_GC_IsTracked() to Python 3.9.0a6.
+// bpo-4688 added _PyObject_GC_IS_TRACKED() to Python 2.7.0a2.
+#if PY_VERSION_HEX < 0x030900A6 && !defined(PYPY_VERSION)
+static inline int PyObject_GC_IsTracked(PyObject* obj)
+{
+    return (PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj));
+}
+#endif
+
+// bpo-40241 added PyObject_GC_IsFinalized() to Python 3.9.0a6.
+// bpo-18112 added _PyGCHead_FINALIZED() to Python 3.4.0 final.
+#if PY_VERSION_HEX < 0x030900A6 && PY_VERSION_HEX >= 0x030400F0 && !defined(PYPY_VERSION)
+static inline int PyObject_GC_IsFinalized(PyObject *obj)
+{
+    PyGC_Head *gc = _Py_CAST(PyGC_Head*, obj) - 1;
+    return (PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(gc));
+}
+#endif
+
+
+// bpo-39573 added Py_IS_TYPE() to Python 3.9.0a4
+#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_IS_TYPE)
+static inline int _Py_IS_TYPE(PyObject *ob, PyTypeObject *type) {
+    return Py_TYPE(ob) == type;
+}
+#define Py_IS_TYPE(ob, type) _Py_IS_TYPE(_PyObject_CAST(ob), type)
+#endif
+
+
+// bpo-46906 added PyFloat_Pack2() and PyFloat_Unpack2() to Python 3.11a7.
+// bpo-11734 added _PyFloat_Pack2() and _PyFloat_Unpack2() to Python 3.6.0b1.
+// Python 3.11a2 moved _PyFloat_Pack2() and _PyFloat_Unpack2() to the internal
+// C API: Python 3.11a2-3.11a6 versions are not supported.
+#if 0x030600B1 <= PY_VERSION_HEX && PY_VERSION_HEX <= 0x030B00A1 && !defined(PYPY_VERSION)
+static inline int PyFloat_Pack2(double x, char *p, int le)
+{ return _PyFloat_Pack2(x, (unsigned char*)p, le); }
+
+static inline double PyFloat_Unpack2(const char *p, int le)
+{ return _PyFloat_Unpack2((const unsigned char *)p, le); }
+#endif
+
+
+// bpo-46906 added PyFloat_Pack4(), PyFloat_Pack8(), PyFloat_Unpack4() and
+// PyFloat_Unpack8() to Python 3.11a7.
+// Python 3.11a2 moved _PyFloat_Pack4(), _PyFloat_Pack8(), _PyFloat_Unpack4()
+// and _PyFloat_Unpack8() to the internal C API: Python 3.11a2-3.11a6 versions
+// are not supported.
+#if PY_VERSION_HEX <= 0x030B00A1 && !defined(PYPY_VERSION)
+static inline int PyFloat_Pack4(double x, char *p, int le)
+{ return _PyFloat_Pack4(x, (unsigned char*)p, le); }
+
+static inline int PyFloat_Pack8(double x, char *p, int le)
+{ return _PyFloat_Pack8(x, (unsigned char*)p, le); }
+
+static inline double PyFloat_Unpack4(const char *p, int le)
+{ return _PyFloat_Unpack4((const unsigned char *)p, le); }
+
+static inline double PyFloat_Unpack8(const char *p, int le)
+{ return _PyFloat_Unpack8((const unsigned char *)p, le); }
+#endif
+
+
+// gh-92154 added PyCode_GetCode() to Python 3.11.0b1
+#if PY_VERSION_HEX < 0x030B00B1 && !defined(PYPY_VERSION)
+static inline PyObject* PyCode_GetCode(PyCodeObject *code)
+{
+    return Py_NewRef(code->co_code);
+}
+#endif
+
+
+// gh-95008 added PyCode_GetVarnames() to Python 3.11.0rc1
+#if PY_VERSION_HEX < 0x030B00C1 && !defined(PYPY_VERSION)
+static inline PyObject* PyCode_GetVarnames(PyCodeObject *code)
+{
+    return Py_NewRef(code->co_varnames);
+}
+#endif
+
+// gh-95008 added PyCode_GetFreevars() to Python 3.11.0rc1
+#if PY_VERSION_HEX < 0x030B00C1 && !defined(PYPY_VERSION)
+static inline PyObject* PyCode_GetFreevars(PyCodeObject *code)
+{
+    return Py_NewRef(code->co_freevars);
+}
+#endif
+
+// gh-95008 added PyCode_GetCellvars() to Python 3.11.0rc1
+#if PY_VERSION_HEX < 0x030B00C1 && !defined(PYPY_VERSION)
+static inline PyObject* PyCode_GetCellvars(PyCodeObject *code)
+{
+    return Py_NewRef(code->co_cellvars);
+}
+#endif
+
+
+// Py_UNUSED() was added to Python 3.4.0b2.
+#if PY_VERSION_HEX < 0x030400B2 && !defined(Py_UNUSED)
+#  if defined(__GNUC__) || defined(__clang__)
+#    define Py_UNUSED(name) _unused_ ## name __attribute__((unused))
+#  else
+#    define Py_UNUSED(name) _unused_ ## name
+#  endif
+#endif
+
+
+// gh-105922 added PyImport_AddModuleRef() to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A0
+static inline PyObject* PyImport_AddModuleRef(const char *name)
+{
+    return Py_XNewRef(PyImport_AddModule(name));
+}
+#endif
+
+
+// gh-105927 added PyWeakref_GetRef() to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D0000
+static inline int PyWeakref_GetRef(PyObject *ref, PyObject **pobj)
+{
+    PyObject *obj;
+    if (ref != NULL && !PyWeakref_Check(ref)) {
+        *pobj = NULL;
+        PyErr_SetString(PyExc_TypeError, "expected a weakref");
+        return -1;
+    }
+    obj = PyWeakref_GetObject(ref);
+    if (obj == NULL) {
+        // SystemError if ref is NULL
+        *pobj = NULL;
+        return -1;
+    }
+    if (obj == Py_None) {
+        *pobj = NULL;
+        return 0;
+    }
+    *pobj = Py_NewRef(obj);
+    return (*pobj != NULL);
+}
+#endif
+
+
+// bpo-36974 added PY_VECTORCALL_ARGUMENTS_OFFSET to Python 3.8b1
+#ifndef PY_VECTORCALL_ARGUMENTS_OFFSET
+#  define PY_VECTORCALL_ARGUMENTS_OFFSET (_Py_CAST(size_t, 1) << (8 * sizeof(size_t) - 1))
+#endif
+
+// bpo-36974 added PyVectorcall_NARGS() to Python 3.8b1
+#if PY_VERSION_HEX < 0x030800B1
+static inline Py_ssize_t PyVectorcall_NARGS(size_t n)
+{
+    return n & ~PY_VECTORCALL_ARGUMENTS_OFFSET;
+}
+#endif
+
+
+// gh-105922 added PyObject_Vectorcall() to Python 3.9.0a4
+#if PY_VERSION_HEX < 0x030900A4
+static inline PyObject*
+PyObject_Vectorcall(PyObject *callable, PyObject *const *args,
+                     size_t nargsf, PyObject *kwnames)
+{
+#if PY_VERSION_HEX >= 0x030800B1 && !defined(PYPY_VERSION)
+    // bpo-36974 added _PyObject_Vectorcall() to Python 3.8.0b1
+    return _PyObject_Vectorcall(callable, args, nargsf, kwnames);
+#else
+    PyObject *posargs = NULL, *kwargs = NULL;
+    PyObject *res;
+    Py_ssize_t nposargs, nkwargs, i;
+
+    if (nargsf != 0 && args == NULL) {
+        PyErr_BadInternalCall();
+        goto error;
+    }
+    if (kwnames != NULL && !PyTuple_Check(kwnames)) {
+        PyErr_BadInternalCall();
+        goto error;
+    }
+
+    nposargs = (Py_ssize_t)PyVectorcall_NARGS(nargsf);
+    if (kwnames) {
+        nkwargs = PyTuple_GET_SIZE(kwnames);
+    }
+    else {
+        nkwargs = 0;
+    }
+
+    posargs = PyTuple_New(nposargs);
+    if (posargs == NULL) {
+        goto error;
+    }
+    if (nposargs) {
+        for (i=0; i < nposargs; i++) {
+            PyTuple_SET_ITEM(posargs, i, Py_NewRef(*args));
+            args++;
+        }
+    }
+
+    if (nkwargs) {
+        kwargs = PyDict_New();
+        if (kwargs == NULL) {
+            goto error;
+        }
+
+        for (i = 0; i < nkwargs; i++) {
+            PyObject *key = PyTuple_GET_ITEM(kwnames, i);
+            PyObject *value = *args;
+            args++;
+            if (PyDict_SetItem(kwargs, key, value) < 0) {
+                goto error;
+            }
+        }
+    }
+    else {
+        kwargs = NULL;
+    }
+
+    res = PyObject_Call(callable, posargs, kwargs);
+    Py_DECREF(posargs);
+    Py_XDECREF(kwargs);
+    return res;
+
+error:
+    Py_DECREF(posargs);
+    Py_XDECREF(kwargs);
+    return NULL;
+#endif
+}
+#endif
+
+
+// gh-106521 added PyObject_GetOptionalAttr() and
+// PyObject_GetOptionalAttrString() to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A1
+static inline int
+PyObject_GetOptionalAttr(PyObject *obj, PyObject *attr_name, PyObject **result)
+{
+    // bpo-32571 added _PyObject_LookupAttr() to Python 3.7.0b1
+#if PY_VERSION_HEX >= 0x030700B1 && !defined(PYPY_VERSION)
+    return _PyObject_LookupAttr(obj, attr_name, result);
+#else
+    *result = PyObject_GetAttr(obj, attr_name);
+    if (*result != NULL) {
+        return 1;
+    }
+    if (!PyErr_Occurred()) {
+        return 0;
+    }
+    if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
+        PyErr_Clear();
+        return 0;
+    }
+    return -1;
+#endif
+}
+
+static inline int
+PyObject_GetOptionalAttrString(PyObject *obj, const char *attr_name, PyObject **result)
+{
+    PyObject *name_obj;
+    int rc;
+#if PY_VERSION_HEX >= 0x03000000
+    name_obj = PyUnicode_FromString(attr_name);
+#else
+    name_obj = PyString_FromString(attr_name);
+#endif
+    if (name_obj == NULL) {
+        *result = NULL;
+        return -1;
+    }
+    rc = PyObject_GetOptionalAttr(obj, name_obj, result);
+    Py_DECREF(name_obj);
+    return rc;
+}
+#endif
+
+
+// gh-106307 added PyObject_GetOptionalAttr() and
+// PyMapping_GetOptionalItemString() to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A1
+static inline int
+PyMapping_GetOptionalItem(PyObject *obj, PyObject *key, PyObject **result)
+{
+    *result = PyObject_GetItem(obj, key);
+    if (*result) {
+        return 1;
+    }
+    if (!PyErr_ExceptionMatches(PyExc_KeyError)) {
+        return -1;
+    }
+    PyErr_Clear();
+    return 0;
+}
+
+static inline int
+PyMapping_GetOptionalItemString(PyObject *obj, const char *key, PyObject **result)
+{
+    PyObject *key_obj;
+    int rc;
+#if PY_VERSION_HEX >= 0x03000000
+    key_obj = PyUnicode_FromString(key);
+#else
+    key_obj = PyString_FromString(key);
+#endif
+    if (key_obj == NULL) {
+        *result = NULL;
+        return -1;
+    }
+    rc = PyMapping_GetOptionalItem(obj, key_obj, result);
+    Py_DECREF(key_obj);
+    return rc;
+}
+#endif
+
+// gh-108511 added PyMapping_HasKeyWithError() and
+// PyMapping_HasKeyStringWithError() to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A1
+static inline int
+PyMapping_HasKeyWithError(PyObject *obj, PyObject *key)
+{
+    PyObject *res;
+    int rc = PyMapping_GetOptionalItem(obj, key, &res);
+    Py_XDECREF(res);
+    return rc;
+}
+
+static inline int
+PyMapping_HasKeyStringWithError(PyObject *obj, const char *key)
+{
+    PyObject *res;
+    int rc = PyMapping_GetOptionalItemString(obj, key, &res);
+    Py_XDECREF(res);
+    return rc;
+}
+#endif
+
+
+// gh-108511 added PyObject_HasAttrWithError() and
+// PyObject_HasAttrStringWithError() to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A1
+static inline int
+PyObject_HasAttrWithError(PyObject *obj, PyObject *attr)
+{
+    PyObject *res;
+    int rc = PyObject_GetOptionalAttr(obj, attr, &res);
+    Py_XDECREF(res);
+    return rc;
+}
+
+static inline int
+PyObject_HasAttrStringWithError(PyObject *obj, const char *attr)
+{
+    PyObject *res;
+    int rc = PyObject_GetOptionalAttrString(obj, attr, &res);
+    Py_XDECREF(res);
+    return rc;
+}
+#endif
+
+
+// gh-106004 added PyDict_GetItemRef() and PyDict_GetItemStringRef()
+// to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A1
+static inline int
+PyDict_GetItemRef(PyObject *mp, PyObject *key, PyObject **result)
+{
+#if PY_VERSION_HEX >= 0x03000000
+    PyObject *item = PyDict_GetItemWithError(mp, key);
+#else
+    PyObject *item = _PyDict_GetItemWithError(mp, key);
+#endif
+    if (item != NULL) {
+        *result = Py_NewRef(item);
+        return 1;  // found
+    }
+    if (!PyErr_Occurred()) {
+        *result = NULL;
+        return 0;  // not found
+    }
+    *result = NULL;
+    return -1;
+}
+
+static inline int
+PyDict_GetItemStringRef(PyObject *mp, const char *key, PyObject **result)
+{
+    int res;
+#if PY_VERSION_HEX >= 0x03000000
+    PyObject *key_obj = PyUnicode_FromString(key);
+#else
+    PyObject *key_obj = PyString_FromString(key);
+#endif
+    if (key_obj == NULL) {
+        *result = NULL;
+        return -1;
+    }
+    res = PyDict_GetItemRef(mp, key_obj, result);
+    Py_DECREF(key_obj);
+    return res;
+}
+#endif
+
+
+// gh-106307 added PyModule_Add() to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A1
+static inline int
+PyModule_Add(PyObject *mod, const char *name, PyObject *value)
+{
+    int res = PyModule_AddObjectRef(mod, name, value);
+    Py_XDECREF(value);
+    return res;
+}
+#endif
+
+
+// gh-108014 added Py_IsFinalizing() to Python 3.13.0a1
+// bpo-1856 added _Py_Finalizing to Python 3.2.1b1.
+// _Py_IsFinalizing() was added to PyPy 7.3.0.
+#if (0x030201B1 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030D00A1) \
+        && (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x7030000)
+static inline int Py_IsFinalizing(void)
+{
+#if PY_VERSION_HEX >= 0x030700A1
+    // _Py_IsFinalizing() was added to Python 3.7.0a1.
+    return _Py_IsFinalizing();
+#else
+    return (_Py_Finalizing != NULL);
+#endif
+}
+#endif
+
+
+// gh-108323 added PyDict_ContainsString() to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A1
+static inline int PyDict_ContainsString(PyObject *op, const char *key)
+{
+    PyObject *key_obj = PyUnicode_FromString(key);
+    if (key_obj == NULL) {
+        return -1;
+    }
+    int res = PyDict_Contains(op, key_obj);
+    Py_DECREF(key_obj);
+    return res;
+}
+#endif
+
+
+// gh-108445 added PyLong_AsInt() to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A1
+static inline int PyLong_AsInt(PyObject *obj)
+{
+#ifdef PYPY_VERSION
+    long value = PyLong_AsLong(obj);
+    if (value == -1 && PyErr_Occurred()) {
+        return -1;
+    }
+    if (value < (long)INT_MIN || (long)INT_MAX < value) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "Python int too large to convert to C int");
+        return -1;
+    }
+    return (int)value;
+#else
+    return _PyLong_AsInt(obj);
+#endif
+}
+#endif
+
+
+// gh-107073 added PyObject_VisitManagedDict() to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A1
+static inline int
+PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg)
+{
+    PyObject **dict = _PyObject_GetDictPtr(obj);
+    if (*dict == NULL) {
+        return -1;
+    }
+    Py_VISIT(*dict);
+    return 0;
+}
+
+static inline void
+PyObject_ClearManagedDict(PyObject *obj)
+{
+    PyObject **dict = _PyObject_GetDictPtr(obj);
+    if (*dict == NULL) {
+        return;
+    }
+    Py_CLEAR(*dict);
+}
+#endif
+
+// gh-108867 added PyThreadState_GetUnchecked() to Python 3.13.0a1
+// Python 3.5.2 added _PyThreadState_UncheckedGet().
+#if PY_VERSION_HEX >= 0x03050200 && PY_VERSION_HEX < 0x030D00A1
+static inline PyThreadState*
+PyThreadState_GetUnchecked(void)
+{
+    return _PyThreadState_UncheckedGet();
+}
+#endif
+
+// gh-110289 added PyUnicode_EqualToUTF8() and PyUnicode_EqualToUTF8AndSize()
+// to Python 3.13.0a1
+#if PY_VERSION_HEX < 0x030D00A1
+static inline int
+PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t str_len)
+{
+    Py_ssize_t len;
+    const void *utf8;
+    PyObject *exc_type, *exc_value, *exc_tb;
+    int res;
+
+    // API cannot report errors so save/restore the exception
+    PyErr_Fetch(&exc_type, &exc_value, &exc_tb);
+
+    // Python 3.3.0a1 added PyUnicode_AsUTF8AndSize()
+#if PY_VERSION_HEX >= 0x030300A1
+    if (PyUnicode_IS_ASCII(unicode)) {
+        utf8 = PyUnicode_DATA(unicode);
+        len = PyUnicode_GET_LENGTH(unicode);
+    }
+    else {
+        utf8 = PyUnicode_AsUTF8AndSize(unicode, &len);
+        if (utf8 == NULL) {
+            // Memory allocation failure. The API cannot report error,
+            // so ignore the exception and return 0.
+            res = 0;
+            goto done;
+        }
+    }
+
+    if (len != str_len) {
+        res = 0;
+        goto done;
+    }
+    res = (memcmp(utf8, str, (size_t)len) == 0);
+#else
+    PyObject *bytes = PyUnicode_AsUTF8String(unicode);
+    if (bytes == NULL) {
+        // Memory allocation failure. The API cannot report error,
+        // so ignore the exception and return 0.
+        res = 0;
+        goto done;
+    }
+
+#if PY_VERSION_HEX >= 0x03000000
+    len = PyBytes_GET_SIZE(bytes);
+    utf8 = PyBytes_AS_STRING(bytes);
+#else
+    len = PyString_GET_SIZE(bytes);
+    utf8 = PyString_AS_STRING(bytes);
+#endif
+    if (len != str_len) {
+        Py_DECREF(bytes);
+        res = 0;
+        goto done;
+    }
+
+    res = (memcmp(utf8, str, (size_t)len) == 0);
+    Py_DECREF(bytes);
+#endif
+
+done:
+    PyErr_Restore(exc_type, exc_value, exc_tb);
+    return res;
+}
+
+static inline int
+PyUnicode_EqualToUTF8(PyObject *unicode, const char *str)
+{
+    return PyUnicode_EqualToUTF8AndSize(unicode, str, (Py_ssize_t)strlen(str));
+}
+#endif
+
+
+// gh-111138 added PyList_Extend() and PyList_Clear() to Python 3.13.0a2
+#if PY_VERSION_HEX < 0x030D00A2
+static inline int
+PyList_Extend(PyObject *list, PyObject *iterable)
+{
+    return PyList_SetSlice(list, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, iterable);
+}
+
+static inline int
+PyList_Clear(PyObject *list)
+{
+    return PyList_SetSlice(list, 0, PY_SSIZE_T_MAX, NULL);
+}
+#endif
+
+// gh-111262 added PyDict_Pop() and PyDict_PopString() to Python 3.13.0a2
+#if PY_VERSION_HEX < 0x030D00A2
+static inline int
+PyDict_Pop(PyObject *dict, PyObject *key, PyObject **result)
+{
+    PyObject *value;
+
+    if (!PyDict_Check(dict)) {
+        PyErr_BadInternalCall();
+        if (result) {
+            *result = NULL;
+        }
+        return -1;
+    }
+
+    // bpo-16991 added _PyDict_Pop() to Python 3.5.0b2.
+    // Python 3.6.0b3 changed _PyDict_Pop() first argument type to PyObject*.
+    // Python 3.13.0a1 removed _PyDict_Pop().
+#if defined(PYPY_VERSION) || PY_VERSION_HEX < 0x030500b2 || PY_VERSION_HEX >= 0x030D0000
+    value = PyObject_CallMethod(dict, "pop", "O", key);
+#elif PY_VERSION_HEX < 0x030600b3
+    value = _PyDict_Pop(_Py_CAST(PyDictObject*, dict), key, NULL);
+#else
+    value = _PyDict_Pop(dict, key, NULL);
+#endif
+    if (value == NULL) {
+        if (result) {
+            *result = NULL;
+        }
+        if (PyErr_Occurred() && !PyErr_ExceptionMatches(PyExc_KeyError)) {
+            return -1;
+        }
+        PyErr_Clear();
+        return 0;
+    }
+    if (result) {
+        *result = value;
+    }
+    else {
+        Py_DECREF(value);
+    }
+    return 1;
+}
+
+static inline int
+PyDict_PopString(PyObject *dict, const char *key, PyObject **result)
+{
+    PyObject *key_obj = PyUnicode_FromString(key);
+    if (key_obj == NULL) {
+        if (result != NULL) {
+            *result = NULL;
+        }
+        return -1;
+    }
+
+    int res = PyDict_Pop(dict, key_obj, result);
+    Py_DECREF(key_obj);
+    return res;
+}
+#endif
+
+
+#if PY_VERSION_HEX < 0x030200A4
+// Python 3.2.0a4 added Py_hash_t type
+typedef Py_ssize_t Py_hash_t;
+#endif
+
+
+// gh-111545 added Py_HashPointer() to Python 3.13.0a3
+#if PY_VERSION_HEX < 0x030D00A3
+static inline Py_hash_t Py_HashPointer(const void *ptr)
+{
+#if PY_VERSION_HEX >= 0x030900A4 && !defined(PYPY_VERSION)
+    return _Py_HashPointer(ptr);
+#else
+    return _Py_HashPointer(_Py_CAST(void*, ptr));
+#endif
+}
+#endif
+
+
+// Python 3.13a4 added a PyTime API.
+// Use the private API added to Python 3.5.
+#if PY_VERSION_HEX < 0x030D00A4 && PY_VERSION_HEX  >= 0x03050000
+typedef _PyTime_t PyTime_t;
+#define PyTime_MIN _PyTime_MIN
+#define PyTime_MAX _PyTime_MAX
+
+static inline double PyTime_AsSecondsDouble(PyTime_t t)
+{ return _PyTime_AsSecondsDouble(t); }
+
+static inline int PyTime_Monotonic(PyTime_t *result)
+{ return _PyTime_GetMonotonicClockWithInfo(result, NULL); }
+
+static inline int PyTime_Time(PyTime_t *result)
+{ return _PyTime_GetSystemClockWithInfo(result, NULL); }
+
+static inline int PyTime_PerfCounter(PyTime_t *result)
+{
+#if PY_VERSION_HEX >= 0x03070000 && !defined(PYPY_VERSION)
+    return _PyTime_GetPerfCounterWithInfo(result, NULL);
+#elif PY_VERSION_HEX >= 0x03070000
+    // Call time.perf_counter_ns() and convert Python int object to PyTime_t.
+    // Cache time.perf_counter_ns() function for best performance.
+    static PyObject *func = NULL;
+    if (func == NULL) {
+        PyObject *mod = PyImport_ImportModule("time");
+        if (mod == NULL) {
+            return -1;
+        }
+
+        func = PyObject_GetAttrString(mod, "perf_counter_ns");
+        Py_DECREF(mod);
+        if (func == NULL) {
+            return -1;
+        }
+    }
+
+    PyObject *res = PyObject_CallNoArgs(func);
+    if (res == NULL) {
+        return -1;
+    }
+    long long value = PyLong_AsLongLong(res);
+    Py_DECREF(res);
+
+    if (value == -1 && PyErr_Occurred()) {
+        return -1;
+    }
+
+    Py_BUILD_ASSERT(sizeof(value) >= sizeof(PyTime_t));
+    *result = (PyTime_t)value;
+    return 0;
+#else
+    // Call time.perf_counter() and convert C double to PyTime_t.
+    // Cache time.perf_counter() function for best performance.
+    static PyObject *func = NULL;
+    if (func == NULL) {
+        PyObject *mod = PyImport_ImportModule("time");
+        if (mod == NULL) {
+            return -1;
+        }
+
+        func = PyObject_GetAttrString(mod, "perf_counter");
+        Py_DECREF(mod);
+        if (func == NULL) {
+            return -1;
+        }
+    }
+
+    PyObject *res = PyObject_CallNoArgs(func);
+    if (res == NULL) {
+        return -1;
+    }
+    double d = PyFloat_AsDouble(res);
+    Py_DECREF(res);
+
+    if (d == -1.0 && PyErr_Occurred()) {
+        return -1;
+    }
+
+    // Avoid floor() to avoid having to link to libm
+    *result = (PyTime_t)(d * 1e9);
+    return 0;
+#endif
+}
+
+#endif
+
+// gh-111389 added hash constants to Python 3.13.0a5. These constants were
+// added first as private macros to Python 3.4.0b1 and PyPy 7.3.9.
+#if (!defined(PyHASH_BITS) \
+     && ((!defined(PYPY_VERSION) && PY_VERSION_HEX >= 0x030400B1) \
+         || (defined(PYPY_VERSION) && PY_VERSION_HEX >= 0x03070000 \
+             && PYPY_VERSION_NUM >= 0x07090000)))
+#  define PyHASH_BITS _PyHASH_BITS
+#  define PyHASH_MODULUS _PyHASH_MODULUS
+#  define PyHASH_INF _PyHASH_INF
+#  define PyHASH_IMAG _PyHASH_IMAG
+#endif
+
+
+// gh-111545 added Py_GetConstant() and Py_GetConstantBorrowed()
+// to Python 3.13.0a6
+#if PY_VERSION_HEX < 0x030D00A6 && !defined(Py_CONSTANT_NONE)
+
+#define Py_CONSTANT_NONE 0
+#define Py_CONSTANT_FALSE 1
+#define Py_CONSTANT_TRUE 2
+#define Py_CONSTANT_ELLIPSIS 3
+#define Py_CONSTANT_NOT_IMPLEMENTED 4
+#define Py_CONSTANT_ZERO 5
+#define Py_CONSTANT_ONE 6
+#define Py_CONSTANT_EMPTY_STR 7
+#define Py_CONSTANT_EMPTY_BYTES 8
+#define Py_CONSTANT_EMPTY_TUPLE 9
+
+static inline PyObject* Py_GetConstant(unsigned int constant_id)
+{
+    static PyObject* constants[Py_CONSTANT_EMPTY_TUPLE + 1] = {NULL};
+
+    if (constants[Py_CONSTANT_NONE] == NULL) {
+        constants[Py_CONSTANT_NONE] = Py_None;
+        constants[Py_CONSTANT_FALSE] = Py_False;
+        constants[Py_CONSTANT_TRUE] = Py_True;
+        constants[Py_CONSTANT_ELLIPSIS] = Py_Ellipsis;
+        constants[Py_CONSTANT_NOT_IMPLEMENTED] = Py_NotImplemented;
+
+        constants[Py_CONSTANT_ZERO] = PyLong_FromLong(0);
+        if (constants[Py_CONSTANT_ZERO] == NULL) {
+            goto fatal_error;
+        }
+
+        constants[Py_CONSTANT_ONE] = PyLong_FromLong(1);
+        if (constants[Py_CONSTANT_ONE] == NULL) {
+            goto fatal_error;
+        }
+
+        constants[Py_CONSTANT_EMPTY_STR] = PyUnicode_FromStringAndSize("", 0);
+        if (constants[Py_CONSTANT_EMPTY_STR] == NULL) {
+            goto fatal_error;
+        }
+
+        constants[Py_CONSTANT_EMPTY_BYTES] = PyBytes_FromStringAndSize("", 0);
+        if (constants[Py_CONSTANT_EMPTY_BYTES] == NULL) {
+            goto fatal_error;
+        }
+
+        constants[Py_CONSTANT_EMPTY_TUPLE] = PyTuple_New(0);
+        if (constants[Py_CONSTANT_EMPTY_TUPLE] == NULL) {
+            goto fatal_error;
+        }
+        // goto dance to avoid compiler warnings about Py_FatalError()
+        goto init_done;
+
+fatal_error:
+        // This case should never happen
+        Py_FatalError("Py_GetConstant() failed to get constants");
+    }
+
+init_done:
+    if (constant_id <= Py_CONSTANT_EMPTY_TUPLE) {
+        return Py_NewRef(constants[constant_id]);
+    }
+    else {
+        PyErr_BadInternalCall();
+        return NULL;
+    }
+}
+
+static inline PyObject* Py_GetConstantBorrowed(unsigned int constant_id)
+{
+    PyObject *obj = Py_GetConstant(constant_id);
+    Py_XDECREF(obj);
+    return obj;
+}
+#endif
+
+
+// gh-114329 added PyList_GetItemRef() to Python 3.13.0a4
+#if PY_VERSION_HEX < 0x030D00A4
+static inline PyObject *
+PyList_GetItemRef(PyObject *op, Py_ssize_t index)
+{
+    PyObject *item = PyList_GetItem(op, index);
+    Py_XINCREF(item);
+    return item;
+}
+#endif
+
+
+// gh-114329 added PyList_GetItemRef() to Python 3.13.0a4
+#if PY_VERSION_HEX < 0x030D00A4
+static inline int
+PyDict_SetDefaultRef(PyObject *d, PyObject *key, PyObject *default_value,
+                     PyObject **result)
+{
+    PyObject *value;
+    if (PyDict_GetItemRef(d, key, &value) < 0) {
+        // get error
+        if (result) {
+            *result = NULL;
+        }
+        return -1;
+    }
+    if (value != NULL) {
+        // present
+        if (result) {
+            *result = value;
+        }
+        else {
+            Py_DECREF(value);
+        }
+        return 1;
+    }
+
+    // missing: set the item
+    if (PyDict_SetItem(d, key, default_value) < 0) {
+        // set error
+        if (result) {
+            *result = NULL;
+        }
+        return -1;
+    }
+    if (result) {
+        *result = Py_NewRef(default_value);
+    }
+    return 0;
+}
+#endif
+
+#if PY_VERSION_HEX < 0x030E0000 && PY_VERSION_HEX >= 0x03060000 && !defined(PYPY_VERSION)
+typedef struct PyUnicodeWriter PyUnicodeWriter;
+
+static inline void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
+{
+    _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
+    PyMem_Free(writer);
+}
+
+static inline PyUnicodeWriter* PyUnicodeWriter_Create(Py_ssize_t length)
+{
+    if (length < 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "length must be positive");
+        return NULL;
+    }
+
+    const size_t size = sizeof(_PyUnicodeWriter);
+    PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
+    if (pub_writer == _Py_NULL) {
+        PyErr_NoMemory();
+        return _Py_NULL;
+    }
+    _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
+
+    _PyUnicodeWriter_Init(writer);
+    if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
+        PyUnicodeWriter_Discard(pub_writer);
+        return NULL;
+    }
+    writer->overallocate = 1;
+    return pub_writer;
+}
+
+static inline PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
+{
+    PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
+    assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
+    PyMem_Free(writer);
+    return str;
+}
+
+static inline int
+PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
+{
+    if (ch > 0x10ffff) {
+        PyErr_SetString(PyExc_ValueError,
+                        "character must be in range(0x110000)");
+        return -1;
+    }
+
+    return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
+}
+
+static inline int
+PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
+{
+    PyObject *str = PyObject_Str(obj);
+    if (str == NULL) {
+        return -1;
+    }
+
+    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
+    Py_DECREF(str);
+    return res;
+}
+
+static inline int
+PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
+{
+    PyObject *str = PyObject_Repr(obj);
+    if (str == NULL) {
+        return -1;
+    }
+
+    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
+    Py_DECREF(str);
+    return res;
+}
+
+static inline int
+PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
+                          const char *str, Py_ssize_t size)
+{
+    if (size < 0) {
+        size = (Py_ssize_t)strlen(str);
+    }
+
+    PyObject *str_obj = PyUnicode_FromStringAndSize(str, size);
+    if (str_obj == _Py_NULL) {
+        return -1;
+    }
+
+    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj);
+    Py_DECREF(str_obj);
+    return res;
+}
+
+static inline int
+PyUnicodeWriter_WriteWideChar(PyUnicodeWriter *writer,
+                              const wchar_t *str, Py_ssize_t size)
+{
+    if (size < 0) {
+        size = (Py_ssize_t)wcslen(str);
+    }
+
+    PyObject *str_obj = PyUnicode_FromWideChar(str, size);
+    if (str_obj == _Py_NULL) {
+        return -1;
+    }
+
+    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str_obj);
+    Py_DECREF(str_obj);
+    return res;
+}
+
+static inline int
+PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
+                               Py_ssize_t start, Py_ssize_t end)
+{
+    if (!PyUnicode_Check(str)) {
+        PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
+        return -1;
+    }
+    if (start < 0 || start > end) {
+        PyErr_Format(PyExc_ValueError, "invalid start argument");
+        return -1;
+    }
+    if (end > PyUnicode_GET_LENGTH(str)) {
+        PyErr_Format(PyExc_ValueError, "invalid end argument");
+        return -1;
+    }
+
+    return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
+                                           start, end);
+}
+
+static inline int
+PyUnicodeWriter_Format(PyUnicodeWriter *writer, const char *format, ...)
+{
+    va_list vargs;
+    va_start(vargs, format);
+    PyObject *str = PyUnicode_FromFormatV(format, vargs);
+    va_end(vargs);
+    if (str == _Py_NULL) {
+        return -1;
+    }
+
+    int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
+    Py_DECREF(str);
+    return res;
+}
+#endif  // PY_VERSION_HEX < 0x030E0000
+
+// gh-116560 added PyLong_GetSign() to Python 3.14.0a0
+#if PY_VERSION_HEX < 0x030E00A0
+static inline int PyLong_GetSign(PyObject *obj, int *sign)
+{
+    if (!PyLong_Check(obj)) {
+        PyErr_Format(PyExc_TypeError, "expect int, got %s", Py_TYPE(obj)->tp_name);
+        return -1;
+    }
+
+    *sign = _PyLong_Sign(obj);
+    return 0;
+}
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // PYTHONCAPI_COMPAT
diff --git a/python/pyarrow/src/arrow/python/visibility.h b/python/pyarrow/src/arrow/python/visibility.h
index dd43b32fd43ff..4bf9680a06bf0 100644
--- a/python/pyarrow/src/arrow/python/visibility.h
+++ b/python/pyarrow/src/arrow/python/visibility.h
@@ -18,22 +18,22 @@
 #pragma once
 
 #if defined(_WIN32) || defined(__CYGWIN__)  // Windows
-#if defined(_MSC_VER)
-#pragma warning(disable : 4251)
-#else
-#pragma GCC diagnostic ignored "-Wattributes"
-#endif
+#  if defined(_MSC_VER)
+#    pragma warning(disable : 4251)
+#  else
+#    pragma GCC diagnostic ignored "-Wattributes"
+#  endif
 
-#ifdef ARROW_PYTHON_STATIC
-#define ARROW_PYTHON_EXPORT
-#elif defined(ARROW_PYTHON_EXPORTING)
-#define ARROW_PYTHON_EXPORT __declspec(dllexport)
-#else
-#define ARROW_PYTHON_EXPORT __declspec(dllimport)
-#endif
+#  ifdef ARROW_PYTHON_STATIC
+#    define ARROW_PYTHON_EXPORT
+#  elif defined(ARROW_PYTHON_EXPORTING)
+#    define ARROW_PYTHON_EXPORT __declspec(dllexport)
+#  else
+#    define ARROW_PYTHON_EXPORT __declspec(dllimport)
+#  endif
 
 #else  // Not Windows
-#ifndef ARROW_PYTHON_EXPORT
-#define ARROW_PYTHON_EXPORT __attribute__((visibility("default")))
-#endif
+#  ifndef ARROW_PYTHON_EXPORT
+#    define ARROW_PYTHON_EXPORT __attribute__((visibility("default")))
+#  endif
 #endif  // Non-Windows
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index eb9ba650dbf60..819bbc34c66b9 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -59,6 +59,7 @@ cdef class ChunkedArray(_PandasConvertible):
 
     def __cinit__(self):
         self.chunked_array = NULL
+        self._init_is_cpu = False
 
     def __init__(self):
         raise TypeError("Do not call ChunkedArray's constructor directly, use "
@@ -69,6 +70,7 @@ cdef class ChunkedArray(_PandasConvertible):
         self.chunked_array = chunked_array.get()
 
     def __reduce__(self):
+        self._assert_cpu()
         return chunked_array, (self.chunks, self.type)
 
     @property
@@ -198,6 +200,7 @@ cdef class ChunkedArray(_PandasConvertible):
         ArrowInvalid
         """
         if full:
+            self._assert_cpu()
             with nogil:
                 check_status(self.sp_chunked_array.get().ValidateFull())
         else:
@@ -220,6 +223,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.null_count
         1
         """
+        self._assert_cpu()
         return self.chunked_array.null_count()
 
     @property
@@ -245,6 +249,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.nbytes
         49
         """
+        self._assert_cpu()
         cdef:
             CResult[int64_t] c_res_buffer
 
@@ -271,6 +276,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.get_total_buffer_size()
         49
         """
+        self._assert_cpu()
         cdef:
             int64_t total_buffer_size
 
@@ -299,13 +305,14 @@ cdef class ChunkedArray(_PandasConvertible):
         -------
         value : Scalar (index) or ChunkedArray (slice)
         """
-
+        self._assert_cpu()
         if isinstance(key, slice):
             return _normalize_slice(self, key)
 
         return self.getitem(_normalize_index(key, self.chunked_array.length()))
 
     cdef getitem(self, int64_t i):
+        self._assert_cpu()
         return Scalar.wrap(GetResultValue(self.chunked_array.GetScalar(i)))
 
     def is_null(self, *, nan_is_null=False):
@@ -338,6 +345,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         options = _pc().NullOptions(nan_is_null=nan_is_null)
         return _pc().call_function('is_null', [self], options)
 
@@ -363,6 +371,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().is_nan(self)
 
     def is_valid(self):
@@ -388,6 +397,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().is_valid(self)
 
     def __eq__(self, other):
@@ -430,6 +440,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().fill_null(self, fill_value)
 
     def equals(self, ChunkedArray other):
@@ -458,6 +469,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.equals(animals)
         False
         """
+        self._assert_cpu()
         if other is None:
             return False
 
@@ -472,6 +484,7 @@ cdef class ChunkedArray(_PandasConvertible):
         return result
 
     def _to_pandas(self, options, types_mapper=None, **kwargs):
+        self._assert_cpu()
         return _array_like_to_pandas(self, options, types_mapper=types_mapper)
 
     def to_numpy(self, zero_copy_only=False):
@@ -495,6 +508,10 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.to_numpy()
         array([  2,   2,   4,   4,   5, 100])
         """
+        self._assert_cpu()
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         if zero_copy_only:
             raise ValueError(
                 "zero_copy_only must be False for pyarrow.ChunkedArray.to_numpy"
@@ -526,6 +543,7 @@ cdef class ChunkedArray(_PandasConvertible):
         return values
 
     def __array__(self, dtype=None, copy=None):
+        self._assert_cpu()
         if copy is False:
             raise ValueError(
                 "Unable to avoid a copy while creating a numpy array as requested "
@@ -571,6 +589,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs_seconds.type
         DurationType(duration[s])
         """
+        self._assert_cpu()
         return _pc().cast(self, target_type, safe=safe, options=options)
 
     def dictionary_encode(self, null_encoding='mask'):
@@ -633,6 +652,7 @@ cdef class ChunkedArray(_PandasConvertible):
             ]
         ]
         """
+        self._assert_cpu()
         options = _pc().DictionaryEncodeOptions(null_encoding)
         return _pc().call_function('dictionary_encode', [self], options)
 
@@ -697,6 +717,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.type
         DataType(int64)
         """
+        self._assert_cpu()
         cdef:
             vector[shared_ptr[CChunkedArray]] flattened
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
@@ -748,6 +769,7 @@ cdef class ChunkedArray(_PandasConvertible):
           100
         ]
         """
+        self._assert_cpu()
         if self.num_chunks == 0:
             return array([], type=self.type)
         else:
@@ -788,6 +810,7 @@ cdef class ChunkedArray(_PandasConvertible):
           100
         ]
         """
+        self._assert_cpu()
         return _pc().call_function('unique', [self])
 
     def value_counts(self):
@@ -834,6 +857,7 @@ cdef class ChunkedArray(_PandasConvertible):
             1
           ]
         """
+        self._assert_cpu()
         return _pc().call_function('value_counts', [self])
 
     def slice(self, offset=0, length=None):
@@ -956,6 +980,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().filter(self, mask, null_selection_behavior)
 
     def index(self, value, start=None, end=None, *, memory_pool=None):
@@ -1003,6 +1028,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.index(4, start=3)
         <pyarrow.Int64Scalar: 3>
         """
+        self._assert_cpu()
         return _pc().index(self, value, start, end, memory_pool=memory_pool)
 
     def take(self, object indices):
@@ -1049,6 +1075,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().take(self, indices)
 
     def drop_null(self):
@@ -1088,6 +1115,7 @@ cdef class ChunkedArray(_PandasConvertible):
           ]
         ]
         """
+        self._assert_cpu()
         return _pc().drop_null(self)
 
     def sort(self, order="ascending", **kwargs):
@@ -1107,6 +1135,7 @@ cdef class ChunkedArray(_PandasConvertible):
         -------
         result : ChunkedArray
         """
+        self._assert_cpu()
         indices = _pc().sort_indices(
             self,
             options=_pc().SortOptions(sort_keys=[("", order)], **kwargs)
@@ -1206,6 +1235,7 @@ cdef class ChunkedArray(_PandasConvertible):
             ]
         ]
         """
+        self._assert_cpu()
         cdef:
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
             shared_ptr[CChunkedArray] c_result
@@ -1330,6 +1360,7 @@ cdef class ChunkedArray(_PandasConvertible):
         >>> n_legs.to_pylist()
         [2, 2, 4, 4, None, 100]
         """
+        self._assert_cpu()
         result = []
         for i in range(self.num_chunks):
             result += self.chunk(i).to_pylist()
@@ -1351,6 +1382,7 @@ cdef class ChunkedArray(_PandasConvertible):
         PyCapsule
             A capsule containing a C ArrowArrayStream struct.
         """
+        self._assert_cpu()
         cdef:
             ChunkedArray chunked
             ArrowArrayStream* c_stream = NULL
@@ -1407,6 +1439,20 @@ cdef class ChunkedArray(_PandasConvertible):
         self.init(c_chunked_array)
         return self
 
+    @property
+    def is_cpu(self):
+        """
+        Whether all chunks in the ChunkedArray are CPU-accessible.
+        """
+        if not self._init_is_cpu:
+            self._is_cpu = self.chunked_array.is_cpu()
+            self._init_is_cpu = True
+        return self._is_cpu
+
+    def _assert_cpu(self):
+        if not self.is_cpu:
+            raise NotImplementedError("Implemented only for data on CPU device")
+
 
 def chunked_array(arrays, type=None):
     """
@@ -1571,6 +1617,7 @@ cdef class _Tabular(_PandasConvertible):
                         f"one of the `{self.__class__.__name__}.from_*` functions instead.")
 
     def __array__(self, dtype=None, copy=None):
+        self._assert_cpu()
         if copy is False:
             raise ValueError(
                 "Unable to avoid a copy while creating a numpy array as requested "
@@ -1824,6 +1871,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[4,100]]
         animals: [["Horse","Centipede"]]
         """
+        self._assert_cpu()
         return _pc().drop_null(self)
 
     def field(self, i):
@@ -2085,6 +2133,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[5,100,4,2,4,2]]
         animal: [["Brittle stars","Centipede","Dog","Flamingo","Horse","Parrot"]]
         """
+        self._assert_cpu()
         if isinstance(sorting, str):
             sorting = [(sorting, "ascending")]
 
@@ -2130,6 +2179,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[4,100]]
         animals: [["Horse","Centipede"]]
         """
+        self._assert_cpu()
         return _pc().take(self, indices)
 
     def filter(self, mask, object null_selection_behavior="drop"):
@@ -2199,6 +2249,7 @@ cdef class _Tabular(_PandasConvertible):
         n_legs: [[2,4,null]]
         animals: [["Flamingo","Horse",null]]
         """
+        self._assert_cpu()
         if isinstance(mask, _pc().Expression):
             return _pac()._filter_table(self, mask)
         else:
@@ -2399,6 +2450,9 @@ cdef class _Tabular(_PandasConvertible):
         """
         return self.add_column(self.num_columns, field_, column)
 
+    cdef void _assert_cpu(self) except *:
+        return
+
 
 cdef class RecordBatch(_Tabular):
     """
@@ -2509,6 +2563,7 @@ cdef class RecordBatch(_Tabular):
         return self.batch != NULL
 
     def __reduce__(self):
+        self._assert_cpu()
         return _reconstruct_record_batch, (self.columns, self.schema)
 
     def validate(self, *, full=False):
@@ -2528,6 +2583,7 @@ cdef class RecordBatch(_Tabular):
         ArrowInvalid
         """
         if full:
+            self._assert_cpu()
             with nogil:
                 check_status(self.batch.ValidateFull())
         else:
@@ -2694,6 +2750,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.nbytes
         116
         """
+        self._assert_cpu()
         cdef:
             CResult[int64_t] c_res_buffer
 
@@ -2723,6 +2780,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.get_total_buffer_size()
         120
         """
+        self._assert_cpu()
         cdef:
             int64_t total_buffer_size
 
@@ -2789,12 +2847,19 @@ cdef class RecordBatch(_Tabular):
             shared_ptr[CRecordBatch] c_batch
             Field c_field
             Array c_arr
+            CDeviceAllocationType device_type = self.sp_batch.get().device_type()
 
         if isinstance(column, Array):
             c_arr = column
         else:
             c_arr = array(column)
 
+        if device_type != c_arr.sp_array.get().device_type():
+            raise TypeError("The column must be allocated on the same "
+                            "device as the RecordBatch. Got column on "
+                            f"device {c_arr.device_type!r}, but expected "
+                            f"{self.device_type!r}.")
+
         if isinstance(field_, Field):
             c_field = field_
         else:
@@ -2882,12 +2947,19 @@ cdef class RecordBatch(_Tabular):
             shared_ptr[CRecordBatch] c_batch
             Field c_field
             Array c_arr
+            CDeviceAllocationType device_type = self.sp_batch.get().device_type()
 
         if isinstance(column, Array):
             c_arr = column
         else:
             c_arr = array(column)
 
+        if device_type != c_arr.sp_array.get().device_type():
+            raise TypeError("The column must be allocated on the same "
+                            "device as the RecordBatch. Got column on "
+                            f"device {c_arr.device_type!r}, but expected "
+                            f"{self.device_type!r}.")
+
         if isinstance(field_, Field):
             c_field = field_
         else:
@@ -2949,7 +3021,7 @@ cdef class RecordBatch(_Tabular):
             shared_ptr[CRecordBatch] c_batch
             vector[c_string] c_names
 
-        if isinstance(names, list):
+        if isinstance(names, (list, tuple)):
             for name in names:
                 c_names.push_back(tobytes(name))
         elif isinstance(names, dict):
@@ -3013,6 +3085,7 @@ cdef class RecordBatch(_Tabular):
         n_legs: [2,2,4,4,5,100]
         animals: ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]
         """
+        self._assert_cpu()
         cdef shared_ptr[CBuffer] buffer
         cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults()
         options.memory_pool = maybe_unbox_memory_pool(memory_pool)
@@ -3114,6 +3187,7 @@ cdef class RecordBatch(_Tabular):
         >>> batch.equals(batch_1, check_metadata=True)
         False
         """
+        self._assert_cpu()
         cdef:
             CRecordBatch* this_batch = self.batch
             shared_ptr[CRecordBatch] other_batch = pyarrow_unwrap_batch(other)
@@ -3245,6 +3319,7 @@ cdef class RecordBatch(_Tabular):
         return RecordBatch.from_arrays(newcols, schema=target_schema)
 
     def _to_pandas(self, options, **kwargs):
+        self._assert_cpu()
         return Table.from_batches([self])._to_pandas(options, **kwargs)
 
     @classmethod
@@ -3470,6 +3545,8 @@ cdef class RecordBatch(_Tabular):
         """
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
+        if struct_array.sp_array.get().device_type() != CDeviceAllocationType_kCPU:
+            raise NotImplementedError("Implemented only for data on CPU device")
         with nogil:
             c_record_batch = GetResultValue(
                 CRecordBatch.FromStructArray(struct_array.sp_array))
@@ -3479,6 +3556,7 @@ cdef class RecordBatch(_Tabular):
         """
         Convert to a struct array.
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
             shared_ptr[CArray] c_array
@@ -3557,6 +3635,7 @@ cdef class RecordBatch(_Tabular):
                [ 4., 40.],
                [nan, nan]])
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CRecordBatch] c_record_batch
             shared_ptr[CTensor] c_tensor
@@ -3569,6 +3648,41 @@ cdef class RecordBatch(_Tabular):
                                                                              row_major, pool))
         return pyarrow_wrap_tensor(c_tensor)
 
+    def copy_to(self, destination):
+        """
+        Copy the entire RecordBatch to destination device.
+
+        This copies each column of the record batch to create
+        a new record batch where all underlying buffers for the columns have
+        been copied to the destination MemoryManager.
+
+        Parameters
+        ----------
+        destination : pyarrow.MemoryManager or pyarrow.Device
+            The destination device to copy the array to.
+
+        Returns
+        -------
+        RecordBatch
+        """
+        cdef:
+            shared_ptr[CRecordBatch] c_batch
+            shared_ptr[CMemoryManager] c_memory_manager
+
+        if isinstance(destination, Device):
+            c_memory_manager = (<Device>destination).unwrap().get().default_memory_manager()
+        elif isinstance(destination, MemoryManager):
+            c_memory_manager = (<MemoryManager>destination).unwrap()
+        else:
+            raise TypeError(
+                "Argument 'destination' has incorrect type (expected a "
+                f"pyarrow Device or MemoryManager, got {type(destination)})"
+            )
+
+        with nogil:
+            c_batch = GetResultValue(self.batch.CopyTo(c_memory_manager))
+        return pyarrow_wrap_batch(c_batch)
+
     def _export_to_c(self, out_ptr, out_schema_ptr=0):
         """
         Export to a C ArrowArray struct, given its pointer.
@@ -3648,6 +3762,7 @@ cdef class RecordBatch(_Tabular):
             A pair of PyCapsules containing a C ArrowSchema and ArrowArray,
             respectively.
         """
+        self._assert_cpu()
         cdef:
             ArrowArray* c_array
             ArrowSchema* c_schema
@@ -3693,6 +3808,7 @@ cdef class RecordBatch(_Tabular):
         -------
         PyCapsule
         """
+        self._assert_cpu()
         return Table.from_batches([self]).__arrow_c_stream__(requested_schema)
 
     @staticmethod
@@ -3905,6 +4021,10 @@ cdef class RecordBatch(_Tabular):
         """
         return self.device_type == DeviceAllocationType.CPU
 
+    cdef void _assert_cpu(self) except *:
+        if self.sp_batch.get().device_type() != CDeviceAllocationType_kCPU:
+            raise NotImplementedError("Implemented only for data on CPU device")
+
 
 def _reconstruct_record_batch(columns, schema):
     """
@@ -4060,6 +4180,7 @@ cdef class Table(_Tabular):
 
     def __cinit__(self):
         self.table = NULL
+        self._init_is_cpu = False
 
     cdef void init(self, const shared_ptr[CTable]& table):
         self.sp_table = table
@@ -4085,6 +4206,7 @@ cdef class Table(_Tabular):
         ArrowInvalid
         """
         if full:
+            self._assert_cpu()
             with nogil:
                 check_status(self.table.ValidateFull())
         else:
@@ -4094,6 +4216,7 @@ cdef class Table(_Tabular):
     def __reduce__(self):
         # Reduce the columns as ChunkedArrays to avoid serializing schema
         # data twice
+        self._assert_cpu()
         columns = [col for col in self.columns]
         return _reconstruct_table, (columns, self.schema)
 
@@ -4332,6 +4455,7 @@ cdef class Table(_Tabular):
         a.year: [[null,2022]]
         month: [[4,6]]
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CTable] flattened
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
@@ -4379,6 +4503,7 @@ cdef class Table(_Tabular):
         n_legs: [[2,2,4,4,5,100]]
         animals: [["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]]
         """
+        self._assert_cpu()
         cdef:
             shared_ptr[CTable] combined
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
@@ -4436,6 +4561,7 @@ cdef class Table(_Tabular):
         ["Flamingo","Parrot","Dog","Horse","Brittle stars","Centipede"]  -- indices:
         [3,4,5]]
         """
+        self._assert_cpu()
         cdef:
             CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
             shared_ptr[CTable] c_result
@@ -4481,6 +4607,7 @@ cdef class Table(_Tabular):
         >>> table.equals(table_1, check_metadata=True)
         False
         """
+        self._assert_cpu()
         if other is None:
             return False
 
@@ -4538,6 +4665,7 @@ cdef class Table(_Tabular):
         n_legs: [[2,4,5,100]]
         animals: [["Flamingo","Horse","Brittle stars","Centipede"]]
         """
+        self._assert_cpu()
         cdef:
             ChunkedArray column, casted
             Field field
@@ -4789,6 +4917,7 @@ cdef class Table(_Tabular):
         -------
         ChunkedArray
         """
+        self._assert_cpu()
         return chunked_array([
             batch.to_struct_array()
             for batch in self.to_batches(max_chunksize=max_chunksize)
@@ -4998,6 +5127,7 @@ cdef class Table(_Tabular):
 
     def _to_pandas(self, options, categories=None, ignore_metadata=False,
                    types_mapper=None):
+        self._assert_cpu()
         from pyarrow.pandas_compat import table_to_dataframe
         df = table_to_dataframe(
             options, self, categories,
@@ -5119,6 +5249,7 @@ cdef class Table(_Tabular):
         >>> table.nbytes
         72
         """
+        self._assert_cpu()
         cdef:
             CResult[int64_t] c_res_buffer
 
@@ -5148,6 +5279,7 @@ cdef class Table(_Tabular):
         >>> table.get_total_buffer_size()
         76
         """
+        self._assert_cpu()
         cdef:
             int64_t total_buffer_size
 
@@ -5374,7 +5506,7 @@ cdef class Table(_Tabular):
             shared_ptr[CTable] c_table
             vector[c_string] c_names
 
-        if isinstance(names, list):
+        if isinstance(names, (list, tuple)):
             for name in names:
                 c_names.push_back(tobytes(name))
         elif isinstance(names, dict):
@@ -5456,6 +5588,7 @@ cdef class Table(_Tabular):
         year: [[2020,2022,2021,2019]]
         n_legs_sum: [[2,6,104,5]]
         """
+        self._assert_cpu()
         return TableGroupBy(self, keys, use_threads=use_threads)
 
     def join(self, right_table, keys, right_keys=None, join_type="left outer",
@@ -5565,6 +5698,7 @@ cdef class Table(_Tabular):
         n_legs: [[100]]
         animal: [["Centipede"]]
         """
+        self._assert_cpu()
         if right_keys is None:
             right_keys = keys
         return _pac()._perform_join(
@@ -5652,6 +5786,7 @@ cdef class Table(_Tabular):
         n_legs: [[null,5,null,5,null]]
         animal: [[null,"Brittle stars",null,"Brittle stars",null]]
         """
+        self._assert_cpu()
         if right_on is None:
             right_on = on
         if right_by is None:
@@ -5677,8 +5812,23 @@ cdef class Table(_Tabular):
         -------
         PyCapsule
         """
+        self._assert_cpu()
         return self.to_reader().__arrow_c_stream__(requested_schema)
 
+    @property
+    def is_cpu(self):
+        """
+        Whether all ChunkedArrays are CPU-accessible.
+        """
+        if not self._init_is_cpu:
+            self._is_cpu = all(c.is_cpu for c in self.itercolumns())
+            self._init_is_cpu = True
+        return self._is_cpu
+
+    cdef void _assert_cpu(self) except *:
+        if not self.is_cpu:
+            raise NotImplementedError("Implemented only for data on CPU device")
+
 
 def _reconstruct_table(arrays, schema):
     """
diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi
index 6fb4fc99d7cbc..3e0c63c18fc98 100644
--- a/python/pyarrow/tensor.pxi
+++ b/python/pyarrow/tensor.pxi
@@ -107,6 +107,9 @@ strides: {0.strides}""".format(self)
         array([[  2,   2,   4],
                [  4,   5, 100]], dtype=int32)
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out
 
         check_status(TensorToNdarray(self.sp_tensor, self, &out))
@@ -478,6 +481,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCOOTensor to numpy.ndarrays with zero copy.
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_coords
 
@@ -743,6 +749,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSRMatrix to numpy.ndarrays with zero copy.
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
@@ -981,6 +990,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSCMatrix to numpy.ndarrays with zero copy
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
@@ -1216,6 +1228,9 @@ shape: {0.shape}""".format(self)
         """
         Convert arrow::SparseCSFTensor to numpy.ndarrays with zero copy
         """
+        if np is None:
+            raise ImportError(
+                "Cannot return a numpy.ndarray if NumPy is not present")
         cdef PyObject* out_data
         cdef PyObject* out_indptr
         cdef PyObject* out_indices
diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py
index 343b602995db6..0b82696d0a73f 100644
--- a/python/pyarrow/tests/conftest.py
+++ b/python/pyarrow/tests/conftest.py
@@ -25,6 +25,7 @@
 
 import pytest
 import hypothesis as h
+
 from ..conftest import groups, defaults
 
 from pyarrow import set_timezone_db_path
@@ -233,17 +234,16 @@ def minio_server_health_check(address):
 def gcs_server():
     port = find_free_port()
     env = os.environ.copy()
-    args = [sys.executable, '-m', 'testbench', '--port', str(port)]
+    exe = 'storage-testbench'
+    args = [exe, '--port', str(port)]
     proc = None
     try:
-        # check first if testbench module is available
-        import testbench  # noqa:F401
         # start server
         proc = subprocess.Popen(args, env=env)
         # Make sure the server is alive.
         if proc.poll() is not None:
             pytest.skip(f"Command {args} did not start server successfully!")
-    except (ModuleNotFoundError, OSError) as e:
+    except OSError as e:
         pytest.skip(f"Command {args} failed to execute: {e}")
     else:
         yield {
@@ -263,6 +263,9 @@ def azure_server(tmpdir_factory):
     tmpdir = tmpdir_factory.getbasetemp()
     # We only need blob service emulator, not queue or table.
     args = ['azurite-blob', "--location", tmpdir, "--blobPort", str(port)]
+    # For old Azurite. We can't install the latest Azurite with old
+    # Node.js on old Ubuntu.
+    args += ["--skipApiVersionCheck"]
     proc = None
     try:
         proc = subprocess.Popen(args, env=env)
diff --git a/python/pyarrow/tests/extensions.pyx b/python/pyarrow/tests/extensions.pyx
index c1bf9aae1ec03..309b574dc0264 100644
--- a/python/pyarrow/tests/extensions.pyx
+++ b/python/pyarrow/tests/extensions.pyx
@@ -37,7 +37,7 @@ cdef extern from * namespace "arrow::py" nogil:
     class UuidType : public ExtensionType {
     public:
         UuidType() : ExtensionType(fixed_size_binary(16)) {}
-        std::string extension_name() const override { return "uuid"; }
+        std::string extension_name() const override { return "example-uuid"; }
 
         bool ExtensionEquals(const ExtensionType& other) const override {
             return other.extension_name() == this->extension_name();
diff --git a/python/pyarrow/tests/interchange/test_conversion.py b/python/pyarrow/tests/interchange/test_conversion.py
index 6d91bad57cef4..50da6693afff1 100644
--- a/python/pyarrow/tests/interchange/test_conversion.py
+++ b/python/pyarrow/tests/interchange/test_conversion.py
@@ -16,11 +16,15 @@
 # under the License.
 
 from datetime import datetime as dt
-import numpy as np
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 import pytest
 
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
 import pyarrow.interchange as pi
 from pyarrow.interchange.column import (
     _PyArrowColumn,
@@ -107,13 +111,13 @@ def test_offset_of_sliced_array():
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
+    "float, np_float_str", [
         # (pa.float16(), np.float16),   #not supported by pandas
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
-def test_pandas_roundtrip(uint, int, float, np_float):
+def test_pandas_roundtrip(uint, int, float, np_float_str):
     if Version(pd.__version__) < Version("1.5.0"):
         pytest.skip("__dataframe__ added to pandas in 1.5.0")
 
@@ -122,7 +126,7 @@ def test_pandas_roundtrip(uint, int, float, np_float):
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float), type=float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)), type=float),
             "d": [True, False, True],
         }
     )
@@ -326,13 +330,13 @@ def test_pandas_roundtrip_datetime(unit):
 
 @pytest.mark.pandas
 @pytest.mark.parametrize(
-    "np_float", [np.float32, np.float64]
+    "np_float_str", ["float32", "float64"]
 )
-def test_pandas_to_pyarrow_with_missing(np_float):
+def test_pandas_to_pyarrow_with_missing(np_float_str):
     if Version(pd.__version__) < Version("1.5.0"):
         pytest.skip("__dataframe__ added to pandas in 1.5.0")
 
-    np_array = np.array([0, np.nan, 2], dtype=np_float)
+    np_array = np.array([0, np.nan, 2], dtype=np.dtype(np_float_str))
     datetime_array = [None, dt(2007, 7, 14), dt(2007, 7, 15)]
     df = pd.DataFrame({
         # float, ColumnNullType.USE_NAN
@@ -364,6 +368,7 @@ def test_pandas_to_pyarrow_float16_with_missing():
         pi.from_dataframe(df)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(
     "uint", [pa.uint8(), pa.uint16(), pa.uint32()]
 )
@@ -371,16 +376,16 @@ def test_pandas_to_pyarrow_float16_with_missing():
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+    "float, np_float_str", [
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
 @pytest.mark.parametrize("unit", ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize("tz", ['America/New_York', '+07:30', '-04:30'])
 @pytest.mark.parametrize("offset, length", [(0, 3), (0, 2), (1, 2), (2, 1)])
-def test_pyarrow_roundtrip(uint, int, float, np_float,
+def test_pyarrow_roundtrip(uint, int, float, np_float_str,
                            unit, tz, offset, length):
 
     from datetime import datetime as dt
@@ -391,7 +396,7 @@ def test_pyarrow_roundtrip(uint, int, float, np_float,
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)),
                           type=float, from_pandas=True),
             "d": [True, False, True],
             "e": [True, False, None],
diff --git a/python/pyarrow/tests/interchange/test_interchange_spec.py b/python/pyarrow/tests/interchange/test_interchange_spec.py
index 826089652bca6..d060f7842c2fe 100644
--- a/python/pyarrow/tests/interchange/test_interchange_spec.py
+++ b/python/pyarrow/tests/interchange/test_interchange_spec.py
@@ -19,10 +19,13 @@
 import hypothesis as h
 import hypothesis.strategies as st
 
-import numpy as np
+import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 import pyarrow.tests.strategies as past
-import pytest
 
 
 all_types = st.deferred(
@@ -39,6 +42,7 @@
 
 # datetime is tested in test_extra.py
 # dictionary is tested in test_categorical()
+@pytest.mark.numpy
 @h.given(past.arrays(all_types, size=3))
 def test_dtypes(arr):
     table = pa.table([arr], names=["a"])
@@ -51,6 +55,7 @@ def test_dtypes(arr):
     assert df.get_column(0).offset == 0
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(
     "uint, uint_bw",
     [
@@ -68,17 +73,17 @@ def test_dtypes(arr):
     ]
 )
 @pytest.mark.parametrize(
-    "float, float_bw, np_float", [
-        (pa.float16(), 16, np.float16),
-        (pa.float32(), 32, np.float32),
-        (pa.float64(), 64, np.float64)
+    "float, float_bw, np_float_str", [
+        (pa.float16(), 16, "float16"),
+        (pa.float32(), 32, "float32"),
+        (pa.float64(), 64, "float64")
     ]
 )
 @pytest.mark.parametrize("unit", ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize("tz", ['', 'America/New_York', '+07:30', '-04:30'])
 @pytest.mark.parametrize("use_batch", [False, True])
 def test_mixed_dtypes(uint, uint_bw, int, int_bw,
-                      float, float_bw, np_float, unit, tz,
+                      float, float_bw, np_float_str, unit, tz,
                       use_batch):
     from datetime import datetime as dt
     arr = [1, 2, 3]
@@ -87,7 +92,7 @@ def test_mixed_dtypes(uint, uint_bw, int, int_bw,
         {
             "a": pa.array(arr, type=uint),
             "b": pa.array(arr, type=int),
-            "c": pa.array(np.array(arr, dtype=np_float), type=float),
+            "c": pa.array(np.array(arr, dtype=np.dtype(np_float_str)), type=float),
             "d": [True, False, True],
             "e": ["a", "", "c"],
             "f": pa.array(dt_arr, type=pa.timestamp(unit, tz=tz))
@@ -200,16 +205,16 @@ def test_column_get_chunks(use_batch, size, n_chunks):
     "int", [pa.int8(), pa.int16(), pa.int32(), pa.int64()]
 )
 @pytest.mark.parametrize(
-    "float, np_float", [
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64)
+    "float, np_float_str", [
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64")
     ]
 )
 @pytest.mark.parametrize("use_batch", [False, True])
-def test_get_columns(uint, int, float, np_float, use_batch):
+def test_get_columns(uint, int, float, np_float_str, use_batch):
     arr = [[1, 2, 3], [4, 5]]
-    arr_float = np.array([1, 2, 3, 4, 5], dtype=np_float)
+    arr_float = np.array([1, 2, 3, 4, 5], dtype=np.dtype(np_float_str))
     table = pa.table(
         {
             "a": pa.chunked_array(arr, type=uint),
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index b4a57ba0b1556..fd6ad94fbd6d3 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -17,7 +17,10 @@
 
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests import util
diff --git a/python/pyarrow/tests/parquet/conftest.py b/python/pyarrow/tests/parquet/conftest.py
index 767e7f6b69d07..94b3058fa02c8 100644
--- a/python/pyarrow/tests/parquet/conftest.py
+++ b/python/pyarrow/tests/parquet/conftest.py
@@ -15,6 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import os
+import pathlib
+import sys
+
 import pytest
 
 from pyarrow.util import guid
@@ -25,6 +29,17 @@ def datadir(base_datadir):
     return base_datadir / 'parquet'
 
 
+@pytest.fixture(scope='module')
+def parquet_test_datadir():
+    if sys.platform == 'emscripten':
+        pytest.skip("needs PARQUET_TEST_DATA files access")
+    result = os.environ.get('PARQUET_TEST_DATA')
+    if not result:
+        raise RuntimeError('Please point the PARQUET_TEST_DATA environment '
+                           'variable to the test data directory')
+    return pathlib.Path(result)
+
+
 @pytest.fixture
 def s3_bucket(s3_server):
     boto3 = pytest.importorskip('boto3')
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 194af7415e863..6496aa99092b8 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -22,7 +22,6 @@
 from shutil import copytree
 from decimal import Decimal
 
-import numpy as np
 import pytest
 
 import pyarrow as pa
@@ -47,6 +46,10 @@
 except ImportError:
     pd = tm = None
 
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index e6b66b00428fb..79dd96948261c 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -17,8 +17,12 @@
 
 import decimal
 import io
+import random
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -173,6 +177,7 @@ def test_direct_read_dictionary_subfield():
     assert result[0].num_chunks == 1
 
 
+@pytest.mark.numpy
 def test_dictionary_array_automatically_read():
     # ARROW-3246
 
@@ -334,10 +339,10 @@ def test_column_of_lists(tempdir):
 def test_large_list_records():
     # This was fixed in PARQUET-1100
 
-    list_lengths = np.random.randint(0, 500, size=50)
-    list_lengths[::10] = 0
+    list_lengths = [random.randint(0, 500) for _ in range(50)]
+    list_lengths[::10] = [0, 0, 0, 0, 0]
 
-    list_values = [list(map(int, np.random.randint(0, 100, size=x)))
+    list_values = [list(map(int, [random.randint(0, 100) for _ in range(x)]))
                    if i % 8 else None
                    for i, x in enumerate(list_lengths)]
 
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 47e608a1404ff..f68f1aa9cdb46 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -20,7 +20,10 @@
 import os
 import pathlib
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import unittest.mock as mock
 
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index 08fb1098322be..b89fd97cb91e6 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -19,7 +19,10 @@
 import io
 import warnings
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
index 52ab59a961b3e..14ce9bbfcdd58 100644
--- a/python/pyarrow/tests/parquet/test_metadata.py
+++ b/python/pyarrow/tests/parquet/test_metadata.py
@@ -20,7 +20,10 @@
 from collections import OrderedDict
 import io
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -122,7 +125,7 @@ def test_parquet_metadata_api():
         col_meta = rg_meta.column(ncols + 2)
 
     col_meta = rg_meta.column(0)
-    assert col_meta.file_offset > 0
+    assert col_meta.file_offset == 0
     assert col_meta.file_path == ''  # created from BytesIO
     assert col_meta.physical_type == 'BOOLEAN'
     assert col_meta.num_values == 10000
@@ -584,7 +587,7 @@ def test_table_large_metadata():
     my_schema = pa.schema([pa.field('f0', 'double')],
                           metadata={'large': 'x' * 10000000})
 
-    table = pa.table([np.arange(10)], schema=my_schema)
+    table = pa.table([range(10)], schema=my_schema)
     _check_roundtrip(table)
 
 
@@ -782,3 +785,12 @@ def test_write_metadata_fs_file_combinations(tempdir, s3_example_s3fs):
     assert meta1.read_bytes() == meta2.read_bytes() \
         == meta3.read_bytes() == meta4.read_bytes() \
         == s3_fs.open(meta5).read()
+
+
+def test_column_chunk_key_value_metadata(parquet_test_datadir):
+    metadata = pq.read_metadata(parquet_test_datadir /
+                                'column_chunk_key_value_metadata.parquet')
+    key_value_metadata1 = metadata.row_group(0).column(0).metadata
+    assert key_value_metadata1 == {b'foo': b'bar', b'thisiskeywithoutvalue': b''}
+    key_value_metadata2 = metadata.row_group(0).column(1).metadata
+    assert key_value_metadata2 is None
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index b5913bf5c6b6e..2ea2f46873aef 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -18,7 +18,10 @@
 import io
 import json
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
diff --git a/python/pyarrow/tests/strategies.py b/python/pyarrow/tests/strategies.py
index db0aa1397123d..7a1b31a4d9d77 100644
--- a/python/pyarrow/tests/strategies.py
+++ b/python/pyarrow/tests/strategies.py
@@ -21,7 +21,10 @@
 import pytest
 import hypothesis as h
 import hypothesis.strategies as st
-import hypothesis.extra.numpy as npst
+try:
+    import hypothesis.extra.numpy as npst
+except ImportError:
+    npst = None
 try:
     import hypothesis.extra.pytz as tzst
 except ImportError:
@@ -35,7 +38,10 @@
         import tzdata  # noqa:F401
     except ImportError:
         zoneinfo = None
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 
diff --git a/python/pyarrow/tests/test_adhoc_memory_leak.py b/python/pyarrow/tests/test_adhoc_memory_leak.py
index cd381cf427dc3..76a766984dab6 100644
--- a/python/pyarrow/tests/test_adhoc_memory_leak.py
+++ b/python/pyarrow/tests/test_adhoc_memory_leak.py
@@ -17,7 +17,10 @@
 
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 30d258b9aabd8..4160d64829483 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -27,7 +27,10 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -157,6 +160,7 @@ def test_binary_total_values_length():
     assert large_arr.slice(1, 3).total_values_length == 11
 
 
+@pytest.mark.numpy
 def test_to_numpy_zero_copy():
     arr = pa.array(range(10))
 
@@ -176,6 +180,7 @@ def test_to_numpy_zero_copy():
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_chunked_array_to_numpy_zero_copy():
     elements = [[2, 2, 4], [4, 5, 100]]
 
@@ -191,6 +196,7 @@ def test_chunked_array_to_numpy_zero_copy():
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_to_numpy_unsupported_types():
     # ARROW-2871: Some primitive types are not yet supported in to_numpy
     bool_arr = pa.array([True, False, True])
@@ -217,6 +223,7 @@ def test_to_numpy_unsupported_types():
         arr.to_numpy()
 
 
+@pytest.mark.numpy
 def test_to_numpy_writable():
     arr = pa.array(range(10))
     np_arr = arr.to_numpy()
@@ -234,6 +241,7 @@ def test_to_numpy_writable():
         arr.to_numpy(zero_copy_only=True, writable=True)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns'])
 @pytest.mark.parametrize('tz', [None, "UTC"])
 def test_to_numpy_datetime64(unit, tz):
@@ -243,6 +251,7 @@ def test_to_numpy_datetime64(unit, tz):
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['s', 'ms', 'us', 'ns'])
 def test_to_numpy_timedelta64(unit):
     arr = pa.array([1, 2, 3], pa.duration(unit))
@@ -251,6 +260,7 @@ def test_to_numpy_timedelta64(unit):
     np.testing.assert_array_equal(np_arr, expected)
 
 
+@pytest.mark.numpy
 def test_to_numpy_dictionary():
     # ARROW-7591
     arr = pa.array(["a", "b", "a"]).dictionary_encode()
@@ -427,6 +437,11 @@ def test_array_getitem():
         with pytest.raises(IndexError):
             arr[idx]
 
+
+@pytest.mark.numpy
+def test_array_getitem_numpy_scalars():
+    arr = pa.array(range(10, 15))
+    lst = arr.to_pylist()
     # check that numpy scalars are supported
     for idx in range(-len(arr), len(arr)):
         assert arr[np.int32(idx)].as_py() == lst[idx]
@@ -469,9 +484,11 @@ def test_array_slice():
             res.validate()
             expected = arr.to_pylist()[start:stop]
             assert res.to_pylist() == expected
-            assert res.to_numpy().tolist() == expected
+            if np is not None:
+                assert res.to_numpy().tolist() == expected
 
 
+@pytest.mark.numpy
 def test_array_slice_negative_step():
     # ARROW-2714
     np_arr = np.arange(20)
@@ -542,6 +559,7 @@ def test_struct_array_slice():
                                    {'a': 5, 'b': 6.5}]
 
 
+@pytest.mark.numpy
 def test_array_factory_invalid_type():
 
     class MyObject:
@@ -552,6 +570,7 @@ class MyObject:
         pa.array(arr)
 
 
+@pytest.mark.numpy
 def test_array_ref_to_ndarray_base():
     arr = np.array([1, 2, 3])
 
@@ -576,6 +595,7 @@ def test_array_eq():
     assert (arr1 == None) is False  # noqa: E711
 
 
+@pytest.mark.numpy
 def test_array_from_buffers():
     values_buf = pa.py_buffer(np.int16([4, 5, 6, 7]))
     nulls_buf = pa.py_buffer(np.uint8([0b00001101]))
@@ -707,6 +727,13 @@ def test_struct_from_arrays():
     assert not arr.type[0].nullable
     assert arr.to_pylist() == expected_list
 
+    # From structtype
+    structtype = pa.struct([fa, fb, fc])
+    arr = pa.StructArray.from_arrays([a, b, c], type=structtype)
+    assert arr.type == pa.struct([fa, fb, fc])
+    assert not arr.type[0].nullable
+    assert arr.to_pylist() == expected_list
+
     with pytest.raises(ValueError):
         pa.StructArray.from_arrays([a, b, c], fields=[fa, fb])
 
@@ -766,6 +793,7 @@ def test_dictionary_from_buffers(offset):
     assert a[offset:] == b
 
 
+@pytest.mark.numpy
 def test_dictionary_from_numpy():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)
@@ -788,6 +816,7 @@ def test_dictionary_from_numpy():
             assert d2[i].as_py() == dictionary[indices[i]]
 
 
+@pytest.mark.numpy
 def test_dictionary_to_numpy():
     expected = pa.array(
         ["foo", "bar", None, "foo"]
@@ -858,6 +887,7 @@ def test_dictionary_to_numpy():
     )
 
 
+@pytest.mark.numpy
 def test_dictionary_from_boxed_arrays():
     indices = np.repeat([0, 1, 2], 2)
     dictionary = np.array(['foo', 'bar', 'baz'], dtype=object)
@@ -903,6 +933,7 @@ def test_dictionary_indices():
     arr.indices.validate(full=True)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('list_array_type', 'list_type_factory'),
                          [(pa.ListArray, pa.list_),
                           (pa.LargeListArray, pa.large_list)])
@@ -1045,6 +1076,7 @@ def test_map_from_dict():
     assert tup_arr.equals(dict_arr)
 
 
+@pytest.mark.numpy
 def test_map_from_arrays():
     offsets_arr = np.array([0, 2, 5, 8], dtype='i4')
     offsets = pa.array(offsets_arr, type='int32')
@@ -1465,6 +1497,7 @@ def _check_cast_case(case, *, safe=True, check_array_construction=True):
         assert in_arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_integers_safe():
     safe_cases = [
         (np.array([0, 1, 2, 3], dtype='i1'), 'int8',
@@ -1551,6 +1584,7 @@ def test_chunked_array_data_warns():
     assert isinstance(res, pa.ChunkedArray)
 
 
+@pytest.mark.numpy
 def test_cast_integers_unsafe():
     # We let NumPy do the unsafe casting.
     # Note that NEP50 in the NumPy spec no longer allows
@@ -1571,6 +1605,7 @@ def test_cast_integers_unsafe():
         _check_cast_case(case, safe=False)
 
 
+@pytest.mark.numpy
 def test_floating_point_truncate_safe():
     safe_cases = [
         (np.array([1.0, 2.0, 3.0], dtype='float32'), 'float32',
@@ -1584,6 +1619,7 @@ def test_floating_point_truncate_safe():
         _check_cast_case(case, safe=True)
 
 
+@pytest.mark.numpy
 def test_floating_point_truncate_unsafe():
     unsafe_cases = [
         (np.array([1.1, 2.2, 3.3], dtype='float32'), 'float32',
@@ -1628,6 +1664,7 @@ def test_decimal_to_int_safe():
         _check_cast_case(case, safe=True)
 
 
+@pytest.mark.numpy
 def test_decimal_to_int_value_out_of_bounds():
     out_of_bounds_cases = [
         (
@@ -1728,6 +1765,7 @@ def test_decimal_to_decimal():
         result = arr.cast(pa.decimal128(5, 2))
 
 
+@pytest.mark.numpy
 def test_safe_cast_nan_to_int_raises():
     arr = pa.array([np.nan, 1.])
 
@@ -1735,6 +1773,7 @@ def test_safe_cast_nan_to_int_raises():
         arr.cast(pa.int64(), safe=True)
 
 
+@pytest.mark.numpy
 def test_cast_signed_to_unsigned():
     safe_cases = [
         (np.array([0, 1, 2, 3], dtype='i1'), pa.uint8(),
@@ -1985,6 +2024,7 @@ def test_dictionary_decode():
         assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_time32_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int32'),
                    type=pa.time32('s'))
@@ -1994,6 +2034,7 @@ def test_cast_time32_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_time64_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.time64('us'))
@@ -2003,6 +2044,7 @@ def test_cast_time64_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_timestamp_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.timestamp('us'))
@@ -2028,6 +2070,7 @@ def test_cast_date32_to_int():
     assert result2.equals(arr)
 
 
+@pytest.mark.numpy
 def test_cast_duration_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.duration('us'))
@@ -2037,6 +2080,7 @@ def test_cast_duration_to_int():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_cast_binary_to_utf8():
     binary_arr = pa.array([b'foo', b'bar', b'baz'], type=pa.binary())
     utf8_arr = binary_arr.cast(pa.utf8())
@@ -2057,6 +2101,7 @@ def test_cast_binary_to_utf8():
     assert casted.null_count == 1
 
 
+@pytest.mark.numpy
 def test_cast_date64_to_int():
     arr = pa.array(np.array([0, 1, 2], dtype='int64'),
                    type=pa.date64())
@@ -2139,6 +2184,7 @@ def test_array_pickle_dictionary(pickle_module):
         assert array.equals(result)
 
 
+@pytest.mark.numpy
 @h.settings(suppress_health_check=(h.HealthCheck.too_slow,))
 @h.given(
     past.arrays(
@@ -2170,9 +2216,9 @@ def test_array_pickle_protocol5(data, typ, pickle_module):
         assert result_addresses == addresses
 
 
-@pytest.mark.parametrize(
-    'narr',
-    [
+@pytest.mark.numpy
+def test_to_numpy_roundtrip():
+    for narr in [
         np.arange(10, dtype=np.int64),
         np.arange(10, dtype=np.int32),
         np.arange(10, dtype=np.int16),
@@ -2184,23 +2230,23 @@ def test_array_pickle_protocol5(data, typ, pickle_module):
         np.arange(10, dtype=np.float64),
         np.arange(10, dtype=np.float32),
         np.arange(10, dtype=np.float16),
-    ]
-)
-def test_to_numpy_roundtrip(narr):
-    arr = pa.array(narr)
-    assert narr.dtype == arr.to_numpy().dtype
-    np.testing.assert_array_equal(narr, arr.to_numpy())
-    np.testing.assert_array_equal(narr[:6], arr[:6].to_numpy())
-    np.testing.assert_array_equal(narr[2:], arr[2:].to_numpy())
-    np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
+    ]:
+        arr = pa.array(narr)
+        assert narr.dtype == arr.to_numpy().dtype
+        np.testing.assert_array_equal(narr, arr.to_numpy())
+        np.testing.assert_array_equal(narr[:6], arr[:6].to_numpy())
+        np.testing.assert_array_equal(narr[2:], arr[2:].to_numpy())
+        np.testing.assert_array_equal(narr[2:6], arr[2:6].to_numpy())
 
 
+@pytest.mark.numpy
 def test_array_uint64_from_py_over_range():
     arr = pa.array([2 ** 63], type=pa.uint64())
     expected = pa.array(np.array([2 ** 63], dtype='u8'))
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_conversions_no_sentinel_values():
     arr = np.array([1, 2, 3, 4], dtype='int8')
     refcount = sys.getrefcount(arr)
@@ -2242,6 +2288,7 @@ def test_time32_time64_from_integer():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_binary_string_pandas_null_sentinels():
     # ARROW-6227
     def _check_case(ty):
@@ -2252,6 +2299,7 @@ def _check_case(ty):
     _check_case('utf8')
 
 
+@pytest.mark.numpy
 def test_pandas_null_sentinels_raise_error():
     # ARROW-6227
     cases = [
@@ -2292,6 +2340,7 @@ def test_pandas_null_sentinels_index():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_roundtrip_from_numpy_datetimeD():
     arr = np.array([None, datetime.date(2017, 4, 4)], dtype='datetime64[D]')
 
@@ -2312,6 +2361,7 @@ def test_array_from_naive_datetimes():
     assert arr.type == pa.timestamp('us', tz=None)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('dtype', 'type'), [
     ('datetime64[s]', pa.timestamp('s')),
     ('datetime64[ms]', pa.timestamp('ms')),
@@ -2335,6 +2385,7 @@ def test_array_from_numpy_datetime(dtype, type):
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_from_different_numpy_datetime_units_raises():
     data = [
         None,
@@ -2349,6 +2400,7 @@ def test_array_from_different_numpy_datetime_units_raises():
         pa.array(data)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('unit', ['ns', 'us', 'ms', 's'])
 def test_array_from_list_of_timestamps(unit):
     n = np.datetime64('NaT', unit)
@@ -2363,6 +2415,7 @@ def test_array_from_list_of_timestamps(unit):
     assert a1[0] == a2[0]
 
 
+@pytest.mark.numpy
 def test_array_from_timestamp_with_generic_unit():
     n = np.datetime64('NaT')
     x = np.datetime64('2017-01-01 01:01:01.111111111')
@@ -2373,6 +2426,7 @@ def test_array_from_timestamp_with_generic_unit():
         pa.array([n, x, y])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('dtype', 'type'), [
     ('timedelta64[s]', pa.duration('s')),
     ('timedelta64[ms]', pa.duration('ms')),
@@ -2401,6 +2455,7 @@ def test_array_from_numpy_timedelta(dtype, type):
     assert arr.to_pylist() == data
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_timedelta_incorrect_unit():
     # generic (no unit)
     td = np.timedelta64(1)
@@ -2416,6 +2471,7 @@ def test_array_from_numpy_timedelta_incorrect_unit():
             pa.array(data)
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_ascii():
     arr = np.array(['abcde', 'abc', ''], dtype='|S5')
 
@@ -2560,6 +2616,7 @@ def test_interval_array_from_dateoffset():
     assert list(actual_list[0]) == expected_from_pandas
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_unicode():
     dtypes = ['<U5', '>U5']
 
@@ -2592,12 +2649,14 @@ def test_array_from_numpy_unicode():
     assert arrow_arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_string_from_non_string():
     # ARROW-5682 - when converting to string raise on non string-like dtype
     with pytest.raises(TypeError):
         pa.array(np.array([1, 2, 3]), type=pa.string())
 
 
+@pytest.mark.numpy
 def test_array_string_from_all_null():
     # ARROW-5682
     vals = np.array([None, None], dtype=object)
@@ -2612,6 +2671,7 @@ def test_array_string_from_all_null():
     assert arr.null_count == 2
 
 
+@pytest.mark.numpy
 def test_array_from_masked():
     ma = np.ma.array([1, 2, 3, 4], dtype='int64',
                      mask=[False, False, True, False])
@@ -2623,6 +2683,7 @@ def test_array_from_masked():
         pa.array(ma, mask=np.array([True, False, False, False]))
 
 
+@pytest.mark.numpy
 def test_array_from_shrunken_masked():
     ma = np.ma.array([0], dtype='int64')
     result = pa.array(ma)
@@ -2630,6 +2691,7 @@ def test_array_from_shrunken_masked():
     assert expected.equals(result)
 
 
+@pytest.mark.numpy
 def test_array_from_invalid_dim_raises():
     msg = "only handle 1-dimensional arrays"
     arr2d = np.array([[1, 2, 3], [4, 5, 6]])
@@ -2641,6 +2703,7 @@ def test_array_from_invalid_dim_raises():
         pa.array(arr0d)
 
 
+@pytest.mark.numpy
 def test_array_from_strided_bool():
     # ARROW-6325
     arr = np.ones((3, 2), dtype=bool)
@@ -2652,6 +2715,7 @@ def test_array_from_strided_bool():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_array_from_strided():
     pydata = [
         ([b"ab", b"cd", b"ef"], (pa.binary(), pa.binary(2))),
@@ -2676,6 +2740,7 @@ def test_boolean_true_count_false_count():
     assert arr.false_count == 1000
 
 
+@pytest.mark.numpy
 def test_buffers_primitive():
     a = pa.array([1, 2, None, 4], type=pa.int16())
     buffers = a.buffers()
@@ -2748,6 +2813,7 @@ def test_buffers_nested():
     assert struct.unpack('4xh', values) == (43,)
 
 
+@pytest.mark.numpy
 def test_total_buffer_size():
     a = pa.array(np.array([4, 5, 6], dtype='int64'))
     assert a.nbytes == 8 * 3
@@ -3146,6 +3212,7 @@ def test_nested_dictionary_array():
     assert dict_arr2.to_pylist() == ['a', 'b', 'a', 'b', 'a']
 
 
+@pytest.mark.numpy
 def test_array_from_numpy_str_utf8():
     # ARROW-3890 -- in Python 3, NPY_UNICODE arrays are produced, but in Python
     # 2 they are NPY_STRING (binary), so we must do UTF-8 validation
@@ -3172,6 +3239,7 @@ def test_array_from_numpy_str_utf8():
         pa.array(vec, pa.string(), mask=np.array([False]))
 
 
+@pytest.mark.numpy
 @pytest.mark.slow
 @pytest.mark.large_memory
 def test_numpy_binary_overflow_to_chunked():
@@ -3230,6 +3298,7 @@ def test_list_child_overflow_to_chunked():
     assert len(arr.chunk(1)) == 1
 
 
+@pytest.mark.numpy
 def test_infer_type_masked():
     # ARROW-5208
     ty = pa.infer_type(['foo', 'bar', None, 2],
@@ -3245,6 +3314,7 @@ def test_infer_type_masked():
     assert pa.infer_type([], mask=[]) == pa.null()
 
 
+@pytest.mark.numpy
 def test_array_masked():
     # ARROW-5208
     arr = pa.array([4, None, 4, 3.],
@@ -3257,6 +3327,7 @@ def test_array_masked():
     assert arr.type == pa.int64()
 
 
+@pytest.mark.numpy
 def test_array_supported_masks():
     # ARROW-13883
     arr = pa.array([4, None, 4, 3.],
@@ -3315,6 +3386,7 @@ def test_array_supported_pandas_masks():
     assert arr.to_pylist() == [None, 1]
 
 
+@pytest.mark.numpy
 def test_binary_array_masked():
     # ARROW-12431
     masked_basic = pa.array([b'\x05'], type=pa.binary(1),
@@ -3347,6 +3419,7 @@ def test_binary_array_masked():
     assert ([b'aaa', b'bbb', b'ccc']*10) == arrow_array.to_pylist()
 
 
+@pytest.mark.numpy
 def test_binary_array_strided():
     # Masked
     nparray = np.array([b"ab", b"cd", b"ef"])
@@ -3360,6 +3433,7 @@ def test_binary_array_strided():
     assert [b"ab", b"ef"] == arrow_array.to_pylist()
 
 
+@pytest.mark.numpy
 def test_array_invalid_mask_raises():
     # ARROW-10742
     cases = [
@@ -3393,6 +3467,7 @@ def test_array_from_large_pyints():
         pa.array([int(2 ** 63)])
 
 
+@pytest.mark.numpy
 def test_numpy_array_protocol():
     # test the __array__ method on pyarrow.Array
     arr = pa.array([1, 2, 3])
@@ -3439,6 +3514,7 @@ def test_numpy_array_protocol():
     assert result.dtype == "float64"
 
 
+@pytest.mark.numpy
 def test_array_protocol():
 
     class MyArray:
@@ -3762,6 +3838,7 @@ def test_run_end_encoded_from_buffers():
                                            1, offset, children)
 
 
+@pytest.mark.numpy
 def test_run_end_encoded_from_array_with_type():
     run_ends = [1, 3, 6]
     values = [1, 2, 3]
@@ -3801,6 +3878,7 @@ def test_run_end_encoded_from_array_with_type():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 def test_run_end_encoded_to_numpy():
     arr = [1, 2, 2, 3, 3, 3]
     ree_array = pa.array(arr, pa.run_end_encoded(pa.int32(), pa.int64()))
@@ -4016,6 +4094,7 @@ def test_list_view_slice(list_view_type):
     assert sliced_array[0].as_py() == sliced_array.values[i:j].to_pylist() == [4]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('numpy_native_dtype', ['u2', 'i4', 'f8'])
 def test_swapped_byte_order_fails(numpy_native_dtype):
     # ARROW-39129
diff --git a/python/pyarrow/tests/test_builder.py b/python/pyarrow/tests/test_builder.py
index abc8a0013df37..9187a19b5fc24 100644
--- a/python/pyarrow/tests/test_builder.py
+++ b/python/pyarrow/tests/test_builder.py
@@ -15,10 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import math
 import weakref
 
-import numpy as np
-
 import pyarrow as pa
 from pyarrow.lib import StringBuilder, StringViewBuilder
 
@@ -35,7 +34,7 @@ def test_string_builder_append():
     sbuilder = StringBuilder()
     sbuilder.append(b"a byte string")
     sbuilder.append("a string")
-    sbuilder.append(np.nan)
+    sbuilder.append(math.nan)
     sbuilder.append(None)
     assert len(sbuilder) == 4
     assert sbuilder.null_count == 2
@@ -50,7 +49,7 @@ def test_string_builder_append():
 
 def test_string_builder_append_values():
     sbuilder = StringBuilder()
-    sbuilder.append_values([np.nan, None, "text", None, "other text"])
+    sbuilder.append_values([math.nan, None, "text", None, "other text"])
     assert sbuilder.null_count == 3
     arr = sbuilder.finish()
     assert arr.null_count == 3
@@ -60,7 +59,7 @@ def test_string_builder_append_values():
 
 def test_string_builder_append_after_finish():
     sbuilder = StringBuilder()
-    sbuilder.append_values([np.nan, None, "text", None, "other text"])
+    sbuilder.append_values([math.nan, None, "text", None, "other text"])
     arr = sbuilder.finish()
     sbuilder.append("No effect")
     expected = [None, None, "text", None, "other text"]
@@ -72,7 +71,7 @@ def test_string_view_builder():
     builder.append(b"a byte string")
     builder.append("a string")
     builder.append("a longer not-inlined string")
-    builder.append(np.nan)
+    builder.append(math.nan)
     builder.append_values([None, "text"])
     assert len(builder) == 6
     assert builder.null_count == 2
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 64fe7f1deb510..c16d2f9aacf74 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -28,7 +28,10 @@
 import sys
 import textwrap
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 try:
     import pandas as pd
@@ -44,27 +47,6 @@
 except ImportError:
     pas = None
 
-all_array_types = [
-    ('bool', [True, False, False, True, True]),
-    ('uint8', np.arange(5)),
-    ('int8', np.arange(5)),
-    ('uint16', np.arange(5)),
-    ('int16', np.arange(5)),
-    ('uint32', np.arange(5)),
-    ('int32', np.arange(5)),
-    ('uint64', np.arange(5, 10)),
-    ('int64', np.arange(5, 10)),
-    ('float', np.arange(0, 0.5, 0.1)),
-    ('double', np.arange(0, 0.5, 0.1)),
-    ('string', ['a', 'b', None, 'ddd', 'ee']),
-    ('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
-    (pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
-    (pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
-    (pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
-    (pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
-        {'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
-]
-
 exported_functions = [
     func for (name, func) in sorted(pc.__dict__.items())
     if hasattr(func, '__arrow_compute_function__')]
@@ -87,6 +69,28 @@
 ]
 
 
+all_array_types = [
+    ('bool', [True, False, False, True, True]),
+    ('uint8', range(5)),
+    ('int8', range(5)),
+    ('uint16', range(5)),
+    ('int16', range(5)),
+    ('uint32', range(5)),
+    ('int32', range(5)),
+    ('uint64', range(5, 10)),
+    ('int64', range(5, 10)),
+    ('float', [0, 0.1, 0.2, 0.3, 0.4]),
+    ('double', [0, 0.1, 0.2, 0.3, 0.4]),
+    ('string', ['a', 'b', None, 'ddd', 'ee']),
+    ('binary', [b'a', b'b', b'c', b'ddd', b'ee']),
+    (pa.binary(3), [b'abc', b'bcd', b'cde', b'def', b'efg']),
+    (pa.list_(pa.int8()), [[1, 2], [3, 4], [5, 6], None, [9, 16]]),
+    (pa.large_list(pa.int16()), [[1], [2, 3, 4], [5, 6], None, [9, 16]]),
+    (pa.struct([('a', pa.int8()), ('b', pa.int8())]), [
+        {'a': 1, 'b': 2}, None, {'a': 3, 'b': 4}, None, {'a': 5, 'b': 6}]),
+]
+
+
 def test_exported_functions():
     # Check that all exported concrete functions can be called with
     # the right number of arguments.
@@ -263,6 +267,7 @@ def test_get_function_hash_aggregate():
                         pc.HashAggregateKernel, 1)
 
 
+@pytest.mark.numpy
 def test_call_function_with_memory_pool():
     arr = pa.array(["foo", "bar", "baz"])
     indices = np.array([2, 2, 1])
@@ -1172,7 +1177,7 @@ def test_take_on_chunked_array():
         ]
     ])
 
-    indices = np.array([0, 5, 1, 6, 9, 2])
+    indices = pa.array([0, 5, 1, 6, 9, 2])
     result = arr.take(indices)
     expected = pa.chunked_array([["a", "f", "b", "g", "j", "c"]])
     assert result.equals(expected)
@@ -1304,12 +1309,6 @@ def test_filter(ty, values):
     result.validate()
     assert result.equals(pa.array([values[0], values[3], None], type=ty))
 
-    # same test with different array type
-    mask = np.array([True, False, False, True, None])
-    result = arr.filter(mask, null_selection_behavior='drop')
-    result.validate()
-    assert result.equals(pa.array([values[0], values[3]], type=ty))
-
     # non-boolean dtype
     mask = pa.array([0, 1, 0, 1, 0])
     with pytest.raises(NotImplementedError):
@@ -1321,6 +1320,17 @@ def test_filter(ty, values):
         arr.filter(mask)
 
 
+@pytest.mark.numpy
+@pytest.mark.parametrize(('ty', 'values'), all_array_types)
+def test_filter_numpy_array_mask(ty, values):
+    arr = pa.array(values, type=ty)
+    # same test as test_filter with different array type
+    mask = np.array([True, False, False, True, None])
+    result = arr.filter(mask, null_selection_behavior='drop')
+    result.validate()
+    assert result.equals(pa.array([values[0], values[3]], type=ty))
+
+
 def test_filter_chunked_array():
     arr = pa.chunked_array([["a", None], ["c", "d", "e"]])
     expected_drop = pa.chunked_array([["a"], ["e"]])
@@ -1586,9 +1596,11 @@ def test_round_to_integer(ty):
     for round_mode, expected in rmode_and_expected.items():
         options = RoundOptions(round_mode=round_mode)
         result = round(values, options=options)
-        np.testing.assert_array_equal(result, pa.array(expected))
+        expected_array = pa.array(expected, type=pa.float64())
+        assert expected_array.equals(result)
 
 
+@pytest.mark.numpy
 def test_round():
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     ndigits_and_expected = {
@@ -1607,6 +1619,7 @@ def test_round():
         assert pc.round(values, ndigits, "half_towards_infinity") == result
 
 
+@pytest.mark.numpy
 def test_round_to_multiple():
     values = [320, 3.5, 3.075, 4.5, -3.212, -35.1234, -3.045, None]
     multiple_and_expected = {
@@ -1670,7 +1683,7 @@ def test_is_null():
     expected = pa.chunked_array([[True, True], [True, False]])
     assert result.equals(expected)
 
-    arr = pa.array([1, 2, 3, None, np.nan])
+    arr = pa.array([1, 2, 3, None, float("nan")])
     result = arr.is_null()
     expected = pa.array([False, False, False, True, False])
     assert result.equals(expected)
@@ -1681,7 +1694,7 @@ def test_is_null():
 
 
 def test_is_nan():
-    arr = pa.array([1, 2, 3, None, np.nan])
+    arr = pa.array([1, 2, 3, None, float("nan")])
     result = arr.is_nan()
     expected = pa.array([False, False, False, None, True])
     assert result.equals(expected)
@@ -1986,6 +1999,7 @@ def check_cast_float_to_decimal(float_ty, float_val, decimal_ty, decimal_ctx,
 
 
 # Cannot test float32 as case generators above assume float64
+@pytest.mark.numpy
 @pytest.mark.parametrize('float_ty', [pa.float64()], ids=str)
 @pytest.mark.parametrize('decimal_ty', decimal_type_traits,
                          ids=lambda v: v.name)
@@ -2003,6 +2017,7 @@ def test_cast_float_to_decimal(float_ty, decimal_ty, case_generator):
                 ctx, decimal_ty.max_precision)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('float_ty', [pa.float32(), pa.float64()], ids=str)
 @pytest.mark.parametrize('decimal_traits', decimal_type_traits,
                          ids=lambda v: v.name)
@@ -2402,7 +2417,7 @@ def _check_temporal_rounding(ts, values, unit):
         "millisecond": "s",
         "second": "min",
         "minute": "h",
-        "hour": "d",
+        "hour": "D",
     }
     ta = pa.array(ts)
 
@@ -2908,6 +2923,7 @@ def test_min_max_element_wise():
     assert result == pa.array([1, 2, None])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_sum(start, skip_nulls):
@@ -2962,6 +2978,7 @@ def test_cumulative_sum(start, skip_nulls):
             pc.cumulative_sum([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (1.25, 10.5, -10.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_prod(start, skip_nulls):
@@ -3016,6 +3033,7 @@ def test_cumulative_prod(start, skip_nulls):
             pc.cumulative_prod([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_max(start, skip_nulls):
@@ -3073,6 +3091,7 @@ def test_cumulative_max(start, skip_nulls):
             pc.cumulative_max([1, 2, 3], start=strt)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('start', (0.5, 3.5, 6.5))
 @pytest.mark.parametrize('skip_nulls', (True, False))
 def test_cumulative_min(start, skip_nulls):
@@ -3407,6 +3426,7 @@ def create_sample_expressions():
 # Tests the Arrow-specific serialization mechanism
 
 
+@pytest.mark.numpy
 def test_expression_serialization_arrow(pickle_module):
     for expr in create_sample_expressions()["all"]:
         assert isinstance(expr, pc.Expression)
@@ -3414,6 +3434,7 @@ def test_expression_serialization_arrow(pickle_module):
         assert expr.equals(restored)
 
 
+@pytest.mark.numpy
 @pytest.mark.substrait
 def test_expression_serialization_substrait():
 
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 6140163a8ee8c..c3589877e6423 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -23,8 +23,11 @@
 import re
 
 import hypothesis as h
-import numpy as np
 import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 from pyarrow.pandas_compat import _pandas_api  # noqa
 import pyarrow as pa
@@ -32,17 +35,17 @@
 
 
 int_type_pairs = [
-    (np.int8, pa.int8()),
-    (np.int16, pa.int16()),
-    (np.int32, pa.int32()),
-    (np.int64, pa.int64()),
-    (np.uint8, pa.uint8()),
-    (np.uint16, pa.uint16()),
-    (np.uint32, pa.uint32()),
-    (np.uint64, pa.uint64())]
+    ("int8", pa.int8()),
+    ("int16", pa.int16()),
+    ("int32", pa.int32()),
+    ("int64", pa.int64()),
+    ("uint8", pa.uint8()),
+    ("uint16", pa.uint16()),
+    ("uint32", pa.uint32()),
+    ("uint64", pa.uint64())]
 
 
-np_int_types, pa_int_types = zip(*int_type_pairs)
+np_str_int_types, pa_int_types = zip(*int_type_pairs)
 
 
 class StrangeIterable:
@@ -174,7 +177,9 @@ def _as_set(xs):
     return set(xs)
 
 
-SEQUENCE_TYPES = [_as_list, _as_tuple, _as_numpy_array]
+SEQUENCE_TYPES = [_as_list, _as_tuple]
+if np is not None:
+    SEQUENCE_TYPES.append(_as_numpy_array)
 ITERABLE_TYPES = [_as_set, _as_dict_values] + SEQUENCE_TYPES
 COLLECTIONS_TYPES = [_as_deque] + ITERABLE_TYPES
 
@@ -217,6 +222,7 @@ def test_sequence_boolean(seq):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_sequence_numpy_boolean(seq):
     expected = [np.bool_(True), None, np.bool_(False), None]
@@ -225,6 +231,7 @@ def test_sequence_numpy_boolean(seq):
     assert arr.to_pylist() == [True, None, False, None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_sequence_mixed_numpy_python_bools(seq):
     values = np.array([True, False])
@@ -278,11 +285,14 @@ def test_list_with_non_list(seq):
 
 
 @parametrize_with_sequence_types
+@pytest.mark.parametrize(
+    "inner_seq", SEQUENCE_TYPES
+)
 @pytest.mark.parametrize("factory", [
     pa.list_, pa.large_list, pa.list_view, pa.large_list_view])
-def test_nested_arrays(seq, factory):
-    arr = pa.array(seq([np.array([], dtype=np.int64),
-                        np.array([1, 2], dtype=np.int64), None]),
+def test_nested_arrays(seq, inner_seq, factory):
+    arr = pa.array(seq([inner_seq([]),
+                        inner_seq([1, 2]), None]),
                    type=factory(pa.int64()))
     assert len(arr) == 3
     assert arr.null_count == 1
@@ -290,6 +300,7 @@ def test_nested_arrays(seq, factory):
     assert arr.to_pylist() == [[], [1, 2], None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 def test_nested_fixed_size_list(seq):
     # sequence of lists
@@ -334,10 +345,12 @@ def test_sequence_all_none(seq):
     assert arr.to_pylist() == [None, None]
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_integer(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [1, None, 3, None,
                 np.iinfo(np_scalar).min, np.iinfo(np_scalar).max]
     arr = pa.array(seq(expected), type=pa_type)
@@ -347,12 +360,12 @@ def test_sequence_integer(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
-@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
-def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
+@pytest.mark.parametrize("pa_type", pa_int_types)
+def test_sequence_integer_np_nan(seq, pa_type):
     # ARROW-2806: numpy.nan is a double value and thus should produce
     # a double array.
-    _, pa_type = np_scalar_pa_type
     with pytest.raises(ValueError):
         pa.array(seq([np.nan]), type=pa_type, from_pandas=False)
 
@@ -364,12 +377,12 @@ def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
-@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
-def test_sequence_integer_nested_np_nan(seq, np_scalar_pa_type):
+@pytest.mark.parametrize("pa_type", pa_int_types)
+def test_sequence_integer_nested_np_nan(seq, pa_type):
     # ARROW-2806: numpy.nan is a double value and thus should produce
     # a double array.
-    _, pa_type = np_scalar_pa_type
     with pytest.raises(ValueError):
         pa.array(seq([[np.nan]]), type=pa.list_(pa_type), from_pandas=False)
 
@@ -391,10 +404,12 @@ def test_sequence_integer_inferred(seq):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_numpy_integer(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [np_scalar(1), None, np_scalar(3), None,
                 np_scalar(np.iinfo(np_scalar).min),
                 np_scalar(np.iinfo(np_scalar).max)]
@@ -405,10 +420,12 @@ def test_sequence_numpy_integer(seq, np_scalar_pa_type):
     assert arr.to_pylist() == expected
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
 @pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
 def test_sequence_numpy_integer_inferred(seq, np_scalar_pa_type):
-    np_scalar, pa_type = np_scalar_pa_type
+    np_str_scalar, pa_type = np_scalar_pa_type
+    np_scalar = getattr(np, np_str_scalar)
     expected = [np_scalar(1), None, np_scalar(3), None]
     expected += [np_scalar(np.iinfo(np_scalar).min),
                  np_scalar(np.iinfo(np_scalar).max)]
@@ -434,6 +451,7 @@ def test_broken_integers(seq):
         pa.array(seq(data), type=pa.int64())
 
 
+@pytest.mark.numpy
 def test_numpy_scalars_mixed_type():
     # ARROW-4324
     data = [np.int32(10), np.float32(0.5)]
@@ -448,6 +466,7 @@ def test_numpy_scalars_mixed_type():
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.xfail(reason="Type inference for uint64 not implemented",
                    raises=OverflowError)
 def test_uint64_max_convert():
@@ -491,7 +510,7 @@ def test_integer_from_string_error(seq, typ):
 
 def test_convert_with_mask():
     data = [1, 2, 3, 4, 5]
-    mask = np.array([False, True, False, False, True])
+    mask = [False, True, False, False, True]
 
     result = pa.array(data, mask=mask)
     expected = pa.array([1, None, 3, 4, None])
@@ -559,6 +578,7 @@ def test_double_integer_coerce_representable_range():
         pa.array(invalid_values2)
 
 
+@pytest.mark.numpy
 def test_float32_integer_coerce_representable_range():
     f32 = np.float32
     valid_values = [f32(1.5), 1 << 24, -(1 << 24)]
@@ -587,14 +607,16 @@ def test_mixed_sequence_errors():
         pa.array([1.5, 'foo'])
 
 
+@pytest.mark.numpy
 @parametrize_with_sequence_types
-@pytest.mark.parametrize("np_scalar,pa_type", [
-    (np.float16, pa.float16()),
-    (np.float32, pa.float32()),
-    (np.float64, pa.float64())
+@pytest.mark.parametrize("np_str_scalar,pa_type", [
+    ("float16", pa.float16()),
+    ("float32", pa.float32()),
+    ("float64", pa.float64())
 ])
 @pytest.mark.parametrize("from_pandas", [True, False])
-def test_sequence_numpy_double(seq, np_scalar, pa_type, from_pandas):
+def test_sequence_numpy_double(seq, np_str_scalar, pa_type, from_pandas):
+    np_scalar = getattr(np, np_str_scalar)
     data = [np_scalar(1.5), np_scalar(1), None, np_scalar(2.5), None, np.nan]
     arr = pa.array(seq(data), from_pandas=from_pandas)
     assert len(arr) == 6
@@ -616,27 +638,29 @@ def test_sequence_numpy_double(seq, np_scalar, pa_type, from_pandas):
         assert np.isnan(arr.to_pylist()[5])
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("from_pandas", [True, False])
-@pytest.mark.parametrize("inner_seq", [np.array, list])
-def test_ndarray_nested_numpy_double(from_pandas, inner_seq):
+def test_ndarray_nested_numpy_double(from_pandas):
     # ARROW-2806
-    data = np.array([
-        inner_seq([1., 2.]),
-        inner_seq([1., 2., 3.]),
-        inner_seq([np.nan]),
-        None
-    ], dtype=object)
-    arr = pa.array(data, from_pandas=from_pandas)
-    assert len(arr) == 4
-    assert arr.null_count == 1
-    assert arr.type == pa.list_(pa.float64())
-    if from_pandas:
-        assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
-    else:
-        np.testing.assert_equal(arr.to_pylist(),
-                                [[1., 2.], [1., 2., 3.], [np.nan], None])
+    for inner_seq in (np.array, list):
+        data = np.array([
+            inner_seq([1., 2.]),
+            inner_seq([1., 2., 3.]),
+            inner_seq([np.nan]),
+            None
+        ], dtype=object)
+        arr = pa.array(data, from_pandas=from_pandas)
+        assert len(arr) == 4
+        assert arr.null_count == 1
+        assert arr.type == pa.list_(pa.float64())
+        if from_pandas:
+            assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
+        else:
+            np.testing.assert_equal(arr.to_pylist(),
+                                    [[1., 2.], [1., 2., 3.], [np.nan], None])
 
 
+@pytest.mark.numpy
 def test_nested_ndarray_in_object_array():
     # ARROW-4350
     arr = np.empty(2, dtype=object)
@@ -664,6 +688,7 @@ def test_nested_ndarray_in_object_array():
     assert result.to_pylist() == [[[1], [2]], [[1], [2]]]
 
 
+@pytest.mark.numpy
 @pytest.mark.xfail(reason=("Type inference for multidimensional ndarray "
                            "not yet implemented"),
                    raises=AssertionError)
@@ -682,6 +707,7 @@ def test_multidimensional_ndarray_as_nested_list():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('data', 'value_type'), [
     ([True, False], pa.bool_()),
     ([None, None], pa.null()),
@@ -711,6 +737,7 @@ def test_list_array_from_object_ndarray(data, value_type):
     assert arr.to_pylist() == [data]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize(('data', 'value_type'), [
     ([[1, 2], [3]], pa.list_(pa.int64())),
     ([[1, 2], [3, 4]], pa.list_(pa.int64(), 2)),
@@ -730,13 +757,14 @@ def test_array_ignore_nan_from_pandas():
     # See ARROW-4324, this reverts logic that was introduced in
     # ARROW-2240
     with pytest.raises(ValueError):
-        pa.array([np.nan, 'str'])
+        pa.array([float("nan"), 'str'])
 
-    arr = pa.array([np.nan, 'str'], from_pandas=True)
+    arr = pa.array([float("nan"), 'str'], from_pandas=True)
     expected = pa.array([None, 'str'])
     assert arr.equals(expected)
 
 
+@pytest.mark.numpy
 def test_nested_ndarray_different_dtypes():
     data = [
         np.array([1, 2, 3], dtype='int64'),
@@ -1238,6 +1266,7 @@ def test_sequence_timestamp_out_of_bounds_nanosecond():
     assert arr.to_pylist()[0] == datetime.datetime(2262, 4, 12)
 
 
+@pytest.mark.numpy
 def test_sequence_numpy_timestamp():
     data = [
         np.datetime64(datetime.datetime(2007, 7, 13, 1, 23, 34, 123456)),
@@ -1407,14 +1436,25 @@ class CustomClass():
             pa.array([1, CustomClass()], type=ty)
 
 
-@pytest.mark.parametrize('np_scalar', [True, False])
-def test_sequence_duration(np_scalar):
+def test_sequence_duration():
     td1 = datetime.timedelta(2, 3601, 1)
     td2 = datetime.timedelta(1, 100, 1000)
-    if np_scalar:
-        data = [np.timedelta64(td1), None, np.timedelta64(td2)]
-    else:
-        data = [td1, None, td2]
+    data = [td1, None, td2]
+
+    arr = pa.array(data)
+    assert len(arr) == 3
+    assert arr.type == pa.duration('us')
+    assert arr.null_count == 1
+    assert arr[0].as_py() == td1
+    assert arr[1].as_py() is None
+    assert arr[2].as_py() == td2
+
+
+@pytest.mark.numpy
+def test_sequence_duration_np_scalar():
+    td1 = datetime.timedelta(2, 3601, 1)
+    td2 = datetime.timedelta(1, 100, 1000)
+    data = [np.timedelta64(td1), None, np.timedelta64(td2)]
 
     arr = pa.array(data)
     assert len(arr) == 3
@@ -1480,6 +1520,7 @@ def test_sequence_duration_nested_lists_with_explicit_type(factory):
     assert arr.to_pylist() == data
 
 
+@pytest.mark.numpy
 def test_sequence_duration_nested_lists_numpy():
     td1 = datetime.timedelta(1, 1, 1000)
     td2 = datetime.timedelta(1, 100)
@@ -1769,6 +1810,7 @@ def test_struct_from_dicts_bytes_keys():
     ]
 
 
+@pytest.mark.numpy
 def test_struct_from_tuples():
     ty = pa.struct([pa.field('a', pa.int32()),
                     pa.field('b', pa.string()),
@@ -1915,6 +1957,7 @@ def test_struct_from_mixed_sequence():
         pa.array(data, type=ty)
 
 
+@pytest.mark.numpy
 def test_struct_from_dicts_inference():
     expected_type = pa.struct([pa.field('a', pa.int64()),
                                pa.field('b', pa.string()),
@@ -1992,7 +2035,7 @@ def test_structarray_from_arrays_coerce():
 
 
 def test_decimal_array_with_none_and_nan():
-    values = [decimal.Decimal('1.234'), None, np.nan, decimal.Decimal('nan')]
+    values = [decimal.Decimal('1.234'), None, float("nan"), decimal.Decimal('nan')]
 
     with pytest.raises(TypeError):
         # ARROW-6227: Without from_pandas=True, NaN is considered a float
@@ -2215,6 +2258,7 @@ def test_roundtrip_nanosecond_resolution_pandas_temporal_objects():
     ]
 
 
+@pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_array_to_pylist_roundtrip(arr):
     seq = arr.to_pylist()
@@ -2498,6 +2542,7 @@ def test_array_accepts_pyarrow_scalar(seq, data, scalar_data, value_type):
     assert expect.equals(result)
 
 
+@pytest.mark.numpy
 @parametrize_with_collections_types
 def test_array_accepts_pyarrow_scalar_errors(seq):
     sequence = seq([pa.scalar(1), pa.scalar("a"), pa.scalar(3.0)])
diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py
index 83800b77f894b..7508d8f0b9816 100644
--- a/python/pyarrow/tests/test_cpp_internals.py
+++ b/python/pyarrow/tests/test_cpp_internals.py
@@ -18,6 +18,8 @@
 import os.path
 from os.path import join as pjoin
 
+import pytest
+
 from pyarrow._pyarrow_cpp_tests import get_cpp_tests
 
 
@@ -26,10 +28,16 @@ def inject_cpp_tests(ns):
     Inject C++ tests as Python functions into namespace `ns` (a dict).
     """
     for case in get_cpp_tests():
+
         def wrapper(case=case):
             case()
         wrapper.__name__ = wrapper.__qualname__ = case.name
         wrapper.__module__ = ns['__name__']
+        # Add numpy or pandas marks if the test requires it
+        if 'numpy' in case.name:
+            wrapper = pytest.mark.numpy(wrapper)
+        elif 'pandas' in case.name:
+            wrapper = pytest.mark.pandas(wrapper)
         ns[case.name] = wrapper
 
 
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index 112129d9602ed..6a36b41daf302 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -24,6 +24,7 @@
 import io
 import itertools
 import os
+import random
 import select
 import shutil
 import signal
@@ -36,8 +37,6 @@
 
 import pytest
 
-import numpy as np
-
 import pyarrow as pa
 from pyarrow.csv import (
     open_csv, read_csv, ReadOptions, ParseOptions, ConvertOptions, ISO8601,
@@ -54,18 +53,32 @@ def generate_col_names():
             yield first + second
 
 
+def split_rows(arr, num_cols, num_rows):
+    # Split a num_cols x num_rows array into rows
+    for i in range(0, num_rows * num_cols, num_cols):
+        yield arr[i:i + num_cols]
+
+
+def split_columns(arr, num_cols, num_rows):
+    # Split a num_cols x num_rows array into columns
+    for i in range(0, num_cols):
+        yield arr[i::num_cols]
+
+
 def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n', write_names=True):
-    arr = np.random.RandomState(42).randint(0, 1000, size=(num_cols, num_rows))
+    rnd = random.Random(42)
+    arr = [rnd.randint(0, 1000) for _ in range(num_cols * num_rows)]
     csv = io.StringIO()
     col_names = list(itertools.islice(generate_col_names(), num_cols))
     if write_names:
         csv.write(",".join(col_names))
         csv.write(linesep)
-    for row in arr.T:
+    for row in split_rows(arr, num_cols, num_rows):
         csv.write(",".join(map(str, row)))
         csv.write(linesep)
     csv = csv.getvalue().encode()
-    columns = [pa.array(a, type=pa.int64()) for a in arr]
+    columns = [pa.array(row, type=pa.int64())
+               for row in split_columns(arr, num_cols, num_rows)]
     expected = pa.Table.from_arrays(columns, col_names)
     return csv, expected
 
@@ -127,6 +140,25 @@ def __ne__(self, other):
                 other.result != self.result)
 
 
+def test_split_rows_and_columns_utility():
+    num_cols = 5
+    num_rows = 2
+    arr = [x for x in range(1, 11)]
+    rows = list(split_rows(arr, num_cols, num_rows))
+    assert rows == [
+        [1, 2, 3, 4, 5],
+        [6, 7, 8, 9, 10]
+    ]
+    columns = list(split_columns(arr, num_cols, num_rows))
+    assert columns == [
+        [1, 6],
+        [2, 7],
+        [3, 8],
+        [4, 9],
+        [5, 10]
+    ]
+
+
 def test_read_options(pickle_module):
     cls = ReadOptions
     opts = cls()
@@ -520,6 +552,7 @@ def test_skip_rows_after_names(self):
             assert (values[opts.skip_rows + opts.skip_rows_after_names:] ==
                     table_dict[name])
 
+    @pytest.mark.numpy
     def test_row_number_offset_in_errors(self):
         # Row numbers are only correctly counted in serial reads
         def format_msg(msg_format, row, *args):
@@ -1411,14 +1444,11 @@ def test_cancellation(self):
         if (threading.current_thread().ident !=
                 threading.main_thread().ident):
             pytest.skip("test only works from main Python thread")
-        # Skips test if not available
-        raise_signal = util.get_raise_signal()
-        signum = signal.SIGINT
 
         def signal_from_thread():
             # Give our workload a chance to start up
             time.sleep(0.2)
-            raise_signal(signum)
+            signal.raise_signal(signal.SIGINT)
 
         # We start with a small CSV reading workload and increase its size
         # until it's large enough to get an interruption during it, even in
@@ -1474,7 +1504,7 @@ def signal_from_thread():
         assert last_duration <= 2.0
         e = exc_info.__context__
         assert isinstance(e, pa.ArrowCancelled)
-        assert e.signum == signum
+        assert e.signum == signal.SIGINT
 
     @pytest.mark.threading
     def test_cancellation_disabled(self):
@@ -1802,6 +1832,7 @@ def test_header_skip_rows(self):
         with pytest.raises(StopIteration):
             assert reader.read_next_batch()
 
+    @pytest.mark.numpy
     def test_skip_rows_after_names(self):
         super().test_skip_rows_after_names()
 
diff --git a/python/pyarrow/tests/test_cuda.py b/python/pyarrow/tests/test_cuda.py
index 5e833d5371de2..a71fa036503d7 100644
--- a/python/pyarrow/tests/test_cuda.py
+++ b/python/pyarrow/tests/test_cuda.py
@@ -26,7 +26,10 @@
 import pytest
 
 import pyarrow as pa
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 
 
 cuda = pytest.importorskip("pyarrow.cuda")
@@ -57,6 +60,17 @@ def test_Context():
     assert global_context.device_number == 0
     assert global_context1.device_number == cuda.Context.get_num_devices() - 1
 
+    mm = global_context.memory_manager
+    assert not mm.is_cpu
+    assert "<pyarrow.MemoryManager device: CudaDevice" in repr(mm)
+
+    dev = global_context.device
+    assert dev == mm.device
+
+    assert not dev.is_cpu
+    assert dev.device_id == 0
+    assert dev.device_type == pa.DeviceAllocationType.CUDA
+
     with pytest.raises(ValueError,
                        match=("device_number argument must "
                               "be non-negative less than")):
@@ -534,6 +548,28 @@ def put(*args, **kwargs):
             put(position=position, nbytes=nbytes)
 
 
+def test_buffer_device():
+    buf = cuda.new_host_buffer(10)
+    assert buf.device_type == pa.DeviceAllocationType.CUDA_HOST
+    assert isinstance(buf.device, pa.Device)
+    assert isinstance(buf.memory_manager, pa.MemoryManager)
+    assert buf.is_cpu
+    assert buf.device.is_cpu
+    assert buf.device == pa.default_cpu_memory_manager().device
+    # it is not entirely clear if CudaHostBuffer should use the default CPU memory
+    # manager (as it does now), see https://github.com/apache/arrow/pull/42221
+    assert buf.memory_manager.is_cpu
+
+    _, buf = make_random_buffer(size=10, target='device')
+    assert buf.device_type == pa.DeviceAllocationType.CUDA
+    assert isinstance(buf.device, pa.Device)
+    assert buf.device == global_context.memory_manager.device
+    assert isinstance(buf.memory_manager, pa.MemoryManager)
+    assert not buf.is_cpu
+    assert not buf.device.is_cpu
+    assert not buf.memory_manager.is_cpu
+
+
 def test_BufferWriter():
     def allocate(size):
         cbuf = global_context.new_buffer(size)
@@ -794,21 +830,29 @@ def test_IPC(size):
     assert p.exitcode == 0
 
 
-def _arr_copy_to_host(carr):
-    # TODO replace below with copy to device when exposed in python
-    buffers = []
-    for cbuf in carr.buffers():
-        if cbuf is None:
-            buffers.append(None)
-        else:
-            buf = global_context.foreign_buffer(
-                cbuf.address, cbuf.size, cbuf
-            ).copy_to_host()
-            buffers.append(buf)
+def test_copy_to():
+    _, buf = make_random_buffer(size=10, target='device')
+    mm_cuda = buf.memory_manager
+
+    for dest in [mm_cuda, mm_cuda.device]:
+        arr = pa.array([0, 1, 2])
+        arr_cuda = arr.copy_to(dest)
+        assert not arr_cuda.buffers()[1].is_cpu
+        assert arr_cuda.buffers()[1].device_type == pa.DeviceAllocationType.CUDA
+        assert arr_cuda.buffers()[1].device == mm_cuda.device
+
+        arr_roundtrip = arr_cuda.copy_to(pa.default_cpu_memory_manager())
+        assert arr_roundtrip.equals(arr)
+
+        batch = pa.record_batch({"col": arr})
+        batch_cuda = batch.copy_to(dest)
+        buf_cuda = batch_cuda["col"].buffers()[1]
+        assert not buf_cuda.is_cpu
+        assert buf_cuda.device_type == pa.DeviceAllocationType.CUDA
+        assert buf_cuda.device == mm_cuda.device
 
-    child = pa.Array.from_buffers(carr.type.value_type, 3, buffers[2:])
-    new = pa.Array.from_buffers(carr.type, 2, buffers[:2], children=[child])
-    return new
+        batch_roundtrip = batch_cuda.copy_to(pa.default_cpu_memory_manager())
+        assert batch_roundtrip.equals(batch)
 
 
 def test_device_interface_array():
@@ -823,19 +867,10 @@ def test_device_interface_array():
     typ = pa.list_(pa.int32())
     arr = pa.array([[1], [2, 42]], type=typ)
 
-    # TODO replace below with copy to device when exposed in python
-    cbuffers = []
-    for buf in arr.buffers():
-        if buf is None:
-            cbuffers.append(None)
-        else:
-            cbuf = global_context.new_buffer(buf.size)
-            cbuf.copy_from_host(buf, position=0, nbytes=buf.size)
-            cbuffers.append(cbuf)
-
-    carr = pa.Array.from_buffers(typ, 2, cbuffers[:2], children=[
-        pa.Array.from_buffers(typ.value_type, 3, cbuffers[2:])
-    ])
+    # copy to device
+    _, buf = make_random_buffer(size=10, target='device')
+    mm_cuda = buf.memory_manager
+    carr = arr.copy_to(mm_cuda)
 
     # Type is known up front
     carr._export_to_c_device(ptr_array)
@@ -849,7 +884,7 @@ def test_device_interface_array():
     del carr
     carr_new = pa.Array._import_from_c_device(ptr_array, typ)
     assert carr_new.type == pa.list_(pa.int32())
-    arr_new = _arr_copy_to_host(carr_new)
+    arr_new = carr_new.copy_to(pa.default_cpu_memory_manager())
     assert arr_new.equals(arr)
 
     del carr_new
@@ -858,15 +893,13 @@ def test_device_interface_array():
         pa.Array._import_from_c_device(ptr_array, typ)
 
     # Schema is exported and imported at the same time
-    carr = pa.Array.from_buffers(typ, 2, cbuffers[:2], children=[
-        pa.Array.from_buffers(typ.value_type, 3, cbuffers[2:])
-    ])
+    carr = arr.copy_to(mm_cuda)
     carr._export_to_c_device(ptr_array, ptr_schema)
     # Delete and recreate C++ objects from exported pointers
     del carr
     carr_new = pa.Array._import_from_c_device(ptr_array, ptr_schema)
     assert carr_new.type == pa.list_(pa.int32())
-    arr_new = _arr_copy_to_host(carr_new)
+    arr_new = carr_new.copy_to(pa.default_cpu_memory_manager())
     assert arr_new.equals(arr)
 
     del carr_new
@@ -875,21 +908,6 @@ def test_device_interface_array():
         pa.Array._import_from_c_device(ptr_array, ptr_schema)
 
 
-def _batch_copy_to_host(cbatch):
-    # TODO replace below with copy to device when exposed in python
-    arrs = []
-    for col in cbatch.columns:
-        buffers = [
-            global_context.foreign_buffer(buf.address, buf.size, buf).copy_to_host()
-            if buf is not None else None
-            for buf in col.buffers()
-        ]
-        new = pa.Array.from_buffers(col.type, len(col), buffers)
-        arrs.append(new)
-
-    return pa.RecordBatch.from_arrays(arrs, schema=cbatch.schema)
-
-
 def test_device_interface_batch_array():
     cffi = pytest.importorskip("pyarrow.cffi")
     ffi = cffi.ffi
@@ -916,7 +934,7 @@ def test_device_interface_batch_array():
     del cbatch
     cbatch_new = pa.RecordBatch._import_from_c_device(ptr_array, schema)
     assert cbatch_new.schema == schema
-    batch_new = _batch_copy_to_host(cbatch_new)
+    batch_new = cbatch_new.copy_to(pa.default_cpu_memory_manager())
     assert batch_new.equals(batch)
 
     del cbatch_new
@@ -931,7 +949,7 @@ def test_device_interface_batch_array():
     del cbatch
     cbatch_new = pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)
     assert cbatch_new.schema == schema
-    batch_new = _batch_copy_to_host(cbatch_new)
+    batch_new = cbatch_new.copy_to(pa.default_cpu_memory_manager())
     assert batch_new.equals(batch)
 
     del cbatch_new
@@ -962,6 +980,17 @@ def test_print_array():
     assert str(carr) == str(arr)
 
 
+@pytest.mark.parametrize("size", [10, 100])
+def test_print_array_host(size):
+    buf = cuda.new_host_buffer(size*8)
+    np_arr = np.frombuffer(buf, dtype=np.int64)
+    np_arr[:] = range(size)
+
+    arr = pa.array(range(size), pa.int64())
+    carr = pa.Array.from_buffers(pa.int64(), size, [None, buf])
+    assert str(carr) == str(arr)
+
+
 def make_chunked_array(n_elements_per_chunk, n_chunks):
     arrs = []
     carrs = []
diff --git a/python/pyarrow/tests/test_cuda_numba_interop.py b/python/pyarrow/tests/test_cuda_numba_interop.py
index ff1722d278d5e..876f3c7f761cf 100644
--- a/python/pyarrow/tests/test_cuda_numba_interop.py
+++ b/python/pyarrow/tests/test_cuda_numba_interop.py
@@ -17,7 +17,10 @@
 
 import pytest
 import pyarrow as pa
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 
 dtypes = ['uint8', 'int16', 'float32']
 cuda = pytest.importorskip("pyarrow.cuda")
diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py
index 0eeae5d65f7d5..937d927f831b0 100644
--- a/python/pyarrow/tests/test_cython.py
+++ b/python/pyarrow/tests/test_cython.py
@@ -80,6 +80,9 @@ def check_cython_example_module(mod):
         mod.cast_scalar(scal, pa.list_(pa.int64()))
 
 
+# NumPy is still a required build dependency. It is present in our
+# headers and is required to build for the cython tests.
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_cython_api(tmpdir):
     """
@@ -162,6 +165,7 @@ def test_cython_api(tmpdir):
                               env=subprocess_env)
 
 
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_visit_strings(tmpdir):
     with tmpdir.as_cwd():
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index 3b0284bcb74a6..276cd2e78db37 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -20,6 +20,7 @@
 import os
 import pathlib
 import posixpath
+import random
 import sys
 import tempfile
 import textwrap
@@ -28,7 +29,10 @@
 from shutil import copytree
 from urllib.parse import quote
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -684,8 +688,8 @@ def test_partitioning():
 
     # test partitioning roundtrip
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     partitioning_schema = pa.schema([("part", pa.string())])
@@ -2494,7 +2498,7 @@ def _create_partitioned_dataset(basedir):
         pq.write_table(table.slice(3*i, 3), part / "test.parquet")
 
     full_table = table.append_column(
-        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int32()))
+        "part", pa.array([0] * 3 + [1] * 3 + [2] * 3, type=pa.int32()))
 
     return full_table, path
 
@@ -2532,7 +2536,7 @@ def test_open_dataset_partitioned_directory(tempdir, dataset_reader, pickle_modu
 
     result = dataset.to_table()
     expected = table.append_column(
-        "part", pa.array(np.repeat([0, 1, 2], 3), type=pa.int8()))
+        "part", pa.array([0] * 3 + [1] * 3 + [2] * 3, type=pa.int8()))
     assert result.equals(expected)
 
 
@@ -3567,7 +3571,7 @@ def _create_parquet_dataset_simple(root_path):
     metadata_collector = []
 
     for i in range(4):
-        table = pa.table({'f1': [i] * 10, 'f2': np.random.randn(10)})
+        table = pa.table({'f1': [i] * 10, 'f2': [random.random() for _ in range(10)]})
         pq.write_to_dataset(
             table, str(root_path), metadata_collector=metadata_collector
         )
@@ -4255,7 +4259,7 @@ def compare_tables_ignoring_order(t1, t2):
 
 
 def _generate_random_int_array(size=4, min=1, max=10):
-    return np.random.randint(min, max, size)
+    return [random.randint(min, max) for _ in range(size)]
 
 
 def _generate_data_and_columns(num_of_columns, num_of_records):
@@ -4513,8 +4517,8 @@ def file_visitor(written_file):
 
 def test_write_table(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     base_dir = tempdir / 'single'
@@ -4560,8 +4564,8 @@ def file_visitor(written_file):
 
 def test_write_table_multiple_fragments(tempdir):
     table = pa.table([
-        pa.array(range(10)), pa.array(np.random.randn(10)),
-        pa.array(np.repeat(['a', 'b'], 5))
+        pa.array(range(10)), pa.array(random.random() for _ in range(10)),
+        pa.array(['a'] * 5 + ['b'] * 5)
     ], names=["f1", "f2", "part"])
     table = pa.concat_tables([table]*2)
 
@@ -4596,8 +4600,8 @@ def test_write_table_multiple_fragments(tempdir):
 
 def test_write_iterable(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     base_dir = tempdir / 'inmemory_iterable'
@@ -4618,8 +4622,8 @@ def test_write_iterable(tempdir):
 
 def test_write_scanner(tempdir, dataset_reader):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
     dataset = ds.dataset(table)
 
@@ -4647,7 +4651,7 @@ def test_write_table_partitioned_dict(tempdir):
     # specifying the dictionary values explicitly
     table = pa.table([
         pa.array(range(20)),
-        pa.array(np.repeat(['a', 'b'], 10)).dictionary_encode(),
+        pa.array(['a'] * 10 + ['b'] * 10).dictionary_encode(),
     ], names=['col', 'part'])
 
     partitioning = ds.partitioning(table.select(["part"]).schema)
@@ -4666,6 +4670,7 @@ def test_write_table_partitioned_dict(tempdir):
     assert result.equals(table)
 
 
+@pytest.mark.numpy
 @pytest.mark.parquet
 def test_write_dataset_parquet(tempdir):
     table = pa.table([
@@ -4712,8 +4717,8 @@ def test_write_dataset_parquet(tempdir):
 
 def test_write_dataset_csv(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "chr1"])
 
     base_dir = tempdir / 'csv_dataset'
@@ -4739,8 +4744,8 @@ def test_write_dataset_csv(tempdir):
 @pytest.mark.parquet
 def test_write_dataset_parquet_file_visitor(tempdir):
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)
     ], names=["f1", "f2", "part"])
 
     visitor_called = False
@@ -4763,7 +4768,7 @@ def test_partition_dataset_parquet_file_visitor(tempdir):
     f1_vals = [item for chunk in range(4) for item in [chunk] * 10]
     f2_vals = [item*10 for chunk in range(4) for item in [chunk] * 10]
     table = pa.table({'f1': f1_vals, 'f2': f2_vals,
-                      'part': np.repeat(['a', 'b'], 20)})
+                      'part': ['a'] * 20 + ['b'] * 20})
 
     root_path = tempdir / 'partitioned'
     partitioning = ds.partitioning(
@@ -4841,8 +4846,8 @@ def test_write_dataset_s3(s3_example_simple):
     )
 
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a'] * 10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive")
@@ -4918,8 +4923,8 @@ def test_write_dataset_s3_put_only(s3_server):
     _configure_s3_limited_user(s3_server, _minio_put_only_policy)
 
     table = pa.table([
-        pa.array(range(20)), pa.array(np.random.randn(20)),
-        pa.array(np.repeat(['a', 'b'], 10))],
+        pa.array(range(20)), pa.array(random.random() for _ in range(20)),
+        pa.array(['a']*10 + ['b'] * 10)],
         names=["f1", "f2", "part"]
     )
     part = ds.partitioning(pa.schema([("part", pa.string())]), flavor="hive")
diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py
index 0d8b4a152ab9f..eb79121b1cdbe 100644
--- a/python/pyarrow/tests/test_dataset_encryption.py
+++ b/python/pyarrow/tests/test_dataset_encryption.py
@@ -17,7 +17,7 @@
 
 import base64
 from datetime import timedelta
-import numpy as np
+import random
 import pyarrow.fs as fs
 import pyarrow as pa
 
@@ -187,7 +187,10 @@ def unwrap_key(self, wrapped_key: bytes, _: str) -> bytes:
 
     row_count = 2**15 + 1
     table = pa.Table.from_arrays(
-        [pa.array(np.random.rand(row_count), type=pa.float32())], names=["foo"]
+        [pa.array(
+            [random.random() for _ in range(row_count)],
+            type=pa.float32()
+        )], names=["foo"]
     )
 
     kms_config = pe.KmsConnectionConfig()
diff --git a/python/pyarrow/tests/test_device.py b/python/pyarrow/tests/test_device.py
index 6bdb015be1a95..dc1a51e6d0092 100644
--- a/python/pyarrow/tests/test_device.py
+++ b/python/pyarrow/tests/test_device.py
@@ -17,6 +17,8 @@
 
 import pyarrow as pa
 
+import pytest
+
 
 def test_device_memory_manager():
     mm = pa.default_cpu_memory_manager()
@@ -41,3 +43,27 @@ def test_buffer_device():
     assert buf.device.is_cpu
     assert buf.device == pa.default_cpu_memory_manager().device
     assert buf.memory_manager.is_cpu
+
+
+def test_copy_to():
+    mm = pa.default_cpu_memory_manager()
+
+    arr = pa.array([0, 1, 2])
+    batch = pa.record_batch({"col": arr})
+
+    for dest in [mm, mm.device]:
+        arr_copied = arr.copy_to(dest)
+        assert arr_copied.equals(arr)
+        assert arr_copied.buffers()[1].device == mm.device
+        assert arr_copied.buffers()[1].address != arr.buffers()[1].address
+
+        batch_copied = batch.copy_to(dest)
+        assert batch_copied.equals(batch)
+        assert batch_copied["col"].buffers()[1].device == mm.device
+        assert batch_copied["col"].buffers()[1].address != arr.buffers()[1].address
+
+    with pytest.raises(TypeError, match="Argument 'destination' has incorrect type"):
+        arr.copy_to(mm.device.device_type)
+
+    with pytest.raises(TypeError, match="Argument 'destination' has incorrect type"):
+        batch.copy_to(mm.device.device_type)
diff --git a/python/pyarrow/tests/test_dlpack.py b/python/pyarrow/tests/test_dlpack.py
index 7cf3f4acdbd40..a18accb1e21df 100644
--- a/python/pyarrow/tests/test_dlpack.py
+++ b/python/pyarrow/tests/test_dlpack.py
@@ -19,12 +19,20 @@
 from functools import wraps
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 
 
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not numpy'
+pytestmark = pytest.mark.numpy
+
+
 def PyCapsule_IsValid(capsule, name):
     return ctypes.pythonapi.PyCapsule_IsValid(ctypes.py_object(capsule), name) == 1
 
@@ -52,45 +60,45 @@ def wrapper(*args, **kwargs):
 
 @check_bytes_allocated
 @pytest.mark.parametrize(
-    ('value_type', 'np_type'),
+    ('value_type', 'np_type_str'),
     [
-        (pa.uint8(), np.uint8),
-        (pa.uint16(), np.uint16),
-        (pa.uint32(), np.uint32),
-        (pa.uint64(), np.uint64),
-        (pa.int8(), np.int8),
-        (pa.int16(), np.int16),
-        (pa.int32(), np.int32),
-        (pa.int64(), np.int64),
-        (pa.float16(), np.float16),
-        (pa.float32(), np.float32),
-        (pa.float64(), np.float64),
+        (pa.uint8(), "uint8"),
+        (pa.uint16(), "uint16"),
+        (pa.uint32(), "uint32"),
+        (pa.uint64(), "uint64"),
+        (pa.int8(), "int8"),
+        (pa.int16(), "int16"),
+        (pa.int32(), "int32"),
+        (pa.int64(), "int64"),
+        (pa.float16(), "float16"),
+        (pa.float32(), "float32"),
+        (pa.float64(), "float64"),
     ]
 )
-def test_dlpack(value_type, np_type):
+def test_dlpack(value_type, np_type_str):
     if Version(np.__version__) < Version("1.24.0"):
         pytest.skip("No dlpack support in numpy versions older than 1.22.0, "
                     "strict keyword in assert_array_equal added in numpy version "
                     "1.24.0")
 
-    expected = np.array([1, 2, 3], dtype=np_type)
+    expected = np.array([1, 2, 3], dtype=np.dtype(np_type_str))
     arr = pa.array(expected, type=value_type)
     check_dlpack_export(arr, expected)
 
     arr_sliced = arr.slice(1, 1)
-    expected = np.array([2], dtype=np_type)
+    expected = np.array([2], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_sliced = arr.slice(0, 1)
-    expected = np.array([1], dtype=np_type)
+    expected = np.array([1], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_sliced = arr.slice(1)
-    expected = np.array([2, 3], dtype=np_type)
+    expected = np.array([2, 3], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_sliced, expected)
 
     arr_zero = pa.array([], type=value_type)
-    expected = np.array([], dtype=np_type)
+    expected = np.array([], dtype=np.dtype(np_type_str))
     check_dlpack_export(arr_zero, expected)
 
 
diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
index 1c4d0175a2d97..b74eca75bdca9 100644
--- a/python/pyarrow/tests/test_extension_type.py
+++ b/python/pyarrow/tests/test_extension_type.py
@@ -23,12 +23,15 @@
 from uuid import uuid4, UUID
 import sys
 
-import numpy as np
+import pytest
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
 import pyarrow as pa
 from pyarrow.vendored.version import Version
 
-import pytest
-
 
 @contextlib.contextmanager
 def registered_extension_type(ext_type):
@@ -95,18 +98,21 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
         return cls()
 
 
-class UuidScalarType(pa.ExtensionScalar):
+class ExampleUuidScalarType(pa.ExtensionScalar):
     def as_py(self):
         return None if self.value is None else UUID(bytes=self.value.as_py())
 
 
-class UuidType(pa.ExtensionType):
+class ExampleUuidType(pa.ExtensionType):
 
     def __init__(self):
-        super().__init__(pa.binary(16), 'pyarrow.tests.UuidType')
+        super().__init__(pa.binary(16), 'pyarrow.tests.ExampleUuidType')
+
+    def __reduce__(self):
+        return ExampleUuidType, ()
 
     def __arrow_ext_scalar_class__(self):
-        return UuidScalarType
+        return ExampleUuidScalarType
 
     def __arrow_ext_serialize__(self):
         return b''
@@ -116,10 +122,10 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
         return cls()
 
 
-class UuidType2(pa.ExtensionType):
+class ExampleUuidType2(pa.ExtensionType):
 
     def __init__(self):
-        super().__init__(pa.binary(16), 'pyarrow.tests.UuidType2')
+        super().__init__(pa.binary(16), 'pyarrow.tests.ExampleUuidType2')
 
     def __arrow_ext_serialize__(self):
         return b''
@@ -250,8 +256,8 @@ def ipc_read_batch(buf):
 
 
 def test_ext_type_basics():
-    ty = UuidType()
-    assert ty.extension_name == "pyarrow.tests.UuidType"
+    ty = ExampleUuidType()
+    assert ty.extension_name == "pyarrow.tests.ExampleUuidType"
 
 
 def test_ext_type_str():
@@ -267,16 +273,16 @@ def test_ext_type_repr():
 
 
 def test_ext_type_lifetime():
-    ty = UuidType()
+    ty = ExampleUuidType()
     wr = weakref.ref(ty)
     del ty
     assert wr() is None
 
 
 def test_ext_type_storage_type():
-    ty = UuidType()
+    ty = ExampleUuidType()
     assert ty.storage_type == pa.binary(16)
-    assert ty.__class__ is UuidType
+    assert ty.__class__ is ExampleUuidType
     ty = ParamExtType(5)
     assert ty.storage_type == pa.binary(5)
     assert ty.__class__ is ParamExtType
@@ -284,7 +290,7 @@ def test_ext_type_storage_type():
 
 def test_ext_type_byte_width():
     # Test for fixed-size binary types
-    ty = UuidType()
+    ty = pa.uuid()
     assert ty.byte_width == 16
     ty = ParamExtType(5)
     assert ty.byte_width == 5
@@ -297,7 +303,7 @@ def test_ext_type_byte_width():
 
 def test_ext_type_bit_width():
     # Test for fixed-size binary types
-    ty = UuidType()
+    ty = pa.uuid()
     assert ty.bit_width == 128
     ty = ParamExtType(5)
     assert ty.bit_width == 40
@@ -309,7 +315,7 @@ def test_ext_type_bit_width():
 
 
 def test_ext_type_as_py():
-    ty = UuidType()
+    ty = ExampleUuidType()
     expected = uuid4()
     scalar = pa.ExtensionScalar.from_storage(ty, expected.bytes)
     assert scalar.as_py() == expected
@@ -342,12 +348,22 @@ def test_ext_type_as_py():
 
 def test_uuid_type_pickle(pickle_module):
     for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1):
-        ty = UuidType()
+        ty = ExampleUuidType()
         ser = pickle_module.dumps(ty, protocol=proto)
         del ty
         ty = pickle_module.loads(ser)
         wr = weakref.ref(ty)
-        assert ty.extension_name == "pyarrow.tests.UuidType"
+        assert ty.extension_name == "pyarrow.tests.ExampleUuidType"
+        del ty
+        assert wr() is None
+
+    for proto in range(0, pickle_module.HIGHEST_PROTOCOL + 1):
+        ty = pa.uuid()
+        ser = pickle_module.dumps(ty, protocol=proto)
+        del ty
+        ty = pickle_module.loads(ser)
+        wr = weakref.ref(ty)
+        assert ty.extension_name == "arrow.uuid"
         del ty
         assert wr() is None
 
@@ -358,8 +374,8 @@ def test_ext_type_equality():
     c = ParamExtType(6)
     assert a != b
     assert b == c
-    d = UuidType()
-    e = UuidType()
+    d = ExampleUuidType()
+    e = ExampleUuidType()
     assert a != d
     assert d == e
 
@@ -403,7 +419,7 @@ def test_ext_array_equality():
     storage1 = pa.array([b"0123456789abcdef"], type=pa.binary(16))
     storage2 = pa.array([b"0123456789abcdef"], type=pa.binary(16))
     storage3 = pa.array([], type=pa.binary(16))
-    ty1 = UuidType()
+    ty1 = ExampleUuidType()
     ty2 = ParamExtType(16)
 
     a = pa.ExtensionArray.from_storage(ty1, storage1)
@@ -451,9 +467,9 @@ def test_ext_scalar_from_array():
     data = [b"0123456789abcdef", b"0123456789abcdef",
             b"zyxwvutsrqponmlk", None]
     storage = pa.array(data, type=pa.binary(16))
-    ty1 = UuidType()
+    ty1 = ExampleUuidType()
     ty2 = ParamExtType(16)
-    ty3 = UuidType2()
+    ty3 = ExampleUuidType2()
 
     a = pa.ExtensionArray.from_storage(ty1, storage)
     b = pa.ExtensionArray.from_storage(ty2, storage)
@@ -462,9 +478,9 @@ def test_ext_scalar_from_array():
     scalars_a = list(a)
     assert len(scalars_a) == 4
 
-    assert ty1.__arrow_ext_scalar_class__() == UuidScalarType
-    assert isinstance(a[0], UuidScalarType)
-    assert isinstance(scalars_a[0], UuidScalarType)
+    assert ty1.__arrow_ext_scalar_class__() == ExampleUuidScalarType
+    assert isinstance(a[0], ExampleUuidScalarType)
+    assert isinstance(scalars_a[0], ExampleUuidScalarType)
 
     for s, val in zip(scalars_a, data):
         assert isinstance(s, pa.ExtensionScalar)
@@ -505,7 +521,7 @@ def test_ext_scalar_from_array():
 
 
 def test_ext_scalar_from_storage():
-    ty = UuidType()
+    ty = ExampleUuidType()
 
     s = pa.ExtensionScalar.from_storage(ty, None)
     assert isinstance(s, pa.ExtensionScalar)
@@ -549,6 +565,7 @@ def test_ext_array_pickling(pickle_module):
         assert arr.storage.to_pylist() == [b"foo", b"bar"]
 
 
+@pytest.mark.numpy
 def test_ext_array_conversion_to_numpy():
     storage1 = pa.array([1, 2, 3], type=pa.int64())
     storage2 = pa.array([b"123", b"456", b"789"], type=pa.binary(3))
@@ -606,6 +623,7 @@ def struct_w_ext_data():
     return [sarr1, sarr2]
 
 
+@pytest.mark.numpy
 def test_struct_w_ext_array_to_numpy(struct_w_ext_data):
     # ARROW-15291
     # Check that we don't segfault when trying to build
@@ -706,14 +724,14 @@ def test_cast_between_extension_types():
     tiny_int_arr.cast(pa.int64()).cast(IntegerType())
 
     # Between the same extension types is okay
-    array = pa.array([b'1' * 16, b'2' * 16], pa.binary(16)).cast(UuidType())
-    out = array.cast(UuidType())
-    assert out.type == UuidType()
+    array = pa.array([b'1' * 16, b'2' * 16], pa.binary(16)).cast(ExampleUuidType())
+    out = array.cast(ExampleUuidType())
+    assert out.type == ExampleUuidType()
 
     # Will still fail casting between extensions who share storage type,
     # can only cast between exactly the same extension types.
     with pytest.raises(TypeError, match='Casting from *'):
-        array.cast(UuidType2())
+        array.cast(ExampleUuidType2())
 
 
 def test_cast_to_extension_with_extension_storage():
@@ -744,10 +762,10 @@ def test_cast_nested_extension_types(data, type_factory):
 
 def test_casting_dict_array_to_extension_type():
     storage = pa.array([b"0123456789abcdef"], type=pa.binary(16))
-    arr = pa.ExtensionArray.from_storage(UuidType(), storage)
+    arr = pa.ExtensionArray.from_storage(ExampleUuidType(), storage)
     dict_arr = pa.DictionaryArray.from_arrays(pa.array([0, 0], pa.int32()),
                                               arr)
-    out = dict_arr.cast(UuidType())
+    out = dict_arr.cast(ExampleUuidType())
     assert isinstance(out, pa.ExtensionArray)
     assert out.to_pylist() == [UUID('30313233-3435-3637-3839-616263646566'),
                                UUID('30313233-3435-3637-3839-616263646566')]
@@ -1220,6 +1238,7 @@ def test_parquet_extension_nested_in_extension(tmpdir):
             assert table == orig_table
 
 
+@pytest.mark.numpy
 def test_to_numpy():
     period_type = PeriodType('D')
     storage = pa.array([1, 2, 3, 4], pa.int64())
@@ -1272,7 +1291,11 @@ def test_empty_take():
     (["cat", "dog", "horse"], LabelType)
 ))
 @pytest.mark.parametrize(
-    "into", ["to_numpy", pytest.param("to_pandas", marks=pytest.mark.pandas)])
+    "into", [
+        pytest.param("to_numpy", marks=pytest.mark.numpy),
+        pytest.param("to_pandas", marks=pytest.mark.pandas)
+    ]
+)
 def test_extension_array_to_numpy_pandas(data, ty, into):
     storage = pa.array(data)
     ext_arr = pa.ExtensionArray.from_storage(ty(), storage)
@@ -1288,6 +1311,7 @@ def test_extension_array_to_numpy_pandas(data, ty, into):
         assert np.array_equal(result, expected)
 
 
+@pytest.mark.numpy
 def test_array_constructor():
     ext_type = IntegerType()
     storage = pa.array([1, 2, 3], type=pa.int64())
@@ -1320,6 +1344,7 @@ def test_array_constructor_from_pandas():
     assert result.equals(expected)
 
 
+@pytest.mark.numpy
 @pytest.mark.cython
 def test_cpp_extension_in_python(tmpdir):
     from .test_cython import (
@@ -1347,7 +1372,7 @@ def test_cpp_extension_in_python(tmpdir):
     mod = __import__('extensions')
 
     uuid_type = mod._make_uuid_type()
-    assert uuid_type.extension_name == "uuid"
+    assert uuid_type.extension_name == "example-uuid"
     assert uuid_type.storage_type == pa.binary(16)
 
     array = mod._make_uuid_array()
@@ -1356,6 +1381,31 @@ def test_cpp_extension_in_python(tmpdir):
     assert array[0].as_py() == b'abcdefghijklmno0'
     assert array[1].as_py() == b'0onmlkjihgfedcba'
 
+    buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["example-uuid"]))
+
+    batch = ipc_read_batch(buf)
+    reconstructed_array = batch.column(0)
+    assert reconstructed_array.type == uuid_type
+    assert reconstructed_array == array
+
+
+def test_uuid_extension():
+    data = [b"0123456789abcdef", b"0123456789abcdef",
+            b"zyxwvutsrqponmlk", None]
+
+    uuid_type = pa.uuid()
+    assert uuid_type.extension_name == "arrow.uuid"
+    assert uuid_type.storage_type == pa.binary(16)
+    assert uuid_type.__class__ is pa.UuidType
+
+    storage = pa.array(data, pa.binary(16))
+    array = pa.ExtensionArray.from_storage(uuid_type, storage)
+    assert array.type == uuid_type
+
+    assert array.to_pylist() == [x if x is None else UUID(bytes=x) for x in data]
+    assert array[0].as_py() == UUID(bytes=data[0])
+    assert array[3].as_py() is None
+
     buf = ipc_write_batch(pa.RecordBatch.from_arrays([array], ["uuid"]))
 
     batch = ipc_read_batch(buf)
@@ -1363,6 +1413,9 @@ def test_cpp_extension_in_python(tmpdir):
     assert reconstructed_array.type == uuid_type
     assert reconstructed_array == array
 
+    assert uuid_type.__arrow_ext_scalar_class__() == pa.UuidScalar
+    assert isinstance(array[0], pa.UuidScalar)
+
 
 def test_tensor_type():
     tensor_type = pa.fixed_shape_tensor(pa.int8(), [2, 3])
@@ -1389,38 +1442,45 @@ def test_tensor_type():
     assert tensor_type.permutation is None
 
 
-@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
-def test_tensor_class_methods(value_type):
+@pytest.mark.numpy
+@pytest.mark.parametrize("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_class_methods(np_type_str):
     from numpy.lib.stride_tricks import as_strided
-    arrow_type = pa.from_numpy_dtype(value_type)
+    arrow_type = pa.from_numpy_dtype(np.dtype(np_type_str))
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 3])
     storage = pa.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
                        pa.list_(arrow_type, 6))
     arr = pa.ExtensionArray.from_storage(tensor_type, storage)
     expected = np.array(
-        [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+        [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(arr.to_tensor(), expected)
     np.testing.assert_array_equal(arr.to_numpy_ndarray(), expected)
 
-    expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=value_type)
+    expected = np.array([[[7, 8, 9], [10, 11, 12]]], dtype=np.dtype(np_type_str))
     result = arr[1:].to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
     values = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]
-    flat_arr = np.array(values[0], dtype=value_type)
-    bw = value_type.itemsize
+    flat_arr = np.array(values[0], dtype=np.dtype(np_type_str))
+    bw = np.dtype(np_type_str).itemsize
     storage = pa.array(values, pa.list_(arrow_type, 12))
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 1, 2])
     result = pa.ExtensionArray.from_storage(tensor_type, storage)
     expected = np.array(
-        [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]], dtype=value_type)
+        [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
 
     result = flat_arr.reshape(1, 2, 3, 2)
     expected = np.array(
-        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result, expected)
 
     tensor_type = pa.fixed_shape_tensor(arrow_type, [2, 2, 3], permutation=[0, 2, 1])
@@ -1441,25 +1501,27 @@ def test_tensor_class_methods(value_type):
     assert result.to_tensor().strides == (12 * bw, 1 * bw, 6 * bw, 2 * bw)
 
 
-@pytest.mark.parametrize("value_type", (np.int8(), np.int64(), np.float32()))
-def test_tensor_array_from_numpy(value_type):
+@pytest.mark.numpy
+@pytest.mark.parametrize("np_type_str", ("int8", "int64", "float32"))
+def test_tensor_array_from_numpy(np_type_str):
     from numpy.lib.stride_tricks import as_strided
-    arrow_type = pa.from_numpy_dtype(value_type)
+    arrow_type = pa.from_numpy_dtype(np.dtype(np_type_str))
 
     arr = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]],
-                   dtype=value_type, order="C")
+                   dtype=np.dtype(np_type_str), order="C")
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
     assert isinstance(tensor_array_from_numpy.type, pa.FixedShapeTensorType)
     assert tensor_array_from_numpy.type.value_type == arrow_type
     assert tensor_array_from_numpy.type.shape == [2, 3]
 
     arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]],
-                   dtype=value_type, order="F")
+                   dtype=np.dtype(np_type_str), order="F")
     with pytest.raises(ValueError, match="First stride needs to be largest"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    flat_arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
-    bw = value_type.itemsize
+    flat_arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+                        dtype=np.dtype(np_type_str))
+    bw = np.dtype(np_type_str).itemsize
 
     arr = flat_arr.reshape(1, 3, 4)
     tensor_array_from_numpy = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
@@ -1477,23 +1539,26 @@ def test_tensor_array_from_numpy(value_type):
     arr = flat_arr.reshape(1, 2, 3, 2)
     result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
     expected = np.array(
-        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]], dtype=value_type)
+        [[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11, 12]]]],
+        dtype=np.dtype(np_type_str)
+    )
     np.testing.assert_array_equal(result.to_numpy_ndarray(), expected)
 
-    arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], dtype=value_type)
+    arr = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
+                   dtype=np.dtype(np_type_str))
     expected = arr[1:]
     result = pa.FixedShapeTensorArray.from_numpy_ndarray(arr)[1:].to_numpy_ndarray()
     np.testing.assert_array_equal(result, expected)
 
-    arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=value_type)
+    arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=np.dtype(np_type_str))
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    arr = np.array(1, dtype=value_type)
+    arr = np.array(1, dtype=np.dtype(np_type_str))
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr)
 
-    arr = np.array([], dtype=value_type)
+    arr = np.array([], dtype=np.dtype(np_type_str))
 
     with pytest.raises(ValueError, match="Cannot convert 1D array or scalar to fixed"):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((0)))
@@ -1505,6 +1570,7 @@ def test_tensor_array_from_numpy(value_type):
         pa.FixedShapeTensorArray.from_numpy_ndarray(arr.reshape((3, 0, 2)))
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("tensor_type", (
     pa.fixed_shape_tensor(pa.int8(), [2, 2, 3]),
     pa.fixed_shape_tensor(pa.int8(), [2, 2, 3], permutation=[0, 2, 1]),
@@ -1661,3 +1727,202 @@ def test_legacy_int_type():
             batch = ipc_read_batch(buf)
             assert isinstance(batch.column(0).type, LegacyIntType)
             assert batch.column(0) == ext_arr
+
+
+@pytest.mark.parametrize("storage_type,storage", [
+    (pa.null(), [None] * 4),
+    (pa.int64(), [1, 2, None, 4]),
+    (pa.binary(), [None, b"foobar"]),
+    (pa.list_(pa.int64()), [[], [1, 2], None, [3, None]]),
+])
+def test_opaque_type(pickle_module, storage_type, storage):
+    opaque_type = pa.opaque(storage_type, "type", "vendor")
+    assert opaque_type.extension_name == "arrow.opaque"
+    assert opaque_type.storage_type == storage_type
+    assert opaque_type.type_name == "type"
+    assert opaque_type.vendor_name == "vendor"
+    assert "arrow.opaque" in str(opaque_type)
+
+    assert opaque_type == opaque_type
+    assert opaque_type != storage_type
+    assert opaque_type != pa.opaque(storage_type, "type2", "vendor")
+    assert opaque_type != pa.opaque(storage_type, "type", "vendor2")
+    assert opaque_type != pa.opaque(pa.decimal128(12, 3), "type", "vendor")
+
+    # Pickle roundtrip
+    result = pickle_module.loads(pickle_module.dumps(opaque_type))
+    assert result == opaque_type
+
+    # IPC roundtrip
+    opaque_arr_class = opaque_type.__arrow_ext_class__()
+    storage = pa.array(storage, storage_type)
+    arr = pa.ExtensionArray.from_storage(opaque_type, storage)
+    assert isinstance(arr, opaque_arr_class)
+
+    buf = ipc_write_batch(pa.RecordBatch.from_arrays([arr], ["ext"]))
+    batch = ipc_read_batch(buf)
+
+    assert batch.column(0).type.extension_name == "arrow.opaque"
+    assert isinstance(batch.column(0), opaque_arr_class)
+
+    # cast storage -> extension type
+    result = storage.cast(opaque_type)
+    assert result == arr
+
+    # cast extension type -> storage type
+    inner = arr.cast(storage_type)
+    assert inner == storage
+
+
+def test_bool8_type(pickle_module):
+    bool8_type = pa.bool8()
+    storage_type = pa.int8()
+    assert bool8_type.extension_name == "arrow.bool8"
+    assert bool8_type.storage_type == storage_type
+    assert str(bool8_type) == "extension<arrow.bool8>"
+
+    assert bool8_type == bool8_type
+    assert bool8_type == pa.bool8()
+    assert bool8_type != storage_type
+
+    # Pickle roundtrip
+    result = pickle_module.loads(pickle_module.dumps(bool8_type))
+    assert result == bool8_type
+
+    # IPC roundtrip
+    storage = pa.array([-1, 0, 1, 2, None], storage_type)
+    arr = pa.ExtensionArray.from_storage(bool8_type, storage)
+    assert isinstance(arr, pa.Bool8Array)
+
+    # extension is registered by default
+    buf = ipc_write_batch(pa.RecordBatch.from_arrays([arr], ["ext"]))
+    batch = ipc_read_batch(buf)
+
+    assert batch.column(0).type.extension_name == "arrow.bool8"
+    assert isinstance(batch.column(0), pa.Bool8Array)
+
+    # cast storage -> extension type
+    result = storage.cast(bool8_type)
+    assert result == arr
+
+    # cast extension type -> storage type
+    inner = arr.cast(storage_type)
+    assert inner == storage
+
+
+def test_bool8_to_bool_conversion():
+    bool_arr = pa.array([True, False, True, True, None], pa.bool_())
+    bool8_arr = pa.ExtensionArray.from_storage(
+        pa.bool8(),
+        pa.array([-1, 0, 1, 2, None], pa.int8()),
+    )
+
+    # cast extension type -> arrow boolean type
+    assert bool8_arr.cast(pa.bool_()) == bool_arr
+
+    # cast arrow boolean type -> extension type, expecting canonical values
+    canonical_storage = pa.array([1, 0, 1, 1, None], pa.int8())
+    canonical_bool8_arr = pa.ExtensionArray.from_storage(pa.bool8(), canonical_storage)
+    assert bool_arr.cast(pa.bool8()) == canonical_bool8_arr
+
+
+@pytest.mark.numpy
+def test_bool8_to_numpy_conversion():
+    arr = pa.ExtensionArray.from_storage(
+        pa.bool8(),
+        pa.array([-1, 0, 1, 2, None], pa.int8()),
+    )
+
+    # cannot zero-copy with nulls
+    with pytest.raises(
+        pa.ArrowInvalid,
+        match="Needed to copy 1 chunks with 1 nulls, but zero_copy_only was True",
+    ):
+        arr.to_numpy()
+
+    # nullable conversion possible with a copy, but dest dtype is object
+    assert np.array_equal(
+        arr.to_numpy(zero_copy_only=False),
+        np.array([True, False, True, True, None], dtype=np.object_),
+    )
+
+    # zero-copy possible with non-null array
+    np_arr_no_nulls = np.array([True, False, True, True], dtype=np.bool_)
+    arr_no_nulls = pa.ExtensionArray.from_storage(
+        pa.bool8(),
+        pa.array([-1, 0, 1, 2], pa.int8()),
+    )
+
+    arr_to_np = arr_no_nulls.to_numpy()
+    assert np.array_equal(arr_to_np, np_arr_no_nulls)
+
+    # same underlying buffer
+    assert arr_to_np.ctypes.data == arr_no_nulls.buffers()[1].address
+
+    # if the user requests a writable array, a copy should be performed
+    arr_to_np_writable = arr_no_nulls.to_numpy(zero_copy_only=False, writable=True)
+    assert np.array_equal(arr_to_np_writable, np_arr_no_nulls)
+
+    # different underlying buffer
+    assert arr_to_np_writable.ctypes.data != arr_no_nulls.buffers()[1].address
+
+
+@pytest.mark.numpy
+def test_bool8_from_numpy_conversion():
+    np_arr_no_nulls = np.array([True, False, True, True], dtype=np.bool_)
+    canonical_bool8_arr_no_nulls = pa.ExtensionArray.from_storage(
+        pa.bool8(),
+        pa.array([1, 0, 1, 1], pa.int8()),
+    )
+
+    arr_from_np = pa.Bool8Array.from_numpy(np_arr_no_nulls)
+    assert arr_from_np == canonical_bool8_arr_no_nulls
+
+    # same underlying buffer
+    assert arr_from_np.buffers()[1].address == np_arr_no_nulls.ctypes.data
+
+    # conversion only valid for 1-D arrays
+    with pytest.raises(
+        ValueError,
+        match="Cannot convert 2-D array to bool8 array",
+    ):
+        pa.Bool8Array.from_numpy(
+            np.array([[True, False], [False, True]], dtype=np.bool_),
+        )
+
+    with pytest.raises(
+        ValueError,
+        match="Cannot convert 0-D array to bool8 array",
+    ):
+        pa.Bool8Array.from_numpy(np.bool_())
+
+    # must use compatible storage type
+    with pytest.raises(
+        TypeError,
+        match="Array dtype float64 incompatible with bool8 storage",
+    ):
+        pa.Bool8Array.from_numpy(np.array([1, 2, 3], dtype=np.float64))
+
+
+def test_bool8_scalar():
+    assert pa.ExtensionScalar.from_storage(pa.bool8(), -1).as_py() is True
+    assert pa.ExtensionScalar.from_storage(pa.bool8(), 0).as_py() is False
+    assert pa.ExtensionScalar.from_storage(pa.bool8(), 1).as_py() is True
+    assert pa.ExtensionScalar.from_storage(pa.bool8(), 2).as_py() is True
+    assert pa.ExtensionScalar.from_storage(pa.bool8(), None).as_py() is None
+
+    arr = pa.ExtensionArray.from_storage(
+        pa.bool8(),
+        pa.array([-1, 0, 1, 2, None], pa.int8()),
+    )
+    assert arr[0].as_py() is True
+    assert arr[1].as_py() is False
+    assert arr[2].as_py() is True
+    assert arr[3].as_py() is True
+    assert arr[4].as_py() is None
+
+    assert pa.scalar(-1, type=pa.bool8()).as_py() is True
+    assert pa.scalar(0, type=pa.bool8()).as_py() is False
+    assert pa.scalar(1, type=pa.bool8()).as_py() is True
+    assert pa.scalar(2, type=pa.bool8()).as_py() is True
+    assert pa.scalar(None, type=pa.bool8()).as_py() is None
diff --git a/python/pyarrow/tests/test_feather.py b/python/pyarrow/tests/test_feather.py
index 0064006489088..18c8cd5b654e6 100644
--- a/python/pyarrow/tests/test_feather.py
+++ b/python/pyarrow/tests/test_feather.py
@@ -23,7 +23,10 @@
 import hypothesis as h
 import hypothesis.strategies as st
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.tests.strategies as past
@@ -135,6 +138,7 @@ def f():
     pytest.raises(exc, f)
 
 
+@pytest.mark.numpy
 def test_dataset(version):
     num_values = (100, 100)
     num_files = 5
@@ -354,6 +358,7 @@ def test_buffer_bounds_error(version):
         _check_arrow_roundtrip(table)
 
 
+@pytest.mark.numpy
 def test_boolean_object_nulls(version):
     repeats = 100
     table = pa.Table.from_arrays(
@@ -540,6 +545,7 @@ def test_read_columns(version):
                             columns=['boo', 'woo'])
 
 
+@pytest.mark.numpy
 def test_overwritten_file(version):
     path = random_path()
     TEST_FILES.append(path)
@@ -675,6 +681,7 @@ def test_v2_compression_options():
         write_feather(df, buf, compression='snappy')
 
 
+@pytest.mark.numpy
 def test_v2_lz4_default_compression():
     # ARROW-8750: Make sure that the compression=None option selects lz4 if
     # it's available
@@ -807,6 +814,7 @@ def test_nested_types(compression):
     _check_arrow_roundtrip(table, compression=compression)
 
 
+@pytest.mark.numpy
 @h.given(past.all_tables, st.sampled_from(["uncompressed", "lz4", "zstd"]))
 def test_roundtrip(table, compression):
     _check_arrow_roundtrip(table, compression=compression)
diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
index 4853dafc76c72..f0ceba37d6933 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -28,7 +28,10 @@
 import traceback
 import json
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import pyarrow as pa
 
@@ -180,20 +183,16 @@ def do_get(self, context, ticket):
     def do_put(self, context, descriptor, reader, writer):
         counter = 0
         expected_data = [-10, -5, 0, 5, 10]
-        while True:
-            try:
-                batch, buf = reader.read_chunk()
-                assert batch.equals(pa.RecordBatch.from_arrays(
-                    [pa.array([expected_data[counter]])],
-                    ['a']
-                ))
-                assert buf is not None
-                client_counter, = struct.unpack('<i', buf.to_pybytes())
-                assert counter == client_counter
-                writer.write(struct.pack('<i', counter))
-                counter += 1
-            except StopIteration:
-                return
+        for batch, buf in reader:
+            assert batch.equals(pa.RecordBatch.from_arrays(
+                [pa.array([expected_data[counter]])],
+                ['a']
+            ))
+            assert buf is not None
+            client_counter, = struct.unpack('<i', buf.to_pybytes())
+            assert counter == client_counter
+            writer.write(struct.pack('<i', counter))
+            counter += 1
 
     @staticmethod
     def number_batches(table):
@@ -1512,6 +1511,24 @@ def test_flight_do_get_metadata():
     table = pa.Table.from_arrays(data, names=['a'])
 
     batches = []
+    with MetadataFlightServer() as server, \
+            FlightClient(('localhost', server.port)) as client:
+        reader = client.do_get(flight.Ticket(b''))
+        idx = 0
+        for batch, metadata in reader:
+            batches.append(batch)
+            server_idx, = struct.unpack('<i', metadata.to_pybytes())
+            assert idx == server_idx
+            idx += 1
+        data = pa.Table.from_batches(batches)
+        assert data.equals(table)
+
+
+def test_flight_metadata_record_batch_reader_iterator():
+    """Verify the iterator interface works as expected."""
+    batches1 = []
+    batches2 = []
+
     with MetadataFlightServer() as server, \
             FlightClient(('localhost', server.port)) as client:
         reader = client.do_get(flight.Ticket(b''))
@@ -1519,14 +1536,24 @@ def test_flight_do_get_metadata():
         while True:
             try:
                 batch, metadata = reader.read_chunk()
-                batches.append(batch)
+                batches1.append(batch)
                 server_idx, = struct.unpack('<i', metadata.to_pybytes())
                 assert idx == server_idx
                 idx += 1
             except StopIteration:
                 break
-        data = pa.Table.from_batches(batches)
-        assert data.equals(table)
+
+    with MetadataFlightServer() as server, \
+            FlightClient(('localhost', server.port)) as client:
+        reader = client.do_get(flight.Ticket(b''))
+        idx = 0
+        for batch, metadata in reader:
+            batches2.append(batch)
+            server_idx, = struct.unpack('<i', metadata.to_pybytes())
+            assert idx == server_idx
+            idx += 1
+
+    assert batches1 == batches2
 
 
 def test_flight_do_get_metadata_v4():
@@ -1564,6 +1591,7 @@ def test_flight_do_put_metadata():
                 assert idx == server_idx
 
 
+@pytest.mark.numpy
 def test_flight_do_put_limit():
     """Try a simple do_put call with a size limit."""
     large_batch = pa.RecordBatch.from_arrays([
@@ -2073,12 +2101,10 @@ def do_exchange(self, context, descriptor, reader, writer):
 def test_interrupt():
     if threading.current_thread().ident != threading.main_thread().ident:
         pytest.skip("test only works from main Python thread")
-    # Skips test if not available
-    raise_signal = util.get_raise_signal()
 
     def signal_from_thread():
         time.sleep(0.5)
-        raise_signal(signal.SIGINT)
+        signal.raise_signal(signal.SIGINT)
 
     exc_types = (KeyboardInterrupt, pa.ArrowCancelled)
 
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index f8ce74700dea8..1c639412cd1ab 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -19,8 +19,10 @@
 import gzip
 import os
 import pathlib
+from urllib.request import urlopen
 import subprocess
 import sys
+import time
 
 import pytest
 import weakref
@@ -34,6 +36,10 @@
                         LocalFileSystem, SubTreeFileSystem, _MockFileSystem,
                         FileSystemHandler, PyFileSystem, FSSpecHandler,
                         copy_files)
+from pyarrow.util import find_free_port
+
+
+here = os.path.dirname(os.path.abspath(__file__))
 
 
 class DummyHandler(FileSystemHandler):
@@ -2010,3 +2016,36 @@ def test_concurrent_s3fs_init():
         finalize_s3()
         """
     subprocess.check_call([sys.executable, "-c", code])
+
+
+@pytest.mark.s3
+def test_uwsgi_integration():
+    # GH-44071: using S3FileSystem under uwsgi shouldn't lead to a crash at shutdown
+    try:
+        subprocess.check_call(["uwsgi", "--version"])
+    except FileNotFoundError:
+        pytest.skip("uwsgi not installed on this Python")
+
+    port = find_free_port()
+    args = ["uwsgi", "-i", "--http", f"127.0.0.1:{port}",
+            "--wsgi-file", os.path.join(here, "wsgi_examples.py")]
+    proc = subprocess.Popen(args, stdin=subprocess.DEVNULL)
+    # Try to fetch URL, it should return 200 Ok...
+    try:
+        url = f"http://127.0.0.1:{port}/s3/"
+        start_time = time.time()
+        error = None
+        while time.time() < start_time + 5:
+            try:
+                with urlopen(url) as resp:
+                    assert resp.status == 200
+                break
+            except OSError as e:
+                error = e
+                time.sleep(0.1)
+        else:
+            pytest.fail(f"Could not fetch {url!r}: {error}")
+    finally:
+        proc.terminate()
+    # ... and uwsgi should gracefully shutdown after it's been asked above
+    assert proc.wait() == 30  # UWSGI_END_CODE = 30
diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
index 0d12d710dcf64..2ac2f55754fe5 100644
--- a/python/pyarrow/tests/test_gdb.py
+++ b/python/pyarrow/tests/test_gdb.py
@@ -409,7 +409,7 @@ def test_types_stack(gdb_arrow):
 
     check_stack_repr(
         gdb_arrow, "uuid_type",
-        ('arrow::ExtensionType "extension<uuid>" '
+        ('arrow::ExtensionType "extension<arrow.uuid>" '
          'with storage type arrow::fixed_size_binary(16)'))
 
 
@@ -447,7 +447,7 @@ def test_types_heap(gdb_arrow):
 
     check_heap_repr(
         gdb_arrow, "heap_uuid_type",
-        ('arrow::ExtensionType "extension<uuid>" '
+        ('arrow::ExtensionType "extension<arrow.uuid>" '
          'with storage type arrow::fixed_size_binary(16)'))
 
 
@@ -716,12 +716,12 @@ def test_scalars_stack(gdb_arrow):
 
     check_stack_repr(
         gdb_arrow, "extension_scalar",
-        ('arrow::ExtensionScalar of type "extension<uuid>", '
+        ('arrow::ExtensionScalar of type "extension<arrow.uuid>", '
          'value arrow::FixedSizeBinaryScalar of size 16, '
          'value "0123456789abcdef"'))
     check_stack_repr(
         gdb_arrow, "extension_scalar_null",
-        'arrow::ExtensionScalar of type "extension<uuid>", null value')
+        'arrow::ExtensionScalar of type "extension<arrow.uuid>", null value')
 
 
 def test_scalars_heap(gdb_arrow):
diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
index ef499a3a8d76c..e2df1b1c46835 100644
--- a/python/pyarrow/tests/test_io.py
+++ b/python/pyarrow/tests/test_io.py
@@ -25,11 +25,15 @@
 import os
 import pathlib
 import pytest
+import random
 import sys
 import tempfile
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 from pyarrow.util import guid
 from pyarrow import Codec
@@ -464,6 +468,7 @@ def test_buffer_hex(val, expected_hex_buffer):
     assert buf.hex() == expected_hex_buffer
 
 
+@pytest.mark.numpy
 def test_buffer_to_numpy():
     # Make sure creating a numpy array from an arrow buffer works
     byte_array = bytearray(20)
@@ -476,6 +481,7 @@ def test_buffer_to_numpy():
     assert array.base == buf
 
 
+@pytest.mark.numpy
 def test_buffer_from_numpy():
     # C-contiguous
     arr = np.arange(12, dtype=np.int8).reshape((3, 4))
@@ -493,6 +499,7 @@ def test_buffer_from_numpy():
         buf = pa.py_buffer(arr.T[::2])
 
 
+@pytest.mark.numpy
 def test_buffer_address():
     b1 = b'some data!'
     b2 = bytearray(b1)
@@ -513,6 +520,7 @@ def test_buffer_address():
     assert buf.address == arr.ctypes.data
 
 
+@pytest.mark.numpy
 def test_buffer_equals():
     # Buffer.equals() returns true iff the buffers have the same contents
     def eq(a, b):
@@ -624,6 +632,7 @@ def test_buffer_hashing():
         hash(pa.py_buffer(b'123'))
 
 
+@pytest.mark.numpy
 def test_buffer_protocol_respects_immutability():
     # ARROW-3228; NumPy's frombuffer ctor determines whether a buffer-like
     # object is mutable by first attempting to get a mutable buffer using
@@ -635,6 +644,7 @@ def test_buffer_protocol_respects_immutability():
     assert not numpy_ref.flags.writeable
 
 
+@pytest.mark.numpy
 def test_foreign_buffer():
     obj = np.array([1, 2], dtype=np.int32)
     addr = obj.__array_interface__["data"][0]
@@ -669,6 +679,7 @@ def test_allocate_buffer_resizable():
     assert buf.size == 200
 
 
+@pytest.mark.numpy
 def test_non_cpu_buffer(pickle_module):
     cuda = pytest.importorskip("pyarrow.cuda")
     ctx = cuda.Context(0)
@@ -798,6 +809,7 @@ def test_cache_options_pickling(pickle_module):
         assert pickle_module.loads(pickle_module.dumps(option)) == option
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
         "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
@@ -838,6 +850,7 @@ def test_compress_decompress(compression):
         pa.decompress(compressed_bytes, codec=compression)
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("compression", [
     pytest.param(
         "bz2", marks=pytest.mark.xfail(raises=pa.lib.ArrowNotImplementedError)
@@ -996,6 +1009,7 @@ def make_buffer(bytes_obj):
     assert refcount_before == sys.getrefcount(val)
 
 
+@pytest.mark.numpy
 def test_nativefile_write_memoryview():
     f = pa.BufferOutputStream()
     data = b'ok'
@@ -1058,8 +1072,8 @@ def test_mock_output_stream():
 @pytest.fixture
 def sample_disk_data(request, tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
 
@@ -1146,8 +1160,8 @@ def test_memory_map_writer(tmpdir):
     if sys.platform == "emscripten":
         pytest.xfail("Multiple memory maps to same file don't work on emscripten")
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
     with open(path, 'wb') as f:
@@ -1187,9 +1201,9 @@ def test_memory_map_writer(tmpdir):
 
 def test_memory_map_resize(tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype(np.uint8)
-    data1 = arr.tobytes()[:(SIZE // 2)]
-    data2 = arr.tobytes()[(SIZE // 2):]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data1 = bytes(arr[:(SIZE // 2)])
+    data2 = bytes(arr[(SIZE // 2):])
 
     path = os.path.join(str(tmpdir), guid())
 
@@ -1202,7 +1216,7 @@ def test_memory_map_resize(tmpdir):
     mmap.close()
 
     with open(path, 'rb') as f:
-        assert f.read() == arr.tobytes()
+        assert f.read() == bytes(arr[:SIZE])
 
 
 def test_memory_zero_length(tmpdir):
@@ -1241,8 +1255,8 @@ def test_memory_map_deref_remove(tmpdir):
 
 def test_os_file_writer(tmpdir):
     SIZE = 4096
-    arr = np.random.randint(0, 256, size=SIZE).astype('u1')
-    data = arr.tobytes()[:SIZE]
+    arr = [random.randint(0, 255) for _ in range(SIZE)]
+    data = bytes(arr[:SIZE])
 
     path = os.path.join(str(tmpdir), guid())
     with open(path, 'wb') as f:
@@ -1523,6 +1537,7 @@ def test_buffered_input_stream_detach_non_seekable():
         raw.seek(2)
 
 
+@pytest.mark.numpy
 def test_buffered_output_stream():
     np_buf = np.zeros(100, dtype=np.int8)  # zero-initialized buffer
     buf = pa.py_buffer(np_buf)
@@ -1540,6 +1555,7 @@ def test_buffered_output_stream():
     assert np_buf[:10].tobytes() == b'123456789\0'
 
 
+@pytest.mark.numpy
 def test_buffered_output_stream_detach():
     np_buf = np.zeros(100, dtype=np.int8)  # zero-initialized buffer
     buf = pa.py_buffer(np_buf)
diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py
index 1e5242efe40f0..4be5792a92f6d 100644
--- a/python/pyarrow/tests/test_ipc.py
+++ b/python/pyarrow/tests/test_ipc.py
@@ -20,11 +20,15 @@
 import io
 import pathlib
 import pytest
+import random
 import socket
 import threading
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow.tests.util import changed_environ, invoke_script
@@ -59,7 +63,7 @@ def write_batches(self, num_batches=5, as_table=False):
         batches = []
         for i in range(num_batches):
             batch = pa.record_batch(
-                [np.random.randn(nrows),
+                [[random.random() for _ in range(nrows)],
                  ['foo', None, 'bar', 'bazbaz', 'qux']],
                 schema=schema)
             batches.append(batch)
@@ -422,7 +426,7 @@ def test_stream_simple_roundtrip(stream_fixture, use_legacy_ipc_format):
 @pytest.mark.zstd
 def test_compression_roundtrip():
     sink = io.BytesIO()
-    values = np.random.randint(0, 3, 10000)
+    values = [random.randint(0, 3) for _ in range(10000)]
     table = pa.Table.from_arrays([values], names=["values"])
 
     options = pa.ipc.IpcWriteOptions(compression='zstd')
diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
index a0a6174266310..3bb4440e89750 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -23,7 +23,10 @@
 import string
 import unittest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 
 import pyarrow as pa
@@ -297,6 +300,7 @@ def test_explicit_schema_with_unexpected_behaviour(self):
                            match="JSON parse error: unexpected field"):
             self.read_bytes(rows, parse_options=opts)
 
+    @pytest.mark.numpy
     def test_small_random_json(self):
         data, expected = make_random_json(num_cols=2, num_rows=10)
         table = self.read_bytes(data)
@@ -304,6 +308,7 @@ def test_small_random_json(self):
         assert table.equals(expected)
         assert table.to_pydict() == expected.to_pydict()
 
+    @pytest.mark.numpy
     def test_load_large_json(self):
         data, expected = make_random_json(num_cols=2, num_rows=100100)
         # set block size is 10MB
@@ -312,6 +317,7 @@ def test_load_large_json(self):
         assert table.num_rows == 100100
         assert expected.num_rows == 100100
 
+    @pytest.mark.numpy
     def test_stress_block_sizes(self):
         # Test a number of small block sizes to stress block stitching
         data_base, expected = make_random_json(num_cols=2, num_rows=100)
diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py
index c42e4fbdfc2e8..5d3471c7c35db 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -247,6 +247,12 @@ def test_set_timezone_db_path_non_windows():
     pa.ProxyMemoryPool,
     pa.Device,
     pa.MemoryManager,
+    pa.OpaqueArray,
+    pa.OpaqueScalar,
+    pa.OpaqueType,
+    pa.Bool8Array,
+    pa.Bool8Scalar,
+    pa.Bool8Type,
 ])
 def test_extension_type_constructor_errors(klass):
     # ARROW-2638: prevent calling extension class constructors directly
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 208812c3ac458..178a073ed59dc 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -27,9 +27,18 @@
 
 import hypothesis as h
 import hypothesis.strategies as st
-import numpy as np
-import numpy.testing as npt
 import pytest
+try:
+    import numpy as np
+    import numpy.testing as npt
+    try:
+        _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
+    except AttributeError:
+        from numpy.exceptions import (
+            VisibleDeprecationWarning as _np_VisibleDeprecationWarning
+        )
+except ImportError:
+    np = None
 
 from pyarrow.pandas_compat import get_logical_type, _pandas_api
 from pyarrow.tests.util import invoke_script, random_ascii, rands
@@ -51,14 +60,6 @@
     pass
 
 
-try:
-    _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
-except AttributeError:
-    from numpy.exceptions import (
-        VisibleDeprecationWarning as _np_VisibleDeprecationWarning
-    )
-
-
 # Marks all of the tests in this module
 pytestmark = pytest.mark.pandas
 
@@ -1202,9 +1203,11 @@ def test_datetime64_to_date32(self):
 
     @pytest.mark.parametrize('mask', [
         None,
-        np.array([True, False, False, True, False, False]),
+        [True, False, False, True, False, False],
     ])
     def test_pandas_datetime_to_date64(self, mask):
+        if mask:
+            mask = np.array(mask)
         s = pd.to_datetime([
             '2018-05-10T00:00:00',
             '2018-05-11T00:00:00',
@@ -1608,7 +1611,8 @@ def test_array_from_pandas_date_with_mask(self):
         assert pa.Array.from_pandas(expected).equals(result)
 
     @pytest.mark.skipif(
-        Version('1.16.0') <= Version(np.__version__) < Version('1.16.1'),
+        np is not None and Version('1.16.0') <= Version(
+            np.__version__) < Version('1.16.1'),
         reason='Until numpy/numpy#12745 is resolved')
     def test_fixed_offset_timezone(self):
         df = pd.DataFrame({
@@ -2921,23 +2925,23 @@ class TestConvertMisc:
     """
 
     type_pairs = [
-        (np.int8, pa.int8()),
-        (np.int16, pa.int16()),
-        (np.int32, pa.int32()),
-        (np.int64, pa.int64()),
-        (np.uint8, pa.uint8()),
-        (np.uint16, pa.uint16()),
-        (np.uint32, pa.uint32()),
-        (np.uint64, pa.uint64()),
-        (np.float16, pa.float16()),
-        (np.float32, pa.float32()),
-        (np.float64, pa.float64()),
+        ("int8", pa.int8()),
+        ("int16", pa.int16()),
+        ("int32", pa.int32()),
+        ("int64", pa.int64()),
+        ("uint8", pa.uint8()),
+        ("uint16", pa.uint16()),
+        ("uint32", pa.uint32()),
+        ("uint64", pa.uint64()),
+        ("float16", pa.float16()),
+        ("float32", pa.float32()),
+        ("float64", pa.float64()),
         # XXX unsupported
         # (np.dtype([('a', 'i2')]), pa.struct([pa.field('a', pa.int16())])),
-        (np.object_, pa.string()),
-        (np.object_, pa.binary()),
-        (np.object_, pa.binary(10)),
-        (np.object_, pa.list_(pa.int64())),
+        ("object", pa.string()),
+        ("object", pa.binary()),
+        ("object", pa.binary(10)),
+        ("object", pa.list_(pa.int64())),
     ]
 
     def test_all_none_objects(self):
@@ -2950,8 +2954,8 @@ def test_all_none_category(self):
         _check_pandas_roundtrip(df)
 
     def test_empty_arrays(self):
-        for dtype, pa_type in self.type_pairs:
-            arr = np.array([], dtype=dtype)
+        for dtype_str, pa_type in self.type_pairs:
+            arr = np.array([], dtype=np.dtype(dtype_str))
             _check_array_roundtrip(arr, type=pa_type)
 
     def test_non_threaded_conversion(self):
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index bc50697e1be17..3f4a53c473e7e 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -20,7 +20,10 @@
 import pytest
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -40,7 +43,6 @@
     (1, pa.int64(), pa.Int64Scalar),
     (1, pa.uint64(), pa.UInt64Scalar),
     (1.0, None, pa.DoubleScalar),
-    (np.float16(1.0), pa.float16(), pa.HalfFloatScalar),
     (1.0, pa.float32(), pa.FloatScalar),
     (decimal.Decimal("1.123"), None, pa.Decimal128Scalar),
     (decimal.Decimal("1.1234567890123456789012345678901234567890"),
@@ -98,6 +100,40 @@ def test_basics(value, ty, klass, pickle_module):
     assert wr() is None
 
 
+# This test is a copy of test_basics but only for float16 (HalfFloatScalar)
+# which currently requires a numpy scalar to create it. The test collection
+# fails if numpy is used on the parametrization when not present.
+@pytest.mark.numpy
+def test_basics_np_required(pickle_module):
+    value, ty, klass = np.float16(1.0), pa.float16(), pa.HalfFloatScalar
+    s = pa.scalar(value, type=ty)
+    s.validate()
+    s.validate(full=True)
+    assert isinstance(s, klass)
+    assert s.as_py() == value
+    assert s == pa.scalar(value, type=ty)
+    assert s != value
+    assert s != "else"
+    assert hash(s) == hash(s)
+    assert s.is_valid is True
+    assert s != None  # noqa: E711
+
+    s = pa.scalar(None, type=s.type)
+    assert s.is_valid is False
+    assert s.as_py() is None
+    assert s != pa.scalar(value, type=ty)
+
+    # test pickle roundtrip
+    restored = pickle_module.loads(pickle_module.dumps(s))
+    assert s.equals(restored)
+
+    # test that scalars are weak-referenceable
+    wr = weakref.ref(s)
+    assert wr() is not None
+    del s
+    assert wr() is None
+
+
 def test_invalid_scalar():
     s = pc.cast(pa.scalar(b"\xff"), pa.string(), safe=False)
     s.validate()
@@ -202,14 +238,15 @@ def test_numerics():
     assert str(s) == "1.5"
     assert s.as_py() == 1.5
 
-    # float16
-    s = pa.scalar(np.float16(0.5), type='float16')
-    assert isinstance(s, pa.HalfFloatScalar)
-    # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
-    # on numpy1 repr(np.float16(0.5)) == "0.5"
-    assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
-    assert str(s) == "0.5"
-    assert s.as_py() == 0.5
+    if np is not None:
+        # float16
+        s = pa.scalar(np.float16(0.5), type='float16')
+        assert isinstance(s, pa.HalfFloatScalar)
+        # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
+        # on numpy1 repr(np.float16(0.5)) == "0.5"
+        assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
+        assert str(s) == "0.5"
+        assert s.as_py() == 0.5
 
 
 def test_decimal128():
@@ -434,6 +471,7 @@ def test_timestamp_fixed_offset_print():
     assert str(arr[0]) == "1970-01-01 02:00:00+02:00"
 
 
+@pytest.mark.numpy
 def test_duration():
     arr = np.array([0, 3600000000000], dtype='timedelta64[ns]')
 
@@ -559,6 +597,7 @@ def test_list(ty, klass):
         s[2]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize('ty', [
     pa.list_(pa.int64()),
     pa.large_list(pa.int64()),
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 1b05c58384cf0..bdcb6c2b42d78 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -20,7 +20,10 @@
 import weakref
 
 import pytest
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
@@ -185,6 +188,7 @@ def test_time_types():
         pa.time64('s')
 
 
+@pytest.mark.numpy
 def test_from_numpy_dtype():
     cases = [
         (np.dtype('bool'), pa.bool_()),
diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py
index aa7da0a742086..7ba9e2b3e13db 100644
--- a/python/pyarrow/tests/test_sparse_tensor.py
+++ b/python/pyarrow/tests/test_sparse_tensor.py
@@ -19,7 +19,10 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 import pyarrow as pa
 
 try:
diff --git a/python/pyarrow/tests/test_strategies.py b/python/pyarrow/tests/test_strategies.py
index 14fc949928c33..da50bcda52f2b 100644
--- a/python/pyarrow/tests/test_strategies.py
+++ b/python/pyarrow/tests/test_strategies.py
@@ -17,6 +17,8 @@
 
 import hypothesis as h
 
+import pytest
+
 import pyarrow as pa
 import pyarrow.tests.strategies as past
 
@@ -36,11 +38,13 @@ def test_schemas(schema):
     assert isinstance(schema, pa.lib.Schema)
 
 
+@pytest.mark.numpy
 @h.given(past.all_arrays)
 def test_arrays(array):
     assert isinstance(array, pa.lib.Array)
 
 
+@pytest.mark.numpy
 @h.given(past.arrays(past.primitive_types, nullable=False))
 def test_array_nullability(array):
     assert array.null_count == 0
@@ -56,6 +60,7 @@ def test_record_batches(record_bath):
     assert isinstance(record_bath, pa.lib.RecordBatch)
 
 
+@pytest.mark.numpy
 @h.given(past.all_tables)
 def test_tables(table):
     assert isinstance(table, pa.lib.Table)
diff --git a/python/pyarrow/tests/test_substrait.py b/python/pyarrow/tests/test_substrait.py
index 40700e4741321..01d468cd9e9cc 100644
--- a/python/pyarrow/tests/test_substrait.py
+++ b/python/pyarrow/tests/test_substrait.py
@@ -608,6 +608,7 @@ def table_provider(names, schema):
     assert res_tb == expected
 
 
+@pytest.mark.numpy
 def test_scalar_aggregate_udf_basic(varargs_agg_func_fixture):
 
     test_table = pa.Table.from_pydict(
@@ -756,6 +757,7 @@ def table_provider(names, _):
     assert res_tb == expected_tb
 
 
+@pytest.mark.numpy
 def test_hash_aggregate_udf_basic(varargs_agg_func_fixture):
 
     test_table = pa.Table.from_pydict(
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 30c687b0d94df..b66a5eb083cc5 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -20,10 +20,14 @@
 import sys
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pytest
 import pyarrow as pa
 import pyarrow.compute as pc
+from pyarrow.interchange import from_dataframe
 from pyarrow.vendored.version import Version
 
 
@@ -125,6 +129,7 @@ def test_chunked_array_can_combine_chunks_with_no_chunks():
     ).combine_chunks() == pa.array([], type=pa.bool_())
 
 
+@pytest.mark.numpy
 def test_chunked_array_to_numpy():
     data = pa.chunked_array([
         [1, 2, 3],
@@ -173,6 +178,7 @@ def test_chunked_array_str():
 ]"""
 
 
+@pytest.mark.numpy
 def test_chunked_array_getitem():
     data = [
         pa.array([1, 2, 3]),
@@ -972,12 +978,14 @@ def check_tensors(tensor, expected_tensor, type, size):
     assert tensor.strides == expected_tensor.strides
 
 
-@pytest.mark.parametrize('typ', [
-    np.uint8, np.uint16, np.uint32, np.uint64,
-    np.int8, np.int16, np.int32, np.int64,
-    np.float32, np.float64,
+@pytest.mark.numpy
+@pytest.mark.parametrize('typ_str', [
+    "uint8", "uint16", "uint32", "uint64",
+    "int8", "int16", "int32", "int64",
+    "float32", "float64",
 ])
-def test_recordbatch_to_tensor_uniform_type(typ):
+def test_recordbatch_to_tensor_uniform_type(typ_str):
+    typ = np.dtype(typ_str)
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
     arr3 = [100, 100, 100, 100, 100, 100, 100, 100, 100]
@@ -1031,6 +1039,7 @@ def test_recordbatch_to_tensor_uniform_type(typ):
     check_tensors(result, expected, pa.from_numpy_dtype(typ), 15)
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_uniform_float_16():
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
@@ -1054,6 +1063,7 @@ def test_recordbatch_to_tensor_uniform_float_16():
     check_tensors(result, expected, pa.float16(), 27)
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_mixed_type():
     # uint16 + int16 = int32
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
@@ -1105,6 +1115,7 @@ def test_recordbatch_to_tensor_mixed_type():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_unsupported_mixed_type_with_float16():
     arr1 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, 80, 90]
@@ -1124,6 +1135,7 @@ def test_recordbatch_to_tensor_unsupported_mixed_type_with_float16():
         batch.to_tensor()
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_nan():
     arr1 = [1, 2, 3, 4, np.nan, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, np.nan, 90]
@@ -1144,6 +1156,7 @@ def test_recordbatch_to_tensor_nan():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_null():
     arr1 = [1, 2, 3, 4, None, 6, 7, 8, 9]
     arr2 = [10, 20, 30, 40, 50, 60, 70, None, 90]
@@ -1204,6 +1217,7 @@ def test_recordbatch_to_tensor_null():
     assert result.strides == expected.strides
 
 
+@pytest.mark.numpy
 def test_recordbatch_to_tensor_empty():
     batch = pa.RecordBatch.from_arrays(
         [
@@ -1295,6 +1309,7 @@ def test_slice_zero_length_table():
     table.to_pandas()
 
 
+@pytest.mark.numpy
 def test_recordbatchlist_schema_equals():
     a1 = np.array([1], dtype='uint32')
     a2 = np.array([4.0, 5.0], dtype='float64')
@@ -1751,13 +1766,20 @@ def test_table_rename_columns(cls):
     table = cls.from_arrays(data, names=['a', 'b', 'c'])
     assert table.column_names == ['a', 'b', 'c']
 
+    expected = cls.from_arrays(data, names=['eh', 'bee', 'sea'])
+
+    # Testing with list
     t2 = table.rename_columns(['eh', 'bee', 'sea'])
     t2.validate()
     assert t2.column_names == ['eh', 'bee', 'sea']
-
-    expected = cls.from_arrays(data, names=['eh', 'bee', 'sea'])
     assert t2.equals(expected)
 
+    # Testing with tuple
+    t3 = table.rename_columns(('eh', 'bee', 'sea'))
+    t3.validate()
+    assert t3.column_names == ['eh', 'bee', 'sea']
+    assert t3.equals(expected)
+
     message = "names must be a list or dict not <class 'str'>"
     with pytest.raises(TypeError, match=message):
         table.rename_columns('not a list')
@@ -2123,6 +2145,7 @@ def test_table_unsafe_casting(cls):
     assert casted_table.equals(expected_table)
 
 
+@pytest.mark.numpy
 def test_invalid_table_construct():
     array = np.array([0, 1], dtype=np.uint8)
     u8 = pa.uint8()
@@ -3280,6 +3303,7 @@ def test_table_sort_by(cls):
     assert sorted_tab_dict["b"] == ["foo", "car", "bar", "foobar"]
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch])
 def test_numpy_asarray(constructor):
     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])
@@ -3312,6 +3336,7 @@ def test_numpy_asarray(constructor):
     assert result.dtype == "int32"
 
 
+@pytest.mark.numpy
 @pytest.mark.parametrize("constructor", [pa.table, pa.record_batch])
 def test_numpy_array_protocol(constructor):
     table = constructor([[1, 2, 3], [4.0, 5.0, 6.0]], names=["a", "b"])
@@ -3350,3 +3375,587 @@ def test_invalid_non_join_column():
     with pytest.raises(pa.lib.ArrowInvalid) as excinfo:
         t2.join(t1, 'id', join_type='inner')
     assert exp_error_msg in str(excinfo.value)
+
+
+@pytest.fixture
+def cuda_context():
+    cuda = pytest.importorskip("pyarrow.cuda")
+    return cuda.Context(0)
+
+
+@pytest.fixture
+def schema():
+    return pa.schema([pa.field('c0', pa.int32()), pa.field('c1', pa.int32())])
+
+
+@pytest.fixture
+def cpu_arrays(schema):
+    return [pa.array([1, 2, 3, 4, 5], schema.field(0).type),
+            pa.array([-10, -5, 0, None, 10], schema.field(1).type)]
+
+
+@pytest.fixture
+def cuda_arrays(cuda_context, cpu_arrays):
+    return [arr.copy_to(cuda_context.memory_manager) for arr in cpu_arrays]
+
+
+@pytest.fixture
+def cpu_chunked_array(cpu_arrays):
+    chunked_array = pa.chunked_array(cpu_arrays)
+    assert chunked_array.is_cpu is True
+    return chunked_array
+
+
+@pytest.fixture
+def cuda_chunked_array(cuda_arrays):
+    chunked_array = pa.chunked_array(cuda_arrays)
+    assert chunked_array.is_cpu is False
+    return chunked_array
+
+
+@pytest.fixture
+def cpu_and_cuda_chunked_array(cpu_arrays, cuda_arrays):
+    chunked_array = pa.chunked_array(cpu_arrays + cuda_arrays)
+    assert chunked_array.is_cpu is False
+    return chunked_array
+
+
+@pytest.fixture
+def cpu_recordbatch(cpu_arrays, schema):
+    return pa.record_batch(cpu_arrays, schema=schema)
+
+
+@pytest.fixture
+def cuda_recordbatch(cuda_context, cpu_recordbatch):
+    return cpu_recordbatch.copy_to(cuda_context.memory_manager)
+
+
+@pytest.fixture
+def cpu_table(schema, cpu_chunked_array):
+    return pa.table([cpu_chunked_array, cpu_chunked_array], schema=schema)
+
+
+@pytest.fixture
+def cuda_table(schema, cuda_chunked_array):
+    return pa.table([cuda_chunked_array, cuda_chunked_array], schema=schema)
+
+
+@pytest.fixture
+def cpu_and_cuda_table(schema, cpu_chunked_array, cuda_chunked_array):
+    return pa.table([cpu_chunked_array, cuda_chunked_array], schema=schema)
+
+
+def test_chunked_array_non_cpu(cuda_context, cpu_chunked_array, cuda_chunked_array,
+                               cpu_and_cuda_chunked_array):
+    # type test
+    assert cuda_chunked_array.type == cpu_chunked_array.type
+
+    # length() test
+    assert cuda_chunked_array.length() == cpu_chunked_array.length()
+
+    # str() test
+    assert str(cuda_chunked_array) == str(cpu_chunked_array)
+
+    # repr() test
+    assert str(cuda_chunked_array) in repr(cuda_chunked_array)
+
+    # validate() test
+    cuda_chunked_array.validate()
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.validate(full=True)
+
+    # null_count test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.null_count
+
+    # nbytes() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.nbytes
+
+    # get_total_buffer_size() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.get_total_buffer_size()
+
+    # getitem() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array[0]
+
+    # is_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_null()
+
+    # is_nan() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_nan()
+
+    # is_valid() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.is_valid()
+
+    # fill_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.fill_null(0)
+
+    # equals() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array == cuda_chunked_array
+
+    # to_pandas() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_pandas()
+
+    # to_numpy() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_numpy()
+
+    # __array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__array__()
+
+    # cast() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.cast()
+
+    # dictionary_encode() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.dictionary_encode()
+
+    # flatten() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.flatten()
+
+    # combine_chunks() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.combine_chunks()
+
+    # unique() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.unique()
+
+    # value_counts() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.value_counts()
+
+    # filter() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.filter([True, False, True, False, True])
+
+    # index() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.index(5)
+
+    # slice() test
+    cuda_chunked_array.slice(2, 2)
+
+    # take() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.take([1])
+
+    # drop_null() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.drop_null()
+
+    # sort() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.sort()
+
+    # unify_dictionaries() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.unify_dictionaries()
+
+    # num_chunks test
+    assert cuda_chunked_array.num_chunks == cpu_chunked_array.num_chunks
+
+    # chunks test
+    assert len(cuda_chunked_array.chunks) == len(cpu_chunked_array.chunks)
+
+    # chunk() test
+    chunk = cuda_chunked_array.chunk(0)
+    assert chunk.device_type == pa.DeviceAllocationType.CUDA
+
+    # to_pylist() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.to_pylist()
+
+    # __arrow_c_stream__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__arrow_c_stream__()
+
+    # __reduce__() test
+    with pytest.raises(NotImplementedError):
+        cuda_chunked_array.__reduce__()
+
+
+def verify_cuda_recordbatch(batch, expected_schema):
+    batch.validate()
+    assert batch.device_type == pa.DeviceAllocationType.CUDA
+    assert batch.is_cpu is False
+    assert batch.num_columns == len(expected_schema.names)
+    assert batch.column_names == expected_schema.names
+    assert str(batch) in repr(batch)
+    for c in batch.columns:
+        assert c.device_type == pa.DeviceAllocationType.CUDA
+    assert batch.schema == expected_schema
+
+
+def test_recordbatch_non_cpu(cuda_context, cpu_recordbatch, cuda_recordbatch,
+                             cuda_arrays, schema):
+    verify_cuda_recordbatch(cuda_recordbatch, expected_schema=schema)
+    N = cuda_recordbatch.num_rows
+
+    # shape test
+    assert cuda_recordbatch.shape == (5, 2)
+
+    # columns() test
+    assert len(cuda_recordbatch.columns) == 2
+
+    # add_column(), set_column() test
+    for fn in [cuda_recordbatch.add_column, cuda_recordbatch.set_column]:
+        col = pa.array([-2, -1, 0, 1, 2], pa.int8()
+                       ).copy_to(cuda_context.memory_manager)
+        new_batch = fn(2, 'c2', col)
+        verify_cuda_recordbatch(
+            new_batch, expected_schema=schema.append(pa.field('c2', pa.int8())))
+        err_msg = ("Got column on device <DeviceAllocationType.CPU: 1>, "
+                   "but expected <DeviceAllocationType.CUDA: 2>.")
+        with pytest.raises(TypeError, match=err_msg):
+            fn(2, 'c2', [1] * N)
+
+    # remove_column() test
+    new_batch = cuda_recordbatch.remove_column(1)
+    verify_cuda_recordbatch(new_batch, expected_schema=schema.remove(1))
+
+    # drop_columns() test
+    new_batch = cuda_recordbatch.drop_columns(['c1'])
+    verify_cuda_recordbatch(new_batch, expected_schema=schema.remove(1))
+    empty_batch = cuda_recordbatch.drop_columns(['c0', 'c1'])
+    assert len(empty_batch.columns) == 0
+    assert empty_batch.device_type == pa.DeviceAllocationType.CUDA
+
+    # select() test
+    new_batch = cuda_recordbatch.select(['c0'])
+    verify_cuda_recordbatch(new_batch, expected_schema=schema.remove(1))
+
+    # cast() test
+    new_schema = pa.schema([pa.field('c0', pa.int64()), pa.field('c1', pa.int64())])
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.cast(new_schema)
+
+    # drop_null() test
+    null_col = pa.array([1] * N, mask=[True, False, True, False, True]).copy_to(
+        cuda_context.memory_manager)
+    cuda_recordbatch_with_nulls = cuda_recordbatch.add_column(2, 'c2', null_col)
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch_with_nulls.drop_null()
+
+    # filter() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.filter([True] * N)
+
+    # take() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.take([0])
+
+    # sort_by() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.sort_by('c0')
+
+    # field() test
+    assert cuda_recordbatch.field(0) == schema.field(0)
+    assert cuda_recordbatch.field(1) == schema.field(1)
+
+    # equals() test
+    new_batch = cpu_recordbatch.copy_to(cuda_context.memory_manager)
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.equals(new_batch) is True
+
+    # from_arrays() test
+    new_batch = pa.RecordBatch.from_arrays(cuda_arrays, ['c0', 'c1'])
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.copy_to(pa.default_cpu_memory_manager()).equals(cpu_recordbatch)
+
+    # from_pydict() test
+    new_batch = pa.RecordBatch.from_pydict({'c0': cuda_arrays[0], 'c1': cuda_arrays[1]})
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.copy_to(pa.default_cpu_memory_manager()).equals(cpu_recordbatch)
+
+    # from_struct_array() test
+    fields = [schema.field(i) for i in range(len(schema.names))]
+    struct_array = pa.StructArray.from_arrays(cuda_arrays, fields=fields)
+    with pytest.raises(NotImplementedError):
+        pa.RecordBatch.from_struct_array(struct_array)
+
+    # nbytes test
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.nbytes
+
+    # get_total_buffer_size() test
+    with pytest.raises(NotImplementedError):
+        assert cuda_recordbatch.get_total_buffer_size()
+
+    # to_pydict() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pydict()
+
+    # to_pylist() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pylist()
+
+    # to_pandas() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_pandas()
+
+    # to_tensor() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_tensor()
+
+    # to_struct_array() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.to_struct_array()
+
+    # serialize() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.serialize()
+
+    # slice() test
+    new_batch = cuda_recordbatch.slice(1, 3)
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.num_rows == 3
+    cpu_batch = new_batch.copy_to(pa.default_cpu_memory_manager())
+    assert cpu_batch == cpu_recordbatch.slice(1, 3)
+
+    # replace_schema_metadata() test
+    new_batch = cuda_recordbatch.replace_schema_metadata({b'key': b'value'})
+    verify_cuda_recordbatch(new_batch, expected_schema=schema)
+    assert new_batch.schema.metadata == {b'key': b'value'}
+
+    # rename_columns() test
+    new_batch = cuda_recordbatch.rename_columns(['col0', 'col1'])
+    expected_schema = pa.schema(
+        [pa.field('col0', schema.field(0).type),
+         pa.field('col1', schema.field(1).type)])
+    verify_cuda_recordbatch(new_batch, expected_schema=expected_schema)
+
+    # validate() test
+    cuda_recordbatch.validate()
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.validate(full=True)
+
+    # __array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__array__()
+
+    # __arrow_c_array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__arrow_c_array__()
+
+    # __arrow_c_stream__() test
+    with pytest.raises(NotImplementedError):
+        cuda_recordbatch.__arrow_c_stream__()
+
+    # __dataframe__() test
+    with pytest.raises(NotImplementedError):
+        from_dataframe(cuda_recordbatch.__dataframe__())
+
+
+def verify_cuda_table(table, expected_schema):
+    table.validate()
+    assert table.is_cpu is False
+    assert table.num_columns == len(expected_schema.names)
+    assert table.column_names == expected_schema.names
+    assert str(table) in repr(table)
+    for c in table.columns:
+        assert c.is_cpu is False
+        for chunk in c.iterchunks():
+            assert chunk.is_cpu is False
+            assert chunk.device_type == pa.DeviceAllocationType.CUDA
+    assert table.schema == expected_schema
+
+
+def test_table_non_cpu(cuda_context, cpu_table, cuda_table,
+                       cuda_arrays, cuda_recordbatch, schema):
+    verify_cuda_table(cuda_table, expected_schema=schema)
+    N = cuda_table.num_rows
+
+    # shape test
+    assert cuda_table.shape == (10, 2)
+
+    # columns() test
+    assert len(cuda_table.columns) == 2
+
+    # add_column(), set_column() test
+    for fn in [cuda_table.add_column, cuda_table.set_column]:
+        cpu_col = pa.array([1] * N, pa.int8())
+        cuda_col = cpu_col.copy_to(cuda_context.memory_manager)
+        new_table = fn(2, 'c2', cuda_col)
+        verify_cuda_table(new_table, expected_schema=schema.append(
+            pa.field('c2', pa.int8())))
+        new_table = fn(2, 'c2', cpu_col)
+        assert new_table.is_cpu is False
+        assert new_table.column(0).is_cpu is False
+        assert new_table.column(1).is_cpu is False
+        assert new_table.column(2).is_cpu is True
+
+    # remove_column() test
+    new_table = cuda_table.remove_column(1)
+    verify_cuda_table(new_table, expected_schema=schema.remove(1))
+
+    # drop_columns() test
+    new_table = cuda_table.drop_columns(['c1'])
+    verify_cuda_table(new_table, expected_schema=schema.remove(1))
+    new_table = cuda_table.drop_columns(['c0', 'c1'])
+    assert len(new_table.columns) == 0
+    assert new_table.is_cpu
+
+    # select() test
+    new_table = cuda_table.select(['c0'])
+    verify_cuda_table(new_table, expected_schema=schema.remove(1))
+
+    # cast() test
+    new_schema = pa.schema([pa.field('c0', pa.int64()), pa.field('c1', pa.int64())])
+    with pytest.raises(NotImplementedError):
+        cuda_table.cast(new_schema)
+
+    # drop_null() test
+    null_col = pa.array([1] * N, mask=[True] * N).copy_to(cuda_context.memory_manager)
+    cuda_table_with_nulls = cuda_table.add_column(2, 'c2', null_col)
+    with pytest.raises(NotImplementedError):
+        cuda_table_with_nulls.drop_null()
+
+    # filter() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.filter([True] * N)
+
+    # take() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.take([0])
+
+    # sort_by() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.sort_by('c0')
+
+    # field() test
+    assert cuda_table.field(0) == schema.field(0)
+    assert cuda_table.field(1) == schema.field(1)
+
+    # equals() test
+    with pytest.raises(NotImplementedError):
+        assert cuda_table.equals(cpu_table)
+
+    # from_arrays() test
+    new_table = pa.Table.from_arrays(cuda_arrays, ['c0', 'c1'])
+    verify_cuda_table(new_table, expected_schema=schema)
+
+    # from_pydict() test
+    new_table = pa.Table.from_pydict({'c0': cuda_arrays[0], 'c1': cuda_arrays[1]})
+    verify_cuda_table(new_table, expected_schema=schema)
+
+    # from_struct_array() test
+    fields = [schema.field(i) for i in range(len(schema.names))]
+    struct_array = pa.StructArray.from_arrays(cuda_arrays, fields=fields)
+    with pytest.raises(NotImplementedError):
+        pa.Table.from_struct_array(struct_array)
+
+    # from_batches() test
+    new_table = pa.Table.from_batches([cuda_recordbatch, cuda_recordbatch], schema)
+    verify_cuda_table(new_table, expected_schema=schema)
+
+    # nbytes test
+    with pytest.raises(NotImplementedError):
+        assert cuda_table.nbytes
+
+    # get_total_buffer_size() test
+    with pytest.raises(NotImplementedError):
+        assert cuda_table.get_total_buffer_size()
+
+    # to_pydict() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.to_pydict()
+
+    # to_pylist() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.to_pylist()
+
+    # to_pandas() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.to_pandas()
+
+    # to_struct_array() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.to_struct_array()
+
+    # to_batches() test
+    batches = cuda_table.to_batches(max_chunksize=5)
+    for batch in batches:
+        # GH-44049
+        with pytest.raises(AssertionError):
+            verify_cuda_recordbatch(batch, expected_schema=schema)
+
+    # to_reader() test
+    reader = cuda_table.to_reader(max_chunksize=5)
+    for batch in reader:
+        # GH-44049
+        with pytest.raises(AssertionError):
+            verify_cuda_recordbatch(batch, expected_schema=schema)
+
+    # slice() test
+    new_table = cuda_table.slice(1, 3)
+    verify_cuda_table(new_table, expected_schema=schema)
+    assert new_table.num_rows == 3
+
+    # replace_schema_metadata() test
+    new_table = cuda_table.replace_schema_metadata({b'key': b'value'})
+    verify_cuda_table(new_table, expected_schema=schema)
+    assert new_table.schema.metadata == {b'key': b'value'}
+
+    # rename_columns() test
+    new_table = cuda_table.rename_columns(['col0', 'col1'])
+    expected_schema = pa.schema(
+        [pa.field('col0', schema.field(0).type),
+         pa.field('col1', schema.field(1).type)])
+    verify_cuda_table(new_table, expected_schema=expected_schema)
+
+    # validate() test
+    cuda_table.validate()
+    with pytest.raises(NotImplementedError):
+        cuda_table.validate(full=True)
+
+    # flatten() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.flatten()
+
+    # combine_chunks() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.flatten()
+
+    # unify_dictionaries() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.unify_dictionaries()
+
+    # group_by() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.group_by('c0')
+
+    # join() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.join(cuda_table, 'c0')
+
+    # join_asof() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.join_asof(cuda_table, 'c0', 'c0', 0)
+
+    # __array__() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.__array__()
+
+    # __arrow_c_stream__() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.__arrow_c_stream__()
+
+    # __dataframe__() test
+    with pytest.raises(NotImplementedError):
+        from_dataframe(cuda_table.__dataframe__())
+
+    # __reduce__() test
+    with pytest.raises(NotImplementedError):
+        cuda_table.__reduce__()
diff --git a/python/pyarrow/tests/test_tensor.py b/python/pyarrow/tests/test_tensor.py
index 29c6de65b1607..debb1066280c1 100644
--- a/python/pyarrow/tests/test_tensor.py
+++ b/python/pyarrow/tests/test_tensor.py
@@ -21,7 +21,10 @@
 import warnings
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    pytestmark = pytest.mark.numpy
 import pyarrow as pa
 
 
diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py
index aecf32c5076be..2a05f87615074 100644
--- a/python/pyarrow/tests/test_types.py
+++ b/python/pyarrow/tests/test_types.py
@@ -16,7 +16,7 @@
 # under the License.
 
 from collections import OrderedDict
-from collections.abc import Iterator
+from collections.abc import Iterator, Mapping
 from functools import partial
 import datetime
 import sys
@@ -30,7 +30,10 @@
     tzst = None
 import weakref
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 import pyarrow as pa
 import pyarrow.types as types
 import pyarrow.tests.strategies as past
@@ -380,13 +383,10 @@ def test_tzinfo_to_string_errors():
     with pytest.raises(TypeError):
         pa.lib.tzinfo_to_string("Europe/Budapest")
 
-    if sys.version_info >= (3, 8):
-        # before 3.8 it was only possible to create timezone objects with whole
-        # number of minutes
-        tz = datetime.timezone(datetime.timedelta(hours=1, seconds=30))
-        msg = "Offset must represent whole number of minutes"
-        with pytest.raises(ValueError, match=msg):
-            pa.lib.tzinfo_to_string(tz)
+    tz = datetime.timezone(datetime.timedelta(hours=1, seconds=30))
+    msg = "Offset must represent whole number of minutes"
+    with pytest.raises(ValueError, match=msg):
+        pa.lib.tzinfo_to_string(tz)
 
 
 if tzst:
@@ -693,6 +693,8 @@ def test_struct_type():
     assert list(ty) == fields
     assert ty[0].name == 'a'
     assert ty[2].type == pa.int32()
+    assert ty.names == [f.name for f in ty]
+    assert ty.fields == list(ty)
     with pytest.raises(IndexError):
         assert ty[3]
 
@@ -1263,14 +1265,16 @@ def test_field_modified_copies():
 
 def test_is_integer_value():
     assert pa.types.is_integer_value(1)
-    assert pa.types.is_integer_value(np.int64(1))
+    if np is not None:
+        assert pa.types.is_integer_value(np.int64(1))
     assert not pa.types.is_integer_value('1')
 
 
 def test_is_float_value():
     assert not pa.types.is_float_value(1)
     assert pa.types.is_float_value(1.)
-    assert pa.types.is_float_value(np.float64(1))
+    if np is not None:
+        assert pa.types.is_float_value(np.float64(1))
     assert not pa.types.is_float_value('1.0')
 
 
@@ -1278,8 +1282,9 @@ def test_is_boolean_value():
     assert not pa.types.is_boolean_value(1)
     assert pa.types.is_boolean_value(True)
     assert pa.types.is_boolean_value(False)
-    assert pa.types.is_boolean_value(np.bool_(True))
-    assert pa.types.is_boolean_value(np.bool_(False))
+    if np is not None:
+        assert pa.types.is_boolean_value(np.bool_(True))
+        assert pa.types.is_boolean_value(np.bool_(False))
 
 
 @h.settings(suppress_health_check=(h.HealthCheck.too_slow,))
@@ -1325,17 +1330,36 @@ def test_types_come_back_with_specific_type():
         assert type(type_back) is type(arrow_type)
 
 
-def test_schema_import_c_schema_interface():
-    class Wrapper:
-        def __init__(self, schema):
-            self.schema = schema
+class SchemaWrapper:
+    def __init__(self, schema):
+        self.schema = schema
+
+    def __arrow_c_schema__(self):
+        return self.schema.__arrow_c_schema__()
+
+
+class SchemaMapping(Mapping):
+    def __init__(self, schema):
+        self.schema = schema
+
+    def __arrow_c_schema__(self):
+        return self.schema.__arrow_c_schema__()
+
+    def __getitem__(self, key):
+        return self.schema[key]
+
+    def __iter__(self):
+        return iter(self.schema)
+
+    def __len__(self):
+        return len(self.schema)
 
-        def __arrow_c_schema__(self):
-            return self.schema.__arrow_c_schema__()
 
+@pytest.mark.parametrize("wrapper_class", [SchemaWrapper, SchemaMapping])
+def test_schema_import_c_schema_interface(wrapper_class):
     schema = pa.schema([pa.field("field_name", pa.int32())], metadata={"a": "b"})
     assert schema.metadata == {b"a": b"b"}
-    wrapped_schema = Wrapper(schema)
+    wrapped_schema = wrapper_class(schema)
 
     assert pa.schema(wrapped_schema) == schema
     assert pa.schema(wrapped_schema).metadata == {b"a": b"b"}
diff --git a/python/pyarrow/tests/test_udf.py b/python/pyarrow/tests/test_udf.py
index c8e376fefb3b8..93004a30618a7 100644
--- a/python/pyarrow/tests/test_udf.py
+++ b/python/pyarrow/tests/test_udf.py
@@ -18,7 +18,10 @@
 
 import pytest
 
-import numpy as np
+try:
+    import numpy as np
+except ImportError:
+    np = None
 
 import pyarrow as pa
 from pyarrow import compute as pc
@@ -219,6 +222,31 @@ def nullary_func(context):
     return nullary_func, func_name
 
 
+@pytest.fixture(scope="session")
+def ephemeral_nullary_func_fixture():
+    """
+    Register a nullary scalar function with an ephemeral Python function.
+    This stresses that the Python function object is properly kept alive by the
+    registered function.
+    """
+    def nullary_func(context):
+        return pa.array([42] * context.batch_length, type=pa.int64(),
+                        memory_pool=context.memory_pool)
+
+    func_doc = {
+        "summary": "random function",
+        "description": "generates a random value"
+    }
+    func_name = "test_ephemeral_nullary_func"
+    pc.register_scalar_function(nullary_func,
+                                func_name,
+                                func_doc,
+                                {},
+                                pa.int64())
+
+    return func_name
+
+
 @pytest.fixture(scope="session")
 def wrong_output_type_func_fixture():
     """
@@ -505,6 +533,12 @@ def test_nullary_function(nullary_func_fixture):
                           batch_length=1)
 
 
+def test_ephemeral_function(ephemeral_nullary_func_fixture):
+    name = ephemeral_nullary_func_fixture
+    result = pc.call_function(name, [], length=1)
+    assert result.to_pylist() == [42]
+
+
 def test_wrong_output_type(wrong_output_type_func_fixture):
     _, func_name = wrong_output_type_func_fixture
 
@@ -718,6 +752,7 @@ def test_udt_datasource1_exception():
         _test_datasource1_udt(datasource1_exception)
 
 
+@pytest.mark.numpy
 def test_scalar_agg_basic(unary_agg_func_fixture):
     arr = pa.array([10.0, 20.0, 30.0, 40.0, 50.0], pa.float64())
     result = pc.call_function("mean_udf", [arr])
@@ -725,6 +760,7 @@ def test_scalar_agg_basic(unary_agg_func_fixture):
     assert result == expected
 
 
+@pytest.mark.numpy
 def test_scalar_agg_empty(unary_agg_func_fixture):
     empty = pa.array([], pa.float64())
 
@@ -744,6 +780,7 @@ def test_scalar_agg_wrong_output_type(wrong_output_type_agg_func_fixture):
         pc.call_function("y=wrong_output_type(x)", [arr])
 
 
+@pytest.mark.numpy
 def test_scalar_agg_varargs(varargs_agg_func_fixture):
     arr1 = pa.array([10, 20, 30, 40, 50], pa.int64())
     arr2 = pa.array([1.0, 2.0, 3.0, 4.0, 5.0], pa.float64())
@@ -755,6 +792,7 @@ def test_scalar_agg_varargs(varargs_agg_func_fixture):
     assert result == expected
 
 
+@pytest.mark.numpy
 def test_scalar_agg_exception(exception_agg_func_fixture):
     arr = pa.array([10, 20, 30, 40, 50, 60], pa.int64())
 
@@ -762,6 +800,7 @@ def test_scalar_agg_exception(exception_agg_func_fixture):
         pc.call_function("y=exception_len(x)", [arr])
 
 
+@pytest.mark.numpy
 def test_hash_agg_basic(unary_agg_func_fixture):
     arr1 = pa.array([10.0, 20.0, 30.0, 40.0, 50.0], pa.float64())
     arr2 = pa.array([4, 2, 1, 2, 1], pa.int32())
@@ -780,6 +819,7 @@ def test_hash_agg_basic(unary_agg_func_fixture):
     assert result.sort_by('id') == expected.sort_by('id')
 
 
+@pytest.mark.numpy
 def test_hash_agg_empty(unary_agg_func_fixture):
     arr1 = pa.array([], pa.float64())
     arr2 = pa.array([], pa.int32())
@@ -810,6 +850,7 @@ def test_hash_agg_wrong_output_type(wrong_output_type_agg_func_fixture):
         table.group_by("id").aggregate([("value", "y=wrong_output_type(x)")])
 
 
+@pytest.mark.numpy
 def test_hash_agg_exception(exception_agg_func_fixture):
     arr1 = pa.array([10, 20, 30, 40, 50], pa.int64())
     arr2 = pa.array([4, 2, 1, 2, 1], pa.int32())
@@ -819,6 +860,7 @@ def test_hash_agg_exception(exception_agg_func_fixture):
         table.group_by("id").aggregate([("value", "y=exception_len(x)")])
 
 
+@pytest.mark.numpy
 def test_hash_agg_random(sum_agg_func_fixture):
     """Test hash aggregate udf with randomly sampled data"""
 
diff --git a/python/pyarrow/tests/test_without_numpy.py b/python/pyarrow/tests/test_without_numpy.py
new file mode 100644
index 0000000000000..55c12602ce89a
--- /dev/null
+++ b/python/pyarrow/tests/test_without_numpy.py
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+import pyarrow as pa
+
+# Marks all of the tests in this module
+# Ignore these with pytest ... -m 'not nonumpy'
+pytestmark = pytest.mark.nonumpy
+
+
+def test_array_to_np():
+    arr = pa.array(range(10))
+
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        arr.to_numpy()
+
+
+def test_chunked_array_to_np():
+    data = pa.chunked_array([
+        [1, 2, 3],
+        [4, 5, 6],
+        []
+    ])
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        data.to_numpy()
+
+
+def test_tensor_to_np():
+    tensor_type = pa.fixed_shape_tensor(pa.int32(), [2, 2])
+    arr = [[1, 2, 3, 4], [10, 20, 30, 40], [100, 200, 300, 400]]
+    storage = pa.array(arr, pa.list_(pa.int32(), 4))
+    tensor_array = pa.ExtensionArray.from_storage(tensor_type, storage)
+
+    tensor = tensor_array.to_tensor()
+    msg = "Cannot return a numpy.ndarray if NumPy is not present"
+
+    with pytest.raises(ImportError, match=msg):
+        tensor.to_numpy()
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index 638eee9807335..acbb2a5c0de6f 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -22,7 +22,6 @@
 import contextlib
 import decimal
 import gc
-import numpy as np
 import os
 import random
 import re
@@ -110,27 +109,15 @@ def randdecimal(precision, scale):
 
 
 def random_ascii(length):
-    return bytes(np.random.randint(65, 123, size=length, dtype='i1'))
+    return bytes([random.randint(65, 122) for i in range(length)])
 
 
 def rands(nchars):
     """
     Generate one random string.
     """
-    RANDS_CHARS = np.array(
-        list(string.ascii_letters + string.digits), dtype=(np.str_, 1))
-    return "".join(np.random.choice(RANDS_CHARS, nchars))
-
-
-def make_dataframe():
-    import pandas as pd
-
-    N = 30
-    df = pd.DataFrame(
-        {col: np.random.randn(N) for col in string.ascii_uppercase[:4]},
-        index=pd.Index([rands(10) for _ in range(N)])
-    )
-    return df
+    RANDS_CHARS = list(string.ascii_letters + string.digits)
+    return "".join(random.choice(RANDS_CHARS) for i in range(nchars))
 
 
 def memory_leak_check(f, metric='rss', threshold=1 << 17, iterations=10,
@@ -322,20 +309,6 @@ def open_append_stream(self, path, metadata):
         return self._fs.open_append_stream(path, metadata=metadata)
 
 
-def get_raise_signal():
-    if sys.version_info >= (3, 8):
-        return signal.raise_signal
-    elif os.name == 'nt':
-        # On Windows, os.kill() doesn't actually send a signal,
-        # it just terminates the process with the given exit code.
-        pytest.skip("test requires Python 3.8+ on Windows")
-    else:
-        # On Unix, emulate raise_signal() with os.kill().
-        def raise_signal(signum):
-            os.kill(os.getpid(), signum)
-        return raise_signal
-
-
 @contextlib.contextmanager
 def signal_wakeup_fd(*, warn_on_full_buffer=False):
     # Use a socket pair, rather a self-pipe, so that select() can be used
diff --git a/python/pyarrow/tests/wsgi_examples.py b/python/pyarrow/tests/wsgi_examples.py
new file mode 100644
index 0000000000000..440b107abe511
--- /dev/null
+++ b/python/pyarrow/tests/wsgi_examples.py
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pyarrow.fs
+
+
+def application(env, start_response):
+    path = env['PATH_INFO']
+    members = path.split('/')
+    assert members[0] == ''
+    assert len(members) >= 2
+    root = members[1]
+    if root == 's3':
+        # See test_fs::test_uwsgi_integration
+        start_response('200 OK', [('Content-Type', 'text/html')])
+        # flake8: noqa
+        fs = pyarrow.fs.S3FileSystem()
+        return [b"Hello World\n"]
+    else:
+        start_response('404 Not Found', [('Content-Type', 'text/html')])
+        return [f"Path {path!r} not found\n".encode()]
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index 4343d7ea300b0..70f12e9796e80 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -33,42 +33,50 @@ from cython import sizeof
 
 # These are imprecise because the type (in pandas 0.x) depends on the presence
 # of nulls
-cdef dict _pandas_type_map = {
-    _Type_NA: np.object_,  # NaNs
-    _Type_BOOL: np.bool_,
-    _Type_INT8: np.int8,
-    _Type_INT16: np.int16,
-    _Type_INT32: np.int32,
-    _Type_INT64: np.int64,
-    _Type_UINT8: np.uint8,
-    _Type_UINT16: np.uint16,
-    _Type_UINT32: np.uint32,
-    _Type_UINT64: np.uint64,
-    _Type_HALF_FLOAT: np.float16,
-    _Type_FLOAT: np.float32,
-    _Type_DOUBLE: np.float64,
-    # Pandas does not support [D]ay, so default to [ms] for date32
-    _Type_DATE32: np.dtype('datetime64[ms]'),
-    _Type_DATE64: np.dtype('datetime64[ms]'),
-    _Type_TIMESTAMP: {
-        's': np.dtype('datetime64[s]'),
-        'ms': np.dtype('datetime64[ms]'),
-        'us': np.dtype('datetime64[us]'),
-        'ns': np.dtype('datetime64[ns]'),
-    },
-    _Type_DURATION: {
-        's': np.dtype('timedelta64[s]'),
-        'ms': np.dtype('timedelta64[ms]'),
-        'us': np.dtype('timedelta64[us]'),
-        'ns': np.dtype('timedelta64[ns]'),
-    },
-    _Type_BINARY: np.object_,
-    _Type_FIXED_SIZE_BINARY: np.object_,
-    _Type_STRING: np.object_,
-    _Type_LIST: np.object_,
-    _Type_MAP: np.object_,
-    _Type_DECIMAL128: np.object_,
-}
+cdef dict _pandas_type_map = {}
+
+
+def _get_pandas_type_map():
+    global _pandas_type_map
+    if not _pandas_type_map:
+        _pandas_type_map.update({
+            _Type_NA: np.object_,  # NaNs
+            _Type_BOOL: np.bool_,
+            _Type_INT8: np.int8,
+            _Type_INT16: np.int16,
+            _Type_INT32: np.int32,
+            _Type_INT64: np.int64,
+            _Type_UINT8: np.uint8,
+            _Type_UINT16: np.uint16,
+            _Type_UINT32: np.uint32,
+            _Type_UINT64: np.uint64,
+            _Type_HALF_FLOAT: np.float16,
+            _Type_FLOAT: np.float32,
+            _Type_DOUBLE: np.float64,
+            # Pandas does not support [D]ay, so default to [ms] for date32
+            _Type_DATE32: np.dtype('datetime64[ms]'),
+            _Type_DATE64: np.dtype('datetime64[ms]'),
+            _Type_TIMESTAMP: {
+                's': np.dtype('datetime64[s]'),
+                'ms': np.dtype('datetime64[ms]'),
+                'us': np.dtype('datetime64[us]'),
+                'ns': np.dtype('datetime64[ns]'),
+            },
+            _Type_DURATION: {
+                's': np.dtype('timedelta64[s]'),
+                'ms': np.dtype('timedelta64[ms]'),
+                'us': np.dtype('timedelta64[us]'),
+                'ns': np.dtype('timedelta64[ns]'),
+            },
+            _Type_BINARY: np.object_,
+            _Type_FIXED_SIZE_BINARY: np.object_,
+            _Type_STRING: np.object_,
+            _Type_LIST: np.object_,
+            _Type_MAP: np.object_,
+            _Type_DECIMAL128: np.object_,
+        })
+    return _pandas_type_map
+
 
 cdef dict _pep3118_type_map = {
     _Type_INT8: b'b',
@@ -149,14 +157,15 @@ def _is_primitive(Type type):
 
 def _get_pandas_type(arrow_type, coerce_to_ns=False):
     cdef Type type_id = arrow_type.id
-    if type_id not in _pandas_type_map:
+    cdef dict pandas_type_map = _get_pandas_type_map()
+    if type_id not in pandas_type_map:
         return None
     if coerce_to_ns:
         # ARROW-3789: Coerce date/timestamp types to datetime64[ns]
         if type_id == _Type_DURATION:
             return np.dtype('timedelta64[ns]')
         return np.dtype('datetime64[ns]')
-    pandas_type = _pandas_type_map[type_id]
+    pandas_type = pandas_type_map[type_id]
     if isinstance(pandas_type, dict):
         unit = getattr(arrow_type, 'unit', None)
         pandas_type = pandas_type.get(unit, None)
@@ -1025,6 +1034,33 @@ cdef class StructType(DataType):
     def __reduce__(self):
         return struct, (list(self),)
 
+    @property
+    def names(self):
+        """
+        Lists the field names.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct([('a', pa.int64()), ('b', pa.float64()), ('c', pa.string())])
+        >>> struct_type.names
+        ['a', 'b', 'c']
+        """
+        return [f.name for f in self]
+
+    @property
+    def fields(self):
+        """
+        Lists all fields within the StructType.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> struct_type = pa.struct([('a', pa.int64()), ('b', pa.float64()), ('c', pa.string())])
+        >>> struct_type.fields
+        [pyarrow.Field<a: int64>, pyarrow.Field<b: double>, pyarrow.Field<c: string>]
+        """
+        return list(self)
 
 cdef class UnionType(DataType):
     """
@@ -1591,59 +1627,97 @@ cdef class ExtensionType(BaseExtensionType):
 
     Examples
     --------
-    Define a UuidType extension type subclassing ExtensionType:
+    Define a RationalType extension type subclassing ExtensionType:
 
     >>> import pyarrow as pa
-    >>> class UuidType(pa.ExtensionType):
-    ...    def __init__(self):
-    ...       pa.ExtensionType.__init__(self, pa.binary(16), "my_package.uuid")
-    ...    def __arrow_ext_serialize__(self):
-    ...       # since we don't have a parameterized type, we don't need extra
-    ...       # metadata to be deserialized
-    ...       return b''
-    ...    @classmethod
-    ...    def __arrow_ext_deserialize__(self, storage_type, serialized):
-    ...       # return an instance of this subclass given the serialized
-    ...       # metadata.
-    ...       return UuidType()
-    ...
+    >>> class RationalType(pa.ExtensionType):
+    ...     def __init__(self, data_type: pa.DataType):
+    ...         if not pa.types.is_integer(data_type):
+    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
+    ...         super().__init__(
+    ...             pa.struct(
+    ...                 [
+    ...                     ("numer", data_type),
+    ...                     ("denom", data_type),
+    ...                 ],
+    ...             ),
+    ...             # N.B. This name does _not_ reference `data_type` so deserialization
+    ...             # will work for _any_ integer `data_type` after registration
+    ...             "my_package.rational",
+    ...         )
+    ...     def __arrow_ext_serialize__(self) -> bytes:
+    ...         # No parameters are necessary
+    ...         return b""
+    ...     @classmethod
+    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
+    ...         # return an instance of this subclass
+    ...         return RationalType(storage_type[0].type)
 
     Register the extension type:
 
-    >>> pa.register_extension_type(UuidType())
+    >>> pa.register_extension_type(RationalType(pa.int64()))
 
-    Create an instance of UuidType extension type:
+    Create an instance of RationalType extension type:
 
-    >>> uuid_type = UuidType()
+    >>> rational_type = RationalType(pa.int32())
 
     Inspect the extension type:
 
-    >>> uuid_type.extension_name
-    'my_package.uuid'
-    >>> uuid_type.storage_type
-    FixedSizeBinaryType(fixed_size_binary[16])
+    >>> rational_type.extension_name
+    'my_package.rational'
+    >>> rational_type.storage_type
+    StructType(struct<numer: int32, denom: int32>)
 
     Wrap an array as an extension array:
 
-    >>> import uuid
-    >>> storage_array = pa.array([uuid.uuid4().bytes for _ in range(4)], pa.binary(16))
-    >>> uuid_type.wrap_array(storage_array)
+    >>> storage_array = pa.array(
+    ...     [
+    ...         {"numer": 10, "denom": 17},
+    ...         {"numer": 20, "denom": 13},
+    ...     ],
+    ...     type=rational_type.storage_type
+    ... )
+    >>> rational_array = rational_type.wrap_array(storage_array)
+    >>> rational_array
     <pyarrow.lib.ExtensionArray object at ...>
-    [
-      ...
-    ]
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
+        10,
+        20
+      ]
+    -- child 1 type: int32
+      [
+        17,
+        13
+      ]
 
     Or do the same with creating an ExtensionArray:
 
-    >>> pa.ExtensionArray.from_storage(uuid_type, storage_array)
+    >>> rational_array = pa.ExtensionArray.from_storage(rational_type, storage_array)
+    >>> rational_array
     <pyarrow.lib.ExtensionArray object at ...>
-    [
-      ...
-    ]
+    -- is_valid: all not null
+    -- child 0 type: int32
+      [
+        10,
+        20
+      ]
+    -- child 1 type: int32
+      [
+        17,
+        13
+      ]
 
     Unregister the extension type:
 
-    >>> pa.unregister_extension_type("my_package.uuid")
+    >>> pa.unregister_extension_type("my_package.rational")
+
+    Note that even though we registered the concrete type
+    ``RationalType(pa.int64())``, PyArrow will be able to deserialize
+    ``RationalType(integer_type)`` for any ``integer_type``, as the deserializer
+    will reference the name ``my_package.rational`` and the ``@classmethod``
+    ``__arrow_ext_deserialize__``.
     """
 
     def __cinit__(self):
@@ -1703,7 +1777,7 @@ cdef class ExtensionType(BaseExtensionType):
         return NotImplementedError
 
     @classmethod
-    def __arrow_ext_deserialize__(self, storage_type, serialized):
+    def __arrow_ext_deserialize__(cls, storage_type, serialized):
         """
         Return an extension type instance from the storage type and serialized
         metadata.
@@ -1738,6 +1812,25 @@ cdef class ExtensionType(BaseExtensionType):
         return ExtensionScalar
 
 
+cdef class UuidType(BaseExtensionType):
+    """
+    Concrete class for UUID extension type.
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        BaseExtensionType.init(self, type)
+        self.uuid_ext_type = <const CUuidType*> type.get()
+
+    def __arrow_ext_class__(self):
+        return UuidArray
+
+    def __reduce__(self):
+        return uuid, ()
+
+    def __arrow_ext_scalar_class__(self):
+        return UuidScalar
+
+
 cdef class FixedShapeTensorType(BaseExtensionType):
     """
     Concrete class for fixed shape tensor extension type.
@@ -1810,6 +1903,81 @@ cdef class FixedShapeTensorType(BaseExtensionType):
         return FixedShapeTensorScalar
 
 
+cdef class Bool8Type(BaseExtensionType):
+    """
+    Concrete class for bool8 extension type.
+
+    Bool8 is an alternate representation for boolean
+    arrays using 8 bits instead of 1 bit per value. The underlying
+    storage type is int8.
+
+    Examples
+    --------
+    Create an instance of bool8 extension type:
+
+    >>> import pyarrow as pa
+    >>> pa.bool8()
+    Bool8Type(extension<arrow.bool8>)
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        BaseExtensionType.init(self, type)
+        self.bool8_ext_type = <const CBool8Type*> type.get()
+
+    def __arrow_ext_class__(self):
+        return Bool8Array
+
+    def __reduce__(self):
+        return bool8, ()
+
+    def __arrow_ext_scalar_class__(self):
+        return Bool8Scalar
+
+
+cdef class OpaqueType(BaseExtensionType):
+    """
+    Concrete class for opaque extension type.
+
+    Opaque is a placeholder for a type from an external (often non-Arrow)
+    system that could not be interpreted.
+
+    Examples
+    --------
+    Create an instance of opaque extension type:
+
+    >>> import pyarrow as pa
+    >>> pa.opaque(pa.int32(), "geometry", "postgis")
+    OpaqueType(extension<arrow.opaque[storage_type=int32, type_name=geometry, vendor_name=postgis]>)
+    """
+
+    cdef void init(self, const shared_ptr[CDataType]& type) except *:
+        BaseExtensionType.init(self, type)
+        self.opaque_ext_type = <const COpaqueType*> type.get()
+
+    @property
+    def type_name(self):
+        """
+        The name of the type in the external system.
+        """
+        return frombytes(c_string(self.opaque_ext_type.type_name()))
+
+    @property
+    def vendor_name(self):
+        """
+        The name of the external system.
+        """
+        return frombytes(c_string(self.opaque_ext_type.vendor_name()))
+
+    def __arrow_ext_class__(self):
+        return OpaqueArray
+
+    def __reduce__(self):
+        return opaque, (self.storage_type, self.type_name, self.vendor_name)
+
+    def __arrow_ext_scalar_class__(self):
+        return OpaqueScalar
+
+
 _py_extension_type_auto_load = False
 
 
@@ -1937,30 +2105,39 @@ def register_extension_type(ext_type):
 
     Examples
     --------
-    Define a UuidType extension type subclassing ExtensionType:
+    Define a RationalType extension type subclassing ExtensionType:
 
     >>> import pyarrow as pa
-    >>> class UuidType(pa.ExtensionType):
-    ...    def __init__(self):
-    ...       pa.ExtensionType.__init__(self, pa.binary(16), "my_package.uuid")
-    ...    def __arrow_ext_serialize__(self):
-    ...       # since we don't have a parameterized type, we don't need extra
-    ...       # metadata to be deserialized
-    ...       return b''
-    ...    @classmethod
-    ...    def __arrow_ext_deserialize__(self, storage_type, serialized):
-    ...       # return an instance of this subclass given the serialized
-    ...       # metadata.
-    ...       return UuidType()
-    ...
+    >>> class RationalType(pa.ExtensionType):
+    ...     def __init__(self, data_type: pa.DataType):
+    ...         if not pa.types.is_integer(data_type):
+    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
+    ...         super().__init__(
+    ...             pa.struct(
+    ...                 [
+    ...                     ("numer", data_type),
+    ...                     ("denom", data_type),
+    ...                 ],
+    ...             ),
+    ...             # N.B. This name does _not_ reference `data_type` so deserialization
+    ...             # will work for _any_ integer `data_type` after registration
+    ...             "my_package.rational",
+    ...         )
+    ...     def __arrow_ext_serialize__(self) -> bytes:
+    ...         # No parameters are necessary
+    ...         return b""
+    ...     @classmethod
+    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
+    ...         # return an instance of this subclass
+    ...         return RationalType(storage_type[0].type)
 
     Register the extension type:
 
-    >>> pa.register_extension_type(UuidType())
+    >>> pa.register_extension_type(RationalType(pa.int64()))
 
     Unregister the extension type:
 
-    >>> pa.unregister_extension_type("my_package.uuid")
+    >>> pa.unregister_extension_type("my_package.rational")
     """
     cdef:
         DataType _type = ensure_type(ext_type, allow_none=False)
@@ -1987,30 +2164,39 @@ def unregister_extension_type(type_name):
 
     Examples
     --------
-    Define a UuidType extension type subclassing ExtensionType:
+    Define a RationalType extension type subclassing ExtensionType:
 
     >>> import pyarrow as pa
-    >>> class UuidType(pa.ExtensionType):
-    ...    def __init__(self):
-    ...       pa.ExtensionType.__init__(self, pa.binary(16), "my_package.uuid")
-    ...    def __arrow_ext_serialize__(self):
-    ...       # since we don't have a parameterized type, we don't need extra
-    ...       # metadata to be deserialized
-    ...       return b''
-    ...    @classmethod
-    ...    def __arrow_ext_deserialize__(self, storage_type, serialized):
-    ...       # return an instance of this subclass given the serialized
-    ...       # metadata.
-    ...       return UuidType()
-    ...
+    >>> class RationalType(pa.ExtensionType):
+    ...     def __init__(self, data_type: pa.DataType):
+    ...         if not pa.types.is_integer(data_type):
+    ...             raise TypeError(f"data_type must be an integer type not {data_type}")
+    ...         super().__init__(
+    ...             pa.struct(
+    ...                 [
+    ...                     ("numer", data_type),
+    ...                     ("denom", data_type),
+    ...                 ],
+    ...             ),
+    ...             # N.B. This name does _not_ reference `data_type` so deserialization
+    ...             # will work for _any_ integer `data_type` after registration
+    ...             "my_package.rational",
+    ...         )
+    ...     def __arrow_ext_serialize__(self) -> bytes:
+    ...         # No parameters are necessary
+    ...         return b""
+    ...     @classmethod
+    ...     def __arrow_ext_deserialize__(cls, storage_type, serialized):
+    ...         # return an instance of this subclass
+    ...         return RationalType(storage_type[0].type)
 
     Register the extension type:
 
-    >>> pa.register_extension_type(UuidType())
+    >>> pa.register_extension_type(RationalType(pa.int64()))
 
     Unregister the extension type:
 
-    >>> pa.unregister_extension_type("my_package.uuid")
+    >>> pa.unregister_extension_type("my_package.rational")
     """
     cdef:
         c_string c_type_name = tobytes(type_name)
@@ -4188,8 +4374,12 @@ def float16():
       15872,
       32256
     ]
-    >>> a.to_pylist()
-    [np.float16(1.5), np.float16(nan)]
+
+    Note that unlike other float types, if you convert this array
+    to a python list, the types of its elements will be ``np.float16``
+
+    >>> [type(val) for val in a.to_pylist()]
+    [<class 'numpy.float16'>, <class 'numpy.float16'>]
     """
     return primitive_type(_Type_HALF_FLOAT)
 
@@ -5106,6 +5296,21 @@ def run_end_encoded(run_end_type, value_type):
     return pyarrow_wrap_data_type(ree_type)
 
 
+def uuid():
+    """
+    Create UuidType instance.
+
+    Returns
+    -------
+    type : UuidType
+    """
+
+    cdef UuidType out = UuidType.__new__(UuidType)
+    c_uuid_ext_type = GetResultValue(CUuidType.Make())
+    out.init(c_uuid_ext_type)
+    return out
+
+
 def fixed_shape_tensor(DataType value_type, shape, dim_names=None, permutation=None):
     """
     Create instance of fixed shape tensor extension type with shape and optional
@@ -5207,6 +5412,107 @@ def fixed_shape_tensor(DataType value_type, shape, dim_names=None, permutation=N
     return out
 
 
+def bool8():
+    """
+    Create instance of bool8 extension type.
+
+    Examples
+    --------
+    Create an instance of bool8 extension type:
+
+    >>> import pyarrow as pa
+    >>> type = pa.bool8()
+    >>> type
+    Bool8Type(extension<arrow.bool8>)
+
+    Inspect the data type:
+
+    >>> type.storage_type
+    DataType(int8)
+
+    Create a table with a bool8 array:
+
+    >>> arr = [-1, 0, 1, 2, None]
+    >>> storage = pa.array(arr, pa.int8())
+    >>> other = pa.ExtensionArray.from_storage(type, storage)
+    >>> pa.table([other], names=["unknown_col"])
+    pyarrow.Table
+    unknown_col: extension<arrow.bool8>
+    ----
+    unknown_col: [[-1,0,1,2,null]]
+
+    Returns
+    -------
+    type : Bool8Type
+    """
+
+    cdef Bool8Type out = Bool8Type.__new__(Bool8Type)
+
+    c_type = GetResultValue(CBool8Type.Make())
+
+    out.init(c_type)
+
+    return out
+
+
+def opaque(DataType storage_type, str type_name not None, str vendor_name not None):
+    """
+    Create instance of opaque extension type.
+
+    Parameters
+    ----------
+    storage_type : DataType
+        The underlying data type.
+    type_name : str
+        The name of the type in the external system.
+    vendor_name : str
+        The name of the external system.
+
+    Examples
+    --------
+    Create an instance of an opaque extension type:
+
+    >>> import pyarrow as pa
+    >>> type = pa.opaque(pa.binary(), "other", "jdbc")
+    >>> type
+    OpaqueType(extension<arrow.opaque[storage_type=binary, type_name=other, vendor_name=jdbc]>)
+
+    Inspect the data type:
+
+    >>> type.storage_type
+    DataType(binary)
+    >>> type.type_name
+    'other'
+    >>> type.vendor_name
+    'jdbc'
+
+    Create a table with an opaque array:
+
+    >>> arr = [None, b"foobar"]
+    >>> storage = pa.array(arr, pa.binary())
+    >>> other = pa.ExtensionArray.from_storage(type, storage)
+    >>> pa.table([other], names=["unknown_col"])
+    pyarrow.Table
+    unknown_col: extension<arrow.opaque[storage_type=binary, type_name=other, vendor_name=jdbc]>
+    ----
+    unknown_col: [[null,666F6F626172]]
+
+    Returns
+    -------
+    type : OpaqueType
+    """
+
+    cdef:
+        c_string c_type_name = tobytes(type_name)
+        c_string c_vendor_name = tobytes(vendor_name)
+        shared_ptr[COpaqueType] c_opaque_type = make_shared[COpaqueType](
+            storage_type.sp_type, c_type_name, c_vendor_name)
+        shared_ptr[CDataType] c_type = static_pointer_cast[CDataType, COpaqueType](c_opaque_type)
+        OpaqueType out = OpaqueType.__new__(OpaqueType)
+    out.init(c_type)
+    return out
+
+
 cdef dict _type_aliases = {
     'null': null,
     'bool': bool_,
@@ -5347,14 +5653,15 @@ def schema(fields, metadata=None):
         Field py_field
         vector[shared_ptr[CField]] c_fields
 
-    if isinstance(fields, Mapping):
-        fields = fields.items()
-    elif hasattr(fields, "__arrow_c_schema__"):
+    if hasattr(fields, "__arrow_c_schema__"):
         result = Schema._import_from_c_capsule(fields.__arrow_c_schema__())
         if metadata is not None:
             result = result.with_metadata(metadata)
         return result
 
+    if isinstance(fields, Mapping):
+        fields = fields.items()
+
     for item in fields:
         if isinstance(item, tuple):
             py_field = field(*item)
diff --git a/python/pyproject.toml b/python/pyproject.toml
index 86a90906d02f9..932210044399e 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -34,7 +34,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "pyarrow"
 dynamic = ["version"]
-requires-python = ">=3.8"
+requires-python = ">=3.9"
 dependencies = [
     "numpy >= 1.16.6"
 ]
@@ -43,11 +43,11 @@ readme = {file = "README.md", content-type = "text/markdown"}
 license = {text = "Apache Software License"}
 classifiers  = [
     'License :: OSI Approved :: Apache Software License',
-    'Programming Language :: Python :: 3.8',
     'Programming Language :: Python :: 3.9',
     'Programming Language :: Python :: 3.10',
     'Programming Language :: Python :: 3.11',
     'Programming Language :: Python :: 3.12',
+    'Programming Language :: Python :: 3.13',
 ]
 maintainers = [
     {name = "Apache Arrow Developers", email = "dev@arrow.apache.org"}
@@ -73,7 +73,8 @@ zip-safe=false
 include-package-data=true
 
 [tool.setuptools.packages.find]
-where = ["."]
+include = ["pyarrow"]
+namespaces = false
 
 [tool.setuptools.package-data]
 pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd"]
@@ -83,4 +84,4 @@ root = '..'
 version_file = 'pyarrow/_generated_version.py'
 version_scheme = 'guess-next-dev'
 git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"'
-fallback_version = '17.0.0a0'
+fallback_version = '18.0.0a0'
diff --git a/python/requirements-test.txt b/python/requirements-test.txt
index 975477c4223dd..48422f86cc720 100644
--- a/python/requirements-test.txt
+++ b/python/requirements-test.txt
@@ -3,3 +3,4 @@ hypothesis
 pandas
 pytest
 pytz
+uwsgi; sys.platform != 'win32' and python_version < '3.13'
diff --git a/python/requirements-wheel-build.txt b/python/requirements-wheel-build.txt
index faa078d3d7fe7..2d448004768ce 100644
--- a/python/requirements-wheel-build.txt
+++ b/python/requirements-wheel-build.txt
@@ -1,3 +1,8 @@
+# Remove pre and extra index url once there's NumPy and Cython wheels for 3.13
+# on PyPI
+--pre
+--extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
+
 cython>=0.29.31
 oldest-supported-numpy>=0.14; python_version<'3.9'
 numpy>=2.0.0; python_version>='3.9'
diff --git a/python/requirements-wheel-test.txt b/python/requirements-wheel-test.txt
index 46bedc13ba1a7..bad3e251d4464 100644
--- a/python/requirements-wheel-test.txt
+++ b/python/requirements-wheel-test.txt
@@ -1,26 +1,24 @@
+# Remove pre and extra index url once there's NumPy and Cython wheels for 3.13
+# on PyPI
+--pre
+--prefer-binary
+--extra-index-url "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
+
 cffi
 cython
 hypothesis
 pytest
 pytz
 tzdata; sys_platform == 'win32'
+uwsgi; sys.platform != 'win32' and python_version < '3.13'
 
-numpy==1.21.3; platform_system == "Linux"   and platform_machine == "aarch64" and python_version < "3.11"
-numpy==1.23.4;                                                                    python_version == "3.11"
-numpy==1.26.0;                                                                    python_version >= "3.12"
-numpy==1.19.5; platform_system == "Linux"   and platform_machine != "aarch64" and python_version <  "3.9"
-numpy==1.21.3; platform_system == "Linux"   and platform_machine != "aarch64" and python_version >= "3.9" and python_version < "3.11"
-numpy==1.21.3; platform_system == "Darwin"  and platform_machine == "arm64"   and python_version <  "3.11"
-numpy==1.19.5; platform_system == "Darwin"  and platform_machine != "arm64"   and python_version <  "3.9"
-numpy==1.21.3; platform_system == "Darwin"  and platform_machine != "arm64"   and python_version >= "3.9" and python_version < "3.11"
-numpy==1.19.5; platform_system == "Windows"                                   and python_version <  "3.9"
-numpy==1.21.3; platform_system == "Windows"                                   and python_version >= "3.9" and python_version < "3.11"
+# We generally test with the oldest numpy version that supports a given Python
+# version. However, there is no need to make this strictly the oldest version,
+# so it can be broadened to have a single version specification across platforms.
+# (`~=x.y.z` specifies a compatible release as `>=x.y.z, == x.y.*`)
+numpy~=1.21.3; python_version < "3.11"
+numpy~=1.23.2; python_version == "3.11"
+numpy~=1.26.0; python_version == "3.12"
+numpy~=2.1.0; python_version >= "3.13"
 
-pandas<1.1.0;  platform_system == "Linux"   and platform_machine != "aarch64" and python_version <  "3.8"
-pandas;        platform_system == "Linux"   and platform_machine != "aarch64" and python_version >= "3.8"
-pandas;        platform_system == "Linux"   and platform_machine == "aarch64"
-pandas<1.1.0;  platform_system == "Darwin"  and platform_machine != "arm64"   and python_version <  "3.8"
-pandas;        platform_system == "Darwin"  and platform_machine != "arm64"   and python_version >= "3.8"
-pandas;        platform_system == "Darwin"  and platform_machine == "arm64"
-pandas<1.1.0;  platform_system == "Windows"                                   and python_version <  "3.8"
-pandas;        platform_system == "Windows"                                   and python_version >= "3.8"
+pandas
diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py
index 1a4b4a4e05614..53d3dd52bd8a6 100644
--- a/python/scripts/run_emscripten_tests.py
+++ b/python/scripts/run_emscripten_tests.py
@@ -335,7 +335,7 @@ def _load_pyarrow_in_runner(driver, wheel_name):
         """
 import pyarrow,pathlib
 pyarrow_dir = pathlib.Path(pyarrow.__file__).parent
-pytest.main([pyarrow_dir, '-v'])
+pytest.main([pyarrow_dir, '-r', 's'])
 """,
         wait_for_terminate=False,
     )
diff --git a/python/setup.py b/python/setup.py
index 11cd7028023be..60b9a696d9785 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -24,6 +24,7 @@
 import re
 import shlex
 import sys
+import warnings
 
 if sys.version_info >= (3, 10):
     import sysconfig
@@ -31,8 +32,7 @@
     # Get correct EXT_SUFFIX on Windows (https://bugs.python.org/issue39825)
     from distutils import sysconfig
 
-import pkg_resources
-from setuptools import setup, Extension, Distribution, find_namespace_packages
+from setuptools import setup, Extension, Distribution
 
 from Cython.Distutils import build_ext as _build_ext
 import Cython
@@ -84,11 +84,29 @@ def strtobool(val):
         raise ValueError("invalid truth value %r" % (val,))
 
 
+MSG_DEPR_SETUP_BUILD_FLAGS = """
+  !!
+
+        ***********************************************************************
+        The '{}' flag is being passed to setup.py, but this is
+        deprecated.
+
+        If a certain component is available in Arrow C++, it will automatically
+        be enabled for the PyArrow build as well. If you want to force the
+        build of a certain component, you can still use the
+        PYARROW_WITH_$COMPONENT environment variable.
+        ***********************************************************************
+
+  !!
+"""
+
+
 class build_ext(_build_ext):
     _found_names = ()
 
     def build_extensions(self):
-        numpy_incl = pkg_resources.resource_filename('numpy', 'core/include')
+        import numpy
+        numpy_incl = numpy.get_include()
 
         self.extensions = [ext for ext in self.extensions
                            if ext.name != '__dummy__']
@@ -258,9 +276,16 @@ def append_cmake_bool(value, varname):
                     varname, 'on' if value else 'off'))
 
             def append_cmake_component(flag, varname):
-                # only pass this to cmake is the user pass the --with-component
+                # only pass this to cmake if the user pass the --with-component
                 # flag to setup.py build_ext
                 if flag is not None:
+                    flag_name = (
+                        "--with-"
+                        + varname.removeprefix("PYARROW_").lower().replace("_", "-"))
+                    warnings.warn(
+                        MSG_DEPR_SETUP_BUILD_FLAGS.format(flag_name),
+                        UserWarning, stacklevel=2
+                    )
                     append_cmake_bool(flag, varname)
 
             if self.cmake_generator:
@@ -371,21 +396,7 @@ def has_ext_modules(foo):
         return True
 
 
-if strtobool(os.environ.get('PYARROW_INSTALL_TESTS', '1')):
-    packages = find_namespace_packages(include=['pyarrow*'])
-    exclude_package_data = {}
-else:
-    packages = find_namespace_packages(include=['pyarrow*'],
-                                       exclude=["pyarrow.tests*"])
-    # setuptools adds back importable packages even when excluded.
-    # https://github.com/pypa/setuptools/issues/3260
-    # https://github.com/pypa/setuptools/issues/3340#issuecomment-1219383976
-    exclude_package_data = {"pyarrow": ["tests*"]}
-
-
 setup(
-    packages=packages,
-    exclude_package_data=exclude_package_data,
     distclass=BinaryDistribution,
     # Dummy extension to trigger build_ext
     ext_modules=[Extension('__dummy__', sources=[])],
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index bb4470e29037d..ff74c566ffeb3 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: arrow
 Title: Integration to 'Apache' 'Arrow'
-Version: 16.1.0.9000
+Version: 17.0.0.9000
 Authors@R: c(
     person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")),
     person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")),
diff --git a/r/NEWS.md b/r/NEWS.md
index c2690e6248dbc..b9568afe66542 100644
--- a/r/NEWS.md
+++ b/r/NEWS.md
@@ -17,14 +17,35 @@
   under the License.
 -->
 
-# arrow 16.1.0.9000
+# arrow 17.0.0.9000
 
-* R functions that users write that use functions that Arrow supports in dataset queries now can be used in queries too. Previously, only functions that used arithmetic operators worked. For example, `time_hours <- function(mins) mins / 60` worked, but `time_hours_rounded <- function(mins) round(mins / 60)` did not; now both work. These are automatic translations rather than true user-defined functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223)
+# arrow 17.0.0
+
+## New features
+
+* R functions that users write that use functions that Arrow supports in dataset
+  queries now can be used in queries too. Previously, only functions that used
+  arithmetic operators worked.
+  For example, `time_hours <- function(mins) mins / 60` worked,
+  but `time_hours_rounded <- function(mins) round(mins / 60)` did not;
+  now both work. These are automatic translations rather than true user-defined
+  functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223)
 * `mutate()` expressions can now include aggregations, such as `x - mean(x)`. (#41350)
-* `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. 
-* The `na_matches` argument to the `dplyr::*_join()` functions is now supported. This argument controls whether `NA` values are considered equal when joining. (#41358)
-* R metadata, stored in the Arrow schema to support round-tripping data between R and Arrow/Parquet, is now serialized and deserialized more strictly. This makes it safer to load data from files from unknown sources into R data.frames. (#41969)
-* The minimum version of the Arrow C++ library the Arrow R package can be built with has been bumped to 15.0.0 (#42241)
+* `summarize()` supports more complex expressions, and correctly handles cases
+  where column names are reused in expressions. (#41223)
+* The `na_matches` argument to the `dplyr::*_join()` functions is now supported.
+  This argument controls whether `NA` values are considered equal when joining. (#41358)
+* R metadata, stored in the Arrow schema to support round-tripping data between
+  R and Arrow/Parquet, is now serialized and deserialized more strictly.
+  This makes it safer to load data from files from unknown sources into R data.frames. (#41969)
+
+## Minor improvements and fixes
+* Turn on the S3 and ZSTD features by default for macOS. (#42210)
+* Fix bindings in Math group generics. (#43162)
+* Fix a bug in our implementation of `pull` on grouped datasets, it now
+  returns the expected column. (#43172)
+* The minimum version of the Arrow C++ library the Arrow R package can be built
+  with has been bumped to 15.0.0 (#42241)
 
 # arrow 16.1.0
 
diff --git a/r/PACKAGING.md b/r/PACKAGING.md
index abe86a62fdc4f..7f6f80745bd96 100644
--- a/r/PACKAGING.md
+++ b/r/PACKAGING.md
@@ -32,7 +32,7 @@ For a high-level overview of the release process see the
 - [ ] Ensure the contents of the README are accurate and up to date.
 - [ ] Run `urlchecker::url_check()` on the R directory at the release candidate.
   commit. Ignore any errors with badges as they will be removed in the CRAN release branch.
-- [ ] [Polish NEWS](https://style.tidyverse.org/news.html#news-release) but do **not** update version numbers (this is done automatically later). You can find commits by, for example, `git log --oneline aa057d0..HEAD | grep "\[R\]"`
+- [ ] [Polish NEWS](https://style.tidyverse.org/news.html#news-release) but do **not** update version numbers (this is done automatically later). You can find commits by, for example, `git log --oneline <sha of last release>..HEAD | grep "\[R\]"`
 - [ ] Run preliminary reverse dependency checks using `archery docker run r-revdepcheck`.
 - [ ] For major releases, prepare tweet thread highlighting new features.
 
@@ -72,15 +72,10 @@ Wait for the release candidate to be cut:
 - [ ] Run `Rscript tools/update-checksums.R <libarrow version>` to download the checksums for the pre-compiled binaries from the ASF artifactory into the tools directory.
 - [ ] Regenerate arrow_X.X.X.tar.gz (i.e., `make build`)
 
-Ensure linux binary packages are available:
-- [ ] Ensure linux binaries are available in the artifactory:
-  https://apache.jfrog.io/ui/repos/tree/General/arrow/r
-
 ## Check binary Arrow C++ distributions specific to the R package
 
 - [ ] Upload the .tar.gz to [win-builder](https://win-builder.r-project.org/upload.aspx) (r-devel only)
-  and confirm (with Nic, who will automatically receive an email about the results) that the check is clean.
-  This step cannot be completed before Jeroen has put the binaries in the MinGW repository, i.e. [here](https://ftp.opencpu.org/rtools/ucrt64/), [here](https://ftp.opencpu.org/rtools/mingw64/), and [here](https://ftp.opencpu.org/rtools/mingw32/).
+  and confirm (with Jon, who will automatically receive an email about the results) that the check is clean.
 - [ ] Upload the .tar.gz to [MacBuilder](https://mac.r-project.org/macbuilder/submit.html)
   and confirm that the check is clean
 - [ ] Check `install.packages("arrow_X.X.X.tar.gz")` on Ubuntu and ensure that the
@@ -105,3 +100,4 @@ Wait for CRAN...
   [CRAN package page](https://cran.r-project.org/package=arrow) to reflect the
   new version
 - [ ] Tweet!
+  - Use Bryce's [script](https://gist.githubusercontent.com/amoeba/4e26c064d1a0d0227cd8c2260cf0072a/raw/bc0d983152bdde4820de9074d4caee9986624bc5/new_contributors.R) for contributor calculation.
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 44dfbbcd5c7e7..4c3b78e085c6e 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -41,7 +41,7 @@ supported_dplyr_methods <- list(
   collect = NULL,
   summarise = c(
     "window functions not currently supported;",
-    'arguments `.drop = FALSE` and `.groups = "rowwise" not supported'
+    'arguments `.drop = FALSE` and `.groups = "rowwise"` not supported'
   ),
   group_by = NULL,
   groups = NULL,
diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index a67fb7db5d4af..f569c690fb07d 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -48,13 +48,13 @@ ArrowTabular <- R6Class("ArrowTabular",
     SortIndices = function(names, descending = FALSE) {
       assert_that(is.character(names))
       assert_that(length(names) > 0)
-      assert_that(!any(is.na(names)))
+      assert_that(!anyNA(names))
       if (length(descending) == 1L) {
         descending <- rep_len(descending, length(names))
       }
       assert_that(is.logical(descending))
       assert_that(identical(length(names), length(descending)))
-      assert_that(!any(is.na(descending)))
+      assert_that(!anyNA(descending))
       call_function(
         "sort_indices",
         self,
@@ -113,13 +113,13 @@ as.data.frame.ArrowTabular <- function(x, row.names = NULL, optional = FALSE, ..
     # That way, if we're filtering too, we have fewer arrays to filter/slice/take
     if (is.character(j)) {
       j_new <- match(j, names(x))
-      if (any(is.na(j_new))) {
+      if (anyNA(j_new)) {
         stop("Column not found: ", oxford_paste(j[is.na(j_new)]), call. = FALSE)
       }
       j <- j_new
     }
     if (is_integerish(j)) {
-      if (any(is.na(j))) {
+      if (anyNA(j)) {
         stop("Column indices cannot be NA", call. = FALSE)
       }
       if (length(j) && all(j < 0)) {
diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 8e6a7f6185366..6e08f34708ab4 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -55,7 +55,7 @@ duration_from_chunks <- function(chunks) {
   accepted_chunks <- c("second", "minute", "hour", "day", "week")
   matched_chunks <- accepted_chunks[pmatch(names(chunks), accepted_chunks, duplicates.ok = TRUE)]
 
-  if (any(is.na(matched_chunks))) {
+  if (anyNA(matched_chunks)) {
     arrow_not_supported(
       paste(
         "named `difftime` units other than:",
@@ -442,7 +442,7 @@ parse_period_unit <- function(x) {
   str_unit_start <- substr(str_unit, 1, 3)
   unit <- as.integer(pmatch(str_unit_start, known_units)) - 1L
 
-  if (any(is.na(unit))) {
+  if (anyNA(unit)) {
     validation_error(
       sprintf(
         "Invalid period name: '%s'",
diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R
index 1997d698c0b24..2dce24117a343 100644
--- a/r/R/dplyr-eval.R
+++ b/r/R/dplyr-eval.R
@@ -201,7 +201,12 @@ try_arrow_dplyr <- function(expr) {
   parent <- caller_env()
   # Make sure that the call is available in the parent environment
   # so that we can use it in abandon_ship, if needed
-  evalq(call <- match.call(), parent)
+  # (but don't error if we're in some weird context where we can't get the call,
+  # which could happen if you're code-generating or something?)
+  try(
+    evalq(call <- match.call(), parent),
+    silent = !getOption("arrow.debug", FALSE)
+  )
 
   tryCatch(
     eval(expr, parent),
@@ -217,7 +222,10 @@ try_arrow_dplyr <- function(expr) {
 # and that the function being called also exists in the dplyr namespace.
 abandon_ship <- function(err, env) {
   .data <- get(".data", envir = env)
-  if (query_on_dataset(.data)) {
+  # If there's no call (see comment in try_arrow_dplyr), we can't eval with
+  # dplyr even if the data is in memory already
+  call <- try(get("call", envir = env), silent = TRUE)
+  if (query_on_dataset(.data) || inherits(call, "try-error")) {
     # Add a note suggesting `collect()` to the error message.
     # If there are other suggestions already there (with the > arrow name),
     # collect() isn't the only suggestion, so message differently
@@ -231,7 +239,6 @@ abandon_ship <- function(err, env) {
   }
 
   # Else, warn, collect(), and run in regular dplyr
-  call <- get("call", envir = env)
   rlang::warn(
     message = paste0("In ", format_expr(err$call), ": "),
     body = c("i" = conditionMessage(err), ">" = "Pulling data into R")
diff --git a/r/R/dplyr-funcs-conditional.R b/r/R/dplyr-funcs-conditional.R
index 3ab955aa8aee4..52f47a128f40a 100644
--- a/r/R/dplyr-funcs-conditional.R
+++ b/r/R/dplyr-funcs-conditional.R
@@ -21,9 +21,16 @@ register_bindings_conditional <- function() {
     value_set <- Array$create(table)
     # If possible, `table` should be the same type as `x`
     # Try downcasting here; otherwise Acero may upcast x to table's type
+    x_type <- x$type()
+    # GH-43440: `is_in` doesn't want a DictionaryType in the value_set,
+    # so we'll cast to its value_type
+    # TODO: should this be pushed into cast_or_parse? Is this a bigger issue?
+    if (inherits(x_type, "DictionaryType")) {
+      x_type <- x_type$value_type
+    }
     try(
-      value_set <- cast_or_parse(value_set, x$type()),
-      silent = TRUE
+      value_set <- cast_or_parse(value_set, x_type),
+      silent = !getOption("arrow.debug", FALSE)
     )
 
     expr <- Expression$create("is_in", x,
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 5e6ac4a1035f8..47c8193688229 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -814,7 +814,7 @@ register_bindings_datetime_rounding <- function() {
              week_start = getOption("lubridate.week.start", 7)) {
       opts <- parse_period_unit(unit)
       if (is.null(change_on_boundary)) {
-        change_on_boundary <- ifelse(call_binding("is.Date", x), TRUE, FALSE)
+        change_on_boundary <- call_binding("is.Date", x)
       }
       opts$ceil_is_strictly_greater <- change_on_boundary
 
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 7f0627c33d010..4f90dd16b266f 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -67,7 +67,7 @@
 #' * [`slice_min()`][dplyr::slice_min()]: slicing within groups not supported; `with_ties = TRUE` (dplyr default) is not supported; `prop` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_sample()`][dplyr::slice_sample()]: slicing within groups not supported; `replace = TRUE` and the `weight_by` argument not supported; `n` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_tail()`][dplyr::slice_tail()]: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; `prop` only supported on queries where `nrow()` is knowable without evaluating
-#' * [`summarise()`][dplyr::summarise()]: window functions not currently supported; arguments `.drop = FALSE` and `.groups = "rowwise" not supported
+#' * [`summarise()`][dplyr::summarise()]: window functions not currently supported; arguments `.drop = FALSE` and `.groups = "rowwise"` not supported
 #' * [`tally()`][dplyr::tally()]
 #' * [`transmute()`][dplyr::transmute()]
 #' * [`ungroup()`][dplyr::ungroup()]
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 77e1a5405a692..28db78f609309 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -570,10 +570,12 @@ register_bindings_string_other <- function() {
       end <- .Machine$integer.max
     }
 
-    # An end value lower than a start value returns an empty string in
-    # stringr::str_sub so set end to 0 here to match this behavior
-    if (end < start) {
-      end <- 0
+    # strings returned by utf8_slice_codeunits are exclusive of the `end` position.
+    # stringr::str_sub returns strings inclusive of the `end` position, so add 1 to `end`.
+    # NOTE:this isn't necessary for positive values of `end`, because utf8_slice_codeunits
+    # is 0-based while R is 1-based, which cancels out the effect of the exclusive `end`
+    if (end < -1) {
+      end <- end + 1L
     }
 
     # subtract 1 from `start` because C++ is 0-based and R is 1-based
diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R
index fcb1cedbbb168..03659f5735708 100644
--- a/r/R/dplyr-mutate.R
+++ b/r/R/dplyr-mutate.R
@@ -77,12 +77,12 @@ mutate.arrow_dplyr_query <- function(.data,
       agg_query$aggregations <- mask$.aggregations
       agg_query <- collapse.arrow_dplyr_query(agg_query)
       if (length(grv)) {
-        out <- left_join(out, agg_query, by = grv)
+        out <- dplyr::left_join(out, agg_query, by = grv)
       } else {
         # If there are no group_by vars, add a scalar column to both and join on that
         agg_query$selected_columns[["..tempjoin"]] <- Expression$scalar(1L)
         out$selected_columns[["..tempjoin"]] <- Expression$scalar(1L)
-        out <- left_join(out, agg_query, by = "..tempjoin")
+        out <- dplyr::left_join(out, agg_query, by = "..tempjoin")
       }
     }
 
diff --git a/r/R/dplyr-slice.R b/r/R/dplyr-slice.R
index 2173d897f1f9d..cc718c5806a87 100644
--- a/r/R/dplyr-slice.R
+++ b/r/R/dplyr-slice.R
@@ -101,7 +101,7 @@ slice_sample.arrow_dplyr_query <- function(.data,
   # just to make sure we get enough, then head(n)
   sampling_n <- missing(prop)
   if (sampling_n) {
-    prop <- min(n_to_prop(.data, n) + .05, 1)
+    prop <- min(n_to_prop(.data, n) + 0.05, 1)
   }
   validate_prop(prop)
 
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index f4fda0f13aabd..42fd245e5ab9d 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -43,6 +43,15 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
     hash = length(.data$group_by_vars) > 0
   )
 
+  # Do a projection here to keep only the columns we need in summarize().
+  # If possible, this will push down the column selection into the SourceNode,
+  # saving lots of wasted processing for columns we don't need. (GH-43627)
+  vars_to_keep <- unique(c(
+    unlist(lapply(exprs, all.vars)), # vars referenced in summarize
+    dplyr::group_vars(.data) # vars needed for grouping
+  ))
+  .data <- dplyr::select(.data, intersect(vars_to_keep, names(.data)))
+
   # nolint start
   # summarize() is complicated because you can do a mixture of scalar operations
   # and aggregations, but that's not how Acero works. For example, for us to do
@@ -232,7 +241,7 @@ group_types <- function(.data, schema = NULL) {
 }
 
 format_aggregation <- function(x) {
-  paste0(x$fun, "(", paste(map(x$data, ~ .$ToString()), collapse = ","), ")")
+  Expression$create(x$fun, args = x$data, options = x$options)$ToString()
 }
 
 # This function evaluates an expression and returns the post-summarize
diff --git a/r/R/duckdb.R b/r/R/duckdb.R
index a2bf62de2fde2..65c70243e7ab3 100644
--- a/r/R/duckdb.R
+++ b/r/R/duckdb.R
@@ -137,7 +137,13 @@ duckdb_disconnector <- function(con, tbl_name) {
 
 #' Create an Arrow object from a DuckDB connection
 #'
-#' This can be used in pipelines that pass data back and forth between Arrow and DuckDB
+#' This can be used in pipelines that pass data back and forth between Arrow and
+#' DuckDB.
+#'
+#' Note that you can only call `collect()` or `compute()` on the result of this
+#' function once. To work around this limitation, you should either only call
+#' `collect()` as the final step in a pipeline or call `as_arrow_table()` on the
+#' result to materialize the entire Table in-memory.
 #'
 #' @param .data the object to be converted
 #' @return A `RecordBatchReader`.
diff --git a/r/R/metadata.R b/r/R/metadata.R
index ba73f0857881d..61e412be62450 100644
--- a/r/R/metadata.R
+++ b/r/R/metadata.R
@@ -107,15 +107,34 @@ safe_r_metadata <- function(metadata, on_save = FALSE) {
   # and mutate the `types_removed` variable outside of it.
   check_r_metadata_types_recursive <- function(x) {
     allowed_types <- c("character", "double", "integer", "logical", "complex", "list", "NULL")
+    # Pull out the attributes so we can also check them
+    x_attrs <- attributes(x)
+
     if (is.list(x)) {
+      # Add special handling for some base R classes that are list but
+      # their [[ methods leads to infinite recursion.
+      # We unclass here and then reapply attributes after.
+      x <- unclass(x)
+
       types <- map_chr(x, typeof)
-      x[types == "list"] <- map(x[types == "list"], check_r_metadata_types_recursive)
       ok <- types %in% allowed_types
       if (!all(ok)) {
         # Record the invalid types, then remove the offending elements
         types_removed <<- c(types_removed, setdiff(types, allowed_types))
         x <- x[ok]
+        if ("names" %in% names(x_attrs)) {
+          # Also prune from the attributes since we'll re-add later
+          x_attrs[["names"]] <- x_attrs[["names"]][ok]
+        }
       }
+      # For the rest, recurse
+      x <- map(x, check_r_metadata_types_recursive)
+    }
+
+    # attributes() of a named list will return a list with a "names" attribute,
+    # so it will recurse indefinitely.
+    if (!is.null(x_attrs) && !identical(x_attrs, list(names = names(x)))) {
+      attributes(x) <- check_r_metadata_types_recursive(x_attrs)
     }
     x
   }
diff --git a/r/README.md b/r/README.md
index c3cd5a32eaf69..8601afe492b99 100644
--- a/r/README.md
+++ b/r/README.md
@@ -4,15 +4,16 @@
 
 [![cran](https://www.r-pkg.org/badges/version-last-release/arrow)](https://cran.r-project.org/package=arrow)
 [![CI](https://github.com/apache/arrow/workflows/R/badge.svg?event=push)](https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amain+event%3Apush)
+[![R-universe status badge](https://apache.r-universe.dev/badges/arrow)](https://apache.r-universe.dev)
 [![conda-forge](https://img.shields.io/conda/vn/conda-forge/r-arrow.svg)](https://anaconda.org/conda-forge/r-arrow)
 
 <!-- badges: end -->
 
 ## Overview
 
-The R `{arrow}` package provides access to many of the features of the [Apache Arrow C++ library](https://arrow.apache.org/docs/cpp/index.html) for R users. The goal of arrow is to provide an Arrow C++ backend to `{dplyr}`, and access to the Arrow C++ library through familiar base R and tidyverse functions, or `{R6}` classes.
+The R `{arrow}` package provides access to many of the features of the [Apache Arrow C++ library](https://arrow.apache.org/docs/cpp/index.html) for R users. The goal of arrow is to provide an Arrow C++ backend to `{dplyr}`, and access to the Arrow C++ library through familiar base R and tidyverse functions, or `{R6}` classes. The dedicated R package website is located [here](https://arrow.apache.org/docs/r/index.html).
 
-To learn more about the Apache Arrow project, see the parent documentation of the [Arrow Project](https://arrow.apache.org/). The Arrow project provides functionality for a wide range of data analysis tasks to store, process and move data fast. See the [read/write article](https://arrow.apache.org/docs/r/articles/read_write.html) to learn about reading and writing data files, [data wrangling](https://arrow.apache.org/docs/r/articles/data_wrangling.html) to learn how to use dplyr syntax with arrow objects, and the [function documentation](https://arrow.apache.org/docs/r/reference/acero.html) for a full list of supported functions within dplyr queries.
+To learn more about the Apache Arrow project, see the documentation of the parent [Arrow Project](https://arrow.apache.org/). The Arrow project provides functionality for a wide range of data analysis tasks to store, process and move data fast. See the [read/write article](https://arrow.apache.org/docs/r/articles/read_write.html) to learn about reading and writing data files, [data wrangling](https://arrow.apache.org/docs/r/articles/data_wrangling.html) to learn how to use dplyr syntax with arrow objects, and the [function documentation](https://arrow.apache.org/docs/r/reference/acero.html) for a full list of supported functions within dplyr queries.
 
 ## Installation
 
@@ -23,7 +24,15 @@ Windows or macOS.
 install.packages("arrow")
 ```
 
-Alternatively, if you are using conda you can install arrow from conda-forge:
+If you are having trouble installing from CRAN, then we offer two alternative install options for grabbing the latest arrow release. First, [R-universe](https://r-universe.dev/) provides pre-compiled binaries for the most commonly used operating systems.[^1]
+
+[^1]: Linux users should consult the R-universe [documentation](https://docs.r-universe.dev/install/binaries.html) for guidance on the exact repo URL path and potential limitations.
+
+```r
+install.packages("arrow", repos = c("https://apache.r-universe.dev", "https://cloud.r-project.org"))
+```
+
+Second, if you are using conda then you can install arrow from conda-forge.
 
 ```sh
 conda install -c conda-forge --strict-channel-priority r-arrow
diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml
index ceb68d773bdb4..10a233356b684 100644
--- a/r/_pkgdown.yml
+++ b/r/_pkgdown.yml
@@ -76,7 +76,7 @@ home:
           [C GLib](https://arrow.apache.org/docs/c_glib) <br>
           [C++](https://arrow.apache.org/docs/cpp) <br>
           [C#](https://github.com/apache/arrow/blob/main/csharp/README.md) <br>
-          [Go](https://pkg.go.dev/github.com/apache/arrow/go/v17) <br>
+          [Go](https://pkg.go.dev/github.com/apache/arrow/go/v18) <br>
           [Java](https://arrow.apache.org/docs/java) <br>
           [JavaScript](https://arrow.apache.org/docs/js) <br>
           [Julia](https://github.com/apache/arrow-julia/blob/main/README.md) <br>
diff --git a/r/configure b/r/configure
index 0882ee6719c4b..e1f0bad378719 100755
--- a/r/configure
+++ b/r/configure
@@ -95,20 +95,6 @@ if [ ! "`${R_HOME}/bin/R CMD config CXX17`" ]; then
   exit 1
 fi
 
-# GH-36969: The version of Abseil used in the bundled build won't build on
-# gcc-13. As a workaround for the 13.0.0 release, explicitly set
-# ARROW_WITH_GOOGLE_CLOUD_CPP to OFF (if not already set)
-if [ -z "$ARROW_GCS" ]; then
-  CXX17=`${R_HOME}/bin/R CMD config CXX17`
-  CXX17_VERSION=`$CXX17 --version`
-  if echo "$CXX17_VERSION" | grep -e "g++" > /dev/null ; then
-    if echo "$CXX17_VERSION" | grep -e "13.[0-9]\+.[0-9]\+" > /dev/null ; then
-      echo "*** Disabling Arrow build with GCS on gcc-13."
-      echo "*** Set ARROW_GCS=ON to explicitly enable."
-      export ARROW_GCS="OFF"
-    fi
-  fi
-fi
 
 # Test if pkg-config is available to use
 if ${PKG_CONFIG} --version >/dev/null 2>&1; then
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index 9ef9cd7dda6fb..aceb533a151f6 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -54,7 +54,7 @@ Table into an R \code{tibble}.
 \item \code{\link[dplyr:slice]{slice_min()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_sample()}}: slicing within groups not supported; \code{replace = TRUE} and the \code{weight_by} argument not supported; \code{n} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_tail()}}: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
-\item \code{\link[dplyr:summarise]{summarise()}}: window functions not currently supported; arguments \code{.drop = FALSE} and `.groups = "rowwise" not supported
+\item \code{\link[dplyr:summarise]{summarise()}}: window functions not currently supported; arguments \code{.drop = FALSE} and \code{.groups = "rowwise"} not supported
 \item \code{\link[dplyr:count]{tally()}}
 \item \code{\link[dplyr:transmute]{transmute()}}
 \item \code{\link[dplyr:group_by]{ungroup()}}
diff --git a/r/man/to_arrow.Rd b/r/man/to_arrow.Rd
index aed40609a5161..87b8fea36eeda 100644
--- a/r/man/to_arrow.Rd
+++ b/r/man/to_arrow.Rd
@@ -13,7 +13,14 @@ to_arrow(.data)
 A \code{RecordBatchReader}.
 }
 \description{
-This can be used in pipelines that pass data back and forth between Arrow and DuckDB
+This can be used in pipelines that pass data back and forth between Arrow and
+DuckDB.
+}
+\details{
+Note that you can only call \code{collect()} or \code{compute()} on the result of this
+function once. To work around this limitation, you should either only call
+\code{collect()} as the final step in a pipeline or call \code{as_arrow_table()} on the
+result to materialize the entire Table in-memory.
 }
 \examples{
 \dontshow{if (getFromNamespace("run_duckdb_examples", "arrow")()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json
index 43f0b3fac62a1..e8e26f22b05cd 100644
--- a/r/pkgdown/assets/versions.json
+++ b/r/pkgdown/assets/versions.json
@@ -1,12 +1,16 @@
 [
     {
-        "name": "16.1.0.9000 (dev)",
+        "name": "17.0.0.9000 (dev)",
         "version": "dev/"
     },
     {
-        "name": "16.1.0 (release)",
+        "name": "17.0.0 (release)",
         "version": ""
     },
+    {
+        "name": "16.1.0",
+        "version": "16.1/"
+    },
     {
         "name": "15.0.2",
         "version": "15.0/"
diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h
index 5e6a7d5a42fb2..c4483ce531f43 100644
--- a/r/src/arrow_cpp11.h
+++ b/r/src/arrow_cpp11.h
@@ -138,7 +138,13 @@ inline R_xlen_t r_string_size(SEXP s) {
 }  // namespace unsafe
 
 inline SEXP utf8_strings(SEXP x) {
-  return cpp11::unwind_protect([x] {
+  return cpp11::unwind_protect([&] {
+    // ensure that x is not actually altrep first this also ensures that
+    // x is not altrep even after it is materialized
+    bool was_altrep = ALTREP(x);
+    if (was_altrep) {
+      x = PROTECT(Rf_duplicate(x));
+    }
     R_xlen_t n = XLENGTH(x);
 
     // if `x` is an altrep of some sort, this will
@@ -152,6 +158,9 @@ inline SEXP utf8_strings(SEXP x) {
         SET_STRING_ELT(x, i, Rf_mkCharCE(Rf_translateCharUTF8(s), CE_UTF8));
       }
     }
+    if (was_altrep) {
+      UNPROTECT(1);
+    }
     return x;
   });
 }
@@ -378,9 +387,17 @@ SEXP to_r6(const std::shared_ptr<T>& ptr, const char* r6_class_name) {
   cpp11::external_pointer<std::shared_ptr<T>> xp(new std::shared_ptr<T>(ptr));
   SEXP r6_class = Rf_install(r6_class_name);
 
+// R_existsVarInFrame doesn't exist before R 4.2, so we need to fall back to
+// Rf_findVarInFrame3 if it is not defined.
+#if R_VERSION >= R_Version(4, 2, 0)
   if (!R_existsVarInFrame(arrow::r::ns::arrow, r6_class)) {
     cpp11::stop("No arrow R6 class named '%s'", r6_class_name);
   }
+#else
+  if (Rf_findVarInFrame3(arrow::r::ns::arrow, r6_class, FALSE) == R_UnboundValue) {
+    cpp11::stop("No arrow R6 class named '%s'", r6_class_name);
+  }
+#endif
 
   // make call:  <symbol>$new(<x>)
   SEXP call = PROTECT(Rf_lang3(R_DollarSymbol, r6_class, arrow::r::symbols::new_));
diff --git a/r/tests/testthat/helper-data.R b/r/tests/testthat/helper-data.R
index 0631cfccae3fc..dc83b8861a228 100644
--- a/r/tests/testthat/helper-data.R
+++ b/r/tests/testthat/helper-data.R
@@ -17,7 +17,7 @@
 
 example_data <- tibble::tibble(
   int = c(1:3, NA_integer_, 5:10),
-  dbl = c(1:8, NA, 10) + .1,
+  dbl = c(1:8, NA, 10) + 0.1,
   dbl2 = rep(5, 10),
   lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
   false = logical(10),
diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 98068bdea20d5..3e41b8b2746e3 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -539,14 +539,14 @@ test_that("StructArray methods", {
   expect_equal(a$x, arrow_array(df$x))
   expect_equal(a[["x"]], arrow_array(df$x))
   expect_equal(a[[1]], arrow_array(df$x))
-  expect_identical(names(a), c("x", "y", "z"))
+  expect_named(a, c("x", "y", "z"))
   expect_identical(dim(a), c(10L, 3L))
 })
 
 test_that("StructArray creation", {
   # from data.frame
   a <- StructArray$create(example_data)
-  expect_identical(names(a), c("int", "dbl", "dbl2", "lgl", "false", "chr", "fct"))
+  expect_named(a, c("int", "dbl", "dbl2", "lgl", "false", "chr", "fct"))
   expect_identical(dim(a), c(10L, 7L))
   expect_r6_class(a, "StructArray")
 
diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R
index 5987f5a4b7c17..96052047e2928 100644
--- a/r/tests/testthat/test-RecordBatch.R
+++ b/r/tests/testthat/test-RecordBatch.R
@@ -42,7 +42,7 @@ test_that("RecordBatch", {
   expect_equal(batch$column_name(2), "lgl")
   expect_equal(batch$column_name(3), "chr")
   expect_equal(batch$column_name(4), "fct")
-  expect_equal(names(batch), c("int", "dbl", "lgl", "chr", "fct"))
+  expect_named(batch, c("int", "dbl", "lgl", "chr", "fct"))
 
   # input validation
   expect_error(batch$column_name(NA), "'i' cannot be NA")
@@ -497,9 +497,9 @@ test_that("RecordBatch$Equals(check_metadata)", {
 
 test_that("RecordBatch name assignment", {
   rb <- record_batch(x = 1:10, y = 1:10)
-  expect_identical(names(rb), c("x", "y"))
+  expect_named(rb, c("x", "y"))
   names(rb) <- c("a", "b")
-  expect_identical(names(rb), c("a", "b"))
+  expect_named(rb, c("a", "b"))
   expect_error(names(rb) <- "f")
   expect_error(names(rb) <- letters)
   expect_error(names(rb) <- character(0))
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index f6cec3b2b7683..3c0cbb1e3297a 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -66,7 +66,7 @@ tbl <- tibble::tibble(
 tab <- Table$create(tbl)
 
 test_that("[, [[, $ for Table", {
-  expect_identical(names(tab), names(tbl))
+  expect_named(tab, names(tbl))
 
   expect_equal_data_frame(tab[6:7, ], tbl[6:7, ])
   expect_equal_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4])
@@ -393,9 +393,9 @@ test_that("Table$SelectColumns()", {
 
 test_that("Table name assignment", {
   tab <- Table$create(x = 1:10, y = 1:10)
-  expect_identical(names(tab), c("x", "y"))
+  expect_named(tab, c("x", "y"))
   names(tab) <- c("a", "b")
-  expect_identical(names(tab), c("a", "b"))
+  expect_named(tab, c("a", "b"))
   expect_error(names(tab) <- "f")
   expect_error(names(tab) <- letters)
   expect_error(names(tab) <- character(0))
diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R
index 2732cdef3edd5..090a31b805666 100644
--- a/r/tests/testthat/test-compute-aggregate.R
+++ b/r/tests/testthat/test-compute-aggregate.R
@@ -286,7 +286,7 @@ test_that("median passes ... args to quantile", {
     Scalar$create(2)
   )
   expect_error(
-    median(Array$create(c(1, 2)), probs = c(.25, .75))
+    median(Array$create(c(1, 2)), probs = c(0.25, 0.75))
   )
 })
 
diff --git a/r/tests/testthat/test-compute-arith.R b/r/tests/testthat/test-compute-arith.R
index bbdcb10a6b1c2..22c4ee8002e7a 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -108,22 +108,22 @@ test_that("Power", {
   expect_equal(a^0, Array$create(c(1, 1, 1, 1, NA_real_)))
   expect_equal(a^2, Array$create(c(1, 4, 9, 16, NA_real_)))
   expect_equal(a^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
-  expect_equal(a^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+  expect_equal(a^(0.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
 
   expect_equal(b^0, Array$create(c(1, 1, 1, 1, NA_real_)))
   expect_equal(b^2, Array$create(c(1, 4, 9, 16, NA_real_)))
   expect_equal(b^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
-  expect_equal(b^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+  expect_equal(b^(0.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
 
   expect_equal(c^0, Array$create(c(1, 1, 1, 1, NA_real_)))
   expect_equal(c^2, Array$create(c(1, 4, 9, 16, NA_real_)))
   expect_equal(c^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
-  expect_equal(c^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+  expect_equal(c^(0.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
 
   expect_equal(d^0, Array$create(c(1, 1, 1, 1, NA_real_)))
   expect_equal(d^2, Array$create(c(1, 4, 9, 16, NA_real_)))
   expect_equal(d^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
-  expect_equal(d^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+  expect_equal(d^(0.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
 })
 
 test_that("Dates casting", {
diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R
index 36f1f229a6085..84c1786f6fdc8 100644
--- a/r/tests/testthat/test-csv.R
+++ b/r/tests/testthat/test-csv.R
@@ -90,7 +90,7 @@ test_that("read_csv_arrow parsing options: col_names", {
 
   tab1 <- read_csv_arrow(tf, col_names = names(tbl))
 
-  expect_identical(names(tab1), names(tbl))
+  expect_named(tab1, names(tbl))
   expect_equal(tbl, tab1)
 
   # This errors (correctly) because I haven't given enough names
@@ -114,7 +114,7 @@ test_that("read_csv_arrow parsing options: skip", {
 
   tab1 <- read_csv_arrow(tf, skip = 2)
 
-  expect_identical(names(tab1), names(tbl))
+  expect_named(tab1, names(tbl))
   expect_equal(tbl, tab1)
 })
 
@@ -738,5 +738,22 @@ test_that("read_csv2_arrow correctly parses comma decimals", {
   tf <- tempfile()
   writeLines("x;y\n1,2;c", con = tf)
   expect_equal(read_csv2_arrow(tf), tibble(x = 1.2, y = "c"))
+})
+
+test_that("altrep columns can roundtrip to table", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(tbl, tf, row.names = FALSE)
+
+  # read in, some columns will be altrep by default
+  new_df <- read_csv_arrow(tf)
+  expect_equal(tbl, as_tibble(arrow_table(new_df)))
+
+  # but also if we materialize the vector
+  # this could also be accomplished with printing
+  new_df <- read_csv_arrow(tf)
+  test_arrow_altrep_force_materialize(new_df$chr)
 
+  # we should still be able to turn this into a table
+  expect_equal(tbl, as_tibble(arrow_table(new_df)))
 })
diff --git a/r/tests/testthat/test-dataset-csv.R b/r/tests/testthat/test-dataset-csv.R
index 2698cd854ae65..387346a0d6862 100644
--- a/r/tests/testthat/test-dataset-csv.R
+++ b/r/tests/testthat/test-dataset-csv.R
@@ -49,7 +49,7 @@ test_that("CSV dataset", {
   ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
   expect_r6_class(ds$format, "CsvFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")
-  expect_identical(names(ds), c(names(df1), "part"))
+  expect_named(ds, c(names(df1), "part"))
   expect_identical(dim(ds), c(20L, 7L))
 
   expect_equal(
@@ -349,7 +349,7 @@ test_that("Can use col_names readr parameter", {
     format = "csv",
     col_names = expected_names
   )
-  expect_equal(names(ds), expected_names)
+  expect_named(ds, expected_names)
   expect_equal(ds %>% collect(), set_names(tbl, expected_names))
 
   # WITHOUT header, makes up names
@@ -358,7 +358,7 @@ test_that("Can use col_names readr parameter", {
     format = "csv",
     col_names = FALSE
   )
-  expect_equal(names(ds), c("f0", "f1"))
+  expect_named(ds, c("f0", "f1"))
   expect_equal(ds %>% collect(), set_names(tbl, c("f0", "f1")))
 
   # WITH header, gets names
@@ -367,7 +367,7 @@ test_that("Can use col_names readr parameter", {
     format = "csv",
     col_names = TRUE
   )
-  expect_equal(names(ds), c("int", "dbl"))
+  expect_named(ds, c("int", "dbl"))
   expect_equal(ds %>% collect(), tbl)
 
   ds <- open_dataset(
@@ -376,7 +376,7 @@ test_that("Can use col_names readr parameter", {
     col_names = FALSE,
     skip = 1
   )
-  expect_equal(names(ds), c("f0", "f1"))
+  expect_named(ds, c("f0", "f1"))
   expect_equal(ds %>% collect(), set_names(tbl, c("f0", "f1")))
 
   expect_error(
@@ -481,7 +481,7 @@ test_that("open_delim_dataset params passed through to open_dataset", {
   ds <- open_delim_dataset(csv_dir, delim = ",", partitioning = "part")
   expect_r6_class(ds$format, "CsvFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")
-  expect_identical(names(ds), c(names(df1), "part"))
+  expect_named(ds, c(names(df1), "part"))
   expect_identical(dim(ds), c(20L, 7L))
 
   # quote
@@ -629,8 +629,8 @@ test_that("GH-34640 - CSV datasets are read in correctly when both schema and pa
   )
   expect_r6_class(ds$format, "CsvFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")
-  expect_identical(names(ds), c(names(df1), "part"))
-  expect_identical(names(collect(ds)), c(names(df1), "part"))
+  expect_named(ds, c(names(df1), "part"))
+  expect_named(collect(ds), c(names(df1), "part"))
 
   expect_identical(dim(ds), c(20L, 7L))
   expect_equal(schema(ds), target_schema)
diff --git a/r/tests/testthat/test-dataset-json.R b/r/tests/testthat/test-dataset-json.R
index 699beacb8597e..2a999d8a0c4f1 100644
--- a/r/tests/testthat/test-dataset-json.R
+++ b/r/tests/testthat/test-dataset-json.R
@@ -40,7 +40,7 @@ test_that("JSON dataset", {
 
   expect_r6_class(ds$format, "JsonFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")
-  expect_identical(names(ds), c(names(df1), "part"))
+  expect_named(ds, c(names(df1), "part"))
   expect_identical(dim(ds), c(20L, 7L))
 
   expect_equal(
diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R
index cafe7ada53f9e..626210f5b99f5 100644
--- a/r/tests/testthat/test-dataset.R
+++ b/r/tests/testthat/test-dataset.R
@@ -50,7 +50,7 @@ test_that("IPC/Feather format data", {
   ds <- open_dataset(ipc_dir, partitioning = "part", format = "feather")
   expect_r6_class(ds$format, "IpcFileFormat")
   expect_r6_class(ds$filesystem, "LocalFileSystem")
-  expect_identical(names(ds), c(names(df1), "part"))
+  expect_named(ds, c(names(df1), "part"))
   expect_identical(dim(ds), c(20L, 7L))
 
   expect_equal(
@@ -396,7 +396,7 @@ test_that("input validation", {
 test_that("Partitioning inference", {
   # These are the same tests as above, just using the *PartitioningFactory
   ds1 <- open_dataset(dataset_dir, partitioning = "part")
-  expect_identical(names(ds1), c(names(df1), "part"))
+  expect_named(ds1, c(names(df1), "part"))
   expect_equal(
     ds1 %>%
       select(string = chr, integer = int, part) %>%
@@ -410,7 +410,7 @@ test_that("Partitioning inference", {
   )
 
   ds2 <- open_dataset(hive_dir)
-  expect_identical(names(ds2), c(names(df1), "group", "other"))
+  expect_named(ds2, c(names(df1), "group", "other"))
   expect_equal(
     ds2 %>%
       filter(group == 2) %>%
@@ -511,7 +511,7 @@ test_that("Including partition columns in schema and partitioning, hive style CS
 
 test_that("partitioning = NULL to ignore partition information (but why?)", {
   ds <- open_dataset(hive_dir, partitioning = NULL)
-  expect_identical(names(ds), names(df1)) # i.e. not c(names(df1), "group", "other")
+  expect_named(ds, names(df1)) # i.e. not c(names(df1), "group", "other")
 })
 
 test_that("Dataset with multiple file formats", {
@@ -520,7 +520,7 @@ test_that("Dataset with multiple file formats", {
     open_dataset(dataset_dir, format = "parquet", partitioning = "part"),
     open_dataset(ipc_dir, format = "arrow", partitioning = "part")
   ))
-  expect_identical(names(ds), c(names(df1), "part"))
+  expect_named(ds, c(names(df1), "part"))
   expect_equal(
     ds %>%
       filter(int > 6 & part %in% c(1, 3)) %>%
@@ -1085,14 +1085,14 @@ test_that("Assembling a Dataset manually and getting a Table", {
   expect_r6_class(schm, "Schema")
 
   phys_schm <- ParquetFileReader$create(files[1])$GetSchema()
-  expect_equal(names(phys_schm), names(df1))
-  expect_equal(names(schm), c(names(phys_schm), "part"))
+  expect_named(phys_schm, names(df1))
+  expect_named(schm, c(names(phys_schm), "part"))
 
   child <- factory$Finish(schm)
   expect_r6_class(child, "FileSystemDataset")
   expect_r6_class(child$schema, "Schema")
   expect_r6_class(child$format, "ParquetFileFormat")
-  expect_equal(names(schm), names(child$schema))
+  expect_named(schm, names(child$schema))
   expect_equal(child$files, files)
 
   ds <- Dataset$create(list(child), schm)
@@ -1112,12 +1112,12 @@ test_that("Assembling multiple DatasetFactories with DatasetFactory", {
   expect_r6_class(schm, "Schema")
 
   phys_schm <- ParquetFileReader$create(files[1])$GetSchema()
-  expect_equal(names(phys_schm), names(df1))
+  expect_named(phys_schm, names(df1))
 
   ds <- factory$Finish(schm)
   expect_r6_class(ds, "UnionDataset")
   expect_r6_class(ds$schema, "Schema")
-  expect_equal(names(schm), names(ds$schema))
+  expect_named(schm, names(ds$schema))
   expect_equal(unlist(map(ds$children, ~ .$files)), files)
 
   expect_scan_result(ds, schm)
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index ba086133dcaf4..ae3ce29edf872 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -314,7 +314,7 @@ test_that("Filtering on a column that doesn't exist errors correctly", {
 test_that("Filtering with unsupported functions", {
   compare_dplyr_binding(
     .input %>%
-      filter(int > 2, pnorm(dbl) > .99) %>%
+      filter(int > 2, pnorm(dbl) > 0.99) %>%
       collect(),
     tbl,
     warning = paste(
@@ -329,7 +329,7 @@ test_that("Filtering with unsupported functions", {
       filter(
         nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg
         int > 2, # good
-        pnorm(dbl) > .99 # bad, opaque, but we'll error on the first one before we get here
+        pnorm(dbl) > 0.99 # bad, opaque, but we'll error on the first one before we get here
       ) %>%
       collect(),
     tbl,
@@ -472,7 +472,7 @@ test_that(".by argument", {
   # filter should pulling not grouped data into R when using the .by argument
   compare_dplyr_binding(
     .input %>%
-      filter(int > 2, pnorm(dbl) > .99, .by = chr) %>%
+      filter(int > 2, pnorm(dbl) > 0.99, .by = chr) %>%
       collect(),
     tbl,
     warning = paste(
diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R b/r/tests/testthat/test-dplyr-funcs-conditional.R
index d90dc827b40d5..24ddd342a882b 100644
--- a/r/tests/testthat/test-dplyr-funcs-conditional.R
+++ b/r/tests/testthat/test-dplyr-funcs-conditional.R
@@ -26,6 +26,16 @@ tbl <- example_data
 tbl$verses <- verses[[1]]
 tbl$another_chr <- tail(letters, 10)
 
+test_that("%in% handles dictionary type", {
+  df <- tibble::tibble(x = factor(c("a", "b", "c")))
+  compare_dplyr_binding(
+    .input %>%
+      filter(x %in% "a") %>%
+      collect(),
+    df
+  )
+})
+
 test_that("if_else and ifelse", {
   compare_dplyr_binding(
     .input %>%
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index e53daf8317837..d613a9cc5c28a 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -3411,7 +3411,7 @@ check_boundary_with_unit <- function(unit, ...) {
 test_that("ceiling_date() applies change_on_boundary correctly", {
   check_boundary_with_unit(".001 second")
   check_boundary_with_unit("second")
-  check_boundary_with_unit("minute", tolerance = .001) # floating point issue?
+  check_boundary_with_unit("minute", tolerance = 0.001) # floating point issue?
   check_boundary_with_unit("hour")
   check_boundary_with_unit("day")
 })
diff --git a/r/tests/testthat/test-dplyr-funcs-string.R b/r/tests/testthat/test-dplyr-funcs-string.R
index cb1d4675058b6..86966b305368a 100644
--- a/r/tests/testthat/test-dplyr-funcs-string.R
+++ b/r/tests/testthat/test-dplyr-funcs-string.R
@@ -1178,6 +1178,15 @@ test_that("str_sub", {
       collect(),
     df
   )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = str_sub(x, 1, -3),
+        y2 = stringr::str_sub(x, 1, -3)
+      ) %>%
+      collect(),
+    df
+  )
 
   expect_arrow_eval_error(
     str_sub("Apache Arrow", c(1, 2), 3),
diff --git a/r/tests/testthat/test-dplyr-join.R b/r/tests/testthat/test-dplyr-join.R
index 9a1c8b7b80fea..3609e53b18c5a 100644
--- a/r/tests/testthat/test-dplyr-join.R
+++ b/r/tests/testthat/test-dplyr-join.R
@@ -353,7 +353,7 @@ test_that("suffix and implicit schema", {
   join_op <- inner_join(left_suf, right_suf, by = "key", suffix = c("_left", "_right"))
   output <- collect(join_op)
   impl_schema <- implicit_schema(join_op)
-  expect_equal(names(output), names(implicit_schema(join_op)))
+  expect_named(output, names(implicit_schema(join_op)))
 })
 
 test_that("summarize and join", {
diff --git a/r/tests/testthat/test-dplyr-slice.R b/r/tests/testthat/test-dplyr-slice.R
index 3b103d2e3cd8a..870f1b2d78e44 100644
--- a/r/tests/testthat/test-dplyr-slice.R
+++ b/r/tests/testthat/test-dplyr-slice.R
@@ -40,13 +40,13 @@ test_that("slice_head/tail, ungrouped", {
 
   expect_equal(
     tab %>%
-      slice_head(prop = .25) %>%
+      slice_head(prop = 0.25) %>%
       nrow(),
     2
   )
   expect_equal(
     tab %>%
-      slice_tail(prop = .25) %>%
+      slice_tail(prop = 0.25) %>%
       nrow(),
     2
   )
@@ -78,13 +78,13 @@ test_that("slice_min/max, ungrouped", {
 
   compare_dplyr_binding(
     .input %>%
-      slice_max(int, prop = .25, with_ties = FALSE) %>%
+      slice_max(int, prop = 0.25, with_ties = FALSE) %>%
       collect(),
     tbl
   )
   compare_dplyr_binding(
     .input %>%
-      slice_min(int, prop = .25, with_ties = FALSE) %>%
+      slice_min(int, prop = 0.25, with_ties = FALSE) %>%
       collect(),
     tbl
   )
@@ -108,7 +108,7 @@ test_that("slice_sample, ungrouped", {
   # Because this is random (and we only have 10 rows), try several times
   for (i in 1:50) {
     sampled_prop <- tab %>%
-      slice_sample(prop = .2) %>%
+      slice_sample(prop = 0.2) %>%
       collect() %>%
       nrow()
     if (sampled_prop == 2) break
@@ -184,7 +184,7 @@ test_that("slice_* not supported with groups", {
 
 test_that("input validation", {
   tab <- arrow_table(tbl)
-  for (p in list("a", -1, 2, c(.01, .02), NA_real_)) {
+  for (p in list("a", -1, 2, c(0.01, 0.02), NA_real_)) {
     expect_error(
       slice_head(tab, prop = !!p),
       "`prop` must be a single numeric value between 0 and 1",
@@ -206,7 +206,7 @@ test_that("n <-> prop conversion when nrow is not known", {
 
   expect_error(
     joined %>%
-      slice_min(int, prop = .25, with_ties = FALSE),
+      slice_min(int, prop = 0.25, with_ties = FALSE),
     "Slicing with `prop` when"
   )
 
diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R
index 95212407acf9d..1768d7534e69f 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -955,6 +955,44 @@ test_that("Summarize with 0 arguments", {
   )
 })
 
+test_that("Printing aggregation expressions", {
+  q <- tbl %>%
+    arrow_table() %>%
+    summarize(
+      total = sum(int, na.rm = TRUE),
+      prod = prod(int, na.rm = TRUE),
+      any = any(lgl, na.rm = TRUE),
+      all = all(lgl, na.rm = TRUE),
+      mean = mean(int, na.rm = TRUE),
+      sd = sd(int, na.rm = TRUE),
+      var = var(int, na.rm = TRUE),
+      n_distinct = n_distinct(chr),
+      min = min(int, na.rm = TRUE),
+      max = max(int, na.rm = TRUE)
+    )
+  expect_output(
+    print(q$.data),
+    "Table (query)
+int: int32
+lgl: bool
+chr: string
+
+* Aggregations:
+total: sum(int, {skip_nulls=true, min_count=0})
+prod: product(int, {skip_nulls=true, min_count=0})
+any: any(lgl, {skip_nulls=true, min_count=0})
+all: all(lgl, {skip_nulls=true, min_count=0})
+mean: mean(int, {skip_nulls=true, min_count=0})
+sd: stddev(int, {ddof=1, skip_nulls=true, min_count=0})
+var: variance(int, {ddof=1, skip_nulls=true, min_count=0})
+n_distinct: count_distinct(chr, {mode=ALL})
+min: min(int, {skip_nulls=true, min_count=0})
+max: max(int, {skip_nulls=true, min_count=0})
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
+})
+
 test_that("Not supported: window functions", {
   compare_dplyr_binding(
     .input %>%
diff --git a/r/tests/testthat/test-extra-package-roundtrip.R b/r/tests/testthat/test-extra-package-roundtrip.R
new file mode 100644
index 0000000000000..092288dffb955
--- /dev/null
+++ b/r/tests/testthat/test-extra-package-roundtrip.R
@@ -0,0 +1,105 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_on_cran()
+
+# Any additional package that we test here that is not already in DESCRIPTION should be
+# added to dev/tasks/r/github.linux.extra.packages.yml in the r-lib/actions/setup-r-dependencies@v2
+# step so that they are installed + available in that CI job.
+
+# So that we can force these in CI
+load_or_skip <- function(pkg) {
+  if (identical(tolower(Sys.getenv("ARROW_R_FORCE_EXTRA_PACKAGE_TESTS")), "true")) {
+    # because of this indirection on the package name we also avoid a CHECK note and
+    # we don't otherwise need to Suggest this
+    requireNamespace(pkg, quietly = TRUE)
+  } else {
+    skip_if(!requireNamespace(pkg, quietly = TRUE))
+  }
+  attachNamespace(pkg)
+}
+
+library(dplyr)
+
+test_that("readr read csvs roundtrip", {
+  load_or_skip("readr")
+
+  tbl <- example_data[, c("dbl", "lgl", "false", "chr")]
+
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(tbl, tf, row.names = FALSE)
+
+  # we should still be able to turn this into a table
+  new_df <- read_csv(tf, show_col_types = FALSE)
+  expect_equal(new_df, as_tibble(arrow_table(new_df)))
+
+  # we should still be able to turn this into a table
+  new_df <- read_csv(tf, show_col_types = FALSE, lazy = TRUE)
+  expect_equal(new_df, as_tibble(arrow_table(new_df)))
+
+  # and can roundtrip to a parquet file
+  pq_tmp_file <- tempfile()
+  write_parquet(new_df, pq_tmp_file)
+  new_df_read <- read_parquet(pq_tmp_file)
+
+  # we should still be able to turn this into a table
+  expect_equal(new_df, new_df_read)
+})
+
+test_that("data.table objects roundtrip", {
+  load_or_skip("data.table")
+
+  # https://github.com/Rdatatable/data.table/blob/83fd2c05ce2d8555ceb8ba417833956b1b574f7e/R/cedta.R#L25-L27
+  .datatable.aware <- TRUE
+
+  DT <- as.data.table(example_data)
+
+  # Table to collect which is what writing + reading to parquet uses under the hood to roundtrip
+  tab <- as_arrow_table(DT)
+  DT_read <- collect(tab)
+
+  # we should still be able to turn this into a table
+  # the .internal.selfref attribute is automatically ignored by testthat/waldo
+  expect_equal(DT, DT_read)
+
+  # and we can set keys + indices + create new columns
+  setkey(DT, chr)
+  setindex(DT, dbl)
+  DT[, dblshift := shift(dbl, 1)]
+
+  # Table to collect
+  tab <- as_arrow_table(DT)
+  DT_read <- collect(tab)
+
+  # we should still be able to turn this into a table
+  expect_equal(DT, DT_read)
+})
+
+test_that("units roundtrip", {
+  load_or_skip("units")
+
+  tbl <- example_data
+  units(tbl$dbl) <- "s"
+
+  # Table to collect which is what writing + reading to parquet uses under the hood to roundtrip
+  tab <- as_arrow_table(tbl)
+  tbl_read <- collect(tab)
+
+  # we should still be able to turn this into a table
+  expect_equal(tbl, tbl_read)
+})
diff --git a/r/tests/testthat/test-gcs.R b/r/tests/testthat/test-gcs.R
index d671c12138c60..54159e82ca60f 100644
--- a/r/tests/testthat/test-gcs.R
+++ b/r/tests/testthat/test-gcs.R
@@ -116,12 +116,12 @@ test_that("GcsFileSystem$create() can read json_credentials", {
 })
 
 skip_on_cran()
-skip_if_not(system('python -c "import testbench"') == 0, message = "googleapis-storage-testbench is not installed.")
+skip_if_not(system("storage-testbench -h") == 0, message = "googleapis-storage-testbench is not installed.")
 library(dplyr)
 
 testbench_port <- Sys.getenv("TESTBENCH_PORT", "9001")
 
-pid_minio <- sys::exec_background("python", c("-m", "testbench", "--port", testbench_port),
+pid_minio <- sys::exec_background("storage-testbench", c("--port", testbench_port),
   std_out = FALSE,
   std_err = FALSE # TODO: is there a good place to send output?
 )
diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R
index 12c9ce0178024..88ce07a8a673f 100644
--- a/r/tests/testthat/test-json.R
+++ b/r/tests/testthat/test-json.R
@@ -80,10 +80,10 @@ test_that("read_json_arrow() supports col_select=", {
   ', tf)
 
   tab1 <- read_json_arrow(tf, col_select = c(hello, world))
-  expect_equal(names(tab1), c("hello", "world"))
+  expect_named(tab1, c("hello", "world"))
 
   tab2 <- read_json_arrow(tf, col_select = 1:2)
-  expect_equal(names(tab2), c("hello", "world"))
+  expect_named(tab2, c("hello", "world"))
 })
 
 test_that("read_json_arrow(schema=) with empty schema", {
diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R
index 175e7ef3b6b73..06aa1535e0a36 100644
--- a/r/tests/testthat/test-metadata.R
+++ b/r/tests/testthat/test-metadata.R
@@ -149,6 +149,15 @@ arbitrary\040code\040was\040just\040executed
   )
 })
 
+test_that("R metadata processing doesn't choke on packageVersion() output", {
+  metadata <- list(version = packageVersion("base"))
+  expect_identical(safe_r_metadata(metadata), metadata)
+
+  df <- example_data[1:6]
+  attr(df, "version") <- packageVersion("base")
+  expect_equal_data_frame(Table$create(df), df)
+})
+
 test_that("Complex or unsafe attributes are pruned from R metadata, if they exist", {
   tab <- Table$create(example_data[1:6])
   bad <- new.env()
@@ -161,18 +170,24 @@ i Type: \"environment\"
 > If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)` to preserve them.",
     fixed = TRUE
   )
+  # Try hiding it even further, in attributes
+  bad_meta <- list(attributes = structure(list(), hidden_attr = bad))
+  tab$metadata <- list(r = rawToChar(serialize(bad_meta, NULL, ascii = TRUE)))
+  expect_warning(
+    as.data.frame(tab),
+    "Potentially unsafe or invalid elements have been discarded from R metadata.
+i Type: \"environment\"
+> If you trust the source, you can set `options(arrow.unsafe_metadata = TRUE)` to preserve them.",
+    fixed = TRUE
+  )
+
   # You can set an option to allow them through.
   # It still warns, just differently, and it doesn't prune the attributes
   withr::local_options(list("arrow.unsafe_metadata" = TRUE))
   expect_warning(
-    expect_warning(
-      as.data.frame(tab),
-      "R metadata may have unsafe or invalid elements
+    as.data.frame(tab),
+    "R metadata may have unsafe or invalid elements
 i Type: \"environment\""
-    ),
-    # This particular example ultimately fails because it's not a list
-    "Invalid metadata$r",
-    fixed = TRUE
   )
 })
 
diff --git a/r/tests/testthat/test-parquet.R b/r/tests/testthat/test-parquet.R
index cc57022600f8d..738febb7b3c4c 100644
--- a/r/tests/testthat/test-parquet.R
+++ b/r/tests/testthat/test-parquet.R
@@ -42,10 +42,10 @@ test_that("simple int column roundtrip", {
 test_that("read_parquet() supports col_select", {
   skip_if_not_available("snappy")
   df <- read_parquet(pq_file, col_select = c(x, y, z))
-  expect_equal(names(df), c("x", "y", "z"))
+  expect_named(df, c("x", "y", "z"))
 
   df <- read_parquet(pq_file, col_select = starts_with("c"))
-  expect_equal(names(df), c("carat", "cut", "color", "clarity"))
+  expect_named(df, c("carat", "cut", "color", "clarity"))
 })
 
 test_that("read_parquet() with raw data", {
diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R
index c9dbd024855df..279a532003d54 100644
--- a/r/tests/testthat/test-python.R
+++ b/r/tests/testthat/test-python.R
@@ -22,8 +22,8 @@ test_that("install_pyarrow", {
   # Windows CI machine doesn't pick up the right python or something
   skip_on_os("windows")
   skip_if_not_installed("reticulate")
-  # PyArrow doesn't support Python 3.7 or earlier
-  skip_on_python_older_than("3.8")
+  # PyArrow doesn't support Python 3.8 or earlier
+  skip_on_python_older_than("3.9")
   # no pyarrow wheels for macos 10.13
   skip_if(on_macos_10_13_or_lower())
 
diff --git a/r/tests/testthat/test-record-batch-reader.R b/r/tests/testthat/test-record-batch-reader.R
index a59523790ac3b..540bbfebd5ac7 100644
--- a/r/tests/testthat/test-record-batch-reader.R
+++ b/r/tests/testthat/test-record-batch-reader.R
@@ -183,7 +183,7 @@ test_that("RBR methods", {
 x: int32
 y: string"
   )
-  expect_equal(names(reader), c("x", "y"))
+  expect_named(reader, c("x", "y"))
   expect_identical(dim(reader), c(NA_integer_, 2L))
 
   expect_equal(
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index 68db5c819b7cc..0c45d24ce1768 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -21,7 +21,7 @@ test_that("Alternate type names are supported", {
     schema(b = double(), c = bool(), d = string(), e = float(), f = halffloat()),
     schema(b = float64(), c = boolean(), d = utf8(), e = float32(), f = float16())
   )
-  expect_equal(names(schema(b = double(), c = bool(), d = string())), c("b", "c", "d"))
+  expect_named(schema(b = double(), c = bool(), d = string()), c("b", "c", "d"))
 })
 
 test_that("Schema print method", {
@@ -279,9 +279,9 @@ test_that("as_schema() works for StructType objects", {
 
 test_that("schema name assignment", {
   schm <- schema(x = int8(), y = string(), z = double())
-  expect_identical(names(schm), c("x", "y", "z"))
+  expect_named(schm, c("x", "y", "z"))
   names(schm) <- c("a", "b", "c")
-  expect_identical(names(schm), c("a", "b", "c"))
+  expect_named(schm, c("a", "b", "c"))
   expect_error(names(schm) <- "f", regexp = "Replacement names must contain same number of items as current names")
   expect_error(names(schm) <- NULL, regexp = "Replacement names must be character vector, not NULL")
 
@@ -289,8 +289,8 @@ test_that("schema name assignment", {
   df <- data.frame(x = 1:3, y = c("a", "b", "c"))
   schm2 <- arrow_table(df)$schema
   names(schm2) <- c("col1", "col2")
-  expect_identical(names(schm2), c("col1", "col2"))
-  expect_identical(names(schm2$r_metadata$columns), c("col1", "col2"))
+  expect_named(schm2, c("col1", "col2"))
+  expect_named(schm2$r_metadata$columns, c("col1", "col2"))
 })
 
 test_that("schema extraction", {
diff --git a/r/tests/testthat/test-utf.R b/r/tests/testthat/test-utf.R
index 660b2a4784fbe..00f52ae9d1f6f 100644
--- a/r/tests/testthat/test-utf.R
+++ b/r/tests/testthat/test-utf.R
@@ -54,7 +54,7 @@ test_that("We handle non-UTF strings", {
 
   # Schema field name
   df_schema <- schema(raw_schema)
-  expect_identical(names(df_schema), names(df))
+  expect_named(df_schema, names(df))
 
   df_struct_schema <- schema(a = do.call(struct, raw_schema))
 
diff --git a/r/vignettes/developing.Rmd b/r/vignettes/developing.Rmd
index 248a80292a029..147f9cc028d78 100644
--- a/r/vignettes/developing.Rmd
+++ b/r/vignettes/developing.Rmd
@@ -52,6 +52,3 @@ There are a number of ways in which we do this:
 * [Running R with the C++ debugger attached](https://arrow.apache.org/docs/r/articles/developers/debugging.html)
 * [In-depth guide to how the package installation works](https://arrow.apache.org/docs/r/articles/developers/install_details.html)
 * [Using Docker to diagnose a bug or test a feature on a specific OS](https://arrow.apache.org/docs/r/articles/developers/docker.html)
-* [Writing bindings between R functions and Arrow Acero functions](https://arrow.apache.org/docs/r/articles/developers/bindings.html)
-
-
diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
index daf40583ee7d3..1fbbe88bcc343 100644
--- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
+++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowCUDA
-  VERSION = "17.0.0-SNAPSHOT"
+  VERSION = "18.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
index 5091384212023..a9eae43d59b9f 100644
--- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
+++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowDataset
-  VERSION = "17.0.0-SNAPSHOT"
+  VERSION = "18.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb
index 042029b2c40cf..f24688e9cb0f8 100644
--- a/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb
+++ b/ruby/red-arrow-flight-sql/lib/arrow-flight-sql/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowFlightSQL
-  VERSION = "17.0.0-SNAPSHOT"
+  VERSION = "18.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/client.rb b/ruby/red-arrow-flight/lib/arrow-flight/client.rb
index ad45a4e403559..2750bcca589c8 100644
--- a/ruby/red-arrow-flight/lib/arrow-flight/client.rb
+++ b/ruby/red-arrow-flight/lib/arrow-flight/client.rb
@@ -47,5 +47,49 @@ def authenticate_basic(user, password, options=nil)
       end
       options
     end
+
+    alias_method :do_put_raw, :do_put
+    # Upload data to a Flight described by the given descriptor. The
+    # caller must call `#close` on the returned stream once they are
+    # done writing. Note that it's automatically done when you use
+    # block.
+    #
+    # The reader and writer are linked; closing the writer will also
+    # close the reader. Use GArrowFlight::StreamWriter#done_writing to
+    # only close the write side of the channel.
+    #
+    # @param descriptor [GArrowFlight::Descriptor] Descriptor to be uploaded.
+    # @param schema [GArrow::Schema] Schema of uploaded data.
+    # @param options [ArrowFlight::CallOptions, Hash, nil] (nil)
+    #   The options to be used.
+    #
+    # @yieldparam writer [GArrowFlight::StreamWriter] The writer to upload
+    #   data to the given descriptor.
+    #
+    #   This is closed automatically after the given block is finished.
+    #
+    # @yieldparam reader [GArrowFlight::MetadataReader] The reader to read
+    #   metadata from the server.
+    #
+    # @return [Array<GArrowFlight::MetadataReader, GArrowFlight::StreamWriter>, Object]
+    #   The reader and the writer if block isn't given.
+    #
+    #   The return value from block if block is given.
+    #
+    # @since 18.0.0
+    def do_put(descriptor, schema, options=nil)
+      result = do_put_raw(descriptor, schema, options)
+      reader = result.reader
+      writer = result.writer
+      if block_given?
+        begin
+          yield(reader, writer)
+        ensure
+          writer.close unless writer.closed?
+        end
+      else
+        return reader, writer
+      end
+    end
   end
 end
diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
index 8245a0f12e681..263f35986616b 100644
--- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb
+++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module ArrowFlight
-  VERSION = "17.0.0-SNAPSHOT"
+  VERSION = "18.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow-flight/test/helper/server.rb b/ruby/red-arrow-flight/test/helper/server.rb
index 269bb5f3d7858..1ea4855897b09 100644
--- a/ruby/red-arrow-flight/test/helper/server.rb
+++ b/ruby/red-arrow-flight/test/helper/server.rb
@@ -21,6 +21,8 @@ module Helper
   class Server < ArrowFlight::Server
     type_register
 
+    attr_reader :uploaded_table
+
     private
     def virtual_do_list_flights(context, criteria)
       generator = InfoGenerator.new
@@ -35,5 +37,14 @@ def virtual_do_do_get(context, ticket)
       table = generator.page_view_table
       ArrowFlight::RecordBatchStream.new(table)
     end
+
+    def virtual_do_do_put(context, reader, writer)
+      @uploaded_table = reader.read_all
+      writer.write(Arrow::Buffer.new("done"))
+      if @uploaded_table.n_rows.zero?
+        raise Arrow::Error::Invalid.new("empty table")
+      end
+      true
+    end
   end
 end
diff --git a/ruby/red-arrow-flight/test/test-client.rb b/ruby/red-arrow-flight/test/test-client.rb
index 850d6f45790c3..9f1ebbff81550 100644
--- a/ruby/red-arrow-flight/test/test-client.rb
+++ b/ruby/red-arrow-flight/test/test-client.rb
@@ -43,4 +43,35 @@ def test_do_get
     assert_equal(generator.page_view_table,
                  reader.read_all)
   end
+
+  def test_do_put_with_block
+    client = ArrowFlight::Client.new(@location)
+    generator = Helper::InfoGenerator.new
+    descriptor = generator.page_view_descriptor
+    table = generator.page_view_table
+    client.do_put(descriptor, table.schema) do |reader, writer|
+      writer.write_table(table)
+      writer.done_writing
+      metadata = reader.read
+      assert_equal(["done", table],
+                   [metadata.data.to_s, @server.uploaded_table])
+    end
+  end
+
+  def test_do_put_without_block
+    client = ArrowFlight::Client.new(@location)
+    generator = Helper::InfoGenerator.new
+    descriptor = generator.page_view_descriptor
+    table = generator.page_view_table
+    reader, writer = client.do_put(descriptor, table.schema)
+    begin
+      writer.write_table(table)
+      writer.done_writing
+      metadata = reader.read
+      assert_equal(["done", table],
+                   [metadata.data.to_s, @server.uploaded_table])
+    ensure
+      writer.close
+    end
+  end
 end
diff --git a/ruby/red-arrow/ext/arrow/extconf.rb b/ruby/red-arrow/ext/arrow/extconf.rb
index 7ef3c6c83432a..a3005cd56f270 100644
--- a/ruby/red-arrow/ext/arrow/extconf.rb
+++ b/ruby/red-arrow/ext/arrow/extconf.rb
@@ -66,6 +66,13 @@
   exit(false)
 end
 
+# Old re2.pc (e.g. re2.pc on Ubuntu 20.04) may add -std=c++11. It
+# causes a build error because Apache Arrow C++ requires C++17 or
+# later.
+#
+# We can remove this when we drop support for Ubuntu 20.04.
+$CXXFLAGS.gsub!("-std=c++11", "")
+
 [
   ["glib2", "ext/glib2"],
 ].each do |name, relative_source_dir|
@@ -84,7 +91,7 @@
   symbols_in_external_bundles.each do |symbol|
     $DLDFLAGS << " -Wl,-U,#{symbol}"
   end
-  mmacosx_version_min = "-mmacosx-version-min=10.15"
+  mmacosx_version_min = "-mmacosx-version-min=12.0"
   $CFLAGS << " #{mmacosx_version_min}"
   $CXXFLAGS << " #{mmacosx_version_min}"
 end
diff --git a/ruby/red-arrow/lib/arrow/decimal128-array.rb b/ruby/red-arrow/lib/arrow/decimal128-array.rb
index a5ee53be7b229..528c878a859b5 100644
--- a/ruby/red-arrow/lib/arrow/decimal128-array.rb
+++ b/ruby/red-arrow/lib/arrow/decimal128-array.rb
@@ -18,7 +18,9 @@
 module Arrow
   class Decimal128Array
     def get_value(i)
-      BigDecimal(format_value(i))
+      string = format_value(i)
+      string.sub!(".E", ".0E") if string.include?(".E")
+      BigDecimal(string)
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/decimal256-array.rb b/ruby/red-arrow/lib/arrow/decimal256-array.rb
index 8c2306dfe3627..32841ca4862f5 100644
--- a/ruby/red-arrow/lib/arrow/decimal256-array.rb
+++ b/ruby/red-arrow/lib/arrow/decimal256-array.rb
@@ -19,7 +19,9 @@ module Arrow
   class Decimal256Array
     # @since 3.0.0
     def get_value(i)
-      BigDecimal(format_value(i))
+      string = format_value(i)
+      string.sub!(".E", ".0E") if string.include?(".E")
+      BigDecimal(string)
     end
   end
 end
diff --git a/ruby/red-arrow/lib/arrow/loader.rb b/ruby/red-arrow/lib/arrow/loader.rb
index bd0d03930885c..5468b0c78cc99 100644
--- a/ruby/red-arrow/lib/arrow/loader.rb
+++ b/ruby/red-arrow/lib/arrow/loader.rb
@@ -116,6 +116,8 @@ def require_libraries
       require "arrow/sparse-union-data-type"
       require "arrow/string-dictionary-array-builder"
       require "arrow/string-array-builder"
+      require "arrow/stream-decoder"
+      require "arrow/stream-listener"
       require "arrow/struct-array"
       require "arrow/struct-array-builder"
       require "arrow/struct-data-type"
@@ -168,6 +170,16 @@ def gc_guard
       end
     end
 
+    def rubyish_class_name(info)
+      name = info.name
+      case name
+      when "StreamListener"
+        "StreamListenerRaw"
+      else
+        super
+      end
+    end
+
     def load_object_info(info)
       super
 
diff --git a/ruby/red-arrow/lib/arrow/stream-decoder.rb b/ruby/red-arrow/lib/arrow/stream-decoder.rb
new file mode 100644
index 0000000000000..a6945215bc646
--- /dev/null
+++ b/ruby/red-arrow/lib/arrow/stream-decoder.rb
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Arrow
+  class StreamDecoder
+    def consume(data)
+      case data
+      when Buffer
+        consume_buffer(data)
+      else
+        consume_bytes(data)
+      end
+    end
+  end
+end
diff --git a/ci/scripts/go_cgo_python_test.sh b/ruby/red-arrow/lib/arrow/stream-listener.rb
old mode 100755
new mode 100644
similarity index 58%
rename from ci/scripts/go_cgo_python_test.sh
rename to ruby/red-arrow/lib/arrow/stream-listener.rb
index a76b6d0613a4b..14a70385842e4
--- a/ci/scripts/go_cgo_python_test.sh
+++ b/ruby/red-arrow/lib/arrow/stream-listener.rb
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-#
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -17,35 +15,33 @@
 # specific language governing permissions and limitations
 # under the License.
 
-set -ex
-
-source_dir=${1}/go
-
-if [ -n "${ARROW_PYTHON_VENV:-}" ]; then
-  . "${ARROW_PYTHON_VENV}/bin/activate"
-fi
-
-export GOFLAGS="${GOFLAGS} -gcflags=all=-d=checkptr"
+module Arrow
+  class StreamListener < StreamListenerRaw
+    type_register
 
-pushd ${source_dir}/arrow/cdata/test
+    def on_eos
+    end
 
-case "$(uname)" in
-    Linux)
-        testlib="cgotest.so"
-        ;;
-    Darwin)
-        testlib="cgotest.so"
-        ;;
-    MINGW*)
-        testlib="cgotest.dll"
-        ;;
-esac
+    def on_record_batch_decoded(record_batch, metadata)
+    end
 
-go build -tags cdata_test,assert -buildmode=c-shared -o $testlib .
+    def on_schema(schema, filtered_schema)
+    end
 
-python test_export_to_cgo.py
+    private
+    def virtual_do_on_eos
+      on_eos
+      true
+    end
 
-rm $testlib
-rm "${testlib%.*}.h"
+    def virtual_do_on_record_batch_decoded(record_batch, metadata)
+      on_record_batch_decoded(record_batch, metadata)
+      true
+    end
 
-popd
+    def virtual_do_on_schema_decoded(schema, filtered_schema)
+      on_schema_decoded(schema, filtered_schema)
+      true
+    end
+  end
+end
diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb
index 1d41ae77dda04..42708a65026ee 100644
--- a/ruby/red-arrow/lib/arrow/version.rb
+++ b/ruby/red-arrow/lib/arrow/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Arrow
-  VERSION = "17.0.0-SNAPSHOT"
+  VERSION = "18.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-arrow/red-arrow.gemspec b/ruby/red-arrow/red-arrow.gemspec
index 9e9c147f76507..67fec2e0907c1 100644
--- a/ruby/red-arrow/red-arrow.gemspec
+++ b/ruby/red-arrow/red-arrow.gemspec
@@ -49,7 +49,7 @@ Gem::Specification.new do |spec|
   spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
   spec.add_runtime_dependency("csv")
   spec.add_runtime_dependency("extpp", ">= 0.1.1")
-  spec.add_runtime_dependency("gio2", ">= 3.5.0")
+  spec.add_runtime_dependency("gio2", ">= 4.2.3")
   spec.add_runtime_dependency("native-package-installer")
   spec.add_runtime_dependency("pkg-config")
 
diff --git a/ruby/red-arrow/test/test-decimal128-array.rb b/ruby/red-arrow/test/test-decimal128-array.rb
index a50e2cf4a4832..a6e7c4e1ac433 100644
--- a/ruby/red-arrow/test/test-decimal128-array.rb
+++ b/ruby/red-arrow/test/test-decimal128-array.rb
@@ -38,4 +38,10 @@ class Decimal128ArrayTest < Test::Unit::TestCase
                    array.to_a)
     end
   end
+
+  def test_zero
+    array = Arrow::Decimal128Array.new({precision: 38, scale: 9},
+                                       [BigDecimal("0")])
+    assert_equal(BigDecimal("0"), array[0])
+  end
 end
diff --git a/ruby/red-arrow/test/test-decimal256-array.rb b/ruby/red-arrow/test/test-decimal256-array.rb
index ed542f2d6c75e..053e948fc84b7 100644
--- a/ruby/red-arrow/test/test-decimal256-array.rb
+++ b/ruby/red-arrow/test/test-decimal256-array.rb
@@ -38,4 +38,10 @@ class Decimal256ArrayTest < Test::Unit::TestCase
                    array.to_a)
     end
   end
+
+  def test_zero
+    array = Arrow::Decimal256Array.new({precision: 38, scale: 9},
+                                       [BigDecimal("0")])
+    assert_equal(BigDecimal("0"), array[0])
+  end
 end
diff --git a/ruby/red-arrow/test/test-stream-listener.rb b/ruby/red-arrow/test/test-stream-listener.rb
new file mode 100644
index 0000000000000..0aed9cb1e2613
--- /dev/null
+++ b/ruby/red-arrow/test/test-stream-listener.rb
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestStreamListener < Test::Unit::TestCase
+  class Listener < Arrow::StreamListener
+    attr_reader :events
+    def initialize
+      super
+      @events = []
+    end
+
+    def on_eos
+      @events << [:eos]
+    end
+
+    def on_record_batch_decoded(record_batch, metadata)
+      @events << [:record_batch_decoded, record_batch, metadata]
+    end
+
+    def on_schema_decoded(schema, filtered_schema)
+      @events << [:schema_decoded, schema, filtered_schema]
+    end
+  end
+
+  def setup
+    @record_batch = Arrow::RecordBatch.new(enabled: [true, false, nil, true])
+    @schema = @record_batch.schema
+
+    @buffer = Arrow::ResizableBuffer.new(0)
+    table = Arrow::Table.new(@schema, [@record_batch])
+    table.save(@buffer, format: :stream)
+
+    @listener = Listener.new
+    @decoder = Arrow::StreamDecoder.new(@listener)
+  end
+
+  def test_consume
+    @decoder.consume(@buffer)
+    assert_equal([
+                   [:schema_decoded, @schema, @schema],
+                   [:record_batch_decoded, @record_batch, nil],
+                   [:eos],
+                 ],
+                 @listener.events)
+  end
+end
diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb
index 3d9b4d9d87fa6..4a9f27a7ef649 100644
--- a/ruby/red-gandiva/lib/gandiva/version.rb
+++ b/ruby/red-gandiva/lib/gandiva/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Gandiva
-  VERSION = "17.0.0-SNAPSHOT"
+  VERSION = "18.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-parquet/lib/parquet/arrow-file-writer.rb b/ruby/red-parquet/lib/parquet/arrow-file-writer.rb
new file mode 100644
index 0000000000000..137dc518e3f95
--- /dev/null
+++ b/ruby/red-parquet/lib/parquet/arrow-file-writer.rb
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+module Parquet
+  class ArrowFileWriter
+    # Write data to Apache Parquet.
+    #
+    # @return [void]
+    #
+    # @overload write(record_batch)
+    #
+    #   @param record_batch [Arrow::RecordBatch] The record batch to
+    #     be written.
+    #
+    #   @example Write a record batch
+    #     record_batch = Arrow::RecordBatch.new(enabled: [true, false])
+    #     schema = record_batch.schema
+    #     Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
+    #       writer.write(record_batch)
+    #     end
+    #
+    # @overload write(table, chunk_size: nil)
+    #
+    #   @param table [Arrow::Table] The table to be written.
+    #
+    #   @param chunk_size [nil, Integer] (nil) The maximum number of
+    #     rows to write per row group.
+    #
+    #     If this is `nil`, the default value (`1024 * 1024`) is used.
+    #
+    #   @example Write a record batch with the default chunk size
+    #     table = Arrow::Table.new(enabled: [true, false])
+    #     schema = table.schema
+    #     Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
+    #       writer.write(table)
+    #     end
+    #
+    #   @example Write a record batch with the specified chunk size
+    #     table = Arrow::Table.new(enabled: [true, false])
+    #     schema = table.schema
+    #     Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
+    #       writer.write(table, chunk_size: 1)
+    #     end
+    #
+    # @overload write(raw_records)
+    #
+    #   @param data [Array<Hash>, Array<Array>] The data to be written
+    #     as primitive Ruby objects.
+    #
+    #   @example Write a record batch with Array<Array> based data
+    #     schema = Arrow::Schema.new(enabled: :boolean)
+    #     raw_records = [
+    #       [true],
+    #       [false],
+    #     ]
+    #     Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
+    #       writer.write(raw_records)
+    #     end
+    #
+    #   @example Write a record batch with Array<Hash> based data
+    #     schema = Arrow::Schema.new(enabled: :boolean)
+    #     raw_columns = [
+    #       enabled: [true, false],
+    #     ]
+    #     Parquet::ArrowFileWriter.open(schema, "data.parquet") do |writer|
+    #       writer.write(raw_columns)
+    #     end
+    #
+    # @since 18.0.0
+    def write(target, chunk_size: nil)
+      case target
+      when Arrow::RecordBatch
+        write_record_batch(target)
+      when Arrow::Table
+        # Same as parquet::DEFAULT_MAX_ROW_GROUP_LENGTH in C++
+        chunk_size ||= 1024 * 1024
+        write_table(target, chunk_size)
+      else
+        record_batch = Arrow::RecordBatch.new(schema, target)
+        write_record_batch(record_batch)
+      end
+    end
+  end
+end
diff --git a/ruby/red-parquet/lib/parquet/loader.rb b/ruby/red-parquet/lib/parquet/loader.rb
index 0c20ad2b52a21..018a35ce459eb 100644
--- a/ruby/red-parquet/lib/parquet/loader.rb
+++ b/ruby/red-parquet/lib/parquet/loader.rb
@@ -30,6 +30,7 @@ def post_load(repository, namespace)
 
     def require_libraries
       require "parquet/arrow-file-reader"
+      require "parquet/arrow-file-writer"
       require "parquet/arrow-table-loadable"
       require "parquet/arrow-table-savable"
       require "parquet/writer-properties"
diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb
index 6d92829c23b31..ba66b2cad16f3 100644
--- a/ruby/red-parquet/lib/parquet/version.rb
+++ b/ruby/red-parquet/lib/parquet/version.rb
@@ -16,7 +16,7 @@
 # under the License.
 
 module Parquet
-  VERSION = "17.0.0-SNAPSHOT"
+  VERSION = "18.0.0-SNAPSHOT"
 
   module Version
     numbers, TAG = VERSION.split("-")
diff --git a/ruby/red-parquet/test/test-arrow-file-writer.rb b/ruby/red-parquet/test/test-arrow-file-writer.rb
new file mode 100644
index 0000000000000..c71586499c59d
--- /dev/null
+++ b/ruby/red-parquet/test/test-arrow-file-writer.rb
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestArrowFileWriter < Test::Unit::TestCase
+  def open_buffer_output_stream
+    buffer = Arrow::ResizableBuffer.new(4096)
+    Arrow::BufferOutputStream.open(buffer) do |output|
+      yield(output)
+    end
+    buffer
+  end
+
+  sub_test_case("#write") do
+    test("RecordBatch") do
+      schema = Arrow::Schema.new(visible: :boolean)
+      record_batch = Arrow::RecordBatch.new(schema, [[true], [false]])
+      buffer = open_buffer_output_stream do |output|
+        Parquet::ArrowFileWriter.open(record_batch.schema, output) do |writer|
+          writer.write(record_batch)
+        end
+      end
+      assert_equal(record_batch.to_table,
+                   Arrow::Table.load(buffer, format: :parquet))
+    end
+
+    test("Table") do
+      schema = Arrow::Schema.new(visible: :boolean)
+      table = Arrow::Table.new(schema, [[true], [false]])
+      buffer = open_buffer_output_stream do |output|
+        Parquet::ArrowFileWriter.open(table.schema, output) do |writer|
+          writer.write(table)
+        end
+      end
+      assert_equal(table,
+                   Arrow::Table.load(buffer, format: :parquet))
+    end
+
+    test("[[]]") do
+      schema = Arrow::Schema.new(visible: :boolean)
+      raw_records = [[true], [false]]
+      buffer = open_buffer_output_stream do |output|
+        Parquet::ArrowFileWriter.open(schema, output) do |writer|
+          writer.write(raw_records)
+        end
+      end
+      assert_equal(Arrow::RecordBatch.new(schema, raw_records).to_table,
+                   Arrow::Table.load(buffer, format: :parquet))
+    end
+
+    test("[{}]") do
+      schema = Arrow::Schema.new(visible: :boolean)
+      raw_columns = [visible: [true, false]]
+      buffer = open_buffer_output_stream do |output|
+        Parquet::ArrowFileWriter.open(schema, output) do |writer|
+          writer.write(raw_columns)
+        end
+      end
+      assert_equal(Arrow::RecordBatch.new(schema, raw_columns).to_table,
+                   Arrow::Table.load(buffer, format: :parquet))
+    end
+  end
+end
diff --git a/swift/Arrow/Sources/Arrow/ArrowArray.swift b/swift/Arrow/Sources/Arrow/ArrowArray.swift
index b0f20ee06c2e4..4fc1b8b9fc71c 100644
--- a/swift/Arrow/Sources/Arrow/ArrowArray.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowArray.swift
@@ -78,41 +78,41 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder {
         _ arrowType: ArrowType, with: ArrowData) throws -> ArrowArrayHolder {
         switch arrowType.id {
         case .int8:
-            return ArrowArrayHolderImpl(FixedArray<Int8>(with))
+            return try ArrowArrayHolderImpl(FixedArray<Int8>(with))
         case .int16:
-            return ArrowArrayHolderImpl(FixedArray<Int16>(with))
+            return try ArrowArrayHolderImpl(FixedArray<Int16>(with))
         case .int32:
-            return ArrowArrayHolderImpl(FixedArray<Int32>(with))
+            return try ArrowArrayHolderImpl(FixedArray<Int32>(with))
         case .int64:
-            return ArrowArrayHolderImpl(FixedArray<Int64>(with))
+            return try ArrowArrayHolderImpl(FixedArray<Int64>(with))
         case .uint8:
-            return ArrowArrayHolderImpl(FixedArray<UInt8>(with))
+            return try ArrowArrayHolderImpl(FixedArray<UInt8>(with))
         case .uint16:
-            return ArrowArrayHolderImpl(FixedArray<UInt16>(with))
+            return try ArrowArrayHolderImpl(FixedArray<UInt16>(with))
         case .uint32:
-            return ArrowArrayHolderImpl(FixedArray<UInt32>(with))
+            return try ArrowArrayHolderImpl(FixedArray<UInt32>(with))
         case .uint64:
-            return ArrowArrayHolderImpl(FixedArray<UInt64>(with))
+            return try ArrowArrayHolderImpl(FixedArray<UInt64>(with))
         case .double:
-            return ArrowArrayHolderImpl(FixedArray<Double>(with))
+            return try ArrowArrayHolderImpl(FixedArray<Double>(with))
         case .float:
-            return ArrowArrayHolderImpl(FixedArray<Float>(with))
+            return try ArrowArrayHolderImpl(FixedArray<Float>(with))
         case .date32:
-            return ArrowArrayHolderImpl(Date32Array(with))
+            return try ArrowArrayHolderImpl(Date32Array(with))
         case .date64:
-            return ArrowArrayHolderImpl(Date64Array(with))
+            return try ArrowArrayHolderImpl(Date64Array(with))
         case .time32:
-            return ArrowArrayHolderImpl(Time32Array(with))
+            return try ArrowArrayHolderImpl(Time32Array(with))
         case .time64:
-            return ArrowArrayHolderImpl(Time64Array(with))
+            return try ArrowArrayHolderImpl(Time64Array(with))
         case .string:
-            return ArrowArrayHolderImpl(StringArray(with))
+            return try ArrowArrayHolderImpl(StringArray(with))
         case .boolean:
-            return ArrowArrayHolderImpl(BoolArray(with))
+            return try ArrowArrayHolderImpl(BoolArray(with))
         case .binary:
-            return ArrowArrayHolderImpl(BinaryArray(with))
+            return try ArrowArrayHolderImpl(BinaryArray(with))
         case .strct:
-            return ArrowArrayHolderImpl(StructArray(with))
+            return try ArrowArrayHolderImpl(StructArray(with))
         default:
             throw ArrowError.invalid("Array not found for type: \(arrowType)")
         }
@@ -125,7 +125,7 @@ public class ArrowArray<T>: AsString, AnyArray {
     public var nullCount: UInt {return self.arrowData.nullCount}
     public var length: UInt {return self.arrowData.length}
 
-    public required init(_ arrowData: ArrowData) {
+    public required init(_ arrowData: ArrowData) throws {
         self.arrowData = arrowData
     }
 
@@ -277,18 +277,14 @@ public class BinaryArray: ArrowArray<Data> {
 
 public class StructArray: ArrowArray<[Any?]> {
     public private(set) var arrowFields: [ArrowArrayHolder]?
-    public required init(_ arrowData: ArrowData) {
-        super.init(arrowData)
-    }
-
-    public func initialize() throws -> StructArray {
+    public required init(_ arrowData: ArrowData) throws {
+        try super.init(arrowData)
         var fields = [ArrowArrayHolder]()
         for child in arrowData.children {
             fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
         }
 
         self.arrowFields = fields
-        return self
     }
 
     public override subscript(_ index: UInt) -> [Any?]? {
diff --git a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
index dc80f52f8ebd2..005cad79daeda 100644
--- a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
@@ -58,7 +58,8 @@ public class ArrowArrayBuilder<T: ArrowBufferBuilder, U: ArrowArray<T.ItemType>>
     public func finish() throws -> ArrowArray<T.ItemType> {
         let buffers = self.bufferBuilder.finish()
         let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount)
-        return U(arrowData)
+        let array = try U(arrowData)
+        return array
     }
 
     public func getStride() -> Int {
@@ -118,6 +119,55 @@ public class Time64ArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Time64>, T
     }
 }
 
+public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructArray> {
+    let builders: [any ArrowArrayHolderBuilder]
+    let fields: [ArrowField]
+    public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws {
+        self.fields = fields
+        self.builders = builders
+        try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
+        self.bufferBuilder.initializeTypeInfo(fields)
+    }
+
+    public init(_ fields: [ArrowField]) throws {
+        self.fields = fields
+        var builders = [any ArrowArrayHolderBuilder]()
+        for field in fields {
+            builders.append(try ArrowArrayBuilders.loadBuilder(arrowType: field.type))
+        }
+
+        self.builders = builders
+        try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
+    }
+
+    public override func append(_ values: [Any?]?) {
+        self.bufferBuilder.append(values)
+        if let anyValues = values {
+            for index in 0..<builders.count {
+                self.builders[index].appendAny(anyValues[index])
+            }
+        } else {
+            for index in 0..<builders.count {
+                self.builders[index].appendAny(nil)
+            }
+        }
+    }
+
+    public override func finish() throws -> StructArray {
+        let buffers = self.bufferBuilder.finish()
+        var childData = [ArrowData]()
+        for builder in self.builders {
+            childData.append(try builder.toHolder().array.arrowData)
+        }
+
+        let arrowData = try ArrowData(self.type, buffers: buffers,
+                                      children: childData, nullCount: self.nullCount,
+                                      length: self.length)
+        let structArray = try StructArray(arrowData)
+        return structArray
+    }
+}
+
 public class ArrowArrayBuilders {
     public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
         _ builderType: Any.Type) throws -> ArrowArrayHolderBuilder {
@@ -168,6 +218,72 @@ public class ArrowArrayBuilders {
             type == Float.self || type == Date.self
     }
 
+    public static func loadStructArrayBuilderForType<T>(_ obj: T) throws -> StructArrayBuilder {
+        let mirror = Mirror(reflecting: obj)
+        var builders = [ArrowArrayHolderBuilder]()
+        var fields = [ArrowField]()
+        for (property, value) in mirror.children {
+            guard let propertyName = property else {
+                continue
+            }
+
+            let builderType = type(of: value)
+            let arrowType = ArrowType(ArrowType.infoForType(builderType))
+            fields.append(ArrowField(propertyName, type: arrowType, isNullable: true))
+            builders.append(try loadBuilder(arrowType: arrowType))
+        }
+
+        return try StructArrayBuilder(fields, builders: builders)
+    }
+
+    public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
+        arrowType: ArrowType) throws -> ArrowArrayHolderBuilder {
+        switch arrowType.id {
+        case .uint8:
+            return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt8>
+        case .uint16:
+            return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt16>
+        case .uint32:
+            return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt32>
+        case .uint64:
+            return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt64>
+        case .int8:
+            return try loadNumberArrayBuilder() as NumberArrayBuilder<Int8>
+        case .int16:
+            return try loadNumberArrayBuilder() as NumberArrayBuilder<Int16>
+        case .int32:
+            return try loadNumberArrayBuilder() as NumberArrayBuilder<Int32>
+        case .int64:
+            return try loadNumberArrayBuilder() as NumberArrayBuilder<Int64>
+        case .double:
+            return try loadNumberArrayBuilder() as NumberArrayBuilder<Double>
+        case .float:
+            return try loadNumberArrayBuilder() as NumberArrayBuilder<Float>
+        case .string:
+            return try StringArrayBuilder()
+        case .boolean:
+            return try BoolArrayBuilder()
+        case .binary:
+            return try BinaryArrayBuilder()
+        case .date32:
+            return try Date32ArrayBuilder()
+        case .date64:
+            return try Date64ArrayBuilder()
+        case .time32:
+            guard let timeType = arrowType as? ArrowTypeTime32 else {
+                throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
+            }
+            return try Time32ArrayBuilder(timeType.unit)
+        case .time64:
+            guard let timeType = arrowType as? ArrowTypeTime64 else {
+                throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
+            }
+            return try Time64ArrayBuilder(timeType.unit)
+        default:
+            throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)")
+        }
+    }
+
     public static func loadNumberArrayBuilder<T>() throws -> NumberArrayBuilder<T> {
         let type = T.self
         if type == Int8.self {
diff --git a/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
index e4c8036c327d1..47f9c40354b1b 100644
--- a/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
@@ -30,18 +30,14 @@ public protocol ArrowBufferBuilder {
     func finish() -> [ArrowBuffer]
 }
 
-public class BaseBufferBuilder<T> {
-    var values: ArrowBuffer
+public class BaseBufferBuilder {
     var nulls: ArrowBuffer
-    var stride: Int
     public var offset: UInt = 0
-    public var capacity: UInt {return self.values.capacity}
+    public var capacity: UInt {return self.nulls.capacity}
     public var length: UInt = 0
     public var nullCount: UInt  = 0
 
-    init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout<T>.stride) {
-        self.stride = stride
-        self.values = values
+    init(_ nulls: ArrowBuffer) {
         self.nulls = nulls
     }
 
@@ -61,7 +57,19 @@ public class BaseBufferBuilder<T> {
     }
 }
 
-public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
+public class ValuesBufferBuilder<T>: BaseBufferBuilder {
+    var values: ArrowBuffer
+    var stride: Int
+    public override var capacity: UInt {return self.values.capacity}
+
+    init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout<T>.stride) {
+        self.stride = stride
+        self.values = values
+        super.init(nulls)
+    }
+}
+
+public class FixedBufferBuilder<T>: ValuesBufferBuilder<T>, ArrowBufferBuilder {
     public typealias ItemType = T
     private let defaultVal: ItemType
     public required init() throws {
@@ -138,7 +146,7 @@ public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
     }
 }
 
-public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
+public class BoolBufferBuilder: ValuesBufferBuilder<Bool>, ArrowBufferBuilder {
     public typealias ItemType = Bool
     public required init() throws {
         let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
@@ -190,7 +198,7 @@ public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
     }
 }
 
-public class VariableBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
+public class VariableBufferBuilder<T>: ValuesBufferBuilder<T>, ArrowBufferBuilder {
     public typealias ItemType = T
     var offsets: ArrowBuffer
     let binaryStride = MemoryLayout<UInt8>.stride
@@ -327,3 +335,47 @@ public class Date64BufferBuilder: AbstractWrapperBufferBuilder<Date, Int64> {
         }
     }
 }
+
+public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
+    public typealias ItemType = [Any?]
+    var info: ArrowNestedType?
+    public init() throws {
+        let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
+        super.init(nulls)
+    }
+
+    public func initializeTypeInfo(_ fields: [ArrowField]) {
+        info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
+    }
+
+    public func append(_ newValue: [Any?]?) {
+        let index = UInt(self.length)
+        self.length += 1
+        if length > self.nulls.length {
+            self.resize(length)
+        }
+
+        if newValue != nil {
+            BitUtility.setBit(index + self.offset, buffer: self.nulls)
+        } else {
+            self.nullCount += 1
+            BitUtility.clearBit(index + self.offset, buffer: self.nulls)
+        }
+    }
+
+    public func resize(_ length: UInt) {
+        if length > self.nulls.length {
+            let resizeLength = resizeLength(self.nulls)
+            var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
+            ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
+            self.nulls = nulls
+        }
+    }
+
+    public func finish() -> [ArrowBuffer] {
+        let length = self.length
+        var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
+        ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
+        return [nulls]
+    }
+}
diff --git a/swift/Arrow/Sources/Arrow/ArrowCImporter.swift b/swift/Arrow/Sources/Arrow/ArrowCImporter.swift
index f55077ef3dc95..e65d78d730be7 100644
--- a/swift/Arrow/Sources/Arrow/ArrowCImporter.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowCImporter.swift
@@ -153,7 +153,8 @@ public class ArrowCImporter {
             }
         }
 
-        switch makeArrayHolder(arrowField, buffers: arrowBuffers, nullCount: nullCount) {
+        switch makeArrayHolder(arrowField, buffers: arrowBuffers,
+                               nullCount: nullCount, children: nil, rbLength: 0) {
         case .success(let holder):
             return .success(ImportArrayHolder(holder, cArrayPtr: cArrayPtr))
         case .failure(let err):
diff --git a/swift/Arrow/Sources/Arrow/ArrowReader.swift b/swift/Arrow/Sources/Arrow/ArrowReader.swift
index 237f22dc979e3..ae187e22eef70 100644
--- a/swift/Arrow/Sources/Arrow/ArrowReader.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowReader.swift
@@ -21,14 +21,46 @@ import Foundation
 let FILEMARKER = "ARROW1"
 let CONTINUATIONMARKER = -1
 
-public class ArrowReader {
-    private struct DataLoadInfo {
+public class ArrowReader { // swiftlint:disable:this type_body_length
+    private class RecordBatchData {
+        let schema: org_apache_arrow_flatbuf_Schema
         let recordBatch: org_apache_arrow_flatbuf_RecordBatch
-        let field: org_apache_arrow_flatbuf_Field
-        let nodeIndex: Int32
-        let bufferIndex: Int32
+        private var fieldIndex: Int32 = 0
+        private var nodeIndex: Int32 = 0
+        private var bufferIndex: Int32 = 0
+        init(_ recordBatch: org_apache_arrow_flatbuf_RecordBatch,
+             schema: org_apache_arrow_flatbuf_Schema) {
+            self.recordBatch = recordBatch
+            self.schema = schema
+        }
+
+        func nextNode() -> org_apache_arrow_flatbuf_FieldNode? {
+            if nodeIndex >= self.recordBatch.nodesCount {return nil}
+            defer {nodeIndex += 1}
+            return self.recordBatch.nodes(at: nodeIndex)
+        }
+
+        func nextBuffer() -> org_apache_arrow_flatbuf_Buffer? {
+            if bufferIndex >= self.recordBatch.buffersCount {return nil}
+            defer {bufferIndex += 1}
+            return self.recordBatch.buffers(at: bufferIndex)
+        }
+
+        func nextField() -> org_apache_arrow_flatbuf_Field? {
+            if fieldIndex >= self.schema.fieldsCount {return nil}
+            defer {fieldIndex += 1}
+            return self.schema.fields(at: fieldIndex)
+        }
+
+        func isDone() -> Bool {
+            return nodeIndex >= self.recordBatch.nodesCount
+        }
+    }
+
+    private struct DataLoadInfo {
         let fileData: Data
         let messageOffset: Int64
+        var batchData: RecordBatchData
     }
 
     public class ArrowReaderResult {
@@ -54,49 +86,104 @@ public class ArrowReader {
         return .success(builder.finish())
     }
 
-    private func loadPrimitiveData(_ loadInfo: DataLoadInfo) -> Result<ArrowArrayHolder, ArrowError> {
-        do {
-            let node = loadInfo.recordBatch.nodes(at: loadInfo.nodeIndex)!
-            let nullLength = UInt(ceil(Double(node.length) / 8))
-            try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex)
-            let nullBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex)!
-            let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData,
-                                             length: nullLength, messageOffset: loadInfo.messageOffset)
-            try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex + 1)
-            let valueBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex + 1)!
-            let arrowValueBuffer = makeBuffer(valueBuffer, fileData: loadInfo.fileData,
-                                              length: UInt(node.length), messageOffset: loadInfo.messageOffset)
-            return makeArrayHolder(loadInfo.field, buffers: [arrowNullBuffer, arrowValueBuffer],
-                                   nullCount: UInt(node.nullCount))
-        } catch let error as ArrowError {
-            return .failure(error)
-        } catch {
-            return .failure(.unknownError("\(error)"))
+    private func loadStructData(_ loadInfo: DataLoadInfo,
+                                field: org_apache_arrow_flatbuf_Field)
+    -> Result<ArrowArrayHolder, ArrowError> {
+        guard let node = loadInfo.batchData.nextNode() else {
+            return .failure(.invalid("Node not found"))
+        }
+
+        guard let nullBuffer = loadInfo.batchData.nextBuffer() else {
+            return .failure(.invalid("Null buffer not found"))
+        }
+
+        let nullLength = UInt(ceil(Double(node.length) / 8))
+        let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData,
+                                         length: nullLength, messageOffset: loadInfo.messageOffset)
+        var children = [ArrowData]()
+        for index in 0..<field.childrenCount {
+            let childField = field.children(at: index)!
+            switch loadField(loadInfo, field: childField) {
+            case .success(let holder):
+                children.append(holder.array.arrowData)
+            case .failure(let error):
+                return .failure(error)
+            }
         }
+
+        return makeArrayHolder(field, buffers: [arrowNullBuffer],
+                               nullCount: UInt(node.nullCount), children: children,
+                               rbLength: UInt(loadInfo.batchData.recordBatch.length))
     }
 
-    private func loadVariableData(_ loadInfo: DataLoadInfo) -> Result<ArrowArrayHolder, ArrowError> {
-        let node = loadInfo.recordBatch.nodes(at: loadInfo.nodeIndex)!
-        do {
-            let nullLength = UInt(ceil(Double(node.length) / 8))
-            try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex)
-            let nullBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex)!
-            let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData,
-                                             length: nullLength, messageOffset: loadInfo.messageOffset)
-            try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex + 1)
-            let offsetBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex + 1)!
-            let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData: loadInfo.fileData,
-                                               length: UInt(node.length), messageOffset: loadInfo.messageOffset)
-            try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex + 2)
-            let valueBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex + 2)!
-            let arrowValueBuffer = makeBuffer(valueBuffer, fileData: loadInfo.fileData,
-                                              length: UInt(node.length), messageOffset: loadInfo.messageOffset)
-            return makeArrayHolder(loadInfo.field, buffers: [arrowNullBuffer, arrowOffsetBuffer, arrowValueBuffer],
-                                   nullCount: UInt(node.nullCount))
-        } catch let error as ArrowError {
-            return .failure(error)
-        } catch {
-            return .failure(.unknownError("\(error)"))
+    private func loadPrimitiveData(
+        _ loadInfo: DataLoadInfo,
+        field: org_apache_arrow_flatbuf_Field)
+    -> Result<ArrowArrayHolder, ArrowError> {
+        guard let node = loadInfo.batchData.nextNode() else {
+            return .failure(.invalid("Node not found"))
+        }
+
+        guard let nullBuffer = loadInfo.batchData.nextBuffer() else {
+            return .failure(.invalid("Null buffer not found"))
+        }
+
+        guard let valueBuffer = loadInfo.batchData.nextBuffer() else {
+            return .failure(.invalid("Value buffer not found"))
+        }
+
+        let nullLength = UInt(ceil(Double(node.length) / 8))
+        let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData,
+                                         length: nullLength, messageOffset: loadInfo.messageOffset)
+        let arrowValueBuffer = makeBuffer(valueBuffer, fileData: loadInfo.fileData,
+                                          length: UInt(node.length), messageOffset: loadInfo.messageOffset)
+        return makeArrayHolder(field, buffers: [arrowNullBuffer, arrowValueBuffer],
+                               nullCount: UInt(node.nullCount), children: nil,
+                               rbLength: UInt(loadInfo.batchData.recordBatch.length))
+    }
+
+    private func loadVariableData(
+        _ loadInfo: DataLoadInfo,
+        field: org_apache_arrow_flatbuf_Field)
+    -> Result<ArrowArrayHolder, ArrowError> {
+        guard let node = loadInfo.batchData.nextNode() else {
+            return .failure(.invalid("Node not found"))
+        }
+
+        guard let nullBuffer = loadInfo.batchData.nextBuffer() else {
+            return .failure(.invalid("Null buffer not found"))
+        }
+
+        guard let offsetBuffer = loadInfo.batchData.nextBuffer() else {
+            return .failure(.invalid("Offset buffer not found"))
+        }
+
+        guard let valueBuffer = loadInfo.batchData.nextBuffer() else {
+            return .failure(.invalid("Value buffer not found"))
+        }
+
+        let nullLength = UInt(ceil(Double(node.length) / 8))
+        let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData,
+                                         length: nullLength, messageOffset: loadInfo.messageOffset)
+        let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData: loadInfo.fileData,
+                                           length: UInt(node.length), messageOffset: loadInfo.messageOffset)
+        let arrowValueBuffer = makeBuffer(valueBuffer, fileData: loadInfo.fileData,
+                                          length: UInt(node.length), messageOffset: loadInfo.messageOffset)
+        return makeArrayHolder(field, buffers: [arrowNullBuffer, arrowOffsetBuffer, arrowValueBuffer],
+                               nullCount: UInt(node.nullCount), children: nil,
+                               rbLength: UInt(loadInfo.batchData.recordBatch.length))
+    }
+
+    private func loadField(
+        _ loadInfo: DataLoadInfo,
+        field: org_apache_arrow_flatbuf_Field)
+    -> Result<ArrowArrayHolder, ArrowError> {
+        if isNestedType(field.typeType) {
+            return loadStructData(loadInfo, field: field)
+        } else if isFixedPrimitive(field.typeType) {
+            return loadPrimitiveData(loadInfo, field: field)
+        } else {
+            return loadVariableData(loadInfo, field: field)
         }
     }
 
@@ -107,23 +194,17 @@ public class ArrowReader {
         data: Data,
         messageEndOffset: Int64
     ) -> Result<RecordBatch, ArrowError> {
-        let nodesCount = recordBatch.nodesCount
-        var bufferIndex: Int32 = 0
         var columns: [ArrowArrayHolder] = []
-        for nodeIndex in 0 ..< nodesCount {
-            let field = schema.fields(at: nodeIndex)!
-            let loadInfo = DataLoadInfo(recordBatch: recordBatch, field: field,
-                                        nodeIndex: nodeIndex, bufferIndex: bufferIndex,
-                                        fileData: data, messageOffset: messageEndOffset)
-            var result: Result<ArrowArrayHolder, ArrowError>
-            if isFixedPrimitive(field.typeType) {
-                result = loadPrimitiveData(loadInfo)
-                bufferIndex += 2
-            } else {
-                result = loadVariableData(loadInfo)
-                bufferIndex += 3
+        let batchData = RecordBatchData(recordBatch, schema: schema)
+        let loadInfo = DataLoadInfo(fileData: data,
+                                    messageOffset: messageEndOffset,
+                                    batchData: batchData)
+        while !batchData.isDone() {
+            guard let field = batchData.nextField() else {
+                return .failure(.invalid("Field not found"))
             }
 
+            let result = loadField(loadInfo, field: field)
             switch result {
             case .success(let holder):
                 columns.append(holder)
diff --git a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift
index c701653ecb2c9..48c6fd855073a 100644
--- a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift
@@ -23,7 +23,7 @@ private func makeBinaryHolder(_ buffers: [ArrowBuffer],
     do {
         let arrowType = ArrowType(ArrowType.ArrowBinary)
         let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
-        return .success(ArrowArrayHolderImpl(BinaryArray(arrowData)))
+        return .success(ArrowArrayHolderImpl(try BinaryArray(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
     } catch {
@@ -36,7 +36,7 @@ private func makeStringHolder(_ buffers: [ArrowBuffer],
     do {
         let arrowType = ArrowType(ArrowType.ArrowString)
         let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
-        return .success(ArrowArrayHolderImpl(StringArray(arrowData)))
+        return .success(ArrowArrayHolderImpl(try StringArray(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
     } catch {
@@ -51,11 +51,11 @@ private func makeDateHolder(_ field: ArrowField,
     do {
         if field.type.id == .date32 {
             let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount)
-            return .success(ArrowArrayHolderImpl(Date32Array(arrowData)))
+            return .success(ArrowArrayHolderImpl(try Date32Array(arrowData)))
         }
 
         let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount)
-        return .success(ArrowArrayHolderImpl(Date64Array(arrowData)))
+        return .success(ArrowArrayHolderImpl(try Date64Array(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
     } catch {
@@ -71,7 +71,7 @@ private func makeTimeHolder(_ field: ArrowField,
         if field.type.id == .time32 {
             if let arrowType = field.type as? ArrowTypeTime32 {
                 let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
-                return .success(ArrowArrayHolderImpl(FixedArray<Time32>(arrowData)))
+                return .success(ArrowArrayHolderImpl(try FixedArray<Time32>(arrowData)))
             } else {
                 return .failure(.invalid("Incorrect field type for time: \(field.type)"))
             }
@@ -79,7 +79,7 @@ private func makeTimeHolder(_ field: ArrowField,
 
         if let arrowType = field.type as? ArrowTypeTime64 {
             let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
-            return .success(ArrowArrayHolderImpl(FixedArray<Time64>(arrowData)))
+            return .success(ArrowArrayHolderImpl(try FixedArray<Time64>(arrowData)))
         } else {
             return .failure(.invalid("Incorrect field type for time: \(field.type)"))
         }
@@ -95,7 +95,7 @@ private func makeBoolHolder(_ buffers: [ArrowBuffer],
     do {
         let arrowType = ArrowType(ArrowType.ArrowBool)
         let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount)
-        return .success(ArrowArrayHolderImpl(BoolArray(arrowData)))
+        return .success(ArrowArrayHolderImpl(try BoolArray(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
     } catch {
@@ -109,7 +109,26 @@ private func makeFixedHolder<T>(
 ) -> Result<ArrowArrayHolder, ArrowError> {
     do {
         let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount)
-        return .success(ArrowArrayHolderImpl(FixedArray<T>(arrowData)))
+        return .success(ArrowArrayHolderImpl(try FixedArray<T>(arrowData)))
+    } catch let error as ArrowError {
+        return .failure(error)
+    } catch {
+        return .failure(.unknownError("\(error)"))
+    }
+}
+
+ func makeStructHolder(
+    _ field: ArrowField,
+    buffers: [ArrowBuffer],
+    nullCount: UInt,
+    children: [ArrowData],
+    rbLength: UInt
+) -> Result<ArrowArrayHolder, ArrowError> {
+    do {
+        let arrowData = try ArrowData(field.type,
+                                      buffers: buffers, children: children,
+                                      nullCount: nullCount, length: rbLength)
+        return .success(ArrowArrayHolderImpl(try StructArray(arrowData)))
     } catch let error as ArrowError {
         return .failure(error)
     } catch {
@@ -120,16 +139,20 @@ private func makeFixedHolder<T>(
 func makeArrayHolder(
     _ field: org_apache_arrow_flatbuf_Field,
     buffers: [ArrowBuffer],
-    nullCount: UInt
+    nullCount: UInt,
+    children: [ArrowData]?,
+    rbLength: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
     let arrowField = fromProto(field: field)
-    return makeArrayHolder(arrowField, buffers: buffers, nullCount: nullCount)
+    return makeArrayHolder(arrowField, buffers: buffers, nullCount: nullCount, children: children, rbLength: rbLength)
 }
 
 func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity
     _ field: ArrowField,
     buffers: [ArrowBuffer],
-    nullCount: UInt
+    nullCount: UInt,
+    children: [ArrowData]?,
+    rbLength: UInt
 ) -> Result<ArrowArrayHolder, ArrowError> {
     let typeId = field.type.id
     switch typeId {
@@ -159,12 +182,12 @@ func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity
         return makeStringHolder(buffers, nullCount: nullCount)
     case .binary:
         return makeBinaryHolder(buffers, nullCount: nullCount)
-    case .date32:
+    case .date32, .date64:
         return makeDateHolder(field, buffers: buffers, nullCount: nullCount)
-    case .time32:
-        return makeTimeHolder(field, buffers: buffers, nullCount: nullCount)
-    case .time64:
+    case .time32, .time64:
         return makeTimeHolder(field, buffers: buffers, nullCount: nullCount)
+    case .strct:
+        return makeStructHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength)
     default:
         return .failure(.unknownType("Type \(typeId) currently not supported"))
     }
@@ -187,7 +210,16 @@ func isFixedPrimitive(_ type: org_apache_arrow_flatbuf_Type_) -> Bool {
     }
 }
 
-func findArrowType( // swiftlint:disable:this cyclomatic_complexity
+func isNestedType(_ type: org_apache_arrow_flatbuf_Type_) -> Bool {
+    switch type {
+    case .struct_:
+        return true
+    default:
+        return false
+    }
+}
+
+func findArrowType( // swiftlint:disable:this cyclomatic_complexity function_body_length
     _ field: org_apache_arrow_flatbuf_Field) -> ArrowType {
     let type = field.typeType
     switch type {
@@ -229,6 +261,17 @@ func findArrowType( // swiftlint:disable:this cyclomatic_complexity
         }
 
         return ArrowTypeTime64(timeType.unit == .microsecond ? .microseconds : .nanoseconds)
+    case .struct_:
+        _ = field.type(type: org_apache_arrow_flatbuf_Struct_.self)!
+        var fields = [ArrowField]()
+        for index in 0..<field.childrenCount {
+            let childField = field.children(at: index)!
+            let childType = findArrowType(childField)
+            fields.append(
+                ArrowField(childField.name ?? "", type: childType, isNullable: childField.nullable))
+        }
+
+        return ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
     default:
         return ArrowType(ArrowType.ArrowUnknown)
     }
diff --git a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift
index ed0cb1148e871..d793aa11dcb5e 100644
--- a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift
+++ b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift
@@ -212,13 +212,74 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length
         XCTAssertEqual(microArray[2], 987654321)
     }
 
+    func testStructArray() throws { // swiftlint:disable:this function_body_length
+        class StructTest {
+            var fieldBool: Bool = false
+            var fieldInt8: Int8 = 0
+            var fieldInt16: Int16 = 0
+            var fieldInt32: Int32 = 0
+            var fieldInt64: Int64 = 0
+            var fieldUInt8: UInt8 = 0
+            var fieldUInt16: UInt16 = 0
+            var fieldUInt32: UInt32 = 0
+            var fieldUInt64: UInt64 = 0
+            var fieldDouble: Double = 0
+            var fieldFloat: Float = 0
+            var fieldString: String = ""
+            var fieldData = Data()
+            var fieldDate: Date = Date.now
+        }
+
+        enum STIndex: Int {
+            case bool, int8, int16, int32, int64
+            case uint8, uint16, uint32, uint64, double
+            case float, string, data, date
+        }
+
+        let testData = StructTest()
+        let dateNow = Date.now
+        let structBuilder = try ArrowArrayBuilders.loadStructArrayBuilderForType(testData)
+        structBuilder.append([true, Int8(1), Int16(2), Int32(3), Int64(4),
+                              UInt8(5), UInt16(6), UInt32(7), UInt64(8), Double(9.9),
+                              Float(10.10), "11", Data("12".utf8), dateNow])
+        structBuilder.append(nil)
+        structBuilder.append([true, Int8(13), Int16(14), Int32(15), Int64(16),
+                              UInt8(17), UInt16(18), UInt32(19), UInt64(20), Double(21.21),
+                              Float(22.22), "23", Data("24".utf8), dateNow])
+        XCTAssertEqual(structBuilder.length, 3)
+        let structArray = try structBuilder.finish()
+        XCTAssertEqual(structArray.length, 3)
+        XCTAssertNil(structArray[1])
+        XCTAssertEqual(structArray.arrowFields![0].length, 3)
+        XCTAssertNil(structArray.arrowFields![0].array.asAny(1))
+        XCTAssertEqual(structArray[0]![STIndex.bool.rawValue] as? Bool, true)
+        XCTAssertEqual(structArray[0]![STIndex.int8.rawValue] as? Int8, 1)
+        XCTAssertEqual(structArray[0]![STIndex.int16.rawValue] as? Int16, 2)
+        XCTAssertEqual(structArray[0]![STIndex.int32.rawValue] as? Int32, 3)
+        XCTAssertEqual(structArray[0]![STIndex.int64.rawValue] as? Int64, 4)
+        XCTAssertEqual(structArray[0]![STIndex.uint8.rawValue] as? UInt8, 5)
+        XCTAssertEqual(structArray[0]![STIndex.uint16.rawValue] as? UInt16, 6)
+        XCTAssertEqual(structArray[0]![STIndex.uint32.rawValue] as? UInt32, 7)
+        XCTAssertEqual(structArray[0]![STIndex.uint64.rawValue] as? UInt64, 8)
+        XCTAssertEqual(structArray[0]![STIndex.double.rawValue] as? Double, 9.9)
+        XCTAssertEqual(structArray[0]![STIndex.float.rawValue] as? Float, 10.10)
+        XCTAssertEqual(structArray[2]![STIndex.string.rawValue] as? String, "23")
+        XCTAssertEqual(
+            String(decoding: (structArray[0]![STIndex.data.rawValue] as? Data)!, as: UTF8.self), "12")
+        let dateFormatter = DateFormatter()
+        dateFormatter.timeStyle = .full
+        XCTAssertTrue(
+            dateFormatter.string(from: (structArray[0]![STIndex.date.rawValue] as? Date)!) ==
+            dateFormatter.string(from: dateNow))
+    }
+
     func checkHolderForType(_ checkType: ArrowType) throws {
         let buffers = [ArrowBuffer(length: 0, capacity: 0,
                                 rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)),
                        ArrowBuffer(length: 0, capacity: 0,
                                rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))]
         let field = ArrowField("", type: checkType, isNullable: true)
-        switch makeArrayHolder(field, buffers: buffers, nullCount: 0) {
+        switch makeArrayHolder(field, buffers: buffers, nullCount: 0, children: nil, rbLength: 0) {
         case .success(let holder):
             XCTAssertEqual(holder.type.id, checkType.id)
         case .failure(let err):
diff --git a/testing b/testing
index 735ae7128d571..4d209492d514c 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 735ae7128d571398dd798d7ff004adebeb342883
+Subproject commit 4d209492d514c2d3cb2d392681b9aa00e6d8da1c