Merge pull request #4316 from OpenMathLib/develop

Merge develop into release-0.3.0 for 0.3.25
OpenMathLib · Nov 12, 2023 · 5e1a429 · 5e1a429
2 parents 0e54cbd + 64c9671
commit 5e1a429
Show file tree

Hide file tree

Showing 491 changed files with 14,379 additions and 39,851 deletions.
diff --git a/.cirrus.yml b/.cirrus.yml
@@ -148,6 +148,16 @@ FreeBSD_task:
   - ls -l /usr/local/lib
   - gmake CC=gcc INTERFACE64=1
 
+FreeBSD_task:
+  name: FreeBSD-clang-openmp
+  freebsd_instance:
+    image_family: freebsd-13-2
+  install_script:
+  - pkg update -f && pkg upgrade -y && pkg install -y gmake gcc 
+  - ln -s /usr/local/lib/gcc12/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
+  compile_script:
+  - gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
+
 #task:
 #  name: Windows/LLVM16     --- too slow ---
 #  windows_container:

diff --git a/.cirun.yml b/.cirun.yml
@@ -0,0 +1,16 @@
+# Self-Hosted Github Action Runners on AWS via Cirun.io
+# Reference: https://docs.cirun.io/reference/yaml
+runners:
+  - name: "aws-runner-graviton"
+    # Cloud Provider: AWS
+    cloud: "aws"
+    region: "us-east-1"
+    # Cheapest VM on AWS
+    instance_type: "c7g.large"
+    # Ubuntu-22.04, ami image
+    machine_image: "ami-0a0c8eebcdd6dcbd0"
+    preemptible: false
+    # Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
+    # So that this runner is created for running the workflow
+    labels:
+      - "cirun-aws-runner-graviton"
diff --git a/.github/workflows/arm64_graviton.yml b/.github/workflows/arm64_graviton.yml
@@ -0,0 +1,139 @@
+name: arm64 graviton cirun
+
+on:
+  push:
+    branches:
+      - develop
+      - release-**
+  pull_request:
+    branches:
+      - develop
+      - release-**
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read # to fetch code (actions/checkout)
+
+jobs:
+  build:
+    if: "github.repository == 'OpenMathLib/OpenBLAS'"
+    runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}"
+
+    strategy:
+      fail-fast: false
+      matrix:
+        fortran: [gfortran]
+        build: [cmake, make]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Print system information
+        run: |
+          if [ "$RUNNER_OS" == "Linux" ]; then
+            cat /proc/cpuinfo
+          else
+            echo "::error::$RUNNER_OS not supported"
+            exit 1
+          fi
+
+      - name: Install Dependencies
+        run: |
+          if [ "$RUNNER_OS" == "Linux" ]; then
+            sudo apt update
+            sudo apt-get install -y gfortran cmake ccache libtinfo5
+          else
+            echo "::error::$RUNNER_OS not supported"
+            exit 1
+          fi
+
+      - name: Compilation cache
+        uses: actions/cache@v3
+        with:
+          path: ~/.ccache
+          # We include the commit sha in the cache key, as new cache entries are
+          # only created if there is no existing entry for the key yet.
+          # GNU make and cmake call the compilers differently. It looks like
+          # that causes the cache to mismatch. Keep the ccache for both build
+          # tools separate to avoid polluting each other.
+          key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
+          # Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
+          restore-keys: |
+            ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
+            ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
+            ccache-${{ runner.os }}-${{ matrix.build }}
+
+      - name: Configure ccache
+        run: |
+          if [ "${{ matrix.build }}" = "make" ]; then
+            # Add ccache to path
+            if [ "$RUNNER_OS" = "Linux" ]; then
+              echo "/usr/lib/ccache" >> $GITHUB_PATH
+            else
+              echo "::error::$RUNNER_OS not supported"
+              exit 1
+            fi
+          fi
+          # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
+          test -d ~/.ccache || mkdir -p ~/.ccache
+          echo "max_size = 300M" > ~/.ccache/ccache.conf
+          echo "compression = true" >> ~/.ccache/ccache.conf
+          ccache -s
+
+      - name: Build OpenBLAS
+        run: |
+          case "${{ matrix.build }}" in
+            "make")
+              make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
+              ;;
+            "cmake")
+              mkdir build && cd build
+              cmake -DDYNAMIC_ARCH=1 \
+                    -DNOFORTRAN=0 \
+                    -DBUILD_WITHOUT_LAPACK=0 \
+                    -DCMAKE_VERBOSE_MAKEFILE=ON \
+                    -DCMAKE_BUILD_TYPE=Release \
+                    -DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
+                    -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+                    -DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
+                    ..
+              cmake --build .
+              ;;
+            *)
+              echo "::error::Configuration not supported"
+              exit 1
+              ;;
+          esac
+
+      - name: Show ccache status
+        continue-on-error: true
+        run: ccache -s
+
+      - name: Run tests
+        timeout-minutes: 60
+        run: |
+          case "${{ matrix.build }}" in
+            "make")
+              MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
+              echo "::group::Tests in 'test' directory"
+              make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
+              echo "::endgroup::"
+              echo "::group::Tests in 'ctest' directory"
+              make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
+              echo "::endgroup::"
+              echo "::group::Tests in 'utest' directory"
+              make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
+              echo "::endgroup::"
+              ;;
+            "cmake")
+              cd build && ctest
+              ;;
+            *)
+              echo "::error::Configuration not supported"
+              exit 1
+              ;;
+          esac
diff --git a/.github/workflows/c910v.yml b/.github/workflows/c910v.yml
@@ -2,11 +2,16 @@ name: c910v qemu test
 
 on: [push, pull_request]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
 permissions:
   contents: read # to fetch code (actions/checkout)
 
 jobs:
   TEST:
+    if: "github.repository == 'OpenMathLib/OpenBLAS'"
     runs-on: ubuntu-latest
     env:
       xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1663142514282

diff --git a/.github/workflows/dynamic_arch.yml b/.github/workflows/dynamic_arch.yml
@@ -2,11 +2,16 @@ name: continuous build
 
 on: [push, pull_request]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
 permissions:
   contents: read # to fetch code (actions/checkout)
 
 jobs:
   build:
+    if: "github.repository == 'OpenMathLib/OpenBLAS'"
     runs-on: ${{ matrix.os }}
 
     strategy:
@@ -146,18 +151,19 @@ jobs:
 
 
   msys2:
+    if: "github.repository == 'OpenMathLib/OpenBLAS'"
     runs-on: windows-latest
 
     strategy:
       fail-fast: false
       matrix:
-        msystem: [MINGW64, MINGW32, CLANG64, CLANG32]
+        msystem: [UCRT64, MINGW32, CLANG64, CLANG32]
         idx: [int32, int64]
         build-type: [Release]
         include:
-          - msystem: MINGW64
+          - msystem: UCRT64
             idx: int32
-            target-prefix: mingw-w64-x86_64
+            target-prefix: mingw-w64-ucrt-x86_64
             fc-pkg: fc
           - msystem: MINGW32
             idx: int32
@@ -175,10 +181,10 @@ jobs:
             target-prefix: mingw-w64-clang-i686
             fc-pkg: cc
             c-lapack-flags: -DC_LAPACK=ON
-          - msystem: MINGW64
+          - msystem: UCRT64
             idx: int64
             idx64-flags: -DBINARY=64 -DINTERFACE64=1
-            target-prefix: mingw-w64-x86_64
+            target-prefix: mingw-w64-ucrt-x86_64
             fc-pkg: fc
           - msystem: CLANG64
             idx: int64
@@ -188,9 +194,9 @@ jobs:
             # Compiling with Flang 16 seems to cause test errors on machines
             # with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
             no-avx512-flags: -DNO_AVX512=1
-          - msystem: MINGW64
+          - msystem: UCRT64
             idx: int32
-            target-prefix: mingw-w64-x86_64
+            target-prefix: mingw-w64-ucrt-x86_64
             fc-pkg: fc
             build-type: None
         exclude:
@@ -312,6 +318,7 @@ jobs:
 
 
   cross_build:
+    if: "github.repository == 'OpenMathLib/OpenBLAS'"
     runs-on: ubuntu-22.04
 
     strategy:

diff --git a/.github/workflows/loongarch64.yml b/.github/workflows/loongarch64.yml
@@ -2,8 +2,13 @@ name: loongarch64 qemu test
 
 on: [push, pull_request]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
 jobs:
   TEST:
+    if: "github.repository == 'OpenMathLib/OpenBLAS'"
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false
@@ -18,6 +23,9 @@ jobs:
           - target: LOONGSON2K1000
             triple: loongarch64-unknown-linux-gnu
             opts: NO_SHARED=1 TARGET=LOONGSON2K1000
+          - target: DYNAMIC_ARCH
+            triple: loongarch64-unknown-linux-gnu
+            opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
 
     steps:
       - name: Checkout repository

diff --git a/.github/workflows/mips64.yml b/.github/workflows/mips64.yml
@@ -2,11 +2,16 @@ name: mips64 qemu test
 
 on: [push, pull_request]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
 permissions:
   contents: read # to fetch code (actions/checkout)
 
 jobs:
   TEST:
+    if: "github.repository == 'OpenMathLib/OpenBLAS'"
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false

diff --git a/.github/workflows/nightly-Homebrew-build.yml b/.github/workflows/nightly-Homebrew-build.yml
@@ -18,11 +18,16 @@ on:
 
 name: Nightly-Homebrew-Build
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
 permissions:
   contents: read # to fetch code (actions/checkout)
 
 jobs:
   build-OpenBLAS-with-Homebrew:
+    if: "github.repository == 'OpenMathLib/OpenBLAS'"
     runs-on: macos-latest
     env:
       DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer

diff --git a/Changelog.txt b/Changelog.txt
@@ -1,4 +1,50 @@
 OpenBLAS ChangeLog
+====================================================================
+Version 0.3.25
+ 12-Nov-2023
+
+general:
+- improved the error message shown on exceeding the maximum thread count
+- improved the code to add supplementary thread buffers in case of overflow
+- fixed a potential division by zero in ?ROTG
+- improved the ?MATCOPY functions to accept zero-sized rows or columns
+- corrected empty prototypes in function declarations
+- cleaned up unused declarations in the f2c-converted versions of the LAPACK sources
+- fixed compilation with the Cray CCE Compiler suite
+- improved link line rewriting to avoid mixed libgomp/libomp builds with clang&gfortran
+- worked around OPENMP builds with LLVM14's libomp hanging on FreeBSD
+- improved the Makefiles to require less option duplication on "make install"
+- imported the following changes from the upcoming release 3.12 of Reference-LAPACK
+  - deprecate utility functions ?GELQS and ?GEQRS (LAPACK PR 900)
+  - apply rounding up to workspace calculations done in floating point (LAPACK PR 904)
+  - avoid overflow in STGEX2/DTGEX2 (LAPACK PR 907)
+  - fix accumulation in ?LASSQ (LAPACK PR 909)
+  - fix handling of NaN values in ?GECON (LAPACK PR 926)
+  - avoid overflow in CBDSQR/ZBDSQR (LAPACK PR 927)
+  - fix poor vector orthogonalizations in ?ORBDB5/?UNBDB5 (LAPACK PR 928 & 930)
+
+x86-64:
+- fixed compile-time autodetection of AMD Ryzen3 and Ryzen4 cpus
+- fixed capability-based fallback selection for unknown cpus in DYNAMIC_ARCH
+- added AVX512 optimizations for ?ASUM on Sapphire Rapids and Cooper Lake
+
+ARM64:
+- fixed building on Apple with homebrew gcc
+- fixed building with XCODE 15
+- fixed building on A64FX and Cortex A710/X1/X2
+- increased the default buffer size for recent ARM server cpus 
+
+POWER:
+- fixed building with the IBM xlf 16.1.1 compiler
+- fixed building with IBM XL C
+- added support for DYNAMIC_ARCH builds with clang
+- fixed union declaration in the BFLOAT16 test case
+- enable optimizations for the AIX assembler on POWER10
+
+LOONGARCH64:
+- added an optimized SGEMV kernel
+- added an optimized DTRSM kernel
+
 ====================================================================
 Version 0.3.24
  03-Sep-2023