diff --git a/blas/ext/base/ssumpw/coverage.ndjson b/blas/ext/base/ssumpw/coverage.ndjson new file mode 100644 index 000000000..04979fde0 --- /dev/null +++ b/blas/ext/base/ssumpw/coverage.ndjson @@ -0,0 +1 @@ +[458,467,98.0728,34,34,100,2,4,50,458,467,98.0728,"412acd6244c7712b84bfc9d9ebd138e7f43bd659","2024-03-04 02:55:19 -0800"] diff --git a/blas/ext/base/ssumpw/index.html b/blas/ext/base/ssumpw/index.html new file mode 100644 index 000000000..42b6a1b81 --- /dev/null +++ b/blas/ext/base/ssumpw/index.html @@ -0,0 +1,206 @@ + + + + +
++ Press n or j to go to the next uncovered block, b, p or k for the previous block. +
+ +File | ++ | Statements | ++ | Branches | ++ | Functions | ++ | Lines | ++ |
---|---|---|---|---|---|---|---|---|---|
index.js | +
+
+ |
+ 100% | +69/69 | +100% | +3/3 | +100% | +0/0 | +100% | +69/69 | +
main.js | +
+
+ |
+ 100% | +35/35 | +100% | +1/1 | +100% | +0/0 | +100% | +35/35 | +
native.js | +
+
+ |
+ 100% | +35/35 | +100% | +1/1 | +100% | +0/0 | +100% | +35/35 | +
ndarray.js | +
+
+ |
+ 100% | +134/134 | +100% | +14/14 | +100% | +1/1 | +100% | +134/134 | +
ndarray.native.js | +
+
+ |
+ 89.47% | +51/57 | +100% | +1/1 | +0% | +0/1 | +89.47% | +51/57 | +
ssumpw.js | +
+
+ |
+ 100% | +85/85 | +100% | +13/13 | +100% | +1/1 | +100% | +85/85 | +
ssumpw.native.js | +
+
+ |
+ 94.23% | +49/52 | +100% | +1/1 | +0% | +0/1 | +94.23% | +49/52 | +
+ Press n or j to go to the next uncovered block, b, p or k for the previous block. +
+ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 | 3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +2x +3x +1x +1x +3x +3x +3x +3x +3x +3x +3x + | /** +* @license Apache-2.0 +* +* Copyright (c) 2024 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +/** +* Compute the sum of single-precision floating-point strided array elements using pairwise summation. +* +* @module @stdlib/blas/ext/base/ssumpw +* +* @example +* var Float32Array = require( '@stdlib/array/float32' ); +* var ssumpw = require( '@stdlib/blas/ext/base/ssumpw' ); +* +* var x = new Float32Array( [ 1.0, -2.0, 2.0 ] ); +* var N = x.length; +* +* var v = ssumpw( N, x, 1 ); +* // returns 1.0 +* +* @example +* var Float32Array = require( '@stdlib/array/float32' ); +* var ssumpw = require( '@stdlib/blas/ext/base/ssumpw' ); +* +* var x = new Float32Array( [ 2.0, 1.0, 2.0, -2.0, -2.0, 2.0, 3.0, 4.0 ] ); +* +* var v = ssumpw.ndarray( 4, x, 2, 1 ); +* // returns 5.0 +*/ + +// MODULES // + +var join = require( 'path' ).join; +var tryRequire = require( '@stdlib/utils/try-require' ); +var isError = require( '@stdlib/assert/is-error' ); +var main = require( './main.js' ); + + +// MAIN // + +var ssumpw; +var tmp = tryRequire( join( __dirname, './native.js' ) ); +if ( isError( tmp ) ) { + ssumpw = main; +} else { + ssumpw = tmp; +} + + +// EXPORTS // + +module.exports = ssumpw; + +// exports: { "ndarray": "ssumpw.ndarray" } + |
+ Press n or j to go to the next uncovered block, b, p or k for the previous block. +
+ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 | 1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x + | /** +* @license Apache-2.0 +* +* Copyright (c) 2020 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var setReadOnly = require( '@stdlib/utils/define-nonenumerable-read-only-property' ); +var ssumpw = require( './ssumpw.js' ); +var ndarray = require( './ndarray.js' ); + + +// MAIN // + +setReadOnly( ssumpw, 'ndarray', ndarray ); + + +// EXPORTS // + +module.exports = ssumpw; + |
+ Press n or j to go to the next uncovered block, b, p or k for the previous block. +
+ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 | 1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x + | /** +* @license Apache-2.0 +* +* Copyright (c) 2020 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var setReadOnly = require( '@stdlib/utils/define-nonenumerable-read-only-property' ); +var ssumpw = require( './ssumpw.native.js' ); +var ndarray = require( './ndarray.native.js' ); + + +// MAIN // + +setReadOnly( ssumpw, 'ndarray', ndarray ); + + +// EXPORTS // + +module.exports = ssumpw; + |
+ Press n or j to go to the next uncovered block, b, p or k for the previous block. +
+ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 | 3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +73x +73x +73x +73x +73x +73x +73x +73x +73x +73x +73x +73x +73x +73x +73x +73x +2x +2x +73x +2x +2x +69x +73x +7x +7x +7x +26x +26x +26x +7x +7x +73x +34x +34x +34x +34x +34x +34x +34x +34x +34x +34x +34x +34x +34x +468x +468x +468x +468x +468x +468x +468x +468x +468x +468x +34x +34x +34x +34x +34x +2x +2x +2x +34x +34x +28x +28x +28x +28x +73x +3x +3x +3x +3x +3x + | /** +* @license Apache-2.0 +* +* Copyright (c) 2024 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' ); +var floor = require( '@stdlib/math/base/special/floor' ); + + +// VARIABLES // + +// Blocksize for pairwise summation (NOTE: decreasing the blocksize decreases rounding error as more pairs are summed, but also decreases performance. Because the inner loop is unrolled eight times, the blocksize is effectively `16`.): +var BLOCKSIZE = 128; + + +// MAIN // + +/** +* Computes the sum of single-precision floating-point strided array elements using pairwise summation. +* +* ## Method +* +* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`. +* +* ## References +* +* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050). +* +* @param {PositiveInteger} N - number of indexed elements +* @param {Float32Array} x - input array +* @param {integer} stride - stride length +* @param {NonNegativeInteger} offset - starting index +* @returns {number} sum +* +* @example +* var Float32Array = require( '@stdlib/array/float32' ); +* +* var x = new Float32Array( [ 2.0, 1.0, 2.0, -2.0, -2.0, 2.0, 3.0, 4.0 ] ); +* +* var v = ssumpw( 4, x, 2, 1 ); +* // returns 5.0 +*/ +function ssumpw( N, x, stride, offset ) { + var ix; + var s0; + var s1; + var s2; + var s3; + var s4; + var s5; + var s6; + var s7; + var M; + var s; + var n; + var i; + + if ( N <= 0 ) { + return 0.0; + } + if ( N === 1 || stride === 0 ) { + return x[ offset ]; + } + ix = offset; + if ( N < 8 ) { + // Use simple summation... + s = 0.0; + for ( i = 0; i < N; i++ ) { + s = float64ToFloat32( s + x[ ix ] ); + ix += stride; + } + return s; + } + if ( N <= BLOCKSIZE ) { + // Sum a block with 8 accumulators (by loop unrolling, we lower the effective blocksize to 16)... + s0 = x[ ix ]; + s1 = x[ ix+stride ]; + s2 = x[ ix+(2*stride) ]; + s3 = x[ ix+(3*stride) ]; + s4 = x[ ix+(4*stride) ]; + s5 = x[ ix+(5*stride) ]; + s6 = x[ ix+(6*stride) ]; + s7 = x[ ix+(7*stride) ]; + ix += 8 * stride; + + M = N % 8; + for ( i = 8; i < N-M; i += 8 ) { + s0 = float64ToFloat32( s0 + x[ ix ] ); + s1 = float64ToFloat32( s1 + x[ ix+stride ] ); + s2 = float64ToFloat32( s2 + x[ ix+(2*stride) ] ); + s3 = float64ToFloat32( s3 + x[ ix+(3*stride) ] ); + s4 = float64ToFloat32( s4 + x[ ix+(4*stride) ] ); + s5 = float64ToFloat32( s5 + x[ ix+(5*stride) ] ); + s6 = float64ToFloat32( s6 + x[ ix+(6*stride) ] ); + s7 = float64ToFloat32( s7 + x[ ix+(7*stride) ] ); + ix += 8 * stride; + } + // Pairwise sum the accumulators: + s = float64ToFloat32( float64ToFloat32( float64ToFloat32(s0+s1) + float64ToFloat32(s2+s3) ) + float64ToFloat32( float64ToFloat32(s4+s5) + float64ToFloat32(s6+s7) ) ); // eslint-disable-line max-len + + // Clean-up loop... + for ( i; i < N; i++ ) { + s = float64ToFloat32( s + x[ ix ] ); + ix += stride; + } + return s; + } + // Recurse by dividing by two, but avoiding non-multiples of unroll factor... + n = floor( N/2 ); + n -= n % 8; + return float64ToFloat32( ssumpw( n, x, stride, ix ) + ssumpw( N-n, x, stride, ix+(n*stride) ) ); // eslint-disable-line max-len +} + + +// EXPORTS // + +module.exports = ssumpw; + |
+ Press n or j to go to the next uncovered block, b, p or k for the previous block. +
+ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 | 1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x +1x + + + + + + +1x +1x +1x +1x +1x + | /** +* @license Apache-2.0 +* +* Copyright (c) 2024 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var minViewBufferIndex = require( '@stdlib/strided/base/min-view-buffer-index' ); +var offsetView = require( '@stdlib/strided/base/offset-view' ); +var addon = require( './ssumpw.native.js' ); + + +// MAIN // + +/** +* Computes the sum of single-precision floating-point strided array elements using pairwise summation. +* +* @param {PositiveInteger} N - number of indexed elements +* @param {Float32Array} x - input array +* @param {integer} stride - stride length +* @param {NonNegativeInteger} offset - starting index +* @returns {number} sum +* +* @example +* var Float32Array = require( '@stdlib/array/float32' ); +* +* var x = new Float32Array( [ 2.0, 1.0, 2.0, -2.0, -2.0, 2.0, 3.0, 4.0 ] ); +* +* var v = ssumpw( 4, x, 2, 1 ); +* // returns 5.0 +*/ +function ssumpw( N, x, stride, offset ) { + var view; + offset = minViewBufferIndex( N, stride, offset ); + view = offsetView( x, offset ); + return addon( N, view, stride ); +} + + +// EXPORTS // + +module.exports = ssumpw; + |
+ Press n or j to go to the next uncovered block, b, p or k for the previous block. +
+ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 | 2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +2x +14x +14x +14x +14x +14x +14x +2x +2x +14x +2x +2x +14x +2x +14x +8x +8x +14x +7x +7x +7x +26x +26x +26x +7x +7x +3x +14x +2x +2x +2x +2x +2x + | /** +* @license Apache-2.0 +* +* Copyright (c) 2020 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var float64ToFloat32 = require( '@stdlib/number/float64/base/to-float32' ); +var sum = require( './ndarray.js' ); + + +// MAIN // + +/** +* Computes the sum of single-precision floating-point strided array elements using pairwise summation. +* +* ## Method +* +* - This implementation uses pairwise summation, which accrues rounding error `O(log2 N)` instead of `O(N)`. The recursion depth is also `O(log2 N)`. +* +* ## References +* +* - Higham, Nicholas J. 1993. "The Accuracy of Floating Point Summation." _SIAM Journal on Scientific Computing_ 14 (4): 783–99. doi:[10.1137/0914050](https://doi.org/10.1137/0914050). +* +* @param {PositiveInteger} N - number of indexed elements +* @param {Float32Array} x - input array +* @param {integer} stride - stride length +* @returns {number} sum +* +* @example +* var Float32Array = require( '@stdlib/array/float32' ); +* +* var x = new Float32Array( [ 1.0, -2.0, 2.0 ] ); +* var N = x.length; +* +* var v = ssumpw( N, x, 1 ); +* // returns 1.0 +*/ +function ssumpw( N, x, stride ) { + var ix; + var s; + var i; + + if ( N <= 0 ) { + return 0.0; + } + if ( N === 1 || stride === 0 ) { + return x[ 0 ]; + } + if ( stride < 0 ) { + ix = (1-N) * stride; + } else { + ix = 0; + } + if ( N < 8 ) { + // Use simple summation... + s = 0.0; + for ( i = 0; i < N; i++ ) { + s = float64ToFloat32( s + x[ ix ] ); + ix += stride; + } + return s; + } + return sum( N, x, stride, ix ); +} + + +// EXPORTS // + +module.exports = ssumpw; + |
+ Press n or j to go to the next uncovered block, b, p or k for the previous block. +
+ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 | 3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x +3x + + + +3x +3x +3x +3x +3x + | /** +* @license Apache-2.0 +* +* Copyright (c) 2020 The Stdlib Authors. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +'use strict'; + +// MODULES // + +var addon = require( './../src/addon.node' ); + + +// MAIN // + +/** +* Computes the sum of single-precision floating-point strided array elements using pairwise summation. +* +* @param {PositiveInteger} N - number of indexed elements +* @param {Float32Array} x - input array +* @param {integer} stride - stride length +* @returns {number} sum +* +* @example +* var Float32Array = require( '@stdlib/array/float32' ); +* +* var x = new Float32Array( [ 1.0, -2.0, 2.0 ] ); +* var N = x.length; +* +* var v = ssumpw( N, x, 1 ); +* // returns 1.0 +*/ +function ssumpw( N, x, stride ) { + return addon( N, x, stride ); +} + + +// EXPORTS // + +module.exports = ssumpw; + |