diff --git a/simd/simd_amd64.go b/simd/simd_amd64.go new file mode 100644 index 0000000..251db4a --- /dev/null +++ b/simd/simd_amd64.go @@ -0,0 +1,36 @@ +/* This file is a partial copy of https://github.com/viterin/vek/blob/v0.4.2/internal/functions/accel_amd64.go. +Here is its license, which only applies to the copied parts and not to the rest of chromem-go, +which is licensed under the GNU Affero General Public License. + +MIT License + +Copyright (c) 2022 viterin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +package simd + +import ( + "runtime" + + "golang.org/x/sys/cpu" // TODO: Can we get rid of this dependency? +) + +var UseAVX2 bool = cpu.X86.HasAVX2 && cpu.X86.HasFMA && runtime.GOOS != "darwin" diff --git a/simd/simd_avx2_amd64.go b/simd/simd_avx2_amd64.go new file mode 100644 index 0000000..3eccde8 --- /dev/null +++ b/simd/simd_avx2_amd64.go @@ -0,0 +1,31 @@ +/* This file is a partial copy of https://github.com/viterin/vek/blob/v0.4.2/internal/functions/accel_avx2_amd64.go. +Here is its license, which only applies to the copied parts and not to the rest of chromem-go, +which is licensed under the GNU Affero General Public License. + +MIT License + +Copyright (c) 2022 viterin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +package simd + +//go:noescape +func Dot_AVX2_F32(x []float32, y []float32) float32 diff --git a/simd/simd_avx2_amd64.s b/simd/simd_avx2_amd64.s new file mode 100644 index 0000000..68078ba --- /dev/null +++ b/simd/simd_avx2_amd64.s @@ -0,0 +1,92 @@ +/* This file is a partial copy of https://github.com/viterin/vek/blob/v0.4.2/internal/functions/accel_avx2_amd64.s. +Here is its license, which only applies to the copied parts and not to the rest of chromem-go, +which is licensed under the GNU Affero General Public License. + +MIT License + +Copyright (c) 2022 viterin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "textflag.h" + +// func Dot_AVX2_F32(x []float32, y []float32) float32 +// Requires: AVX, FMA3, SSE +TEXT ·Dot_AVX2_F32(SB), NOSPLIT, $0-52 + MOVQ x_base+0(FP), DI + MOVQ y_base+24(FP), SI + MOVQ x_len+8(FP), DX + TESTQ DX, DX + JE LBB1_1 + CMPQ DX, $0x20 + JAE LBB1_4 + VXORPS X0, X0, X0 + XORL AX, AX + JMP LBB1_7 + +LBB1_1: + VXORPS X0, X0, X0 + MOVSS X0, ret+48(FP) + RET + +LBB1_4: + MOVQ DX, AX + ANDQ $-32, AX + VXORPS X0, X0, X0 + XORL CX, CX + VXORPS X1, X1, X1 + VXORPS X2, X2, X2 + VXORPS X3, X3, X3 + +LBB1_5: + VMOVUPS (SI)(CX*4), Y4 + VMOVUPS 32(SI)(CX*4), Y5 + VMOVUPS 64(SI)(CX*4), Y6 + VMOVUPS 96(SI)(CX*4), Y7 + VFMADD231PS (DI)(CX*4), Y4, Y0 + VFMADD231PS 32(DI)(CX*4), Y5, Y1 + VFMADD231PS 64(DI)(CX*4), Y6, Y2 + VFMADD231PS 96(DI)(CX*4), Y7, Y3 + ADDQ $0x20, CX + CMPQ AX, CX + JNE LBB1_5 + VADDPS Y0, Y1, Y0 + VADDPS Y0, Y2, Y0 + VADDPS Y0, Y3, Y0 + VEXTRACTF128 $0x01, Y0, X1 + VADDPS X1, X0, X0 + VPERMILPD $0x01, X0, X1 + VADDPS X1, X0, X0 + VMOVSHDUP X0, X1 + VADDSS X1, X0, X0 + CMPQ AX, DX + JE LBB1_8 + +LBB1_7: + VMOVSS (SI)(AX*4), X1 + VFMADD231SS (DI)(AX*4), X1, X0 + ADDQ $0x01, AX + CMPQ DX, AX + JNE LBB1_7 + +LBB1_8: + VZEROUPPER + MOVSS X0, ret+48(FP) + RET diff --git a/simd/simd_noasm.go b/simd/simd_noasm.go new file mode 100644 index 0000000..fbc9b13 --- /dev/null +++ b/simd/simd_noasm.go @@ -0,0 +1,36 @@ +//go:build !amd64 + +/* This file is a partial copy of https://github.com/viterin/vek/blob/v0.4.2/internal/functions/accel_noasm.go. +Here is its license, which only applies to the copied parts and not to the rest of chromem-go, +which is licensed under the GNU Affero General Public License. + +MIT License + +Copyright (c) 2022 viterin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +package simd + +var UseAVX2 bool = false + +func Dot_AVX2_F32(x []float32, y []float32) float32 { + panic("not implemented") +} diff --git a/vector.go b/vector.go index 972b6b2..aec4a8f 100644 --- a/vector.go +++ b/vector.go @@ -4,6 +4,8 @@ import ( "errors" "fmt" "math" + + "github.com/philippgille/chromem-go/simd" ) const isNormalizedPrecisionTolerance = 1e-6 @@ -42,8 +44,12 @@ func dotProduct(a, b []float32) (float32, error) { } var dotProduct float32 - for i := range a { - dotProduct += a[i] * b[i] + if simd.UseAVX2 { + dotProduct = simd.Dot_AVX2_F32(a, b) + } else { + for i := range a { + dotProduct += a[i] * b[i] + } } return dotProduct, nil