diff --git a/modules/booklist/booklist.go b/modules/booklist/booklist.go index c4c49dc4..2b83560f 100644 --- a/modules/booklist/booklist.go +++ b/modules/booklist/booklist.go @@ -2,18 +2,20 @@ package booklist import ( "fmt" + "image" "image/jpeg" "log" + "math" "os" "path/filepath" "runtime/debug" "sort" "strings" + "github.com/bamiaux/rez" "github.com/geek1011/BookBrowser/formats" "github.com/geek1011/BookBrowser/models" zglob "github.com/mattn/go-zglob" - "github.com/nfnt/resize" ) // BookList represents a list of Books @@ -92,8 +94,43 @@ func NewBookListFromDir(dir, coverOutDir string, verbose, nocovers bool) (*BookL continue } - // Better quality: thumb := resize.Resize(200, 0, img, resize.Lanczos2) - thumb := resize.Resize(200, 0, cover, resize.Bicubic) + coverBounds := cover.Bounds() + coverWidth := coverBounds.Dx() + coverHeight := coverBounds.Dy() + + if coverWidth <= 200 { + continue + } + + // Scale to fit in 200x900 + scale := math.Min(float64(200.0/float64(coverWidth)), float64(900.0/float64(coverHeight))) + + // Scale and round down + coverWidth = int(float64(coverWidth) * scale) + coverHeight = int(float64(coverHeight) * scale) + + r := image.Rect(0, 0, coverWidth, coverHeight) + var thumb image.Image + switch t := cover.(type) { + case *image.YCbCr: + thumb = image.NewYCbCr(r, t.SubsampleRatio) + case *image.RGBA: + thumb = image.NewRGBA(r) + case *image.NRGBA: + thumb = image.NewNRGBA(r) + case *image.Gray: + thumb = image.NewGray(r) + default: + continue + } + + // rez.NewLanczos(2.0) is faster, but slower + err = rez.Convert(thumb, cover, rez.NewBicubicFilter()) + if err != nil { + fmt.Println(coverWidth, coverHeight, scale, err) + continue + } + thumbFile, err := os.Create(thumbPath) if err != nil { continue diff --git a/vendor/github.com/bamiaux/rez/LICENSE b/vendor/github.com/bamiaux/rez/LICENSE new file mode 100644 index 00000000..f346f200 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2014 Benoît Amiaux + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/bamiaux/rez/README.md b/vendor/github.com/bamiaux/rez/README.md new file mode 100644 index 00000000..534cf1d3 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/README.md @@ -0,0 +1,49 @@ +# rez [![GoDoc](https://godoc.org/github.com/bamiaux/rez/web?status.png)](https://godoc.org/github.com/bamiaux/rez) [![Build Status](https://travis-ci.org/bamiaux/rez.png)](https://travis-ci.org/bamiaux/rez) +Package rez provides image resizing in pure Go and SIMD. + +Download: +```shell +go get github.com/bamiaux/rez +``` + + +Full documentation at http://godoc.org/github.com/bamiaux/rez + +* * * +Package rez provides image resizing in pure Go and SIMD. + +Featuring: + +``` +- YCbCr, RGBA, NRGBA & Gray resizes +- YCbCr Chroma subsample ratio conversions +- Optional interlaced-aware resizes +- Parallel resizes +- SIMD optimisations on AMD64 +``` + +The easiest way to use it is: + +``` +err := Convert(output, input, NewBicubicFilter()) +``` + +However, if you plan to convert video, where resize parameters are the same for +multiple images, the best way is: + +``` +cfg, err := PrepareConversion(output, input) +converter, err := NewConverter(cfg, NewBicubicFilter()) +for i := 0; i < N; i++ { + err := converter.Convert(output[i], input[i]) +} +``` + +Note that by default, images are resized in parallel with GOMAXPROCS slices. +Best performance is obtained when GOMAXPROCS is at least equal to your CPU +count. + + + +* * * +Automatically generated by [autoreadme](https://github.com/jimmyfrasche/autoreadme) on 2014.11.25 diff --git a/vendor/github.com/bamiaux/rez/README.md.template b/vendor/github.com/bamiaux/rez/README.md.template new file mode 100644 index 00000000..1821e658 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/README.md.template @@ -0,0 +1,19 @@ +#{{.Name}} [![GoDoc](https://godoc.org/{{.Import}}/web?status.png)](https://godoc.org/{{.Import}}) [![Build Status](https://travis-ci.org/bamiaux/rez.png)](https://travis-ci.org/bamiaux/rez) +{{.Synopsis}} + +Download: +```shell +go get {{.Import}} +``` +{{if .Library}} + +Full documentation at http://godoc.org/{{.Import}} +{{end}} +* * * +{{.Doc}} +{{if .Bugs}} +#Bugs +{{range .Bugs}}* {{.}}{{end}} +{{end}} +* * * +Automatically generated by [autoreadme](https://github.com/jimmyfrasche/autoreadme) on {{.Today}} diff --git a/vendor/github.com/bamiaux/rez/filters.go b/vendor/github.com/bamiaux/rez/filters.go new file mode 100644 index 00000000..fa724600 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/filters.go @@ -0,0 +1,103 @@ +// Copyright 2013 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package rez + +import ( + "math" +) + +// Filter is an interpolation filter interface +// It is used to compute weights for every input pixel +type Filter interface { + Taps() int + Name() string + Get(dx float64) float64 +} + +type bilinear struct{} + +func (bilinear) Taps() int { return 1 } +func (bilinear) Name() string { return "bilinear" } + +func (bilinear) Get(x float64) float64 { + if x < 1 { + return 1 - x + } + return 0 +} + +// NewBilinearFilter exports a bilinear filter +func NewBilinearFilter() Filter { + return bilinear{} +} + +type bicubic struct { + a, b, c, d, e, f, g float64 +} + +func (bicubic) Taps() int { + return 2 +} + +func (bicubic) Name() string { + return "bicubic" +} + +func (f *bicubic) Get(x float64) float64 { + if x < 1 { + return f.a + x*x*(f.b+x*f.c) + } else if x < 2 { + return f.d + x*(f.e+x*(f.f+x*f.g)) + } + return 0 +} + +// NewCustomBicubicFilter exports a bicubic filter where and can be +// customized. +// For example, the Mitchell-Netravali bicubic filter is b = c = 1/3 +func NewCustomBicubicFilter(b, c float64) Filter { + f := &bicubic{} + f.a = 1 - b/3 + f.b = -3 + 2*b + c + f.c = 2 - 3*b/2 - c + f.d = 4*b/3 + 4*c + f.e = -2*b - 8*c + f.f = b + 5*c + f.g = -b/6 - c + return f +} + +// NewBicubicFilter exports a classic bicubic filter +func NewBicubicFilter() Filter { + return NewCustomBicubicFilter(0, 0.5) +} + +type lanczos struct { + alpha float64 +} + +func (f lanczos) Taps() int { + return int(f.alpha) +} + +func (lanczos) Name() string { + return "lanczos" +} + +func (f lanczos) Get(x float64) float64 { + if x > f.alpha { + return 0 + } else if x == 0 { + return 1 + } + b := x * math.Pi + c := b / f.alpha + return math.Sin(b) * math.Sin(c) / (b * c) +} + +// NewLanczosFilter exports a lanczos filter where is filter size +func NewLanczosFilter(alpha int) Filter { + return lanczos{alpha: float64(alpha)} +} diff --git a/vendor/github.com/bamiaux/rez/fixedscalers.go b/vendor/github.com/bamiaux/rez/fixedscalers.go new file mode 100644 index 00000000..840e97da --- /dev/null +++ b/vendor/github.com/bamiaux/rez/fixedscalers.go @@ -0,0 +1,277 @@ +// Copyright 2013 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package rez + +// This file is auto-generated - do not modify + +func h8scale2Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + si := 0 + for y := 0; y < height; y++ { + c := cof + s := src[si:] + d := dst[di:] + for x, xoff := range off[:width] { + pix := int(s[xoff+0])*int(c[0]) + + int(s[xoff+1])*int(c[1]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + c = c[2:] + } + di += dp + si += sp + } +} + +func v8scale2Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + for _, yoff := range off[:height] { + src = src[sp*int(yoff):] + d := dst[di:] + for x := range d[:width] { + pix := int(src[sp*0+x])*int(cof[0]) + + int(src[sp*1+x])*int(cof[1]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + } + cof = cof[2:] + di += dp + } +} + +func h8scale4Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + si := 0 + for y := 0; y < height; y++ { + c := cof + s := src[si:] + d := dst[di:] + for x, xoff := range off[:width] { + pix := int(s[xoff+0])*int(c[0]) + + int(s[xoff+1])*int(c[1]) + + int(s[xoff+2])*int(c[2]) + + int(s[xoff+3])*int(c[3]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + c = c[4:] + } + di += dp + si += sp + } +} + +func v8scale4Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + for _, yoff := range off[:height] { + src = src[sp*int(yoff):] + d := dst[di:] + for x := range d[:width] { + pix := int(src[sp*0+x])*int(cof[0]) + + int(src[sp*1+x])*int(cof[1]) + + int(src[sp*2+x])*int(cof[2]) + + int(src[sp*3+x])*int(cof[3]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + } + cof = cof[4:] + di += dp + } +} + +func h8scale6Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + si := 0 + for y := 0; y < height; y++ { + c := cof + s := src[si:] + d := dst[di:] + for x, xoff := range off[:width] { + pix := int(s[xoff+0])*int(c[0]) + + int(s[xoff+1])*int(c[1]) + + int(s[xoff+2])*int(c[2]) + + int(s[xoff+3])*int(c[3]) + + int(s[xoff+4])*int(c[4]) + + int(s[xoff+5])*int(c[5]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + c = c[6:] + } + di += dp + si += sp + } +} + +func v8scale6Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + for _, yoff := range off[:height] { + src = src[sp*int(yoff):] + d := dst[di:] + for x := range d[:width] { + pix := int(src[sp*0+x])*int(cof[0]) + + int(src[sp*1+x])*int(cof[1]) + + int(src[sp*2+x])*int(cof[2]) + + int(src[sp*3+x])*int(cof[3]) + + int(src[sp*4+x])*int(cof[4]) + + int(src[sp*5+x])*int(cof[5]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + } + cof = cof[6:] + di += dp + } +} + +func h8scale8Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + si := 0 + for y := 0; y < height; y++ { + c := cof + s := src[si:] + d := dst[di:] + for x, xoff := range off[:width] { + pix := int(s[xoff+0])*int(c[0]) + + int(s[xoff+1])*int(c[1]) + + int(s[xoff+2])*int(c[2]) + + int(s[xoff+3])*int(c[3]) + + int(s[xoff+4])*int(c[4]) + + int(s[xoff+5])*int(c[5]) + + int(s[xoff+6])*int(c[6]) + + int(s[xoff+7])*int(c[7]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + c = c[8:] + } + di += dp + si += sp + } +} + +func v8scale8Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + for _, yoff := range off[:height] { + src = src[sp*int(yoff):] + d := dst[di:] + for x := range d[:width] { + pix := int(src[sp*0+x])*int(cof[0]) + + int(src[sp*1+x])*int(cof[1]) + + int(src[sp*2+x])*int(cof[2]) + + int(src[sp*3+x])*int(cof[3]) + + int(src[sp*4+x])*int(cof[4]) + + int(src[sp*5+x])*int(cof[5]) + + int(src[sp*6+x])*int(cof[6]) + + int(src[sp*7+x])*int(cof[7]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + } + cof = cof[8:] + di += dp + } +} + +func h8scale10Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + si := 0 + for y := 0; y < height; y++ { + c := cof + s := src[si:] + d := dst[di:] + for x, xoff := range off[:width] { + pix := int(s[xoff+0])*int(c[0]) + + int(s[xoff+1])*int(c[1]) + + int(s[xoff+2])*int(c[2]) + + int(s[xoff+3])*int(c[3]) + + int(s[xoff+4])*int(c[4]) + + int(s[xoff+5])*int(c[5]) + + int(s[xoff+6])*int(c[6]) + + int(s[xoff+7])*int(c[7]) + + int(s[xoff+8])*int(c[8]) + + int(s[xoff+9])*int(c[9]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + c = c[10:] + } + di += dp + si += sp + } +} + +func v8scale10Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + for _, yoff := range off[:height] { + src = src[sp*int(yoff):] + d := dst[di:] + for x := range d[:width] { + pix := int(src[sp*0+x])*int(cof[0]) + + int(src[sp*1+x])*int(cof[1]) + + int(src[sp*2+x])*int(cof[2]) + + int(src[sp*3+x])*int(cof[3]) + + int(src[sp*4+x])*int(cof[4]) + + int(src[sp*5+x])*int(cof[5]) + + int(src[sp*6+x])*int(cof[6]) + + int(src[sp*7+x])*int(cof[7]) + + int(src[sp*8+x])*int(cof[8]) + + int(src[sp*9+x])*int(cof[9]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + } + cof = cof[10:] + di += dp + } +} + +func h8scale12Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + si := 0 + for y := 0; y < height; y++ { + c := cof + s := src[si:] + d := dst[di:] + for x, xoff := range off[:width] { + pix := int(s[xoff+0])*int(c[0]) + + int(s[xoff+1])*int(c[1]) + + int(s[xoff+2])*int(c[2]) + + int(s[xoff+3])*int(c[3]) + + int(s[xoff+4])*int(c[4]) + + int(s[xoff+5])*int(c[5]) + + int(s[xoff+6])*int(c[6]) + + int(s[xoff+7])*int(c[7]) + + int(s[xoff+8])*int(c[8]) + + int(s[xoff+9])*int(c[9]) + + int(s[xoff+10])*int(c[10]) + + int(s[xoff+11])*int(c[11]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + c = c[12:] + } + di += dp + si += sp + } +} + +func v8scale12Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + for _, yoff := range off[:height] { + src = src[sp*int(yoff):] + d := dst[di:] + for x := range d[:width] { + pix := int(src[sp*0+x])*int(cof[0]) + + int(src[sp*1+x])*int(cof[1]) + + int(src[sp*2+x])*int(cof[2]) + + int(src[sp*3+x])*int(cof[3]) + + int(src[sp*4+x])*int(cof[4]) + + int(src[sp*5+x])*int(cof[5]) + + int(src[sp*6+x])*int(cof[6]) + + int(src[sp*7+x])*int(cof[7]) + + int(src[sp*8+x])*int(cof[8]) + + int(src[sp*9+x])*int(cof[9]) + + int(src[sp*10+x])*int(cof[10]) + + int(src[sp*11+x])*int(cof[11]) + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + } + cof = cof[12:] + di += dp + } +} diff --git a/vendor/github.com/bamiaux/rez/fixedscalers.go.input b/vendor/github.com/bamiaux/rez/fixedscalers.go.input new file mode 100644 index 00000000..e88ca533 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/fixedscalers.go.input @@ -0,0 +1,10 @@ +{ + "taps": [ + [0,0], + [0,0,0,0], + [0,0,0,0,0,0], + [0,0,0,0,0,0,0,0], + [0,0,0,0,0,0,0,0,0,0], + [0,0,0,0,0,0,0,0,0,0,0,0] + ] +} diff --git a/vendor/github.com/bamiaux/rez/fixedscalers.go.template b/vendor/github.com/bamiaux/rez/fixedscalers.go.template new file mode 100644 index 00000000..4dbcd42f --- /dev/null +++ b/vendor/github.com/bamiaux/rez/fixedscalers.go.template @@ -0,0 +1,45 @@ +// Copyright 2013 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package rez + +// This file is auto-generated - do not modify + +{{range $_, $tab := .taps}} +{{$n := len $tab}} +func h8scale{{$n}}Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + si := 0 + for y := 0; y < height; y++ { + c := cof + s := src[si:] + d := dst[di:] + for x, xoff := range off[:width] { + pix :={{range $i, $_ := $tab}}{{if gt $i 0}} + + {{end}}int(s[xoff+{{$i}}]) * int(c[{{$i}}]){{end}} + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + c = c[{{$n}}:] + } + di += dp + si += sp + } +} + +func v8scale{{$n}}Go(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + for _, yoff := range off[:height] { + src = src[sp*int(yoff):] + d := dst[di:] + for x := range d[:width] { + pix:={{range $i, $_ := $tab}}{{if gt $i 0}} + + {{end}}int(src[sp*{{$i}}+x]) * int(cof[{{$i}}]){{end}} + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + } + cof = cof[{{$n}}:] + di += dp + } +} +{{end}} diff --git a/vendor/github.com/bamiaux/rez/hscalers_amd64.s b/vendor/github.com/bamiaux/rez/hscalers_amd64.s new file mode 100644 index 00000000..0b018eaf --- /dev/null +++ b/vendor/github.com/bamiaux/rez/hscalers_amd64.s @@ -0,0 +1,1440 @@ +// Copyright 2014 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// This file is auto-generated - do not modify + +DATA zero_0<>+0x00(SB)/8, $0x0000000000000000 +DATA zero_0<>+0x08(SB)/8, $0x0000000000000000 +GLOBL zero_0<>(SB), 8, $16 +DATA hbits_1<>+0x00(SB)/8, $0x0000200000002000 +DATA hbits_1<>+0x08(SB)/8, $0x0000200000002000 +GLOBL hbits_1<>(SB), 8, $16 +DATA u8max_2<>+0x00(SB)/8, $0x00000000000000FF +DATA u8max_2<>+0x08(SB)/8, $0x00000000000000FF +GLOBL u8max_2<>(SB), 8, $16 + +TEXT ·h8scale2Amd64(SB),4,$40-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + SHRQ $4, CX + ANDQ $15, DX + MOVQ BX, dstoff+-32(SP) + MOVQ CX, simdroll+-8(SP) + MOVQ DX, asmroll+-16(SP) + MOVQ src+24(FP), AX + MOVQ AX, srcref+-24(SP) + MOVQ taps+96(FP), DX + SUBQ $2, DX + PXOR X15, X15 + MOVO hbits_1<>(SB), X14 + MOVQ src+24(FP), SI + MOVQ dst+0(FP), DI +yloop_0: + MOVQ off+72(FP), BX + MOVQ cof+48(FP), BP + MOVQ simdroll+-8(SP), CX + ORQ CX, CX + JE nosimdloop_3 +simdloop_1: + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X0 + PINSRW $1, (SI)(R9*1), X0 + PINSRW $2, (SI)(R10*1), X0 + PINSRW $3, (SI)(R11*1), X0 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X1 + PINSRW $1, (SI)(R9*1), X1 + PINSRW $2, (SI)(R10*1), X1 + PINSRW $3, (SI)(R11*1), X1 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X2 + PINSRW $1, (SI)(R9*1), X2 + PINSRW $2, (SI)(R10*1), X2 + PINSRW $3, (SI)(R11*1), X2 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X3 + PINSRW $1, (SI)(R9*1), X3 + PINSRW $2, (SI)(R10*1), X3 + PINSRW $3, (SI)(R11*1), X3 + ADDQ $32, BX + PUNPCKLBW X15, X0 + PMADDWL (BP), X0 + PUNPCKLBW X15, X1 + PMADDWL 16(BP), X1 + PUNPCKLBW X15, X2 + PMADDWL 32(BP), X2 + PUNPCKLBW X15, X3 + PMADDWL 48(BP), X3 + ADDQ $64, BP + PADDL X14, X0 + PADDL X14, X1 + PADDL X14, X2 + PADDL X14, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, DI + SUBQ $1, CX + JNE simdloop_1 +nosimdloop_3: + MOVQ asmroll+-16(SP), CX + ORQ CX, CX + JE end_4 +asmloop_2: + MOVWQSX (BX), DX + MOVBQZX (SI)(DX*1), AX + MOVWQSX (BP), DX + IMULQ DX + MOVQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 1(SI)(DX*1), AX + MOVWQSX 2(BP), DX + IMULQ DX + ADDQ $4, BP + ADDQ sum+-40(SP), AX + ADDQ $8192, AX + CMOVQLT zero_0<>(SB), AX + SHRQ $14, AX + CMPQ u8max_2<>(SB), AX + CMOVQLT u8max_2<>(SB), AX + ADDQ $2, BX + MOVB AL, (DI) + ADDQ $1, DI + SUBQ $1, CX + JNE asmloop_2 +end_4: + MOVQ srcref+-24(SP), SI + ADDQ dstoff+-32(SP), DI + ADDQ sp+128(FP), SI + MOVQ SI, srcref+-24(SP) + SUBQ $1, height+112(FP) + JNE yloop_0 + RET + +TEXT ·h8scale4Amd64(SB),4,$40-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + SHRQ $4, CX + ANDQ $15, DX + MOVQ BX, dstoff+-32(SP) + MOVQ CX, simdroll+-8(SP) + MOVQ DX, asmroll+-16(SP) + MOVQ src+24(FP), AX + MOVQ AX, srcref+-24(SP) + MOVQ taps+96(FP), DX + SUBQ $2, DX + PXOR X15, X15 + MOVO hbits_1<>(SB), X14 + MOVQ src+24(FP), SI + MOVQ dst+0(FP), DI +yloop_5: + MOVQ off+72(FP), BX + MOVQ cof+48(FP), BP + MOVQ simdroll+-8(SP), CX + ORQ CX, CX + JE nosimdloop_8 +simdloop_6: + MOVWQSX (BX), AX + MOVWQSX 2(BX), DX + MOVL (SI)(AX*1), X0 + MOVL (SI)(DX*1), X8 + MOVWQSX 4(BX), AX + MOVWQSX 6(BX), DX + MOVL (SI)(AX*1), X1 + MOVL (SI)(DX*1), X9 + PUNPCKLLQ X8, X0 + PUNPCKLLQ X9, X1 + MOVWQSX 8(BX), AX + MOVWQSX 10(BX), DX + MOVL (SI)(AX*1), X2 + MOVL (SI)(DX*1), X10 + MOVWQSX 12(BX), AX + MOVWQSX 14(BX), DX + MOVL (SI)(AX*1), X3 + MOVL (SI)(DX*1), X11 + PUNPCKLLQ X10, X2 + PUNPCKLLQ X11, X3 + MOVWQSX 16(BX), AX + MOVWQSX 18(BX), DX + MOVL (SI)(AX*1), X4 + MOVL (SI)(DX*1), X12 + MOVWQSX 20(BX), AX + MOVWQSX 22(BX), DX + MOVL (SI)(AX*1), X5 + MOVL (SI)(DX*1), X13 + PUNPCKLLQ X12, X4 + PUNPCKLLQ X13, X5 + MOVWQSX 24(BX), AX + MOVWQSX 26(BX), DX + MOVL (SI)(AX*1), X6 + MOVL (SI)(DX*1), X8 + MOVWQSX 28(BX), AX + MOVWQSX 30(BX), DX + MOVL (SI)(AX*1), X7 + MOVL (SI)(DX*1), X9 + PUNPCKLLQ X8, X6 + PUNPCKLLQ X9, X7 + ADDQ $32, BX + PUNPCKLBW X15, X0 + PMADDWL (BP), X0 + PUNPCKLBW X15, X1 + PMADDWL 16(BP), X1 + PUNPCKLBW X15, X2 + PMADDWL 32(BP), X2 + PUNPCKLBW X15, X3 + PMADDWL 48(BP), X3 + MOVO X0, X10 + MOVO X2, X11 + SHUFPS $221, X1, X10 + SHUFPS $221, X3, X11 + SHUFPS $136, X1, X0 + SHUFPS $136, X3, X2 + PADDL X10, X0 + PADDL X11, X2 + PUNPCKLBW X15, X4 + PMADDWL 64(BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 80(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 96(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 112(BP), X7 + MOVO X4, X12 + MOVO X6, X13 + SHUFPS $221, X5, X12 + SHUFPS $221, X7, X13 + SHUFPS $136, X5, X4 + SHUFPS $136, X7, X6 + PADDL X12, X4 + PADDL X13, X6 + ADDQ $128, BP + PADDL X14, X0 + PADDL X14, X2 + PADDL X14, X4 + PADDL X14, X6 + PSRAL $14, X0 + PSRAL $14, X2 + PSRAL $14, X4 + PSRAL $14, X6 + PACKSSLW X2, X0 + PACKSSLW X6, X4 + PACKUSWB X4, X0 + MOVOU X0, (DI) + ADDQ $16, DI + SUBQ $1, CX + JNE simdloop_6 +nosimdloop_8: + MOVQ asmroll+-16(SP), CX + ORQ CX, CX + JE end_9 +asmloop_7: + MOVWQSX (BX), DX + MOVBQZX (SI)(DX*1), AX + MOVWQSX (BP), DX + IMULQ DX + MOVQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 1(SI)(DX*1), AX + MOVWQSX 2(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 2(SI)(DX*1), AX + MOVWQSX 4(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 3(SI)(DX*1), AX + MOVWQSX 6(BP), DX + IMULQ DX + ADDQ $8, BP + ADDQ sum+-40(SP), AX + ADDQ $8192, AX + CMOVQLT zero_0<>(SB), AX + SHRQ $14, AX + CMPQ u8max_2<>(SB), AX + CMOVQLT u8max_2<>(SB), AX + ADDQ $2, BX + MOVB AL, (DI) + ADDQ $1, DI + SUBQ $1, CX + JNE asmloop_7 +end_9: + MOVQ srcref+-24(SP), SI + ADDQ dstoff+-32(SP), DI + ADDQ sp+128(FP), SI + MOVQ SI, srcref+-24(SP) + SUBQ $1, height+112(FP) + JNE yloop_5 + RET + +TEXT ·h8scale8Amd64(SB),4,$40-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + SHRQ $4, CX + ANDQ $15, DX + MOVQ BX, dstoff+-32(SP) + MOVQ CX, simdroll+-8(SP) + MOVQ DX, asmroll+-16(SP) + MOVQ src+24(FP), AX + MOVQ AX, srcref+-24(SP) + MOVQ taps+96(FP), DX + SUBQ $2, DX + PXOR X15, X15 + MOVO hbits_1<>(SB), X14 + MOVQ src+24(FP), SI + MOVQ dst+0(FP), DI +yloop_10: + MOVQ off+72(FP), BX + MOVQ cof+48(FP), BP + MOVQ simdroll+-8(SP), CX + ORQ CX, CX + JE nosimdloop_13 +simdloop_11: + MOVWQSX (BX), AX + MOVQ (SI)(AX*1), X0 + MOVWQSX 2(BX), DX + MOVQ (SI)(DX*1), X1 + MOVWQSX 4(BX), AX + MOVQ (SI)(AX*1), X2 + MOVWQSX 6(BX), DX + MOVQ (SI)(DX*1), X3 + MOVWQSX 8(BX), AX + MOVQ (SI)(AX*1), X4 + MOVWQSX 10(BX), DX + MOVQ (SI)(DX*1), X5 + MOVWQSX 12(BX), AX + MOVQ (SI)(AX*1), X6 + MOVWQSX 14(BX), DX + MOVQ (SI)(DX*1), X7 + MOVWQSX 16(BX), AX + MOVQ (SI)(AX*1), X8 + MOVWQSX 18(BX), DX + MOVQ (SI)(DX*1), X9 + MOVWQSX 20(BX), AX + MOVQ (SI)(AX*1), X10 + MOVWQSX 22(BX), DX + MOVQ (SI)(DX*1), X11 + PUNPCKLBW X15, X0 + PMADDWL (BP), X0 + PUNPCKLBW X15, X1 + PMADDWL 16(BP), X1 + PUNPCKLBW X15, X2 + PMADDWL 32(BP), X2 + PUNPCKLBW X15, X3 + PMADDWL 48(BP), X3 + MOVO X0, X12 + MOVO X2, X13 + PUNPCKLQDQ X1, X0 + PUNPCKHQDQ X1, X12 + PADDL X12, X0 + PUNPCKLQDQ X3, X2 + PUNPCKHQDQ X3, X13 + PADDL X13, X2 + MOVO X0, X12 + SHUFPS $136, X2, X0 + SHUFPS $221, X2, X12 + PADDL X12, X0 + PUNPCKLBW X15, X4 + PMADDWL 64(BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 80(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 96(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 112(BP), X7 + MOVO X4, X1 + MOVO X6, X2 + PUNPCKLQDQ X5, X4 + PUNPCKHQDQ X5, X1 + PADDL X1, X4 + PUNPCKLQDQ X7, X6 + PUNPCKHQDQ X7, X2 + PADDL X2, X6 + MOVO X4, X1 + SHUFPS $136, X6, X4 + SHUFPS $221, X6, X1 + PADDL X1, X4 + MOVWQSX 24(BX), AX + MOVQ (SI)(AX*1), X1 + MOVWQSX 26(BX), DX + MOVQ (SI)(DX*1), X2 + MOVWQSX 28(BX), AX + MOVQ (SI)(AX*1), X3 + MOVWQSX 30(BX), DX + MOVQ (SI)(DX*1), X5 + ADDQ $32, BX + PUNPCKLBW X15, X8 + PMADDWL 128(BP), X8 + PUNPCKLBW X15, X9 + PMADDWL 144(BP), X9 + PUNPCKLBW X15, X10 + PMADDWL 160(BP), X10 + PUNPCKLBW X15, X11 + PMADDWL 176(BP), X11 + MOVO X8, X12 + MOVO X10, X13 + PUNPCKLQDQ X9, X8 + PUNPCKHQDQ X9, X12 + PADDL X12, X8 + PUNPCKLQDQ X11, X10 + PUNPCKHQDQ X11, X13 + PADDL X13, X10 + MOVO X8, X12 + SHUFPS $136, X10, X8 + SHUFPS $221, X10, X12 + PADDL X12, X8 + PUNPCKLBW X15, X1 + PMADDWL 192(BP), X1 + PUNPCKLBW X15, X2 + PMADDWL 208(BP), X2 + PUNPCKLBW X15, X3 + PMADDWL 224(BP), X3 + PUNPCKLBW X15, X5 + PMADDWL 240(BP), X5 + MOVO X1, X10 + MOVO X3, X11 + PUNPCKLQDQ X2, X1 + PUNPCKHQDQ X2, X10 + PADDL X10, X1 + PUNPCKLQDQ X5, X3 + PUNPCKHQDQ X5, X11 + PADDL X11, X3 + MOVO X1, X10 + SHUFPS $136, X3, X1 + SHUFPS $221, X3, X10 + PADDL X10, X1 + ADDQ $256, BP + PADDL X14, X0 + PADDL X14, X4 + PADDL X14, X8 + PADDL X14, X1 + PSRAL $14, X0 + PSRAL $14, X4 + PSRAL $14, X8 + PSRAL $14, X1 + PACKSSLW X4, X0 + PACKSSLW X1, X8 + PACKUSWB X8, X0 + MOVOU X0, (DI) + ADDQ $16, DI + SUBQ $1, CX + JNE simdloop_11 +nosimdloop_13: + MOVQ asmroll+-16(SP), CX + ORQ CX, CX + JE end_14 +asmloop_12: + MOVWQSX (BX), DX + MOVBQZX (SI)(DX*1), AX + MOVWQSX (BP), DX + IMULQ DX + MOVQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 1(SI)(DX*1), AX + MOVWQSX 2(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 2(SI)(DX*1), AX + MOVWQSX 4(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 3(SI)(DX*1), AX + MOVWQSX 6(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 4(SI)(DX*1), AX + MOVWQSX 8(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 5(SI)(DX*1), AX + MOVWQSX 10(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 6(SI)(DX*1), AX + MOVWQSX 12(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 7(SI)(DX*1), AX + MOVWQSX 14(BP), DX + IMULQ DX + ADDQ $16, BP + ADDQ sum+-40(SP), AX + ADDQ $8192, AX + CMOVQLT zero_0<>(SB), AX + SHRQ $14, AX + CMPQ u8max_2<>(SB), AX + CMOVQLT u8max_2<>(SB), AX + ADDQ $2, BX + MOVB AL, (DI) + ADDQ $1, DI + SUBQ $1, CX + JNE asmloop_12 +end_14: + MOVQ srcref+-24(SP), SI + ADDQ dstoff+-32(SP), DI + ADDQ sp+128(FP), SI + MOVQ SI, srcref+-24(SP) + SUBQ $1, height+112(FP) + JNE yloop_10 + RET + +TEXT ·h8scale10Amd64(SB),4,$40-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + SHRQ $4, CX + ANDQ $15, DX + MOVQ BX, dstoff+-32(SP) + MOVQ CX, simdroll+-8(SP) + MOVQ DX, asmroll+-16(SP) + MOVQ src+24(FP), AX + MOVQ AX, srcref+-24(SP) + MOVQ taps+96(FP), DX + SUBQ $2, DX + PXOR X15, X15 + MOVO hbits_1<>(SB), X14 + MOVQ src+24(FP), SI + MOVQ dst+0(FP), DI +yloop_15: + MOVQ off+72(FP), BX + MOVQ cof+48(FP), BP + MOVQ simdroll+-8(SP), CX + ORQ CX, CX + JE nosimdloop_18 +simdloop_16: + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X0 + PINSRW $1, (SI)(R9*1), X0 + PINSRW $2, (SI)(R10*1), X0 + PINSRW $3, (SI)(R11*1), X0 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X1 + PINSRW $1, (SI)(R9*1), X1 + PINSRW $2, (SI)(R10*1), X1 + PINSRW $3, (SI)(R11*1), X1 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X2 + PINSRW $1, (SI)(R9*1), X2 + PINSRW $2, (SI)(R10*1), X2 + PINSRW $3, (SI)(R11*1), X2 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X3 + PINSRW $1, (SI)(R9*1), X3 + PINSRW $2, (SI)(R10*1), X3 + PINSRW $3, (SI)(R11*1), X3 + ADDQ $2, SI + PUNPCKLBW X15, X0 + PMADDWL (BP), X0 + PUNPCKLBW X15, X1 + PMADDWL 16(BP), X1 + PUNPCKLBW X15, X2 + PMADDWL 32(BP), X2 + PUNPCKLBW X15, X3 + PMADDWL 48(BP), X3 + ADDQ $64, BP + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X4 + PINSRW $1, (SI)(R9*1), X4 + PINSRW $2, (SI)(R10*1), X4 + PINSRW $3, (SI)(R11*1), X4 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X5 + PINSRW $1, (SI)(R9*1), X5 + PINSRW $2, (SI)(R10*1), X5 + PINSRW $3, (SI)(R11*1), X5 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X6 + PINSRW $1, (SI)(R9*1), X6 + PINSRW $2, (SI)(R10*1), X6 + PINSRW $3, (SI)(R11*1), X6 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X7 + PINSRW $1, (SI)(R9*1), X7 + PINSRW $2, (SI)(R10*1), X7 + PINSRW $3, (SI)(R11*1), X7 + ADDQ $2, SI + PUNPCKLBW X15, X4 + PMADDWL (BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 16(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 32(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 48(BP), X7 + ADDQ $64, BP + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X4 + PINSRW $1, (SI)(R9*1), X4 + PINSRW $2, (SI)(R10*1), X4 + PINSRW $3, (SI)(R11*1), X4 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X5 + PINSRW $1, (SI)(R9*1), X5 + PINSRW $2, (SI)(R10*1), X5 + PINSRW $3, (SI)(R11*1), X5 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X6 + PINSRW $1, (SI)(R9*1), X6 + PINSRW $2, (SI)(R10*1), X6 + PINSRW $3, (SI)(R11*1), X6 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X7 + PINSRW $1, (SI)(R9*1), X7 + PINSRW $2, (SI)(R10*1), X7 + PINSRW $3, (SI)(R11*1), X7 + ADDQ $2, SI + PUNPCKLBW X15, X4 + PMADDWL (BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 16(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 32(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 48(BP), X7 + ADDQ $64, BP + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X4 + PINSRW $1, (SI)(R9*1), X4 + PINSRW $2, (SI)(R10*1), X4 + PINSRW $3, (SI)(R11*1), X4 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X5 + PINSRW $1, (SI)(R9*1), X5 + PINSRW $2, (SI)(R10*1), X5 + PINSRW $3, (SI)(R11*1), X5 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X6 + PINSRW $1, (SI)(R9*1), X6 + PINSRW $2, (SI)(R10*1), X6 + PINSRW $3, (SI)(R11*1), X6 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X7 + PINSRW $1, (SI)(R9*1), X7 + PINSRW $2, (SI)(R10*1), X7 + PINSRW $3, (SI)(R11*1), X7 + ADDQ $2, SI + PUNPCKLBW X15, X4 + PMADDWL (BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 16(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 32(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 48(BP), X7 + ADDQ $64, BP + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X4 + PINSRW $1, (SI)(R9*1), X4 + PINSRW $2, (SI)(R10*1), X4 + PINSRW $3, (SI)(R11*1), X4 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X5 + PINSRW $1, (SI)(R9*1), X5 + PINSRW $2, (SI)(R10*1), X5 + PINSRW $3, (SI)(R11*1), X5 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X6 + PINSRW $1, (SI)(R9*1), X6 + PINSRW $2, (SI)(R10*1), X6 + PINSRW $3, (SI)(R11*1), X6 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X7 + PINSRW $1, (SI)(R9*1), X7 + PINSRW $2, (SI)(R10*1), X7 + PINSRW $3, (SI)(R11*1), X7 + ADDQ $2, SI + PUNPCKLBW X15, X4 + PMADDWL (BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 16(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 32(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 48(BP), X7 + ADDQ $64, BP + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVQ taps+96(FP), AX + SUBQ AX, SI + ADDQ $32, BX + PADDL X14, X0 + PADDL X14, X1 + PADDL X14, X2 + PADDL X14, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, DI + SUBQ $1, CX + JNE simdloop_16 +nosimdloop_18: + MOVQ asmroll+-16(SP), CX + ORQ CX, CX + JE end_19 +asmloop_17: + MOVWQSX (BX), DX + MOVBQZX (SI)(DX*1), AX + MOVWQSX (BP), DX + IMULQ DX + MOVQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 1(SI)(DX*1), AX + MOVWQSX 2(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 2(SI)(DX*1), AX + MOVWQSX 4(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 3(SI)(DX*1), AX + MOVWQSX 6(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 4(SI)(DX*1), AX + MOVWQSX 8(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 5(SI)(DX*1), AX + MOVWQSX 10(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 6(SI)(DX*1), AX + MOVWQSX 12(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 7(SI)(DX*1), AX + MOVWQSX 14(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 8(SI)(DX*1), AX + MOVWQSX 16(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 9(SI)(DX*1), AX + MOVWQSX 18(BP), DX + IMULQ DX + ADDQ $20, BP + ADDQ sum+-40(SP), AX + ADDQ $8192, AX + CMOVQLT zero_0<>(SB), AX + SHRQ $14, AX + CMPQ u8max_2<>(SB), AX + CMOVQLT u8max_2<>(SB), AX + ADDQ $2, BX + MOVB AL, (DI) + ADDQ $1, DI + SUBQ $1, CX + JNE asmloop_17 +end_19: + MOVQ srcref+-24(SP), SI + ADDQ dstoff+-32(SP), DI + ADDQ sp+128(FP), SI + MOVQ SI, srcref+-24(SP) + SUBQ $1, height+112(FP) + JNE yloop_15 + RET + +TEXT ·h8scale12Amd64(SB),4,$40-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + SHRQ $4, CX + ANDQ $15, DX + MOVQ BX, dstoff+-32(SP) + MOVQ CX, simdroll+-8(SP) + MOVQ DX, asmroll+-16(SP) + MOVQ src+24(FP), AX + MOVQ AX, srcref+-24(SP) + MOVQ taps+96(FP), DX + SUBQ $2, DX + PXOR X15, X15 + MOVO hbits_1<>(SB), X14 + MOVQ src+24(FP), SI + MOVQ dst+0(FP), DI +yloop_20: + MOVQ off+72(FP), BX + MOVQ cof+48(FP), BP + MOVQ simdroll+-8(SP), CX + ORQ CX, CX + JE nosimdloop_23 +simdloop_21: + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X0 + PINSRW $1, (SI)(R9*1), X0 + PINSRW $2, (SI)(R10*1), X0 + PINSRW $3, (SI)(R11*1), X0 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X1 + PINSRW $1, (SI)(R9*1), X1 + PINSRW $2, (SI)(R10*1), X1 + PINSRW $3, (SI)(R11*1), X1 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X2 + PINSRW $1, (SI)(R9*1), X2 + PINSRW $2, (SI)(R10*1), X2 + PINSRW $3, (SI)(R11*1), X2 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X3 + PINSRW $1, (SI)(R9*1), X3 + PINSRW $2, (SI)(R10*1), X3 + PINSRW $3, (SI)(R11*1), X3 + ADDQ $2, SI + PUNPCKLBW X15, X0 + PMADDWL (BP), X0 + PUNPCKLBW X15, X1 + PMADDWL 16(BP), X1 + PUNPCKLBW X15, X2 + PMADDWL 32(BP), X2 + PUNPCKLBW X15, X3 + PMADDWL 48(BP), X3 + ADDQ $64, BP + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X4 + PINSRW $1, (SI)(R9*1), X4 + PINSRW $2, (SI)(R10*1), X4 + PINSRW $3, (SI)(R11*1), X4 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X5 + PINSRW $1, (SI)(R9*1), X5 + PINSRW $2, (SI)(R10*1), X5 + PINSRW $3, (SI)(R11*1), X5 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X6 + PINSRW $1, (SI)(R9*1), X6 + PINSRW $2, (SI)(R10*1), X6 + PINSRW $3, (SI)(R11*1), X6 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X7 + PINSRW $1, (SI)(R9*1), X7 + PINSRW $2, (SI)(R10*1), X7 + PINSRW $3, (SI)(R11*1), X7 + ADDQ $2, SI + PUNPCKLBW X15, X4 + PMADDWL (BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 16(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 32(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 48(BP), X7 + ADDQ $64, BP + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X4 + PINSRW $1, (SI)(R9*1), X4 + PINSRW $2, (SI)(R10*1), X4 + PINSRW $3, (SI)(R11*1), X4 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X5 + PINSRW $1, (SI)(R9*1), X5 + PINSRW $2, (SI)(R10*1), X5 + PINSRW $3, (SI)(R11*1), X5 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X6 + PINSRW $1, (SI)(R9*1), X6 + PINSRW $2, (SI)(R10*1), X6 + PINSRW $3, (SI)(R11*1), X6 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X7 + PINSRW $1, (SI)(R9*1), X7 + PINSRW $2, (SI)(R10*1), X7 + PINSRW $3, (SI)(R11*1), X7 + ADDQ $2, SI + PUNPCKLBW X15, X4 + PMADDWL (BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 16(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 32(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 48(BP), X7 + ADDQ $64, BP + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X4 + PINSRW $1, (SI)(R9*1), X4 + PINSRW $2, (SI)(R10*1), X4 + PINSRW $3, (SI)(R11*1), X4 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X5 + PINSRW $1, (SI)(R9*1), X5 + PINSRW $2, (SI)(R10*1), X5 + PINSRW $3, (SI)(R11*1), X5 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X6 + PINSRW $1, (SI)(R9*1), X6 + PINSRW $2, (SI)(R10*1), X6 + PINSRW $3, (SI)(R11*1), X6 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X7 + PINSRW $1, (SI)(R9*1), X7 + PINSRW $2, (SI)(R10*1), X7 + PINSRW $3, (SI)(R11*1), X7 + ADDQ $2, SI + PUNPCKLBW X15, X4 + PMADDWL (BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 16(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 32(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 48(BP), X7 + ADDQ $64, BP + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X4 + PINSRW $1, (SI)(R9*1), X4 + PINSRW $2, (SI)(R10*1), X4 + PINSRW $3, (SI)(R11*1), X4 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X5 + PINSRW $1, (SI)(R9*1), X5 + PINSRW $2, (SI)(R10*1), X5 + PINSRW $3, (SI)(R11*1), X5 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X6 + PINSRW $1, (SI)(R9*1), X6 + PINSRW $2, (SI)(R10*1), X6 + PINSRW $3, (SI)(R11*1), X6 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X7 + PINSRW $1, (SI)(R9*1), X7 + PINSRW $2, (SI)(R10*1), X7 + PINSRW $3, (SI)(R11*1), X7 + ADDQ $2, SI + PUNPCKLBW X15, X4 + PMADDWL (BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 16(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 32(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 48(BP), X7 + ADDQ $64, BP + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X4 + PINSRW $1, (SI)(R9*1), X4 + PINSRW $2, (SI)(R10*1), X4 + PINSRW $3, (SI)(R11*1), X4 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X5 + PINSRW $1, (SI)(R9*1), X5 + PINSRW $2, (SI)(R10*1), X5 + PINSRW $3, (SI)(R11*1), X5 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X6 + PINSRW $1, (SI)(R9*1), X6 + PINSRW $2, (SI)(R10*1), X6 + PINSRW $3, (SI)(R11*1), X6 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X7 + PINSRW $1, (SI)(R9*1), X7 + PINSRW $2, (SI)(R10*1), X7 + PINSRW $3, (SI)(R11*1), X7 + ADDQ $2, SI + PUNPCKLBW X15, X4 + PMADDWL (BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 16(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 32(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 48(BP), X7 + ADDQ $64, BP + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVQ taps+96(FP), AX + SUBQ AX, SI + ADDQ $32, BX + PADDL X14, X0 + PADDL X14, X1 + PADDL X14, X2 + PADDL X14, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, DI + SUBQ $1, CX + JNE simdloop_21 +nosimdloop_23: + MOVQ asmroll+-16(SP), CX + ORQ CX, CX + JE end_24 +asmloop_22: + MOVWQSX (BX), DX + MOVBQZX (SI)(DX*1), AX + MOVWQSX (BP), DX + IMULQ DX + MOVQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 1(SI)(DX*1), AX + MOVWQSX 2(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 2(SI)(DX*1), AX + MOVWQSX 4(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 3(SI)(DX*1), AX + MOVWQSX 6(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 4(SI)(DX*1), AX + MOVWQSX 8(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 5(SI)(DX*1), AX + MOVWQSX 10(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 6(SI)(DX*1), AX + MOVWQSX 12(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 7(SI)(DX*1), AX + MOVWQSX 14(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 8(SI)(DX*1), AX + MOVWQSX 16(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 9(SI)(DX*1), AX + MOVWQSX 18(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 10(SI)(DX*1), AX + MOVWQSX 20(BP), DX + IMULQ DX + ADDQ AX, sum+-40(SP) + MOVWQSX (BX), DX + MOVBQZX 11(SI)(DX*1), AX + MOVWQSX 22(BP), DX + IMULQ DX + ADDQ $24, BP + ADDQ sum+-40(SP), AX + ADDQ $8192, AX + CMOVQLT zero_0<>(SB), AX + SHRQ $14, AX + CMPQ u8max_2<>(SB), AX + CMOVQLT u8max_2<>(SB), AX + ADDQ $2, BX + MOVB AL, (DI) + ADDQ $1, DI + SUBQ $1, CX + JNE asmloop_22 +end_24: + MOVQ srcref+-24(SP), SI + ADDQ dstoff+-32(SP), DI + ADDQ sp+128(FP), SI + MOVQ SI, srcref+-24(SP) + SUBQ $1, height+112(FP) + JNE yloop_20 + RET + +TEXT ·h8scaleNAmd64(SB),4,$64-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + SHRQ $4, CX + ANDQ $15, DX + MOVQ BX, dstoff+-32(SP) + MOVQ CX, simdroll+-8(SP) + MOVQ DX, asmroll+-16(SP) + MOVQ src+24(FP), AX + MOVQ AX, srcref+-24(SP) + MOVQ taps+96(FP), DX + SUBQ $2, DX + MOVQ DX, inner+-64(SP) + PXOR X15, X15 + MOVO hbits_1<>(SB), X14 + MOVQ src+24(FP), SI + MOVQ dst+0(FP), DI +yloop_25: + MOVQ off+72(FP), BX + MOVQ cof+48(FP), BP + MOVQ simdroll+-8(SP), CX + ORQ CX, CX + JE nosimdloop_28 +simdloop_26: + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X0 + PINSRW $1, (SI)(R9*1), X0 + PINSRW $2, (SI)(R10*1), X0 + PINSRW $3, (SI)(R11*1), X0 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X1 + PINSRW $1, (SI)(R9*1), X1 + PINSRW $2, (SI)(R10*1), X1 + PINSRW $3, (SI)(R11*1), X1 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X2 + PINSRW $1, (SI)(R9*1), X2 + PINSRW $2, (SI)(R10*1), X2 + PINSRW $3, (SI)(R11*1), X2 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X3 + PINSRW $1, (SI)(R9*1), X3 + PINSRW $2, (SI)(R10*1), X3 + PINSRW $3, (SI)(R11*1), X3 + ADDQ $2, SI + PUNPCKLBW X15, X0 + PMADDWL (BP), X0 + PUNPCKLBW X15, X1 + PMADDWL 16(BP), X1 + PUNPCKLBW X15, X2 + PMADDWL 32(BP), X2 + PUNPCKLBW X15, X3 + PMADDWL 48(BP), X3 + ADDQ $64, BP + MOVQ DI, dstref+-48(SP) + MOVQ inner+-64(SP), DI +loop_30: + MOVWQSX (BX), R8 + MOVWQSX 2(BX), R9 + MOVWQSX 4(BX), R10 + MOVWQSX 6(BX), R11 + PINSRW $0, (SI)(R8*1), X4 + PINSRW $1, (SI)(R9*1), X4 + PINSRW $2, (SI)(R10*1), X4 + PINSRW $3, (SI)(R11*1), X4 + MOVWQSX 8(BX), R8 + MOVWQSX 10(BX), R9 + MOVWQSX 12(BX), R10 + MOVWQSX 14(BX), R11 + PINSRW $0, (SI)(R8*1), X5 + PINSRW $1, (SI)(R9*1), X5 + PINSRW $2, (SI)(R10*1), X5 + PINSRW $3, (SI)(R11*1), X5 + MOVWQSX 16(BX), R8 + MOVWQSX 18(BX), R9 + MOVWQSX 20(BX), R10 + MOVWQSX 22(BX), R11 + PINSRW $0, (SI)(R8*1), X6 + PINSRW $1, (SI)(R9*1), X6 + PINSRW $2, (SI)(R10*1), X6 + PINSRW $3, (SI)(R11*1), X6 + MOVWQSX 24(BX), R8 + MOVWQSX 26(BX), R9 + MOVWQSX 28(BX), R10 + MOVWQSX 30(BX), R11 + PINSRW $0, (SI)(R8*1), X7 + PINSRW $1, (SI)(R9*1), X7 + PINSRW $2, (SI)(R10*1), X7 + PINSRW $3, (SI)(R11*1), X7 + ADDQ $2, SI + PUNPCKLBW X15, X4 + PMADDWL (BP), X4 + PUNPCKLBW X15, X5 + PMADDWL 16(BP), X5 + PUNPCKLBW X15, X6 + PMADDWL 32(BP), X6 + PUNPCKLBW X15, X7 + PMADDWL 48(BP), X7 + ADDQ $64, BP + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + SUBQ $2, DI + JNE loop_30 + MOVQ dstref+-48(SP), DI + MOVQ taps+96(FP), AX + SUBQ AX, SI + ADDQ $32, BX + PADDL X14, X0 + PADDL X14, X1 + PADDL X14, X2 + PADDL X14, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, DI + SUBQ $1, CX + JNE simdloop_26 +nosimdloop_28: + MOVQ asmroll+-16(SP), CX + ORQ CX, CX + JE end_29 +asmloop_27: + MOVWQSX (BX), DX + MOVBQZX (SI)(DX*1), AX + MOVWQSX (BP), DX + IMULQ DX + MOVQ AX, sum+-40(SP) + MOVQ inner+-64(SP), AX + MOVQ AX, count+-56(SP) +loop_31: + MOVWQSX (BX), DX + MOVBQZX 1(SI)(DX*1), AX + MOVWQSX 2(BP), DX + IMULQ DX + ADDQ $1, SI + ADDQ $2, BP + ADDQ AX, sum+-40(SP) + SUBQ $1, count+-56(SP) + JNE loop_31 + MOVWQSX (BX), DX + MOVBQZX 1(SI)(DX*1), AX + MOVWQSX 2(BP), DX + IMULQ DX + ADDQ $4, BP + SUBQ inner+-64(SP), SI + ADDQ sum+-40(SP), AX + ADDQ $8192, AX + CMOVQLT zero_0<>(SB), AX + SHRQ $14, AX + CMPQ u8max_2<>(SB), AX + CMOVQLT u8max_2<>(SB), AX + ADDQ $2, BX + MOVB AL, (DI) + ADDQ $1, DI + SUBQ $1, CX + JNE asmloop_27 +end_29: + MOVQ srcref+-24(SP), SI + ADDQ dstoff+-32(SP), DI + ADDQ sp+128(FP), SI + MOVQ SI, srcref+-24(SP) + SUBQ $1, height+112(FP) + JNE yloop_25 + RET diff --git a/vendor/github.com/bamiaux/rez/image.go b/vendor/github.com/bamiaux/rez/image.go new file mode 100644 index 00000000..66cb1312 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/image.go @@ -0,0 +1,540 @@ +// Copyright 2013 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +//go:generate autoreadme -f -template=README.md.template + +/* +Package rez provides image resizing in pure Go and SIMD. + +Featuring: + - YCbCr, RGBA, NRGBA & Gray resizes + - YCbCr Chroma subsample ratio conversions + - Optional interlaced-aware resizes + - Parallel resizes + - SIMD optimisations on AMD64 + +The easiest way to use it is: + + err := Convert(output, input, NewBicubicFilter()) + +However, if you plan to convert video, where resize parameters are the same for +multiple images, the best way is: + + cfg, err := PrepareConversion(output, input) + converter, err := NewConverter(cfg, NewBicubicFilter()) + for i := 0; i < N; i++ { + err := converter.Convert(output[i], input[i]) + } + +Note that by default, images are resized in parallel with GOMAXPROCS slices. +Best performance is obtained when GOMAXPROCS is at least equal to your CPU +count. +*/ +package rez + +import ( + "fmt" + "image" + "runtime" + "sync" +) + +// Converter is an interface that implements conversion between images +// It is currently able to convert only between ycbcr images +type Converter interface { + // Converts one image into another, applying any necessary colorspace + // conversion and/or resizing + // dst = destination image + // src = source image + // Result is undefined if src points to the same data as dst + // Returns an error if the conversion fails + Convert(dst, src image.Image) error +} + +// ChromaRatio is a chroma subsampling ratio +type ChromaRatio int + +const ( + // Ratio410 is 4:1:0 + Ratio410 ChromaRatio = iota + // Ratio411 is 4:1:1 + Ratio411 + // Ratio420 is 4:2:0 + Ratio420 + // Ratio422 is 4:2:2 + Ratio422 + // Ratio440 is 4:4:0 + Ratio440 + // Ratio444 is 4:4:4 + Ratio444 +) + +// Descriptor describes an image properties +type Descriptor struct { + Width int // width in pixels + Height int // height in pixels + Ratio ChromaRatio // chroma ratio + Pack int // pixels per pack + Interlaced bool // progressive or interlaced + Planes int // number of planes +} + +// Check returns whether the descriptor is valid +func (d *Descriptor) Check() error { + if d.Pack < 1 || d.Pack > 4 { + return fmt.Errorf("invalid pack value %v", d.Pack) + } + for i := 0; i < d.Planes; i++ { + h := d.GetHeight(i) + if d.Interlaced && h%2 != 0 && h != d.Height { + return fmt.Errorf("invalid interlaced input height %v", d.Height) + } + } + return nil +} + +// GetWidth returns the width in pixels for the input plane +func (d *Descriptor) GetWidth(plane int) int { + if plane < 0 || plane+1 > maxPlanes { + panic(fmt.Errorf("invalid plane %v", plane)) + } + if plane == 0 { + return d.Width + } + switch d.Ratio { + case Ratio410, Ratio411: + return (d.Width + 3) >> 2 + case Ratio420, Ratio422: + return (d.Width + 1) >> 1 + case Ratio440, Ratio444: + return d.Width + } + panic(fmt.Errorf("invalid ratio %v", d.Ratio)) +} + +// GetHeight returns the height in pixels for the input plane +func (d *Descriptor) GetHeight(plane int) int { + if plane < 0 || plane+1 > maxPlanes { + panic(fmt.Errorf("invalid plane %v", plane)) + } + if plane == 0 { + return d.Height + } + switch d.Ratio { + case Ratio411, Ratio422, Ratio444: + return d.Height + case Ratio410, Ratio420, Ratio440: + h := (d.Height + 1) >> 1 + if d.Interlaced && h&1 != 0 { + h++ + } + return h + } + panic(fmt.Errorf("invalid ratio %v", d.Ratio)) +} + +// ConverterConfig is a configuration used with NewConverter +type ConverterConfig struct { + Input Descriptor // input description + Output Descriptor // output description + Threads int // number of allowed "threads" + DisableAsm bool // disable asm optimisations +} + +const ( + maxPlanes = 3 +) + +// Plane describes a single image plane +type Plane struct { + Data []byte // plane buffer + Width int // width in pixels + Height int // height in pixels + Pitch int // pitch in bytes + Pack int // pixels per pack +} + +type converterContext struct { + ConverterConfig + wrez [maxPlanes]Resizer + hrez [maxPlanes]Resizer + buffer [maxPlanes]*Plane +} + +func toInterlacedString(interlaced bool) string { + if interlaced { + return "interlaced" + } + return "progressive" +} + +func toPackedString(pack int) string { + return fmt.Sprintf("%v-packed", pack) +} + +func align(value, align int) int { + return (value + align - 1) & -align +} + +func checkConversion(dst, src *Descriptor) error { + if err := src.Check(); err != nil { + return fmt.Errorf("invalid input format: %v", err) + } + if err := dst.Check(); err != nil { + return fmt.Errorf("invalid output format: %v", err) + } + if src.Interlaced != dst.Interlaced { + return fmt.Errorf("unable to convert %v input to %v output", + toInterlacedString(src.Interlaced), + toInterlacedString(dst.Interlaced)) + } + if src.Pack != dst.Pack { + return fmt.Errorf("unable to convert %v input to %v output", + toPackedString(src.Pack), + toPackedString(dst.Pack)) + } + if src.Planes != dst.Planes { + return fmt.Errorf("unable to convert %v planes to %v planes", + src.Planes, dst.Planes) + } + return nil +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +// NewConverter returns a Converter interface +// cfg = converter configuration +// filter = filter used for resizing +// Returns an error if the conversion is invalid or not implemented +func NewConverter(cfg *ConverterConfig, filter Filter) (Converter, error) { + err := checkConversion(&cfg.Output, &cfg.Input) + if err != nil { + return nil, err + } + if cfg.Threads == 0 { + cfg.Threads = runtime.GOMAXPROCS(0) + } + ctx := &converterContext{ + ConverterConfig: *cfg, + } + size := 0 + group := sync.WaitGroup{} + for i := 0; i < cfg.Output.Planes; i++ { + win := cfg.Input.GetWidth(i) + hin := cfg.Input.GetHeight(i) + wout := cfg.Output.GetWidth(i) + hout := cfg.Output.GetHeight(i) + if win < 2 || hin < 2 { + return nil, fmt.Errorf("input size too small %vx%v", win, hin) + } + if wout < 2 || hout < 2 { + return nil, fmt.Errorf("output size too small %vx%v", wout, hout) + } + idx := i + if win != wout { + dispatch(&group, cfg.Threads, func() { + threads := min(cfg.Threads, hout) + ctx.wrez[idx] = NewResize(&ResizerConfig{ + Depth: 8, + Input: win, + Output: wout, + Vertical: false, + Interlaced: false, + Pack: cfg.Input.Pack, + Threads: threads, + DisableAsm: cfg.DisableAsm || wout < 16, + }, filter) + }) + } + if hin != hout { + dispatch(&group, cfg.Threads, func() { + threads := min(cfg.Threads, hout) + if cfg.Output.Interlaced { + threads = min(cfg.Threads, hout>>1) + } + ctx.hrez[idx] = NewResize(&ResizerConfig{ + Depth: 8, + Input: hin, + Output: hout, + Vertical: true, + Interlaced: cfg.Output.Interlaced, + Pack: cfg.Output.Pack, + Threads: threads, + DisableAsm: cfg.DisableAsm || wout < 16 || win < 16, + }, filter) + }) + } + if win != wout && hin != hout { + p := &Plane{ + Width: win, + Height: hout, + Pitch: align(win*cfg.Input.Pack, 16), + Pack: cfg.Input.Pack, + } + size += p.Pitch * p.Height + ctx.buffer[i] = p + } + } + if size != 0 { + buffer := make([]byte, size) + idx := 0 + for i := 0; i < cfg.Output.Planes; i++ { + if p := ctx.buffer[i]; p != nil { + size := p.Pitch*(p.Height-1) + p.Width*p.Pack + p.Data = buffer[idx : idx+size] + idx += p.Pitch * p.Height + } + } + } + group.Wait() + return ctx, nil +} + +// GetRatio returns a ChromaRatio from an image.YCbCrSubsampleRatio +func GetRatio(value image.YCbCrSubsampleRatio) ChromaRatio { + switch value { + case image.YCbCrSubsampleRatio410: + return Ratio410 + case image.YCbCrSubsampleRatio411: + return Ratio411 + case image.YCbCrSubsampleRatio420: + return Ratio420 + case image.YCbCrSubsampleRatio422: + return Ratio422 + case image.YCbCrSubsampleRatio440: + return Ratio440 + case image.YCbCrSubsampleRatio444: + return Ratio444 + } + return Ratio444 +} + +func inspect(data image.Image, interlaced bool) (*Descriptor, []Plane, error) { + switch t := data.(type) { + case *image.YCbCr: + d, p := inspectYuv(t, interlaced) + return d, p, nil + case *image.RGBA: + d, p := inspectRgba(t, interlaced) + return d, p, nil + case *image.NRGBA: + d, p := inspectNrgba(t, interlaced) + return d, p, nil + case *image.Gray: + d, p := inspectGray(t, interlaced) + return d, p, nil + } + return nil, nil, fmt.Errorf("unknown image format") +} + +func getYuvDescriptor(img *image.YCbCr, interlaced bool) Descriptor { + return Descriptor{ + Width: img.Rect.Dx(), + Height: img.Rect.Dy(), + Ratio: GetRatio(img.SubsampleRatio), + Interlaced: interlaced, + Pack: 1, + Planes: 3, + } +} + +func getRgbDescriptor(rect image.Rectangle, interlaced bool) Descriptor { + return Descriptor{ + Width: rect.Dx(), + Height: rect.Dy(), + Ratio: Ratio444, + Interlaced: interlaced, + Pack: 4, + Planes: 1, + } +} + +func getGrayDescriptor(img *image.Gray, interlaced bool) Descriptor { + return Descriptor{ + Width: img.Rect.Dx(), + Height: img.Rect.Dy(), + Ratio: Ratio444, + Interlaced: interlaced, + Pack: 1, + Planes: 1, + } +} + +func setPlane(p *Plane, rect image.Rectangle, offset func(x, y int) int, pix []byte) { + x, y := rect.Min.X, rect.Min.Y + base := offset(x, y) + p.Data = pix[base : base+p.Pitch*(p.Height-1)+p.Width*p.Pack] +} + +func getYuvPlanes(img *image.YCbCr, d *Descriptor) []Plane { + planes := []Plane{} + for i := 0; i < maxPlanes; i++ { + p := Plane{ + Width: d.GetWidth(i), + Height: d.GetHeight(i), + Pack: d.Pack, + } + switch i { + case 0: + p.Pitch = img.YStride + setPlane(&p, img.Rect, img.YOffset, img.Y) + case 1: + p.Pitch = img.CStride + setPlane(&p, img.Rect, img.COffset, img.Cb) + case 2: + p.Pitch = img.CStride + setPlane(&p, img.Rect, img.COffset, img.Cr) + } + planes = append(planes, p) + } + return planes +} + +func getSinglePlane(d *Descriptor, pitch int, rect image.Rectangle, offset func(x, y int) int, pix []byte) []Plane { + p := Plane{ + Width: d.Width, + Height: d.Height, + Pack: d.Pack, + Pitch: pitch, + } + setPlane(&p, rect, offset, pix) + return []Plane{p} +} + +func getRgbaPlane(img *image.RGBA, d *Descriptor) []Plane { + return getSinglePlane(d, img.Stride, img.Rect, img.PixOffset, img.Pix) +} + +func getNrgbaPlane(img *image.NRGBA, d *Descriptor) []Plane { + return getSinglePlane(d, img.Stride, img.Rect, img.PixOffset, img.Pix) +} + +func getGrayPlane(img *image.Gray, d *Descriptor) []Plane { + return getSinglePlane(d, img.Stride, img.Rect, img.PixOffset, img.Pix) +} + +func inspectYuv(img *image.YCbCr, interlaced bool) (*Descriptor, []Plane) { + d := getYuvDescriptor(img, interlaced) + return &d, getYuvPlanes(img, &d) +} + +func inspectRgba(img *image.RGBA, interlaced bool) (*Descriptor, []Plane) { + d := getRgbDescriptor(img.Rect, interlaced) + return &d, getRgbaPlane(img, &d) +} + +func inspectNrgba(img *image.NRGBA, interlaced bool) (*Descriptor, []Plane) { + d := getRgbDescriptor(img.Rect, interlaced) + return &d, getNrgbaPlane(img, &d) +} + +func inspectGray(img *image.Gray, interlaced bool) (*Descriptor, []Plane) { + d := getGrayDescriptor(img, interlaced) + return &d, getGrayPlane(img, &d) +} + +func resizePlane(group *sync.WaitGroup, threads int, dst, src, buf *Plane, hrez, wrez Resizer) { + dispatch(group, threads, func() { + hdst := dst + wsrc := src + if hrez != nil && wrez != nil { + hdst = buf + wsrc = buf + } + if hrez != nil { + hrez.Resize(hdst.Data, src.Data, src.Width, src.Height, hdst.Pitch, src.Pitch) + } + if wrez != nil { + wrez.Resize(dst.Data, wsrc.Data, wsrc.Width, wsrc.Height, dst.Pitch, wsrc.Pitch) + } + if hrez == nil && wrez == nil { + copyPlane(dst.Data, src.Data, src.Width*src.Pack, src.Height, dst.Pitch, src.Pitch) + } + }) +} + +func (ctx *converterContext) Convert(output, input image.Image) error { + id, src, err := inspect(input, ctx.Input.Interlaced) + if err != nil { + return err + } + od, dst, err := inspect(output, ctx.Output.Interlaced) + if err != nil { + return err + } + err = checkConversion(od, id) + if err != nil { + return err + } + group := sync.WaitGroup{} + for i := 0; i < ctx.Input.Planes; i++ { + resizePlane(&group, ctx.Threads, &dst[i], &src[i], ctx.buffer[i], ctx.hrez[i], ctx.wrez[i]) + } + group.Wait() + return nil +} + +// PrepareConversion returns a ConverterConfig properly set for a conversion +// from input images to output images +// Returns an error if the conversion is not possible +func PrepareConversion(output, input image.Image) (*ConverterConfig, error) { + src, _, err := inspect(input, false) + if err != nil { + return nil, err + } + dst, _, err := inspect(output, false) + if err != nil { + return nil, err + } + err = checkConversion(dst, src) + if err != nil { + return nil, err + } + return &ConverterConfig{ + Input: *src, + Output: *dst, + }, nil +} + +// Convert converts an input image into output, applying any color conversion +// and/or resizing, using the input filter for interpolation. +// Note that if you plan to do the same conversion over and over, it is faster +// to use a Converter interface +func Convert(output, input image.Image, filter Filter) error { + cfg, err := PrepareConversion(output, input) + if err != nil { + return err + } + converter, err := NewConverter(cfg, filter) + if err != nil { + return err + } + return converter.Convert(output, input) +} + +// Psnr computes the PSNR between two input images +// Only ycbcr is currently supported +func Psnr(a, b image.Image) ([]float64, error) { + psnrs := []float64{} + id, src, err := inspect(a, false) + if err != nil { + return nil, err + } + od, dst, err := inspect(b, false) + if err != nil { + return nil, err + } + if *id != *od { + return nil, fmt.Errorf("unable to psnr different formats") + } + for i := 0; i < len(dst); i++ { + psnrs = append(psnrs, psnrPlane(src[i].Data, dst[i].Data, src[i].Width*src[i].Pack, src[i].Height, src[i].Pitch, dst[i].Pitch)) + } + return psnrs, nil +} diff --git a/vendor/github.com/bamiaux/rez/kernels.go b/vendor/github.com/bamiaux/rez/kernels.go new file mode 100644 index 00000000..8d2c7ba5 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/kernels.go @@ -0,0 +1,226 @@ +// Copyright 2013 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package rez + +import ( + "math" + "sort" +) + +type kernel struct { + coeffs []int16 + offsets []int16 + size int + cofscale int // how many more coeffs do we have +} + +func bin(v bool) uint { + if v { + return 1 + } + return 0 +} + +func clip(v, min, max int) int { + if v < min { + return min + } + if v > max { + return max + } + return v +} + +func max(a, b int) int { + if a > b { + return a + } + return b +} + +func makeDoubleKernel(cfg *ResizerConfig, filter Filter, field, idx uint) ([]int16, []float64, []float64, int, int) { + scale := float64(cfg.Output) / float64(cfg.Input) + step := math.Min(1, scale) + support := float64(filter.Taps()) / step + taps := int(math.Ceil(support)) * 2 + if !cfg.Vertical && taps == 6 && hasAsm() && !cfg.DisableAsm { + taps = 8 + } + taps = min(taps, (cfg.Input>>field)&^1) + offsets := make([]int16, cfg.Output) + sums := make([]float64, cfg.Output) + weights := make([]float64, cfg.Output*taps) + xmid := float64(cfg.Input-cfg.Output) / float64(cfg.Output*2) + xstep := 1 / scale + // interlaced resize see only one field but still use full res pixel positions + ftaps := taps << field + size := (cfg.Output + int(field*(1-idx))) >> field + step /= float64(1 + field) + xmid += xstep * float64(field*idx) + for i := 0; i < size; i++ { + left := int(math.Ceil(xmid)) - ftaps>>1 + x := clip(left, 0, max(0, cfg.Input-ftaps)) + offsets[i] = int16(x) + for j := 0; j < ftaps; j++ { + src := left + j + if field != 0 && idx^uint(src&1) != 0 { + continue + } + weight := filter.Get(math.Abs(xmid-float64(src)) * step) + src = clip(src, x, cfg.Input-1) - x + src >>= field + weights[i*taps+src] += weight + sums[i] += weight + } + xmid += xstep * float64(1+field) + } + return offsets, sums, weights, taps, size +} + +type weight struct { + weight float64 + offset int +} + +type weights []weight + +func (w weights) Len() int { + return len(w) +} + +func (w weights) Less(i, j int) bool { + return math.Abs(w[j].weight) < math.Abs(w[i].weight) +} + +func (w weights) Swap(i, j int) { + w[i], w[j] = w[j], w[i] +} + +func makeIntegerKernel(taps, size int, cof, sums []float64, pos []int16, field, idx uint) ([]int16, []int16) { + coeffs := make([]int16, taps*size) + offsets := make([]int16, size) + weights := make(weights, taps) + for i, sum := range sums[:size] { + for j, w := range cof[:taps] { + weights[j].weight = w + weights[j].offset = j + } + sort.Sort(weights) + diff := float64(0) + scale := 1 << Bits / sum + for _, it := range weights { + w := it.weight*scale + diff + iw := math.Floor(w + 0.5) + coeffs[i*taps+it.offset] = int16(iw) + diff = w - iw + } + cof = cof[taps:] + off := pos[i] + int16(field-idx) + offsets[i] = off >> field + } + return coeffs, offsets +} + +func makeKernel(cfg *ResizerConfig, filter Filter, idx uint) kernel { + field := bin(cfg.Interlaced) + pos, sums, cof, taps, size := makeDoubleKernel(cfg, filter, field, idx) + coeffs, offsets := makeIntegerKernel(taps, size, cof, sums, pos, field, idx) + //coeffs, offsets = reduceKernel(coeffs, offsets, taps, size) + if cfg.Vertical { + for i := len(offsets) - 1; i > 0; i-- { + offsets[i] = offsets[i] - offsets[i-1] + } + + } else if cfg.Pack > 1 { + coeffs, offsets, taps = unpack(coeffs, offsets, taps, cfg.Pack) + } + coeffs, cofscale := prepareCoeffs(cfg, coeffs, size, taps) + return kernel{coeffs, offsets, taps, cofscale} +} + +func prepareCoeffs(cfg *ResizerConfig, cof []int16, size, taps int) ([]int16, int) { + if !hasAsm() || cfg.DisableAsm { + return cof, 1 + } + if cfg.Vertical { + return prepareVerticalCoeffs(cof, size, taps) + } + return prepareHorizontalCoeffs(cof, size*cfg.Pack, taps), 1 +} + +func prepareVerticalCoeffs(cof []int16, size, taps int) ([]int16, int) { + xwidth := 16 + dst := make([]int16, size*taps*xwidth>>1) + si := 0 + di := 0 + for i := 0; i < size; i++ { + for j := 0; j < taps; j += 2 { + for k := 0; k < xwidth; k += 2 { + dst[di+k+0] = cof[si+0] + dst[di+k+1] = cof[si+1] + } + si += 2 + di += xwidth + } + } + return dst, xwidth >> 1 +} + +func prepareHorizontalCoeffs(cof []int16, size, taps int) []int16 { + if taps == 2 || taps == 4 || taps == 8 { + return cof + } + xwidth := 16 + dst := make([]int16, len(cof)) + loop := size / xwidth + left := (size - loop*xwidth) * taps + si := 0 + di := 0 + // instead of having all taps contiguous for one destination pixel, + // we store 2 taps per pixel and fill one simd-sized buffer with it, then + // fill the second register with the following taps until none are left + // this way we don't care about the simd register size, we will always be + // able to process N pixels at once + for i := 0; i < loop; i++ { + for j := 0; j*2 < taps; j++ { + for k := 0; k < xwidth; k++ { + dst[di+k*2+0] = cof[si+k*taps+0] + dst[di+k*2+1] = cof[si+k*taps+1] + } + di += xwidth * 2 + si += 2 + } + si = di + } + copy(dst[di:di+left], cof[si:si+left]) + return dst +} + +func unpack(coeffs, offsets []int16, taps, pack int) ([]int16, []int16, int) { + cof := make([]int16, len(coeffs)*pack*pack) + off := make([]int16, len(offsets)*pack) + di := 0 + ci := 0 + oi := 0 + buf := make([]int16, pack*taps*2) + zero := buf[:pack*taps] + next := buf[pack*taps:] + for _, offset := range offsets { + copy(next, zero) + for i := 0; i < taps; i++ { + next[i*pack] = coeffs[ci+i] + } + for i := 0; i < pack; i++ { + off[oi+i] = offset * int16(pack) + copy(cof[di+pack*taps*i:], next) + copy(next[i+1:], next[i:]) + copy(next[:i+1], zero) + } + di += taps * pack * pack + ci += taps + oi += pack + } + return cof, off, taps * pack +} diff --git a/vendor/github.com/bamiaux/rez/mkscalers.sh b/vendor/github.com/bamiaux/rez/mkscalers.sh new file mode 100644 index 00000000..9d18c7cc --- /dev/null +++ b/vendor/github.com/bamiaux/rez/mkscalers.sh @@ -0,0 +1,7 @@ +#!bin/sh +# depends on https://github.com/jimmyfrasche/txt +cat fixedscalers.go.input | txt -json fixedscalers.go.template > fixedscalers.go +go fmt fixedscalers.go +go install -v .../rez/rezgen +rezgen -gen horizontal > hscalers_amd64.s && echo hscalers_amd64.s +rezgen -gen vertical > vscalers_amd64.s && echo vscalers_amd64.s diff --git a/vendor/github.com/bamiaux/rez/resize.go b/vendor/github.com/bamiaux/rez/resize.go new file mode 100644 index 00000000..9b8d8bb4 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/resize.go @@ -0,0 +1,184 @@ +// Copyright 2013 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package rez + +import ( + "sync" +) + +// ResizerConfig is a configuration used with NewResizer +type ResizerConfig struct { + Depth int // bits per pixel + Input int // input size in pixels + Output int // output size in pixels + Vertical bool // true for vertical resizes + Interlaced bool // true if input/output is interlaced + Pack int // pixels per pack [default=1] + Threads int // number of threads, [default=0] + DisableAsm bool // disable asm optimisations +} + +// Resizer is a interface that implements resizes +type Resizer interface { + // Resize one plane into another + // dst, src = destination and source buffer + // width, height = plane dimensions in pixels + // dstPitch, srcPitch = destination and source pitchs/strides in bytes + Resize(dst, src []byte, width, height, dstPitch, srcPitch int) +} + +type scaler func(dst, src []byte, cof, off []int16, + taps, width, height, dstPitch, srcPitch int) + +type context struct { + cfg ResizerConfig + kernels []kernel + scaler scaler +} + +func getHorizontalScalerGo(taps int) scaler { + switch taps { + case 2: + return h8scale2Go + case 4: + return h8scale4Go + case 6: + return h8scale6Go + case 8: + return h8scale8Go + case 10: + return h8scale10Go + case 12: + return h8scale12Go + } + return h8scaleNGo +} + +func getVerticalScalerGo(taps int) scaler { + switch taps { + case 2: + return v8scale2Go + case 4: + return v8scale4Go + case 6: + return v8scale6Go + case 8: + return v8scale8Go + case 10: + return v8scale10Go + case 12: + return v8scale12Go + } + return v8scaleNGo +} + +// NewResize returns a new resizer +// cfg = resize configuration +// filter = filter used for computing weights +func NewResize(cfg *ResizerConfig, filter Filter) Resizer { + ctx := context{ + cfg: *cfg, + } + ctx.cfg.Depth = 8 // only 8-bit for now + if ctx.cfg.Pack < 1 { + ctx.cfg.Pack = 1 + } + ctx.kernels = []kernel{makeKernel(&ctx.cfg, filter, 0)} + ctx.scaler = getHorizontalScaler(ctx.kernels[0].size, !cfg.DisableAsm) + if cfg.Vertical { + ctx.scaler = getVerticalScaler(ctx.kernels[0].size, !cfg.DisableAsm) + if cfg.Interlaced { + ctx.kernels = append(ctx.kernels, makeKernel(&ctx.cfg, filter, 1)) + } + } + return &ctx +} + +func dispatch(group *sync.WaitGroup, threads int, job func()) { + if threads == 1 { + job() + } else { + group.Add(1) + go func() { + job() + group.Done() + }() + } +} + +func scaleSlice(group *sync.WaitGroup, threads int, scaler scaler, + dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) { + dispatch(group, threads, func() { + scaler(dst, src, cof, off, taps, width, height, dp, sp) + }) +} + +func scaleSlices(group *sync.WaitGroup, scaler scaler, + vertical bool, threads, taps, width, height, dp, sp int, + dst, src []byte, cof []int16, cofscale int, off []int16) { + dispatch(group, threads, func() { + nh := height / threads + if nh < 1 { + nh = 1 + } + di := 0 + si := 0 + oi := 0 + ci := 0 + for i := 0; i < threads; i++ { + last := i+1 == threads + ih := nh + if last { + ih = height - nh*(threads-1) + } + if ih == 0 { + continue + } + next := width + if vertical { + next = ih + } + scaleSlice(group, threads, scaler, + dst[di:di+dp*(ih-1)+width], + src[si:], + cof[ci:ci+next*taps*cofscale], + off[oi:oi+next], + taps, width, ih, dp, sp) + if last { + break + } + di += ih * dp + if vertical { + ci += ih * taps * cofscale + for j := 0; j < ih; j++ { + si += sp * int(off[oi+j]) + } + oi += ih + } else { + si += sp * ih + } + } + }) +} + +func (c *context) Resize(dst, src []byte, width, height, dp, sp int) { + field := bin(c.cfg.Vertical && c.cfg.Interlaced) + dwidth := c.cfg.Output + dheight := height + if c.cfg.Vertical { + dwidth = width + } + pk := c.cfg.Pack + group := sync.WaitGroup{} + for i, k := range c.kernels[:1+field] { + if c.cfg.Vertical { + dheight = (c.cfg.Output + (1-i)*int(field)) >> field + } + scaleSlices(&group, c.scaler, c.cfg.Vertical, c.cfg.Threads, + k.size, dwidth*pk, dheight, dp< 0xFF { + x = 0xFF + } + return byte(x) +} + +func copyPlane(dst, src []byte, width, height, dp, sp int) { + di := 0 + si := 0 + for y := 0; y < height; y++ { + copy(dst[di:di+width], src[si:si+width]) + di += dp + si += sp + } +} + +func psnrPlane(dst, src []byte, width, height, dp, sp int) float64 { + mse := 0 + di := 0 + si := 0 + for y := 0; y < height; y++ { + for x, v := range src[si : si+width] { + n := int(v) - int(dst[di+x]) + mse += n * n + } + di += dp + si += sp + } + fmse := float64(mse) / float64(width*height) + return 10 * math.Log10(255*255/fmse) +} + +func h8scaleNGo(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + si := 0 + for y := 0; y < height; y++ { + c := cof + s := src[si:] + d := dst[di:] + for x, xoff := range off[:width] { + pix := 0 + for i, v := range s[xoff : xoff+int16(taps)] { + pix += int(v) * int(c[i]) + } + d[x] = u8((pix + 1<<(Bits-1)) >> Bits) + c = c[taps:] + } + di += dp + si += sp + } +} + +func v8scaleNGo(dst, src []byte, cof, off []int16, + taps, width, height, dp, sp int) { + di := 0 + for _, yoff := range off[:height] { + src = src[sp*int(yoff):] + for x := range dst[di : di+width] { + pix := 0 + for i, c := range cof[:taps] { + pix += int(c) * int(src[sp*i+x]) + } + dst[di+x] = u8((pix + 1<<(Bits-1)) >> Bits) + } + cof = cof[taps:] + di += dp + } +} diff --git a/vendor/github.com/bamiaux/rez/scalers_amd64.go b/vendor/github.com/bamiaux/rez/scalers_amd64.go new file mode 100644 index 00000000..fc9766e0 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/scalers_amd64.go @@ -0,0 +1,61 @@ +// Copyright 2014 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package rez + +func hasAsm() bool { return true } + +func h8scale2Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func h8scale4Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func h8scale8Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func h8scale10Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func h8scale12Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func h8scaleNAmd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func v8scale2Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func v8scale4Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func v8scale6Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func v8scale8Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func v8scale10Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func v8scale12Amd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) +func v8scaleNAmd64(dst, src []byte, cof, off []int16, taps, width, height, dp, sp int) + +func getHorizontalScaler(taps int, asm bool) scaler { + if !asm { + return getHorizontalScalerGo(taps) + } + switch taps { + case 2: + return h8scale2Amd64 + case 4: + return h8scale4Amd64 + case 8: + return h8scale8Amd64 + case 10: + return h8scale10Amd64 + case 12: + return h8scale12Amd64 + } + return h8scaleNAmd64 +} + +func getVerticalScaler(taps int, asm bool) scaler { + if !asm { + return getVerticalScalerGo(taps) + } + switch taps { + case 2: + return v8scale2Amd64 + case 4: + return v8scale4Amd64 + case 6: + return v8scale6Amd64 + case 8: + return v8scale8Amd64 + case 10: + return v8scale10Amd64 + case 12: + return v8scale12Amd64 + } + return v8scaleNAmd64 +} diff --git a/vendor/github.com/bamiaux/rez/scalers_gen.go b/vendor/github.com/bamiaux/rez/scalers_gen.go new file mode 100644 index 00000000..e972dac6 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/scalers_gen.go @@ -0,0 +1,17 @@ +// Copyright 2014 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// +build !amd64 + +package rez + +func hasAsm() bool { return false } + +func getHorizontalScaler(taps int, asm bool) scaler { + return getHorizontalScalerGo(taps) +} + +func getVerticalScaler(taps int, asm bool) scaler { + return getVerticalScalerGo(taps) +} diff --git a/vendor/github.com/bamiaux/rez/utils.go b/vendor/github.com/bamiaux/rez/utils.go new file mode 100644 index 00000000..5af5ec19 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/utils.go @@ -0,0 +1,43 @@ +// Copyright 2014 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package rez + +import ( + "fmt" + "image" + "os" +) + +func dumpPlane(prefix string, p *Plane, idx int) error { + fh, err := os.Create(fmt.Sprintf("%v_%v.raw", prefix, idx)) + if err != nil { + return err + } + defer fh.Close() + si := 0 + for y := 0; y < p.Height; y++ { + _, err = fh.Write(p.Data[si : si+p.Width]) + if err != nil { + return err + } + si += p.Pitch + } + return nil +} + +// DumpImage dumps each img planes to disk using the input prefix +func DumpImage(prefix string, img image.Image) error { + _, src, err := inspect(img, false) + if err != nil { + return err + } + for i, p := range src { + err = dumpPlane(prefix, &p, i) + if err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/bamiaux/rez/vscalers_amd64.s b/vendor/github.com/bamiaux/rez/vscalers_amd64.s new file mode 100644 index 00000000..33e71ec3 --- /dev/null +++ b/vendor/github.com/bamiaux/rez/vscalers_amd64.s @@ -0,0 +1,1530 @@ +// Copyright 2014 Benoît Amiaux. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// This file is auto-generated - do not modify + +DATA zero_0<>+0x00(SB)/8, $0x0000000000000000 +DATA zero_0<>+0x08(SB)/8, $0x0000000000000000 +GLOBL zero_0<>(SB), 8, $16 +DATA hbits_1<>+0x00(SB)/8, $0x0000200000002000 +DATA hbits_1<>+0x08(SB)/8, $0x0000200000002000 +GLOBL hbits_1<>(SB), 8, $16 + +TEXT ·v8scale2Amd64(SB),4,$0-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + ANDQ $15, DX + SHRQ $4, CX + MOVQ BX, R11 + MOVQ CX, R12 + MOVQ DX, AX + ORQ AX, AX + JE norollback_0 + SUBQ $16, DX + NEGQ DX +norollback_0: + MOVQ DX, R13 + MOVQ off+72(FP), CX + MOVQ CX, R10 + MOVO zero_0<>(SB), X14 + MOVO hbits_1<>(SB), X13 + MOVQ src+24(FP), SI + MOVQ SI, R9 + MOVQ dst+0(FP), DI + MOVQ cof+48(FP), BP + MOVQ sp+128(FP), BX +yloop_1: + MOVQ R9, SI + MOVQ R10, DX + MOVWQSX (DX), AX + MULQ BX + ADDQ AX, SI + MOVQ SI, R9 + MOVQ R12, CX + ORQ CX, CX + JE nomaxloop_2 +maxloop_3: + MOVOU (BP), X12 + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVO X0, X2 + PUNPCKLBW X3, X0 + PUNPCKHBW X3, X2 + MOVO X0, X1 + MOVO X2, X3 + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PMADDWL X12, X0 + PMADDWL X12, X1 + PMADDWL X12, X2 + PMADDWL X12, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI + SUBQ $1, CX + JNE maxloop_3 +nomaxloop_2: + MOVQ R13, CX + SUBQ R13, SI + SUBQ R13, DI + ORQ CX, CX + JE nobackroll_4 + MOVOU (BP), X12 + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVO X0, X2 + PUNPCKLBW X3, X0 + PUNPCKHBW X3, X2 + MOVO X0, X1 + MOVO X2, X3 + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PMADDWL X12, X0 + PMADDWL X12, X1 + PMADDWL X12, X2 + PMADDWL X12, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI +nobackroll_4: + ADDQ R11, DI + ADDQ $32, BP + ADDQ $2, R10 + SUBQ $1, height+112(FP) + JNE yloop_1 + RET + +TEXT ·v8scale4Amd64(SB),4,$0-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + ANDQ $15, DX + SHRQ $4, CX + MOVQ BX, R11 + MOVQ CX, R12 + MOVQ DX, AX + ORQ AX, AX + JE norollback_5 + SUBQ $16, DX + NEGQ DX +norollback_5: + MOVQ DX, R13 + MOVQ off+72(FP), CX + MOVQ CX, R10 + MOVO zero_0<>(SB), X14 + MOVO hbits_1<>(SB), X13 + MOVQ src+24(FP), SI + MOVQ SI, R9 + MOVQ dst+0(FP), DI + MOVQ cof+48(FP), BP + MOVQ sp+128(FP), BX +yloop_6: + MOVQ R9, SI + MOVQ R10, DX + MOVWQSX (DX), AX + MULQ BX + ADDQ AX, SI + MOVQ SI, R9 + MOVQ R12, CX + ORQ CX, CX + JE nomaxloop_7 +maxloop_8: + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI + SUBQ $1, CX + JNE maxloop_8 +nomaxloop_7: + MOVQ R13, CX + SUBQ R13, SI + SUBQ R13, DI + ORQ CX, CX + JE nobackroll_9 + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI +nobackroll_9: + ADDQ R11, DI + ADDQ $64, BP + ADDQ $2, R10 + SUBQ $1, height+112(FP) + JNE yloop_6 + RET + +TEXT ·v8scale6Amd64(SB),4,$0-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + ANDQ $15, DX + SHRQ $4, CX + MOVQ BX, R11 + MOVQ CX, R12 + MOVQ DX, AX + ORQ AX, AX + JE norollback_10 + SUBQ $16, DX + NEGQ DX +norollback_10: + MOVQ DX, R13 + MOVQ off+72(FP), CX + MOVQ CX, R10 + MOVO zero_0<>(SB), X14 + MOVO hbits_1<>(SB), X13 + MOVQ src+24(FP), SI + MOVQ SI, R9 + MOVQ dst+0(FP), DI + MOVQ cof+48(FP), BP + MOVQ sp+128(FP), BX +yloop_11: + MOVQ R9, SI + MOVQ R10, DX + MOVWQSX (DX), AX + MULQ BX + ADDQ AX, SI + MOVQ SI, R9 + MOVQ R12, CX + ORQ CX, CX + JE nomaxloop_12 +maxloop_13: + LEAQ (SI)(BX*4), AX + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 64(BP), X4 + PMADDWL 64(BP), X5 + PMADDWL 64(BP), X6 + PMADDWL 64(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI + SUBQ $1, CX + JNE maxloop_13 +nomaxloop_12: + MOVQ R13, CX + SUBQ R13, SI + SUBQ R13, DI + ORQ CX, CX + JE nobackroll_14 + LEAQ (SI)(BX*4), AX + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 64(BP), X4 + PMADDWL 64(BP), X5 + PMADDWL 64(BP), X6 + PMADDWL 64(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI +nobackroll_14: + ADDQ R11, DI + ADDQ $96, BP + ADDQ $2, R10 + SUBQ $1, height+112(FP) + JNE yloop_11 + RET + +TEXT ·v8scale8Amd64(SB),4,$0-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + ANDQ $15, DX + SHRQ $4, CX + MOVQ BX, R11 + MOVQ CX, R12 + MOVQ DX, AX + ORQ AX, AX + JE norollback_15 + SUBQ $16, DX + NEGQ DX +norollback_15: + MOVQ DX, R13 + MOVQ off+72(FP), CX + MOVQ CX, R10 + MOVO zero_0<>(SB), X14 + MOVO hbits_1<>(SB), X13 + MOVQ src+24(FP), SI + MOVQ SI, R9 + MOVQ dst+0(FP), DI + MOVQ cof+48(FP), BP + MOVQ sp+128(FP), BX +yloop_16: + MOVQ R9, SI + MOVQ R10, DX + MOVWQSX (DX), AX + MULQ BX + ADDQ AX, SI + MOVQ SI, R9 + MOVQ R12, CX + ORQ CX, CX + JE nomaxloop_17 +maxloop_18: + LEAQ (SI)(BX*4), AX + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 64(BP), X4 + PMADDWL 64(BP), X5 + PMADDWL 64(BP), X6 + PMADDWL 64(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 96(BP), X4 + PMADDWL 96(BP), X5 + PMADDWL 96(BP), X6 + PMADDWL 96(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI + SUBQ $1, CX + JNE maxloop_18 +nomaxloop_17: + MOVQ R13, CX + SUBQ R13, SI + SUBQ R13, DI + ORQ CX, CX + JE nobackroll_19 + LEAQ (SI)(BX*4), AX + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 64(BP), X4 + PMADDWL 64(BP), X5 + PMADDWL 64(BP), X6 + PMADDWL 64(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 96(BP), X4 + PMADDWL 96(BP), X5 + PMADDWL 96(BP), X6 + PMADDWL 96(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI +nobackroll_19: + ADDQ R11, DI + ADDQ $128, BP + ADDQ $2, R10 + SUBQ $1, height+112(FP) + JNE yloop_16 + RET + +TEXT ·v8scale10Amd64(SB),4,$0-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + ANDQ $15, DX + SHRQ $4, CX + MOVQ BX, R11 + MOVQ CX, R12 + MOVQ DX, AX + ORQ AX, AX + JE norollback_20 + SUBQ $16, DX + NEGQ DX +norollback_20: + MOVQ DX, R13 + MOVQ off+72(FP), CX + MOVQ CX, R10 + MOVO zero_0<>(SB), X14 + MOVO hbits_1<>(SB), X13 + MOVQ src+24(FP), SI + MOVQ SI, R9 + MOVQ dst+0(FP), DI + MOVQ cof+48(FP), BP + MOVQ sp+128(FP), BX +yloop_21: + MOVQ R9, SI + MOVQ R10, DX + MOVWQSX (DX), AX + MULQ BX + ADDQ AX, SI + MOVQ SI, R9 + MOVQ R12, CX + ORQ CX, CX + JE nomaxloop_22 +maxloop_23: + LEAQ (SI)(BX*4), AX + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 64(BP), X4 + PMADDWL 64(BP), X5 + PMADDWL 64(BP), X6 + PMADDWL 64(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 96(BP), X4 + PMADDWL 96(BP), X5 + PMADDWL 96(BP), X6 + PMADDWL 96(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 128(BP), X4 + PMADDWL 128(BP), X5 + PMADDWL 128(BP), X6 + PMADDWL 128(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI + SUBQ $1, CX + JNE maxloop_23 +nomaxloop_22: + MOVQ R13, CX + SUBQ R13, SI + SUBQ R13, DI + ORQ CX, CX + JE nobackroll_24 + LEAQ (SI)(BX*4), AX + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 64(BP), X4 + PMADDWL 64(BP), X5 + PMADDWL 64(BP), X6 + PMADDWL 64(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 96(BP), X4 + PMADDWL 96(BP), X5 + PMADDWL 96(BP), X6 + PMADDWL 96(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 128(BP), X4 + PMADDWL 128(BP), X5 + PMADDWL 128(BP), X6 + PMADDWL 128(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI +nobackroll_24: + ADDQ R11, DI + ADDQ $160, BP + ADDQ $2, R10 + SUBQ $1, height+112(FP) + JNE yloop_21 + RET + +TEXT ·v8scale12Amd64(SB),4,$0-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + ANDQ $15, DX + SHRQ $4, CX + MOVQ BX, R11 + MOVQ CX, R12 + MOVQ DX, AX + ORQ AX, AX + JE norollback_25 + SUBQ $16, DX + NEGQ DX +norollback_25: + MOVQ DX, R13 + MOVQ off+72(FP), CX + MOVQ CX, R10 + MOVO zero_0<>(SB), X14 + MOVO hbits_1<>(SB), X13 + MOVQ src+24(FP), SI + MOVQ SI, R9 + MOVQ dst+0(FP), DI + MOVQ cof+48(FP), BP + MOVQ sp+128(FP), BX +yloop_26: + MOVQ R9, SI + MOVQ R10, DX + MOVWQSX (DX), AX + MULQ BX + ADDQ AX, SI + MOVQ SI, R9 + MOVQ R12, CX + ORQ CX, CX + JE nomaxloop_27 +maxloop_28: + LEAQ (SI)(BX*4), AX + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 64(BP), X4 + PMADDWL 64(BP), X5 + PMADDWL 64(BP), X6 + PMADDWL 64(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 96(BP), X4 + PMADDWL 96(BP), X5 + PMADDWL 96(BP), X6 + PMADDWL 96(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 128(BP), X4 + PMADDWL 128(BP), X5 + PMADDWL 128(BP), X6 + PMADDWL 128(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 160(BP), X4 + PMADDWL 160(BP), X5 + PMADDWL 160(BP), X6 + PMADDWL 160(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI + SUBQ $1, CX + JNE maxloop_28 +nomaxloop_27: + MOVQ R13, CX + SUBQ R13, SI + SUBQ R13, DI + ORQ CX, CX + JE nobackroll_29 + LEAQ (SI)(BX*4), AX + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 64(BP), X4 + PMADDWL 64(BP), X5 + PMADDWL 64(BP), X6 + PMADDWL 64(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 96(BP), X4 + PMADDWL 96(BP), X5 + PMADDWL 96(BP), X6 + PMADDWL 96(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 128(BP), X4 + PMADDWL 128(BP), X5 + PMADDWL 128(BP), X6 + PMADDWL 128(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL 160(BP), X4 + PMADDWL 160(BP), X5 + PMADDWL 160(BP), X6 + PMADDWL 160(BP), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI +nobackroll_29: + ADDQ R11, DI + ADDQ $192, BP + ADDQ $2, R10 + SUBQ $1, height+112(FP) + JNE yloop_26 + RET + +TEXT ·v8scaleNAmd64(SB),4,$0-136 + MOVQ dp+120(FP), BX + MOVQ width+104(FP), CX + MOVQ CX, DX + SUBQ CX, BX + ANDQ $15, DX + SHRQ $4, CX + MOVQ BX, R11 + MOVQ CX, R12 + MOVQ DX, AX + ORQ AX, AX + JE norollback_30 + SUBQ $16, DX + NEGQ DX +norollback_30: + MOVQ DX, R13 + MOVQ off+72(FP), CX + MOVQ CX, R10 + MOVO zero_0<>(SB), X14 + MOVO hbits_1<>(SB), X13 + MOVQ taps+96(FP), DX + SUBQ $4, DX + SHRQ $1, DX + MOVQ DX, R14 + MOVQ src+24(FP), SI + MOVQ SI, R9 + MOVQ dst+0(FP), DI + MOVQ cof+48(FP), BP + MOVQ sp+128(FP), BX +yloop_31: + MOVQ R9, SI + MOVQ R10, DX + MOVWQSX (DX), AX + MULQ BX + ADDQ AX, SI + MOVQ SI, R9 + MOVQ R12, CX + ORQ CX, CX + JE nomaxloop_32 +maxloop_33: + LEAQ (SI)(BX*4), AX + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVQ R14, R15 + MOVQ BP, DX + ADDQ $32, DX +innerloop_34: + ADDQ $32, DX + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL (DX), X4 + PMADDWL (DX), X5 + PMADDWL (DX), X6 + PMADDWL (DX), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + SUBQ $1, R15 + JNE innerloop_34 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI + SUBQ $1, CX + JNE maxloop_33 +nomaxloop_32: + MOVQ R13, CX + SUBQ R13, SI + SUBQ R13, DI + ORQ CX, CX + JE nobackroll_35 + LEAQ (SI)(BX*4), AX + MOVOU (SI), X0 + MOVOU (SI)(BX*1), X3 + MOVOU (SI)(BX*2), X4 + MOVOU (BP), X10 + MOVOU 32(BP), X11 + ADDQ BX, SI + MOVOU (SI)(BX*2), X7 + MOVO X0, X2 + MOVO X4, X6 + PUNPCKLBW X3, X0 + PUNPCKLBW X7, X4 + PUNPCKHBW X3, X2 + PUNPCKHBW X7, X6 + MOVO X0, X1 + MOVO X4, X5 + MOVO X2, X3 + MOVO X6, X7 + SUBQ BX, SI + PUNPCKLBW X14, X0 + PUNPCKHBW X14, X1 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X2 + PUNPCKHBW X14, X3 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL X10, X0 + PMADDWL X10, X1 + PMADDWL X11, X4 + PMADDWL X11, X5 + PMADDWL X10, X2 + PMADDWL X10, X3 + PMADDWL X11, X6 + PMADDWL X11, X7 + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + MOVQ R14, R15 + MOVQ BP, DX + ADDQ $32, DX +innerloop_36: + ADDQ $32, DX + MOVOU (AX), X4 + MOVOU (AX)(BX*1), X7 + MOVO X4, X6 + PUNPCKLBW X7, X4 + PUNPCKHBW X7, X6 + MOVO X4, X5 + MOVO X6, X7 + PUNPCKLBW X14, X4 + PUNPCKHBW X14, X5 + PUNPCKLBW X14, X6 + PUNPCKHBW X14, X7 + PMADDWL (DX), X4 + PMADDWL (DX), X5 + PMADDWL (DX), X6 + PMADDWL (DX), X7 + LEAQ (AX)(BX*2), AX + PADDL X4, X0 + PADDL X5, X1 + PADDL X6, X2 + PADDL X7, X3 + SUBQ $1, R15 + JNE innerloop_36 + PADDL X13, X0 + PADDL X13, X1 + PADDL X13, X2 + PADDL X13, X3 + PSRAL $14, X0 + PSRAL $14, X1 + PSRAL $14, X2 + PSRAL $14, X3 + PACKSSLW X1, X0 + PACKSSLW X3, X2 + PACKUSWB X2, X0 + MOVOU X0, (DI) + ADDQ $16, SI + ADDQ $16, DI +nobackroll_35: + ADDQ R11, DI + MOVQ taps+96(FP), DX + SHLQ $4, DX + ADDQ DX, BP + ADDQ $2, R10 + SUBQ $1, height+112(FP) + JNE yloop_31 + RET diff --git a/vendor/github.com/nfnt/resize/LICENSE b/vendor/github.com/nfnt/resize/LICENSE deleted file mode 100644 index 7836cad5..00000000 --- a/vendor/github.com/nfnt/resize/LICENSE +++ /dev/null @@ -1,13 +0,0 @@ -Copyright (c) 2012, Jan Schlicht - -Permission to use, copy, modify, and/or distribute this software for any purpose -with or without fee is hereby granted, provided that the above copyright notice -and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. diff --git a/vendor/github.com/nfnt/resize/README.md b/vendor/github.com/nfnt/resize/README.md deleted file mode 100644 index 2aefa75c..00000000 --- a/vendor/github.com/nfnt/resize/README.md +++ /dev/null @@ -1,149 +0,0 @@ -Resize -====== - -Image resizing for the [Go programming language](http://golang.org) with common interpolation methods. - -[![Build Status](https://travis-ci.org/nfnt/resize.svg)](https://travis-ci.org/nfnt/resize) - -Installation ------------- - -```bash -$ go get github.com/nfnt/resize -``` - -It's that easy! - -Usage ------ - -This package needs at least Go 1.1. Import package with - -```go -import "github.com/nfnt/resize" -``` - -The resize package provides 2 functions: - -* `resize.Resize` creates a scaled image with new dimensions (`width`, `height`) using the interpolation function `interp`. - If either `width` or `height` is set to 0, it will be set to an aspect ratio preserving value. -* `resize.Thumbnail` downscales an image preserving its aspect ratio to the maximum dimensions (`maxWidth`, `maxHeight`). - It will return the original image if original sizes are smaller than the provided dimensions. - -```go -resize.Resize(width, height uint, img image.Image, interp resize.InterpolationFunction) image.Image -resize.Thumbnail(maxWidth, maxHeight uint, img image.Image, interp resize.InterpolationFunction) image.Image -``` - -The provided interpolation functions are (from fast to slow execution time) - -- `NearestNeighbor`: [Nearest-neighbor interpolation](http://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) -- `Bilinear`: [Bilinear interpolation](http://en.wikipedia.org/wiki/Bilinear_interpolation) -- `Bicubic`: [Bicubic interpolation](http://en.wikipedia.org/wiki/Bicubic_interpolation) -- `MitchellNetravali`: [Mitchell-Netravali interpolation](http://dl.acm.org/citation.cfm?id=378514) -- `Lanczos2`: [Lanczos resampling](http://en.wikipedia.org/wiki/Lanczos_resampling) with a=2 -- `Lanczos3`: [Lanczos resampling](http://en.wikipedia.org/wiki/Lanczos_resampling) with a=3 - -Which of these methods gives the best results depends on your use case. - -Sample usage: - -```go -package main - -import ( - "github.com/nfnt/resize" - "image/jpeg" - "log" - "os" -) - -func main() { - // open "test.jpg" - file, err := os.Open("test.jpg") - if err != nil { - log.Fatal(err) - } - - // decode jpeg into image.Image - img, err := jpeg.Decode(file) - if err != nil { - log.Fatal(err) - } - file.Close() - - // resize to width 1000 using Lanczos resampling - // and preserve aspect ratio - m := resize.Resize(1000, 0, img, resize.Lanczos3) - - out, err := os.Create("test_resized.jpg") - if err != nil { - log.Fatal(err) - } - defer out.Close() - - // write new image to file - jpeg.Encode(out, m, nil) -} -``` - -Caveats -------- - -* Optimized access routines are used for `image.RGBA`, `image.NRGBA`, `image.RGBA64`, `image.NRGBA64`, `image.YCbCr`, `image.Gray`, and `image.Gray16` types. All other image types are accessed in a generic way that will result in slow processing speed. -* JPEG images are stored in `image.YCbCr`. This image format stores data in a way that will decrease processing speed. A resize may be up to 2 times slower than with `image.RGBA`. - - -Downsizing Samples -------- - -Downsizing is not as simple as it might look like. Images have to be filtered before they are scaled down, otherwise aliasing might occur. -Filtering is highly subjective: Applying too much will blur the whole image, too little will make aliasing become apparent. -Resize tries to provide sane defaults that should suffice in most cases. - -### Artificial sample - -Original image -![Rings](http://nfnt.github.com/img/rings_lg_orig.png) - - - - - - - - - - - - - - -

Nearest-Neighbor

Bilinear

Bicubic

Mitchell-Netravali

Lanczos2

Lanczos3
- -### Real-Life sample - -Original image -![Original](http://nfnt.github.com/img/IMG_3694_720.jpg) - - - - - - - - - - - - - - -

Nearest-Neighbor

Bilinear

Bicubic

Mitchell-Netravali

Lanczos2

Lanczos3
- - -License -------- - -Copyright (c) 2012 Jan Schlicht -Resize is released under a MIT style license. diff --git a/vendor/github.com/nfnt/resize/converter.go b/vendor/github.com/nfnt/resize/converter.go deleted file mode 100644 index f9c520d0..00000000 --- a/vendor/github.com/nfnt/resize/converter.go +++ /dev/null @@ -1,438 +0,0 @@ -/* -Copyright (c) 2012, Jan Schlicht - -Permission to use, copy, modify, and/or distribute this software for any purpose -with or without fee is hereby granted, provided that the above copyright notice -and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -*/ - -package resize - -import "image" - -// Keep value in [0,255] range. -func clampUint8(in int32) uint8 { - // casting a negative int to an uint will result in an overflown - // large uint. this behavior will be exploited here and in other functions - // to achieve a higher performance. - if uint32(in) < 256 { - return uint8(in) - } - if in > 255 { - return 255 - } - return 0 -} - -// Keep value in [0,65535] range. -func clampUint16(in int64) uint16 { - if uint64(in) < 65536 { - return uint16(in) - } - if in > 65535 { - return 65535 - } - return 0 -} - -func resizeGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var rgba [4]int64 - var sum int64 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - coeff := coeffs[ci+i] - if coeff != 0 { - xi := start + i - switch { - case xi < 0: - xi = 0 - case xi >= maxX: - xi = maxX - } - - r, g, b, a := in.At(xi+in.Bounds().Min.X, x+in.Bounds().Min.Y).RGBA() - - rgba[0] += int64(coeff) * int64(r) - rgba[1] += int64(coeff) * int64(g) - rgba[2] += int64(coeff) * int64(b) - rgba[3] += int64(coeff) * int64(a) - sum += int64(coeff) - } - } - - offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 - - value := clampUint16(rgba[0] / sum) - out.Pix[offset+0] = uint8(value >> 8) - out.Pix[offset+1] = uint8(value) - value = clampUint16(rgba[1] / sum) - out.Pix[offset+2] = uint8(value >> 8) - out.Pix[offset+3] = uint8(value) - value = clampUint16(rgba[2] / sum) - out.Pix[offset+4] = uint8(value >> 8) - out.Pix[offset+5] = uint8(value) - value = clampUint16(rgba[3] / sum) - out.Pix[offset+6] = uint8(value >> 8) - out.Pix[offset+7] = uint8(value) - } - } -} - -func resizeRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []int16, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var rgba [4]int32 - var sum int32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - coeff := coeffs[ci+i] - if coeff != 0 { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 4 - case xi >= maxX: - xi = 4 * maxX - default: - xi = 0 - } - - rgba[0] += int32(coeff) * int32(row[xi+0]) - rgba[1] += int32(coeff) * int32(row[xi+1]) - rgba[2] += int32(coeff) * int32(row[xi+2]) - rgba[3] += int32(coeff) * int32(row[xi+3]) - sum += int32(coeff) - } - } - - xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 - - out.Pix[xo+0] = clampUint8(rgba[0] / sum) - out.Pix[xo+1] = clampUint8(rgba[1] / sum) - out.Pix[xo+2] = clampUint8(rgba[2] / sum) - out.Pix[xo+3] = clampUint8(rgba[3] / sum) - } - } -} - -func resizeNRGBA(in *image.NRGBA, out *image.RGBA, scale float64, coeffs []int16, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var rgba [4]int32 - var sum int32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - coeff := coeffs[ci+i] - if coeff != 0 { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 4 - case xi >= maxX: - xi = 4 * maxX - default: - xi = 0 - } - - // Forward alpha-premultiplication - a := int32(row[xi+3]) - r := int32(row[xi+0]) * a - r /= 0xff - g := int32(row[xi+1]) * a - g /= 0xff - b := int32(row[xi+2]) * a - b /= 0xff - - rgba[0] += int32(coeff) * r - rgba[1] += int32(coeff) * g - rgba[2] += int32(coeff) * b - rgba[3] += int32(coeff) * a - sum += int32(coeff) - } - } - - xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 - - out.Pix[xo+0] = clampUint8(rgba[0] / sum) - out.Pix[xo+1] = clampUint8(rgba[1] / sum) - out.Pix[xo+2] = clampUint8(rgba[2] / sum) - out.Pix[xo+3] = clampUint8(rgba[3] / sum) - } - } -} - -func resizeRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var rgba [4]int64 - var sum int64 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - coeff := coeffs[ci+i] - if coeff != 0 { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 8 - case xi >= maxX: - xi = 8 * maxX - default: - xi = 0 - } - - rgba[0] += int64(coeff) * (int64(row[xi+0])<<8 | int64(row[xi+1])) - rgba[1] += int64(coeff) * (int64(row[xi+2])<<8 | int64(row[xi+3])) - rgba[2] += int64(coeff) * (int64(row[xi+4])<<8 | int64(row[xi+5])) - rgba[3] += int64(coeff) * (int64(row[xi+6])<<8 | int64(row[xi+7])) - sum += int64(coeff) - } - } - - xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 - - value := clampUint16(rgba[0] / sum) - out.Pix[xo+0] = uint8(value >> 8) - out.Pix[xo+1] = uint8(value) - value = clampUint16(rgba[1] / sum) - out.Pix[xo+2] = uint8(value >> 8) - out.Pix[xo+3] = uint8(value) - value = clampUint16(rgba[2] / sum) - out.Pix[xo+4] = uint8(value >> 8) - out.Pix[xo+5] = uint8(value) - value = clampUint16(rgba[3] / sum) - out.Pix[xo+6] = uint8(value >> 8) - out.Pix[xo+7] = uint8(value) - } - } -} - -func resizeNRGBA64(in *image.NRGBA64, out *image.RGBA64, scale float64, coeffs []int32, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var rgba [4]int64 - var sum int64 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - coeff := coeffs[ci+i] - if coeff != 0 { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 8 - case xi >= maxX: - xi = 8 * maxX - default: - xi = 0 - } - - // Forward alpha-premultiplication - a := int64(uint16(row[xi+6])<<8 | uint16(row[xi+7])) - r := int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) * a - r /= 0xffff - g := int64(uint16(row[xi+2])<<8|uint16(row[xi+3])) * a - g /= 0xffff - b := int64(uint16(row[xi+4])<<8|uint16(row[xi+5])) * a - b /= 0xffff - - rgba[0] += int64(coeff) * r - rgba[1] += int64(coeff) * g - rgba[2] += int64(coeff) * b - rgba[3] += int64(coeff) * a - sum += int64(coeff) - } - } - - xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 - - value := clampUint16(rgba[0] / sum) - out.Pix[xo+0] = uint8(value >> 8) - out.Pix[xo+1] = uint8(value) - value = clampUint16(rgba[1] / sum) - out.Pix[xo+2] = uint8(value >> 8) - out.Pix[xo+3] = uint8(value) - value = clampUint16(rgba[2] / sum) - out.Pix[xo+4] = uint8(value >> 8) - out.Pix[xo+5] = uint8(value) - value = clampUint16(rgba[3] / sum) - out.Pix[xo+6] = uint8(value >> 8) - out.Pix[xo+7] = uint8(value) - } - } -} - -func resizeGray(in *image.Gray, out *image.Gray, scale float64, coeffs []int16, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[(x-newBounds.Min.X)*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var gray int32 - var sum int32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - coeff := coeffs[ci+i] - if coeff != 0 { - xi := start + i - switch { - case xi < 0: - xi = 0 - case xi >= maxX: - xi = maxX - } - gray += int32(coeff) * int32(row[xi]) - sum += int32(coeff) - } - } - - offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X) - out.Pix[offset] = clampUint8(gray / sum) - } - } -} - -func resizeGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []int32, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var gray int64 - var sum int64 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - coeff := coeffs[ci+i] - if coeff != 0 { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 2 - case xi >= maxX: - xi = 2 * maxX - default: - xi = 0 - } - gray += int64(coeff) * int64(uint16(row[xi+0])<<8|uint16(row[xi+1])) - sum += int64(coeff) - } - } - - offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2 - value := clampUint16(gray / sum) - out.Pix[offset+0] = uint8(value >> 8) - out.Pix[offset+1] = uint8(value) - } - } -} - -func resizeYCbCr(in *ycc, out *ycc, scale float64, coeffs []int16, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var p [3]int32 - var sum int32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - coeff := coeffs[ci+i] - if coeff != 0 { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 3 - case xi >= maxX: - xi = 3 * maxX - default: - xi = 0 - } - p[0] += int32(coeff) * int32(row[xi+0]) - p[1] += int32(coeff) * int32(row[xi+1]) - p[2] += int32(coeff) * int32(row[xi+2]) - sum += int32(coeff) - } - } - - xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*3 - out.Pix[xo+0] = clampUint8(p[0] / sum) - out.Pix[xo+1] = clampUint8(p[1] / sum) - out.Pix[xo+2] = clampUint8(p[2] / sum) - } - } -} - -func nearestYCbCr(in *ycc, out *ycc, scale float64, coeffs []bool, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var p [3]float32 - var sum float32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - if coeffs[ci+i] { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 3 - case xi >= maxX: - xi = 3 * maxX - default: - xi = 0 - } - p[0] += float32(row[xi+0]) - p[1] += float32(row[xi+1]) - p[2] += float32(row[xi+2]) - sum++ - } - } - - xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*3 - out.Pix[xo+0] = floatToUint8(p[0] / sum) - out.Pix[xo+1] = floatToUint8(p[1] / sum) - out.Pix[xo+2] = floatToUint8(p[2] / sum) - } - } -} diff --git a/vendor/github.com/nfnt/resize/filters.go b/vendor/github.com/nfnt/resize/filters.go deleted file mode 100644 index 4ce04e38..00000000 --- a/vendor/github.com/nfnt/resize/filters.go +++ /dev/null @@ -1,143 +0,0 @@ -/* -Copyright (c) 2012, Jan Schlicht - -Permission to use, copy, modify, and/or distribute this software for any purpose -with or without fee is hereby granted, provided that the above copyright notice -and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -*/ - -package resize - -import ( - "math" -) - -func nearest(in float64) float64 { - if in >= -0.5 && in < 0.5 { - return 1 - } - return 0 -} - -func linear(in float64) float64 { - in = math.Abs(in) - if in <= 1 { - return 1 - in - } - return 0 -} - -func cubic(in float64) float64 { - in = math.Abs(in) - if in <= 1 { - return in*in*(1.5*in-2.5) + 1.0 - } - if in <= 2 { - return in*(in*(2.5-0.5*in)-4.0) + 2.0 - } - return 0 -} - -func mitchellnetravali(in float64) float64 { - in = math.Abs(in) - if in <= 1 { - return (7.0*in*in*in - 12.0*in*in + 5.33333333333) * 0.16666666666 - } - if in <= 2 { - return (-2.33333333333*in*in*in + 12.0*in*in - 20.0*in + 10.6666666667) * 0.16666666666 - } - return 0 -} - -func sinc(x float64) float64 { - x = math.Abs(x) * math.Pi - if x >= 1.220703e-4 { - return math.Sin(x) / x - } - return 1 -} - -func lanczos2(in float64) float64 { - if in > -2 && in < 2 { - return sinc(in) * sinc(in*0.5) - } - return 0 -} - -func lanczos3(in float64) float64 { - if in > -3 && in < 3 { - return sinc(in) * sinc(in*0.3333333333333333) - } - return 0 -} - -// range [-256,256] -func createWeights8(dy, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int16, []int, int) { - filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) - filterFactor := math.Min(1./(blur*scale), 1) - - coeffs := make([]int16, dy*filterLength) - start := make([]int, dy) - for y := 0; y < dy; y++ { - interpX := scale*(float64(y)+0.5) - 0.5 - start[y] = int(interpX) - filterLength/2 + 1 - interpX -= float64(start[y]) - for i := 0; i < filterLength; i++ { - in := (interpX - float64(i)) * filterFactor - coeffs[y*filterLength+i] = int16(kernel(in) * 256) - } - } - - return coeffs, start, filterLength -} - -// range [-65536,65536] -func createWeights16(dy, filterLength int, blur, scale float64, kernel func(float64) float64) ([]int32, []int, int) { - filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) - filterFactor := math.Min(1./(blur*scale), 1) - - coeffs := make([]int32, dy*filterLength) - start := make([]int, dy) - for y := 0; y < dy; y++ { - interpX := scale*(float64(y)+0.5) - 0.5 - start[y] = int(interpX) - filterLength/2 + 1 - interpX -= float64(start[y]) - for i := 0; i < filterLength; i++ { - in := (interpX - float64(i)) * filterFactor - coeffs[y*filterLength+i] = int32(kernel(in) * 65536) - } - } - - return coeffs, start, filterLength -} - -func createWeightsNearest(dy, filterLength int, blur, scale float64) ([]bool, []int, int) { - filterLength = filterLength * int(math.Max(math.Ceil(blur*scale), 1)) - filterFactor := math.Min(1./(blur*scale), 1) - - coeffs := make([]bool, dy*filterLength) - start := make([]int, dy) - for y := 0; y < dy; y++ { - interpX := scale*(float64(y)+0.5) - 0.5 - start[y] = int(interpX) - filterLength/2 + 1 - interpX -= float64(start[y]) - for i := 0; i < filterLength; i++ { - in := (interpX - float64(i)) * filterFactor - if in >= -0.5 && in < 0.5 { - coeffs[y*filterLength+i] = true - } else { - coeffs[y*filterLength+i] = false - } - } - } - - return coeffs, start, filterLength -} diff --git a/vendor/github.com/nfnt/resize/nearest.go b/vendor/github.com/nfnt/resize/nearest.go deleted file mode 100644 index 888039d8..00000000 --- a/vendor/github.com/nfnt/resize/nearest.go +++ /dev/null @@ -1,318 +0,0 @@ -/* -Copyright (c) 2014, Charlie Vieth - -Permission to use, copy, modify, and/or distribute this software for any purpose -with or without fee is hereby granted, provided that the above copyright notice -and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -*/ - -package resize - -import "image" - -func floatToUint8(x float32) uint8 { - // Nearest-neighbor values are always - // positive no need to check lower-bound. - if x > 0xfe { - return 0xff - } - return uint8(x) -} - -func floatToUint16(x float32) uint16 { - if x > 0xfffe { - return 0xffff - } - return uint16(x) -} - -func nearestGeneric(in image.Image, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var rgba [4]float32 - var sum float32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - if coeffs[ci+i] { - xi := start + i - switch { - case xi < 0: - xi = 0 - case xi >= maxX: - xi = maxX - } - r, g, b, a := in.At(xi+in.Bounds().Min.X, x+in.Bounds().Min.Y).RGBA() - rgba[0] += float32(r) - rgba[1] += float32(g) - rgba[2] += float32(b) - rgba[3] += float32(a) - sum++ - } - } - - offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 - value := floatToUint16(rgba[0] / sum) - out.Pix[offset+0] = uint8(value >> 8) - out.Pix[offset+1] = uint8(value) - value = floatToUint16(rgba[1] / sum) - out.Pix[offset+2] = uint8(value >> 8) - out.Pix[offset+3] = uint8(value) - value = floatToUint16(rgba[2] / sum) - out.Pix[offset+4] = uint8(value >> 8) - out.Pix[offset+5] = uint8(value) - value = floatToUint16(rgba[3] / sum) - out.Pix[offset+6] = uint8(value >> 8) - out.Pix[offset+7] = uint8(value) - } - } -} - -func nearestRGBA(in *image.RGBA, out *image.RGBA, scale float64, coeffs []bool, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var rgba [4]float32 - var sum float32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - if coeffs[ci+i] { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 4 - case xi >= maxX: - xi = 4 * maxX - default: - xi = 0 - } - rgba[0] += float32(row[xi+0]) - rgba[1] += float32(row[xi+1]) - rgba[2] += float32(row[xi+2]) - rgba[3] += float32(row[xi+3]) - sum++ - } - } - - xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 - out.Pix[xo+0] = floatToUint8(rgba[0] / sum) - out.Pix[xo+1] = floatToUint8(rgba[1] / sum) - out.Pix[xo+2] = floatToUint8(rgba[2] / sum) - out.Pix[xo+3] = floatToUint8(rgba[3] / sum) - } - } -} - -func nearestNRGBA(in *image.NRGBA, out *image.NRGBA, scale float64, coeffs []bool, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var rgba [4]float32 - var sum float32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - if coeffs[ci+i] { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 4 - case xi >= maxX: - xi = 4 * maxX - default: - xi = 0 - } - rgba[0] += float32(row[xi+0]) - rgba[1] += float32(row[xi+1]) - rgba[2] += float32(row[xi+2]) - rgba[3] += float32(row[xi+3]) - sum++ - } - } - - xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*4 - out.Pix[xo+0] = floatToUint8(rgba[0] / sum) - out.Pix[xo+1] = floatToUint8(rgba[1] / sum) - out.Pix[xo+2] = floatToUint8(rgba[2] / sum) - out.Pix[xo+3] = floatToUint8(rgba[3] / sum) - } - } -} - -func nearestRGBA64(in *image.RGBA64, out *image.RGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var rgba [4]float32 - var sum float32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - if coeffs[ci+i] { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 8 - case xi >= maxX: - xi = 8 * maxX - default: - xi = 0 - } - rgba[0] += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) - rgba[1] += float32(uint16(row[xi+2])<<8 | uint16(row[xi+3])) - rgba[2] += float32(uint16(row[xi+4])<<8 | uint16(row[xi+5])) - rgba[3] += float32(uint16(row[xi+6])<<8 | uint16(row[xi+7])) - sum++ - } - } - - xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 - value := floatToUint16(rgba[0] / sum) - out.Pix[xo+0] = uint8(value >> 8) - out.Pix[xo+1] = uint8(value) - value = floatToUint16(rgba[1] / sum) - out.Pix[xo+2] = uint8(value >> 8) - out.Pix[xo+3] = uint8(value) - value = floatToUint16(rgba[2] / sum) - out.Pix[xo+4] = uint8(value >> 8) - out.Pix[xo+5] = uint8(value) - value = floatToUint16(rgba[3] / sum) - out.Pix[xo+6] = uint8(value >> 8) - out.Pix[xo+7] = uint8(value) - } - } -} - -func nearestNRGBA64(in *image.NRGBA64, out *image.NRGBA64, scale float64, coeffs []bool, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var rgba [4]float32 - var sum float32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - if coeffs[ci+i] { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 8 - case xi >= maxX: - xi = 8 * maxX - default: - xi = 0 - } - rgba[0] += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) - rgba[1] += float32(uint16(row[xi+2])<<8 | uint16(row[xi+3])) - rgba[2] += float32(uint16(row[xi+4])<<8 | uint16(row[xi+5])) - rgba[3] += float32(uint16(row[xi+6])<<8 | uint16(row[xi+7])) - sum++ - } - } - - xo := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*8 - value := floatToUint16(rgba[0] / sum) - out.Pix[xo+0] = uint8(value >> 8) - out.Pix[xo+1] = uint8(value) - value = floatToUint16(rgba[1] / sum) - out.Pix[xo+2] = uint8(value >> 8) - out.Pix[xo+3] = uint8(value) - value = floatToUint16(rgba[2] / sum) - out.Pix[xo+4] = uint8(value >> 8) - out.Pix[xo+5] = uint8(value) - value = floatToUint16(rgba[3] / sum) - out.Pix[xo+6] = uint8(value >> 8) - out.Pix[xo+7] = uint8(value) - } - } -} - -func nearestGray(in *image.Gray, out *image.Gray, scale float64, coeffs []bool, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var gray float32 - var sum float32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - if coeffs[ci+i] { - xi := start + i - switch { - case xi < 0: - xi = 0 - case xi >= maxX: - xi = maxX - } - gray += float32(row[xi]) - sum++ - } - } - - offset := (y-newBounds.Min.Y)*out.Stride + (x - newBounds.Min.X) - out.Pix[offset] = floatToUint8(gray / sum) - } - } -} - -func nearestGray16(in *image.Gray16, out *image.Gray16, scale float64, coeffs []bool, offset []int, filterLength int) { - newBounds := out.Bounds() - maxX := in.Bounds().Dx() - 1 - - for x := newBounds.Min.X; x < newBounds.Max.X; x++ { - row := in.Pix[x*in.Stride:] - for y := newBounds.Min.Y; y < newBounds.Max.Y; y++ { - var gray float32 - var sum float32 - start := offset[y] - ci := y * filterLength - for i := 0; i < filterLength; i++ { - if coeffs[ci+i] { - xi := start + i - switch { - case uint(xi) < uint(maxX): - xi *= 2 - case xi >= maxX: - xi = 2 * maxX - default: - xi = 0 - } - gray += float32(uint16(row[xi+0])<<8 | uint16(row[xi+1])) - sum++ - } - } - - offset := (y-newBounds.Min.Y)*out.Stride + (x-newBounds.Min.X)*2 - value := floatToUint16(gray / sum) - out.Pix[offset+0] = uint8(value >> 8) - out.Pix[offset+1] = uint8(value) - } - } -} diff --git a/vendor/github.com/nfnt/resize/resize.go b/vendor/github.com/nfnt/resize/resize.go deleted file mode 100644 index 57bd1fcd..00000000 --- a/vendor/github.com/nfnt/resize/resize.go +++ /dev/null @@ -1,614 +0,0 @@ -/* -Copyright (c) 2012, Jan Schlicht - -Permission to use, copy, modify, and/or distribute this software for any purpose -with or without fee is hereby granted, provided that the above copyright notice -and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -*/ - -// Package resize implements various image resizing methods. -// -// The package works with the Image interface described in the image package. -// Various interpolation methods are provided and multiple processors may be -// utilized in the computations. -// -// Example: -// imgResized := resize.Resize(1000, 0, imgOld, resize.MitchellNetravali) -package resize - -import ( - "image" - "runtime" - "sync" -) - -// An InterpolationFunction provides the parameters that describe an -// interpolation kernel. It returns the number of samples to take -// and the kernel function to use for sampling. -type InterpolationFunction int - -// InterpolationFunction constants -const ( - // Nearest-neighbor interpolation - NearestNeighbor InterpolationFunction = iota - // Bilinear interpolation - Bilinear - // Bicubic interpolation (with cubic hermite spline) - Bicubic - // Mitchell-Netravali interpolation - MitchellNetravali - // Lanczos interpolation (a=2) - Lanczos2 - // Lanczos interpolation (a=3) - Lanczos3 -) - -// kernal, returns an InterpolationFunctions taps and kernel. -func (i InterpolationFunction) kernel() (int, func(float64) float64) { - switch i { - case Bilinear: - return 2, linear - case Bicubic: - return 4, cubic - case MitchellNetravali: - return 4, mitchellnetravali - case Lanczos2: - return 4, lanczos2 - case Lanczos3: - return 6, lanczos3 - default: - // Default to NearestNeighbor. - return 2, nearest - } -} - -// values <1 will sharpen the image -var blur = 1.0 - -// Resize scales an image to new width and height using the interpolation function interp. -// A new image with the given dimensions will be returned. -// If one of the parameters width or height is set to 0, its size will be calculated so that -// the aspect ratio is that of the originating image. -// The resizing algorithm uses channels for parallel computation. -func Resize(width, height uint, img image.Image, interp InterpolationFunction) image.Image { - scaleX, scaleY := calcFactors(width, height, float64(img.Bounds().Dx()), float64(img.Bounds().Dy())) - if width == 0 { - width = uint(0.7 + float64(img.Bounds().Dx())/scaleX) - } - if height == 0 { - height = uint(0.7 + float64(img.Bounds().Dy())/scaleY) - } - - // Trivial case: return input image - if int(width) == img.Bounds().Dx() && int(height) == img.Bounds().Dy() { - return img - } - - if interp == NearestNeighbor { - return resizeNearest(width, height, scaleX, scaleY, img, interp) - } - - taps, kernel := interp.kernel() - cpus := runtime.GOMAXPROCS(0) - wg := sync.WaitGroup{} - - // Generic access to image.Image is slow in tight loops. - // The optimal access has to be determined from the concrete image type. - switch input := img.(type) { - case *image.RGBA: - // 8-bit precision - temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), taps, blur, scaleX, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA) - go func() { - defer wg.Done() - resizeRGBA(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), taps, blur, scaleY, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA) - go func() { - defer wg.Done() - resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - case *image.NRGBA: - // 8-bit precision - temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), taps, blur, scaleX, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA) - go func() { - defer wg.Done() - resizeNRGBA(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), taps, blur, scaleY, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA) - go func() { - defer wg.Done() - resizeRGBA(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - - case *image.YCbCr: - // 8-bit precision - // accessing the YCbCr arrays in a tight loop is slow. - // converting the image to ycc increases performance by 2x. - temp := newYCC(image.Rect(0, 0, input.Bounds().Dy(), int(width)), input.SubsampleRatio) - result := newYCC(image.Rect(0, 0, int(width), int(height)), image.YCbCrSubsampleRatio444) - - coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), taps, blur, scaleX, kernel) - in := imageYCbCrToYCC(input) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*ycc) - go func() { - defer wg.Done() - resizeYCbCr(in, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), taps, blur, scaleY, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*ycc) - go func() { - defer wg.Done() - resizeYCbCr(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result.YCbCr() - case *image.RGBA64: - // 16-bit precision - temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), taps, blur, scaleX, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA64) - go func() { - defer wg.Done() - resizeRGBA64(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), taps, blur, scaleY, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA64) - go func() { - defer wg.Done() - resizeRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - case *image.NRGBA64: - // 16-bit precision - temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), taps, blur, scaleX, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA64) - go func() { - defer wg.Done() - resizeNRGBA64(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), taps, blur, scaleY, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA64) - go func() { - defer wg.Done() - resizeRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - case *image.Gray: - // 8-bit precision - temp := image.NewGray(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewGray(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeights8(temp.Bounds().Dy(), taps, blur, scaleX, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.Gray) - go func() { - defer wg.Done() - resizeGray(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeights8(result.Bounds().Dy(), taps, blur, scaleY, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.Gray) - go func() { - defer wg.Done() - resizeGray(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - case *image.Gray16: - // 16-bit precision - temp := image.NewGray16(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewGray16(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), taps, blur, scaleX, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.Gray16) - go func() { - defer wg.Done() - resizeGray16(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), taps, blur, scaleY, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.Gray16) - go func() { - defer wg.Done() - resizeGray16(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - default: - // 16-bit precision - temp := image.NewRGBA64(image.Rect(0, 0, img.Bounds().Dy(), int(width))) - result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeights16(temp.Bounds().Dy(), taps, blur, scaleX, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA64) - go func() { - defer wg.Done() - resizeGeneric(img, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeights16(result.Bounds().Dy(), taps, blur, scaleY, kernel) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA64) - go func() { - defer wg.Done() - resizeRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - } -} - -func resizeNearest(width, height uint, scaleX, scaleY float64, img image.Image, interp InterpolationFunction) image.Image { - taps, _ := interp.kernel() - cpus := runtime.GOMAXPROCS(0) - wg := sync.WaitGroup{} - - switch input := img.(type) { - case *image.RGBA: - // 8-bit precision - temp := image.NewRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewRGBA(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA) - go func() { - defer wg.Done() - nearestRGBA(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA) - go func() { - defer wg.Done() - nearestRGBA(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - case *image.NRGBA: - // 8-bit precision - temp := image.NewNRGBA(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewNRGBA(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.NRGBA) - go func() { - defer wg.Done() - nearestNRGBA(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.NRGBA) - go func() { - defer wg.Done() - nearestNRGBA(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - case *image.YCbCr: - // 8-bit precision - // accessing the YCbCr arrays in a tight loop is slow. - // converting the image to ycc increases performance by 2x. - temp := newYCC(image.Rect(0, 0, input.Bounds().Dy(), int(width)), input.SubsampleRatio) - result := newYCC(image.Rect(0, 0, int(width), int(height)), image.YCbCrSubsampleRatio444) - - coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX) - in := imageYCbCrToYCC(input) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*ycc) - go func() { - defer wg.Done() - nearestYCbCr(in, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*ycc) - go func() { - defer wg.Done() - nearestYCbCr(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result.YCbCr() - case *image.RGBA64: - // 16-bit precision - temp := image.NewRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA64) - go func() { - defer wg.Done() - nearestRGBA64(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA64) - go func() { - defer wg.Done() - nearestRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - case *image.NRGBA64: - // 16-bit precision - temp := image.NewNRGBA64(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewNRGBA64(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.NRGBA64) - go func() { - defer wg.Done() - nearestNRGBA64(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.NRGBA64) - go func() { - defer wg.Done() - nearestNRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - case *image.Gray: - // 8-bit precision - temp := image.NewGray(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewGray(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.Gray) - go func() { - defer wg.Done() - nearestGray(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.Gray) - go func() { - defer wg.Done() - nearestGray(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - case *image.Gray16: - // 16-bit precision - temp := image.NewGray16(image.Rect(0, 0, input.Bounds().Dy(), int(width))) - result := image.NewGray16(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.Gray16) - go func() { - defer wg.Done() - nearestGray16(input, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.Gray16) - go func() { - defer wg.Done() - nearestGray16(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - default: - // 16-bit precision - temp := image.NewRGBA64(image.Rect(0, 0, img.Bounds().Dy(), int(width))) - result := image.NewRGBA64(image.Rect(0, 0, int(width), int(height))) - - // horizontal filter, results in transposed temporary image - coeffs, offset, filterLength := createWeightsNearest(temp.Bounds().Dy(), taps, blur, scaleX) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(temp, i, cpus).(*image.RGBA64) - go func() { - defer wg.Done() - nearestGeneric(img, slice, scaleX, coeffs, offset, filterLength) - }() - } - wg.Wait() - - // horizontal filter on transposed image, result is not transposed - coeffs, offset, filterLength = createWeightsNearest(result.Bounds().Dy(), taps, blur, scaleY) - wg.Add(cpus) - for i := 0; i < cpus; i++ { - slice := makeSlice(result, i, cpus).(*image.RGBA64) - go func() { - defer wg.Done() - nearestRGBA64(temp, slice, scaleY, coeffs, offset, filterLength) - }() - } - wg.Wait() - return result - } - -} - -// Calculates scaling factors using old and new image dimensions. -func calcFactors(width, height uint, oldWidth, oldHeight float64) (scaleX, scaleY float64) { - if width == 0 { - if height == 0 { - scaleX = 1.0 - scaleY = 1.0 - } else { - scaleY = oldHeight / float64(height) - scaleX = scaleY - } - } else { - scaleX = oldWidth / float64(width) - if height == 0 { - scaleY = scaleX - } else { - scaleY = oldHeight / float64(height) - } - } - return -} - -type imageWithSubImage interface { - image.Image - SubImage(image.Rectangle) image.Image -} - -func makeSlice(img imageWithSubImage, i, n int) image.Image { - return img.SubImage(image.Rect(img.Bounds().Min.X, img.Bounds().Min.Y+i*img.Bounds().Dy()/n, img.Bounds().Max.X, img.Bounds().Min.Y+(i+1)*img.Bounds().Dy()/n)) -} diff --git a/vendor/github.com/nfnt/resize/thumbnail.go b/vendor/github.com/nfnt/resize/thumbnail.go deleted file mode 100644 index 9efc246b..00000000 --- a/vendor/github.com/nfnt/resize/thumbnail.go +++ /dev/null @@ -1,55 +0,0 @@ -/* -Copyright (c) 2012, Jan Schlicht - -Permission to use, copy, modify, and/or distribute this software for any purpose -with or without fee is hereby granted, provided that the above copyright notice -and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -*/ - -package resize - -import ( - "image" -) - -// Thumbnail will downscale provided image to max width and height preserving -// original aspect ratio and using the interpolation function interp. -// It will return original image, without processing it, if original sizes -// are already smaller than provided constraints. -func Thumbnail(maxWidth, maxHeight uint, img image.Image, interp InterpolationFunction) image.Image { - origBounds := img.Bounds() - origWidth := uint(origBounds.Dx()) - origHeight := uint(origBounds.Dy()) - newWidth, newHeight := origWidth, origHeight - - // Return original image if it have same or smaller size as constraints - if maxWidth >= origWidth && maxHeight >= origHeight { - return img - } - - // Preserve aspect ratio - if origWidth > maxWidth { - newHeight = uint(origHeight * maxWidth / origWidth) - if newHeight < 1 { - newHeight = 1 - } - newWidth = maxWidth - } - - if newHeight > maxHeight { - newWidth = uint(newWidth * maxHeight / newHeight) - if newWidth < 1 { - newWidth = 1 - } - newHeight = maxHeight - } - return Resize(newWidth, newHeight, img, interp) -} diff --git a/vendor/github.com/nfnt/resize/ycc.go b/vendor/github.com/nfnt/resize/ycc.go deleted file mode 100644 index 10415995..00000000 --- a/vendor/github.com/nfnt/resize/ycc.go +++ /dev/null @@ -1,227 +0,0 @@ -/* -Copyright (c) 2014, Charlie Vieth - -Permission to use, copy, modify, and/or distribute this software for any purpose -with or without fee is hereby granted, provided that the above copyright notice -and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH -REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND -FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, -INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -THIS SOFTWARE. -*/ - -package resize - -import ( - "image" - "image/color" -) - -// ycc is an in memory YCbCr image. The Y, Cb and Cr samples are held in a -// single slice to increase resizing performance. -type ycc struct { - // Pix holds the image's pixels, in Y, Cb, Cr order. The pixel at - // (x, y) starts at Pix[(y-Rect.Min.Y)*Stride + (x-Rect.Min.X)*3]. - Pix []uint8 - // Stride is the Pix stride (in bytes) between vertically adjacent pixels. - Stride int - // Rect is the image's bounds. - Rect image.Rectangle - // SubsampleRatio is the subsample ratio of the original YCbCr image. - SubsampleRatio image.YCbCrSubsampleRatio -} - -// PixOffset returns the index of the first element of Pix that corresponds to -// the pixel at (x, y). -func (p *ycc) PixOffset(x, y int) int { - return (y-p.Rect.Min.Y)*p.Stride + (x-p.Rect.Min.X)*3 -} - -func (p *ycc) Bounds() image.Rectangle { - return p.Rect -} - -func (p *ycc) ColorModel() color.Model { - return color.YCbCrModel -} - -func (p *ycc) At(x, y int) color.Color { - if !(image.Point{x, y}.In(p.Rect)) { - return color.YCbCr{} - } - i := p.PixOffset(x, y) - return color.YCbCr{ - p.Pix[i+0], - p.Pix[i+1], - p.Pix[i+2], - } -} - -func (p *ycc) Opaque() bool { - return true -} - -// SubImage returns an image representing the portion of the image p visible -// through r. The returned value shares pixels with the original image. -func (p *ycc) SubImage(r image.Rectangle) image.Image { - r = r.Intersect(p.Rect) - if r.Empty() { - return &ycc{SubsampleRatio: p.SubsampleRatio} - } - i := p.PixOffset(r.Min.X, r.Min.Y) - return &ycc{ - Pix: p.Pix[i:], - Stride: p.Stride, - Rect: r, - SubsampleRatio: p.SubsampleRatio, - } -} - -// newYCC returns a new ycc with the given bounds and subsample ratio. -func newYCC(r image.Rectangle, s image.YCbCrSubsampleRatio) *ycc { - w, h := r.Dx(), r.Dy() - buf := make([]uint8, 3*w*h) - return &ycc{Pix: buf, Stride: 3 * w, Rect: r, SubsampleRatio: s} -} - -// YCbCr converts ycc to a YCbCr image with the same subsample ratio -// as the YCbCr image that ycc was generated from. -func (p *ycc) YCbCr() *image.YCbCr { - ycbcr := image.NewYCbCr(p.Rect, p.SubsampleRatio) - var off int - - switch ycbcr.SubsampleRatio { - case image.YCbCrSubsampleRatio422: - for y := ycbcr.Rect.Min.Y; y < ycbcr.Rect.Max.Y; y++ { - yy := (y - ycbcr.Rect.Min.Y) * ycbcr.YStride - cy := (y - ycbcr.Rect.Min.Y) * ycbcr.CStride - for x := ycbcr.Rect.Min.X; x < ycbcr.Rect.Max.X; x++ { - xx := (x - ycbcr.Rect.Min.X) - yi := yy + xx - ci := cy + xx/2 - ycbcr.Y[yi] = p.Pix[off+0] - ycbcr.Cb[ci] = p.Pix[off+1] - ycbcr.Cr[ci] = p.Pix[off+2] - off += 3 - } - } - case image.YCbCrSubsampleRatio420: - for y := ycbcr.Rect.Min.Y; y < ycbcr.Rect.Max.Y; y++ { - yy := (y - ycbcr.Rect.Min.Y) * ycbcr.YStride - cy := (y/2 - ycbcr.Rect.Min.Y/2) * ycbcr.CStride - for x := ycbcr.Rect.Min.X; x < ycbcr.Rect.Max.X; x++ { - xx := (x - ycbcr.Rect.Min.X) - yi := yy + xx - ci := cy + xx/2 - ycbcr.Y[yi] = p.Pix[off+0] - ycbcr.Cb[ci] = p.Pix[off+1] - ycbcr.Cr[ci] = p.Pix[off+2] - off += 3 - } - } - case image.YCbCrSubsampleRatio440: - for y := ycbcr.Rect.Min.Y; y < ycbcr.Rect.Max.Y; y++ { - yy := (y - ycbcr.Rect.Min.Y) * ycbcr.YStride - cy := (y/2 - ycbcr.Rect.Min.Y/2) * ycbcr.CStride - for x := ycbcr.Rect.Min.X; x < ycbcr.Rect.Max.X; x++ { - xx := (x - ycbcr.Rect.Min.X) - yi := yy + xx - ci := cy + xx - ycbcr.Y[yi] = p.Pix[off+0] - ycbcr.Cb[ci] = p.Pix[off+1] - ycbcr.Cr[ci] = p.Pix[off+2] - off += 3 - } - } - default: - // Default to 4:4:4 subsampling. - for y := ycbcr.Rect.Min.Y; y < ycbcr.Rect.Max.Y; y++ { - yy := (y - ycbcr.Rect.Min.Y) * ycbcr.YStride - cy := (y - ycbcr.Rect.Min.Y) * ycbcr.CStride - for x := ycbcr.Rect.Min.X; x < ycbcr.Rect.Max.X; x++ { - xx := (x - ycbcr.Rect.Min.X) - yi := yy + xx - ci := cy + xx - ycbcr.Y[yi] = p.Pix[off+0] - ycbcr.Cb[ci] = p.Pix[off+1] - ycbcr.Cr[ci] = p.Pix[off+2] - off += 3 - } - } - } - return ycbcr -} - -// imageYCbCrToYCC converts a YCbCr image to a ycc image for resizing. -func imageYCbCrToYCC(in *image.YCbCr) *ycc { - w, h := in.Rect.Dx(), in.Rect.Dy() - r := image.Rect(0, 0, w, h) - buf := make([]uint8, 3*w*h) - p := ycc{Pix: buf, Stride: 3 * w, Rect: r, SubsampleRatio: in.SubsampleRatio} - var off int - - switch in.SubsampleRatio { - case image.YCbCrSubsampleRatio422: - for y := in.Rect.Min.Y; y < in.Rect.Max.Y; y++ { - yy := (y - in.Rect.Min.Y) * in.YStride - cy := (y - in.Rect.Min.Y) * in.CStride - for x := in.Rect.Min.X; x < in.Rect.Max.X; x++ { - xx := (x - in.Rect.Min.X) - yi := yy + xx - ci := cy + xx/2 - p.Pix[off+0] = in.Y[yi] - p.Pix[off+1] = in.Cb[ci] - p.Pix[off+2] = in.Cr[ci] - off += 3 - } - } - case image.YCbCrSubsampleRatio420: - for y := in.Rect.Min.Y; y < in.Rect.Max.Y; y++ { - yy := (y - in.Rect.Min.Y) * in.YStride - cy := (y/2 - in.Rect.Min.Y/2) * in.CStride - for x := in.Rect.Min.X; x < in.Rect.Max.X; x++ { - xx := (x - in.Rect.Min.X) - yi := yy + xx - ci := cy + xx/2 - p.Pix[off+0] = in.Y[yi] - p.Pix[off+1] = in.Cb[ci] - p.Pix[off+2] = in.Cr[ci] - off += 3 - } - } - case image.YCbCrSubsampleRatio440: - for y := in.Rect.Min.Y; y < in.Rect.Max.Y; y++ { - yy := (y - in.Rect.Min.Y) * in.YStride - cy := (y/2 - in.Rect.Min.Y/2) * in.CStride - for x := in.Rect.Min.X; x < in.Rect.Max.X; x++ { - xx := (x - in.Rect.Min.X) - yi := yy + xx - ci := cy + xx - p.Pix[off+0] = in.Y[yi] - p.Pix[off+1] = in.Cb[ci] - p.Pix[off+2] = in.Cr[ci] - off += 3 - } - } - default: - // Default to 4:4:4 subsampling. - for y := in.Rect.Min.Y; y < in.Rect.Max.Y; y++ { - yy := (y - in.Rect.Min.Y) * in.YStride - cy := (y - in.Rect.Min.Y) * in.CStride - for x := in.Rect.Min.X; x < in.Rect.Max.X; x++ { - xx := (x - in.Rect.Min.X) - yi := yy + xx - ci := cy + xx - p.Pix[off+0] = in.Y[yi] - p.Pix[off+1] = in.Cb[ci] - p.Pix[off+2] = in.Cr[ci] - off += 3 - } - } - } - return &p -} diff --git a/vendor/vendor.json b/vendor/vendor.json index 19285f3c..b7831a26 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -58,6 +58,12 @@ "revision": "349dd0209470eabd9514242c688c403c0926d266", "revisionTime": "2016-12-24T14:14:13Z" }, + { + "checksumSHA1": "FOqUoijHGUZuS1T4pTcdlrNNlvc=", + "path": "github.com/bamiaux/rez", + "revision": "29f4463c688b986c11f166b12734f69b58b5555f", + "revisionTime": "2017-07-31T18:41:18Z" + }, { "checksumSHA1": "usT4LCSQItkFvFOQT7cBlkCuGaE=", "path": "github.com/beevik/etree", @@ -112,12 +118,6 @@ "revision": "95345c4e1c0ebc9d16a3284177f09360f4d20fab", "revisionTime": "2017-01-24T11:57:57Z" }, - { - "checksumSHA1": "r5eQHkttko6kxroDEENXbmXKrSs=", - "path": "github.com/nfnt/resize", - "revision": "891127d8d1b52734debe1b3c3d7e747502b6c366", - "revisionTime": "2016-07-24T20:39:20Z" - }, { "checksumSHA1": "LuFv4/jlrmFNnDb/5SCSEPAM9vU=", "path": "github.com/pmezard/go-difflib/difflib",