From 1260d4ff0f1af41aac95800cde1ab72ed2c413d4 Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Thu, 17 Jun 2021 23:22:04 -0400 Subject: [PATCH 01/11] wip vector arithmetic --- dataframe/dataframe.go | 263 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 235 insertions(+), 28 deletions(-) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index cf1ae41..11d1c22 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -581,6 +581,34 @@ func (df DataFrame) Capply(f func(series.Series) series.Series) DataFrame { return New(columns...) } +func detectType(types []series.Type) series.Type { + var hasStrings, hasFloats, hasInts, hasBools bool + for _, t := range types { + switch t { + case series.String: + hasStrings = true + case series.Float: + hasFloats = true + case series.Int: + hasInts = true + case series.Bool: + hasBools = true + } + } + switch { + case hasStrings: + return series.String + case hasBools: + return series.Bool + case hasFloats: + return series.Float + case hasInts: + return series.Int + default: + panic("type not supported") + } +} + // Rapply applies the given function to the rows of a DataFrame. Prior to applying // the function the elements of each row are cast to a Series of a specific // type. In order of priority: String -> Float -> Int -> Bool. This casting also @@ -590,34 +618,6 @@ func (df DataFrame) Rapply(f func(series.Series) series.Series) DataFrame { return df } - detectType := func(types []series.Type) series.Type { - var hasStrings, hasFloats, hasInts, hasBools bool - for _, t := range types { - switch t { - case series.String: - hasStrings = true - case series.Float: - hasFloats = true - case series.Int: - hasInts = true - case series.Bool: - hasBools = true - } - } - switch { - case hasStrings: - return series.String - case hasBools: - return series.Bool - case hasFloats: - return series.Float - case hasInts: - return series.Int - default: - panic("type not supported") - } - } - // Detect row type prior to function application types := df.Types() rowType := detectType(types) @@ -1971,3 +1971,210 @@ func (df DataFrame) Describe() DataFrame { ddf := New(ss...) return ddf } + +// Binary vector operators + +// Applies `op` using elements from `lcolnm` and `rcolnm` as left and right operands, +// and stores the output in a new column `newcolnm`. +func (df DataFrame) Math(resultcolnm string, op interface{}, operandcols ...string) DataFrame { + if df.Err != nil { + return df + } + if len(operandcols) == 0 { + df.Err = fmt.Errorf("must supply at least one operand column name") + return df + } + cols := make([]series.Series, len(operandcols)) + types := make([]series.Type, len(operandcols)) + for i, colnm := range operandcols { + cols[i] = df.Col(colnm) + types[i] = cols[i].Type() + } + nrows := cols[0].Len() + ncols := len(cols) + + // confirm colTypes are all numeric + resultType := detectType(types) // float if there are any floats, int otherwise + if resultType == series.String || resultType == series.Bool { + df.Err = fmt.Errorf("cannot perform arithmetic with column of type %s", resultType) + return df + } + + switch resultType { + case series.Int: + results := make([]int, nrows) + for ridx := 0; ridx < nrows; ridx++ { + operands := make([]int, ncols) + for cidx, column := range cols { + operand, err := column.Elem(ridx).Int() + if err != nil { + df.Err = fmt.Errorf("unable to convert element %d of column %s to int: %w", ridx, operandcols[cidx], err) + return df + } + operands[ridx] = operand + } + results[ridx] = intOp(op, operands) + } + df = df.Mutate( + series.New(results, resultType, resultcolnm), + ) + case series.Float: + results := make([]float64, nrows) + for ridx := 0; ridx < nrows; ridx++ { + operands := make([]float64, ncols) + for _, column := range cols { + operand := column.Elem(ridx).Float() + operands[ridx] = operand + } + results[ridx] = floatOp(op, operands) + } + df = df.Mutate( + series.New(results, resultType, resultcolnm), + ) + default: + df.Err = fmt.Errorf("series type %s is not a type on which we can perform arithmetic", resultType) + } + + return df +} + +func floatOp(op interface{}, operands []float64) float64 { + var acc float64 // accumulator for n-ary operators + if len(operands) == 0 { + return 0 + } + + switch op := op.(type) { // takes care of support for things in `math` + case unaryFloatFunc: + return op(operands[0]) + case binaryFloatFunc: + return op(operands[0], operands[1]) + case trinaryFloatFunc: + return op(operands[0], operands[1], operands[2]) + + // for the most basic operations, support variadic operands + case string: + switch op { + case "+": + // add all operands + for _, operand := range operands { + acc += operand + } + case "-": + // with only one operand, return its negative. + // with more, subtract the rest from the first. + if len(operands) == 1 { + return -operands[0] + } + acc = operands[0] + for i := 1; i < len(operands); i++ { + acc = acc - operands[i] + } + case "*": + // the product of all operands + acc = 1 + for _, operand := range operands { + acc = acc * operand + } + case "/": + // With only one operand, reciprocal + // With more operands, divides by each denominator + // Divide by zero returns +Inf (as per usual with float64) + if len(operands) == 1 { + return 1 / operands[0] + } + acc = operands[0] + for i := 1; i < len(operands); i++ { + acc = acc / operands[i] + } + default: + panic(fmt.Sprintf("Unknown arithmetic operator: %s", op)) + } + } + + return acc +} + +// placeholders for infinity +// TODO prefer to handle errors with integer ops +const MaxUint = ^uint(0) +const MaxInt = int(MaxUint >> 1) + +func intOp(op interface{}, operands []int) int { + var acc int // accumulator for n-ary operators + if len(operands) == 0 { + return 0 + } + + switch op := op.(type) { // users can specify functions for `op`, or a string + case unaryIntFunc: + return op(operands[0]) + case binaryIntFunc: + return op(operands[0], operands[1]) + case trinaryIntFunc: + return op(operands[0], operands[1], operands[2]) + case string: + switch op { + case "+": + // add all operands + for _, operand := range operands { + acc += operand + } + case "-": + // with only one operand, return its negative. + // with more, subtract the rest from the first. + if len(operands) == 1 { + return -operands[0] + } + acc = operands[0] + for i := 1; i < len(operands); i++ { + acc = acc - operands[i] + } + case "*": + // the product of all operands + acc = 1 + for _, operand := range operands { + acc = acc * operand + } + case "/": + // With only one operand, int reciprocal (0 or 1 or "infinity") + // With more, divides by each denominator + // Divide by zero returns `MaxInt` (poor-man's infinity) + if len(operands) == 1 { // reciprocal case + if operands[0] == 0 { // reciprocal of zero + return MaxInt // poor man's infinity + } + return 1 / operands[0] // 0 or 1 for int division + } + // normal division case + acc = operands[0] + for i := 1; i < len(operands); i++ { + if operands[i] == 0 { + return MaxInt // poor man's infinity + } + acc = acc / operands[i] + } + case "%": + // remainder after division of first two operands only + if len(operands) < 2 { // one argument, just return it + return operands[0] + } + if operands[1] == 0 { // integer division by zero - just return a big number + return MaxInt // poor man's infinity + } + return operands[0] % operands[1] + default: + panic(fmt.Sprintf("Unknown arithmetic operator: %s", op)) + } + } + + return acc +} + +type unaryFloatFunc func(float64) float64 +type binaryFloatFunc func(float64, float64) float64 +type trinaryFloatFunc func(float64, float64, float64) float64 + +type unaryIntFunc func(int) int +type binaryIntFunc func(int, int) int +type trinaryIntFunc func(int, int, int) int From c9d53dc2954adce9eb3a0148a2754ce0ba08d304 Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Tue, 22 Jun 2021 11:54:29 -0400 Subject: [PATCH 02/11] fixed Math --- dataframe/dataframe.go | 200 ++++++++++++++++++++--------------------- 1 file changed, 96 insertions(+), 104 deletions(-) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 11d1c22..1405d1e 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -98,7 +98,7 @@ func (df DataFrame) Copy() DataFrame { // String implements the Stringer interface for DataFrame func (df DataFrame) String() (str string) { - return df.print(true, true, true, true, 10, 70, "DataFrame") + return df.print(true, false, true, true, 10, 70, "DataFrame") } func (df DataFrame) print( @@ -2011,7 +2011,7 @@ func (df DataFrame) Math(resultcolnm string, op interface{}, operandcols ...stri df.Err = fmt.Errorf("unable to convert element %d of column %s to int: %w", ridx, operandcols[cidx], err) return df } - operands[ridx] = operand + operands[cidx] = operand } results[ridx] = intOp(op, operands) } @@ -2022,9 +2022,9 @@ func (df DataFrame) Math(resultcolnm string, op interface{}, operandcols ...stri results := make([]float64, nrows) for ridx := 0; ridx < nrows; ridx++ { operands := make([]float64, ncols) - for _, column := range cols { + for cidx, column := range cols { operand := column.Elem(ridx).Float() - operands[ridx] = operand + operands[cidx] = operand } results[ridx] = floatOp(op, operands) } @@ -2039,57 +2039,57 @@ func (df DataFrame) Math(resultcolnm string, op interface{}, operandcols ...stri } func floatOp(op interface{}, operands []float64) float64 { - var acc float64 // accumulator for n-ary operators + fmt.Println("In floatOp") // DEBUG + var acc float64 // accumulator for n-ary operators if len(operands) == 0 { return 0 } switch op := op.(type) { // takes care of support for things in `math` - case unaryFloatFunc: + case func(float64) float64: return op(operands[0]) - case binaryFloatFunc: + case func(float64, float64) float64: return op(operands[0], operands[1]) - case trinaryFloatFunc: + case func(float64, float64, float64) float64: return op(operands[0], operands[1], operands[2]) + } // for the most basic operations, support variadic operands - case string: - switch op { - case "+": - // add all operands - for _, operand := range operands { - acc += operand - } - case "-": - // with only one operand, return its negative. - // with more, subtract the rest from the first. - if len(operands) == 1 { - return -operands[0] - } - acc = operands[0] - for i := 1; i < len(operands); i++ { - acc = acc - operands[i] - } - case "*": - // the product of all operands - acc = 1 - for _, operand := range operands { - acc = acc * operand - } - case "/": - // With only one operand, reciprocal - // With more operands, divides by each denominator - // Divide by zero returns +Inf (as per usual with float64) - if len(operands) == 1 { - return 1 / operands[0] - } - acc = operands[0] - for i := 1; i < len(operands); i++ { - acc = acc / operands[i] - } - default: - panic(fmt.Sprintf("Unknown arithmetic operator: %s", op)) + switch op { + case "+": + // add all operands + for _, operand := range operands { + acc += operand + } + case "-": + // with only one operand, return its negative. + // with more, subtract the rest from the first. + if len(operands) == 1 { + return -operands[0] + } + acc = operands[0] + for i := 1; i < len(operands); i++ { + acc = acc - operands[i] + } + case "*": + // the product of all operands + acc = 1 + for _, operand := range operands { + acc = acc * operand + } + case "/": + // With only one operand, reciprocal + // With more operands, divides by each denominator + // Divide by zero returns +Inf (as per usual with float64) + if len(operands) == 1 { + return 1 / operands[0] + } + acc = operands[0] + for i := 1; i < len(operands); i++ { + acc = acc / operands[i] } + default: + panic(fmt.Sprintf("Unknown arithmetic operator: %s", op)) } return acc @@ -2107,74 +2107,66 @@ func intOp(op interface{}, operands []int) int { } switch op := op.(type) { // users can specify functions for `op`, or a string - case unaryIntFunc: + case func(int) int: return op(operands[0]) - case binaryIntFunc: + case func(int, int) int: return op(operands[0], operands[1]) - case trinaryIntFunc: + case func(int, int, int) int: return op(operands[0], operands[1], operands[2]) - case string: - switch op { - case "+": - // add all operands - for _, operand := range operands { - acc += operand - } - case "-": - // with only one operand, return its negative. - // with more, subtract the rest from the first. - if len(operands) == 1 { - return -operands[0] - } - acc = operands[0] - for i := 1; i < len(operands); i++ { - acc = acc - operands[i] - } - case "*": - // the product of all operands - acc = 1 - for _, operand := range operands { - acc = acc * operand - } - case "/": - // With only one operand, int reciprocal (0 or 1 or "infinity") - // With more, divides by each denominator - // Divide by zero returns `MaxInt` (poor-man's infinity) - if len(operands) == 1 { // reciprocal case - if operands[0] == 0 { // reciprocal of zero - return MaxInt // poor man's infinity - } - return 1 / operands[0] // 0 or 1 for int division - } - // normal division case - acc = operands[0] - for i := 1; i < len(operands); i++ { - if operands[i] == 0 { - return MaxInt // poor man's infinity - } - acc = acc / operands[i] - } - case "%": - // remainder after division of first two operands only - if len(operands) < 2 { // one argument, just return it - return operands[0] + } + + switch op { + case "+": + // add all operands + for _, operand := range operands { + acc += operand + } + case "-": + // with only one operand, return its negative. + // with more, subtract the rest from the first. + if len(operands) == 1 { + return -operands[0] + } + acc = operands[0] + for i := 1; i < len(operands); i++ { + acc = acc - operands[i] + } + case "*": + // the product of all operands + acc = 1 + for _, operand := range operands { + acc = acc * operand + } + case "/": + // With only one operand, int reciprocal (0 or 1 or "infinity") + // With more, divides by each denominator + // Divide by zero returns `MaxInt` (poor-man's infinity) + if len(operands) == 1 { // reciprocal case + if operands[0] == 0 { // reciprocal of zero + return MaxInt // poor man's infinity } - if operands[1] == 0 { // integer division by zero - just return a big number + return 1 / operands[0] // 0 or 1 for int division + } + // normal division case + acc = operands[0] + for i := 1; i < len(operands); i++ { + if operands[i] == 0 { return MaxInt // poor man's infinity } - return operands[0] % operands[1] - default: - panic(fmt.Sprintf("Unknown arithmetic operator: %s", op)) + acc = acc / operands[i] + } + case "%": + // remainder after division of first two operands only + if len(operands) < 2 { // one argument, just return it + return operands[0] + } + if operands[1] == 0 { // integer division by zero - just return a big number + return MaxInt // poor man's infinity } + return operands[0] % operands[1] + default: + panic(fmt.Sprintf("Unknown arithmetic operator: %s", op)) } return acc } - -type unaryFloatFunc func(float64) float64 -type binaryFloatFunc func(float64, float64) float64 -type trinaryFloatFunc func(float64, float64, float64) float64 - -type unaryIntFunc func(int) int -type binaryIntFunc func(int, int) int -type trinaryIntFunc func(int, int, int) int From 2ce8e1bea4a35c2035f564becaf154e6e49903b8 Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Tue, 22 Jun 2021 15:09:51 -0400 Subject: [PATCH 03/11] use go modules --- dataframe/math_test.go | 21 ++++++++++++++ go.mod | 7 +++++ go.sum | 66 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 dataframe/math_test.go create mode 100644 go.mod create mode 100644 go.sum diff --git a/dataframe/math_test.go b/dataframe/math_test.go new file mode 100644 index 0000000..4c158da --- /dev/null +++ b/dataframe/math_test.go @@ -0,0 +1,21 @@ +package dataframe + +import ( + "fmt" + "testing" + + "github.com/go-gota/gota/series" +) + +var df DataFrame = New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Ints"), +) + +func TestFloatOps(t *testing.T) { + fmt.Println(df) + t.Fail() +} + +// Test cast to float diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..645bdfc --- /dev/null +++ b/go.mod @@ -0,0 +1,7 @@ +module github.com/go-gota/gota + +go 1.16 + +require gonum.org/v1/gonum v0.9.1 + +replace github.com/go-gota/gota => /Users/danielpcox/projects/decipher/gota diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..fe0441e --- /dev/null +++ b/go.sum @@ -0,0 +1,66 @@ +dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= +github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= +github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= +github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g= +github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks= +github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= +github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY= +github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= +github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= +github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= +github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= +github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= +github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3 h1:n9HxLrNxWWtEb1cA950nuEEj3QnKbtsCJ6KjcgisNUs= +golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE= +golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= +gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= +gonum.org/v1/gonum v0.9.1 h1:HCWmqqNoELL0RAQeKBXWtkp04mGk8koafcB4He6+uhc= +gonum.org/v1/gonum v0.9.1/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= +gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc= +gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= +gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= +gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= From b8854745808861488a217c631e48510271115dc0 Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Tue, 22 Jun 2021 18:17:59 -0400 Subject: [PATCH 04/11] added tests for Math method --- dataframe/dataframe.go | 5 +- dataframe/math_test.go | 278 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 271 insertions(+), 12 deletions(-) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 1405d1e..ddbf81a 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -1972,7 +1972,7 @@ func (df DataFrame) Describe() DataFrame { return ddf } -// Binary vector operators +// Vector operators on arbitrary numeric columns // Applies `op` using elements from `lcolnm` and `rcolnm` as left and right operands, // and stores the output in a new column `newcolnm`. @@ -2039,8 +2039,7 @@ func (df DataFrame) Math(resultcolnm string, op interface{}, operandcols ...stri } func floatOp(op interface{}, operands []float64) float64 { - fmt.Println("In floatOp") // DEBUG - var acc float64 // accumulator for n-ary operators + var acc float64 // accumulator for n-ary operators if len(operands) == 0 { return 0 } diff --git a/dataframe/math_test.go b/dataframe/math_test.go index 4c158da..a6a2fa3 100644 --- a/dataframe/math_test.go +++ b/dataframe/math_test.go @@ -2,20 +2,280 @@ package dataframe import ( "fmt" + "math" + "reflect" "testing" "github.com/go-gota/gota/series" ) -var df DataFrame = New( - series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), - series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), - series.New([]int{1, 3, 5, 7, 11}, series.Int, "Ints"), -) +func TestMath(t *testing.T) { + /* Input is a 5x4 DataFrame + + Strings Floats Primes Naturals + 0: e 2.718000 1 1 + 1: Pi 3.142000 3 2 + 2: Phi 1.618000 5 3 + 3: Sqrt2 1.414000 7 4 + 4: Ln2 0.693000 11 5 + + */ + input := New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Primes"), + series.New([]int{1, 2, 3, 4, 5}, series.Int, "Naturals"), + ) + + table := testTable{ + // Sums + { + fut: func(df DataFrame) DataFrame { + df = df.Math("Sum", "+", "Floats", "Primes") + return df + }, + selection: []string{"Sum"}, + expected: New( + series.New([]float64{3.718, 6.142, 6.618, 8.414, 11.693}, series.Float, "Sum"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("IntSum", "+", "Primes", "Naturals") + return df + }, + selection: []string{"IntSum"}, + expected: New( + series.New([]int{2, 5, 8, 11, 16}, series.Int, "IntSum"), + ), + }, + + // Differences + { + fut: func(df DataFrame) DataFrame { + df = df.Math("Difference", "-", "Floats", "Primes") + return df + }, + selection: []string{"Difference"}, + expected: New( + series.New([]float64{1.718000, 0.142000, -3.382000, -5.586000, -10.307000}, series.Float, "Difference"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("IntDifference", "-", "Primes", "Naturals") + return df + }, + selection: []string{"IntDifference"}, + expected: New( + series.New([]int{0, 1, 2, 3, 6}, series.Int, "IntDifference"), + ), + }, + + // Products + { + fut: func(df DataFrame) DataFrame { + df = df.Math("Product", "*", "Floats", "Primes") + return df + }, + selection: []string{"Product"}, + expected: New( + series.New([]float64{2.718000, 9.426000, 8.090000, 9.898000, 7.623000}, series.Float, "Product"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("IntProduct", "*", "Primes", "Naturals") + return df + }, + selection: []string{"IntProduct"}, + expected: New( + series.New([]int{1, 6, 15, 28, 55}, series.Int, "IntProduct"), + ), + }, + + // Quotients + { + fut: func(df DataFrame) DataFrame { + df = df.Math("Quotient", "/", "Floats", "Primes") + return df + }, + selection: []string{"Quotient"}, + expected: New( + series.New([]float64{2.718000, 1.047333, 0.323600, 0.202000, 0.063000}, series.Float, "Quotient"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("IntQuotient", "/", "Primes", "Naturals") + return df + }, + selection: []string{"IntQuotient"}, + expected: New( + series.New([]int{1, 1, 1, 1, 2}, series.Int, "IntQuotient"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("Modulo", "%", "Primes", "Naturals") + return df + }, + selection: []string{"Modulo"}, + expected: New( + series.New([]int{0, 1, 2, 3, 1}, series.Int, "Modulo"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("ModuloSelf", "%", "Primes", "Primes") + return df + }, + selection: []string{"ModuloSelf"}, + expected: New( + series.New([]int{0, 0, 0, 0, 0}, series.Int, "ModuloSelf"), + ), + }, + + // >2 operands + { + fut: func(df DataFrame) DataFrame { + df = df.Math("MultiSum", "+", "Floats", "Floats", "Primes", "Primes") + return df + }, + selection: []string{"MultiSum"}, + expected: New( + series.New([]float64{7.436000, 12.284000, 13.236000, 16.828000, 23.386000}, series.Float, "MultiSum"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("MultiDifference", "-", "Floats", "Floats", "Primes", "Primes") + return df + }, + selection: []string{"MultiDifference"}, + expected: New( + series.New([]float64{-2.000000, -6.000000, -10.000000, -14.000000, -22.000000}, series.Float, "MultiDifference"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("MultiProduct", "*", "Floats", "Floats", "Primes", "Primes") + return df + }, + selection: []string{"MultiProduct"}, + expected: New( + series.New([]float64{7.387524, 88.849476, 65.448100, 97.970404, 58.110129}, series.Float, "MultiProduct"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("MultiQuotient", "/", "Floats", "Floats", "Primes", "Primes") + return df + }, + selection: []string{"MultiQuotient"}, + expected: New( + series.New([]float64{1.000000, 0.111111, 0.040000, 0.020408, 0.008264}, series.Float, "MultiQuotient"), + ), + }, + + // Arbitrary float functions + { + fut: func(df DataFrame) DataFrame { + df = df.Math("UnaryFloatFunc", math.Cos, "Floats") + return df + }, + selection: []string{"UnaryFloatFunc"}, + expected: New( + series.New([]float64{-0.911618, -1.000000, -0.047186, 0.156155, 0.769333}, series.Float, "UnaryFloatFunc"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("BinaryFloatFunc", math.Hypot, "Floats", "Floats") + return df + }, + selection: []string{"BinaryFloatFunc"}, + expected: New( + series.New([]float64{3.843832, 4.443459, 2.288198, 1.999698, 0.980050}, series.Float, "BinaryFloatFunc"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("TrinaryFloatFunc", math.FMA, "Floats", "Floats", "Floats") + return df + }, + selection: []string{"TrinaryFloatFunc"}, + expected: New( + series.New([]float64{10.105524, 13.014164, 4.235924, 3.413396, 1.173249}, series.Float, "TrinaryFloatFunc"), + ), + }, + + // Arbitrary int functions + { + fut: func(df DataFrame) DataFrame { + df = df.Math("UnaryIntFunc", func(i int) int { return i*2 + 1 }, "Primes") + return df + }, + selection: []string{"UnaryIntFunc"}, + expected: New( + series.New([]int{3, 7, 11, 15, 23}, series.Int, "UnaryIntFunc"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math("BinaryIntFunc", func(x, y int) int { return x * y }, "Naturals", "Primes") + return df + }, + selection: []string{"BinaryIntFunc"}, + expected: New( + series.New([]int{1, 6, 15, 28, 55}, series.Int, "BinaryIntFunc"), + ), + }, + { + fut: func(df DataFrame) DataFrame { + df = df.Math( + "TrinaryIntFunc", + func(x, y, z int) int { return x * y * z }, + "Naturals", "Naturals", "Primes") + return df + }, + selection: []string{"TrinaryIntFunc"}, + expected: New( + series.New([]int{1, 12, 45, 112, 275}, series.Int, "TrinaryIntFunc"), + ), + }, + } -func TestFloatOps(t *testing.T) { - fmt.Println(df) - t.Fail() + runTestTable(table, input, t) + + fmt.Println(input) // DEBUG does it have all columns? + +} + +// Test helpers + +type testTable []struct { + fut func(DataFrame) DataFrame + selection interface{} + expected DataFrame } -// Test cast to float +func runTestTable(table testTable, input DataFrame, t *testing.T) { + + for tidx, test := range table { + observed := test.fut(input).Select(test.selection) + + // Check that the types are the same between both DataFrames + if !reflect.DeepEqual(test.expected.Types(), observed.Types()) { + t.Errorf("Test: %d\nDifferent types:\nA:%v\nB:%v", tidx, test.expected.Types(), observed.Types()) + } + // Check that the colnames are the same between both DataFrames + if !reflect.DeepEqual(test.expected.Names(), observed.Names()) { + t.Errorf("Test: %d\nDifferent colnames:\nA:%v\nB:%v", tidx, test.expected.Names(), observed.Names()) + } + // Check that the values are the same between both DataFrames + if !reflect.DeepEqual(test.expected.Records(), observed.Records()) { + t.Fatalf("Test: %d\nDifferent values:\nExpected:%v\nObserved:%v", tidx, test.expected.Records(), observed.Records()) + } + } +} From 03d5231a52dc4ac2323e9d587ed05008a66651a2 Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Wed, 23 Jun 2021 14:36:12 -0400 Subject: [PATCH 05/11] error tests and coerce ints with float func op --- dataframe/dataframe.go | 52 +++++++++++++++++++++++++++--------------- dataframe/math_test.go | 44 +++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 19 deletions(-) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index ddbf81a..6f43a88 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -1993,8 +1993,16 @@ func (df DataFrame) Math(resultcolnm string, op interface{}, operandcols ...stri nrows := cols[0].Len() ncols := len(cols) + // detect result column type (as well as pre-op coercion target) + // If `op` is a float func, need to coerce ints to floats + var resultType series.Type + switch op.(type) { + case func(float64) float64, func(float64, float64) float64, func(float64, float64, float64) float64: + resultType = series.Float + default: + resultType = detectType(types) // float if there are any floats, int otherwise + } // confirm colTypes are all numeric - resultType := detectType(types) // float if there are any floats, int otherwise if resultType == series.String || resultType == series.Bool { df.Err = fmt.Errorf("cannot perform arithmetic with column of type %s", resultType) return df @@ -2008,12 +2016,18 @@ func (df DataFrame) Math(resultcolnm string, op interface{}, operandcols ...stri for cidx, column := range cols { operand, err := column.Elem(ridx).Int() if err != nil { + // it's possible this error just can't happen anymore at this point df.Err = fmt.Errorf("unable to convert element %d of column %s to int: %w", ridx, operandcols[cidx], err) return df } operands[cidx] = operand } - results[ridx] = intOp(op, operands) + result, err := intOp(op, operands) + if err != nil { + df.Err = fmt.Errorf("error while performing integer op: %w", err) + return df + } + results[ridx] = result } df = df.Mutate( series.New(results, resultType, resultcolnm), @@ -2099,19 +2113,19 @@ func floatOp(op interface{}, operands []float64) float64 { const MaxUint = ^uint(0) const MaxInt = int(MaxUint >> 1) -func intOp(op interface{}, operands []int) int { +func intOp(op interface{}, operands []int) (int, error) { var acc int // accumulator for n-ary operators if len(operands) == 0 { - return 0 + return 0, nil } switch op := op.(type) { // users can specify functions for `op`, or a string case func(int) int: - return op(operands[0]) + return op(operands[0]), nil case func(int, int) int: - return op(operands[0], operands[1]) + return op(operands[0], operands[1]), nil case func(int, int, int) int: - return op(operands[0], operands[1], operands[2]) + return op(operands[0], operands[1], operands[2]), nil } switch op { @@ -2124,7 +2138,7 @@ func intOp(op interface{}, operands []int) int { // with only one operand, return its negative. // with more, subtract the rest from the first. if len(operands) == 1 { - return -operands[0] + return -operands[0], nil } acc = operands[0] for i := 1; i < len(operands); i++ { @@ -2137,35 +2151,35 @@ func intOp(op interface{}, operands []int) int { acc = acc * operand } case "/": - // With only one operand, int reciprocal (0 or 1 or "infinity") + // With only one operand, int reciprocal (0 or 1) // With more, divides by each denominator - // Divide by zero returns `MaxInt` (poor-man's infinity) + // Divide by zero errors if len(operands) == 1 { // reciprocal case if operands[0] == 0 { // reciprocal of zero - return MaxInt // poor man's infinity + return 0, fmt.Errorf("integer divide by zero") } - return 1 / operands[0] // 0 or 1 for int division + return 1 / operands[0], nil // 0 or 1 for int division } // normal division case acc = operands[0] for i := 1; i < len(operands); i++ { if operands[i] == 0 { - return MaxInt // poor man's infinity + return 0, fmt.Errorf("integer divide by zero") } acc = acc / operands[i] } case "%": // remainder after division of first two operands only if len(operands) < 2 { // one argument, just return it - return operands[0] + return operands[0], nil } - if operands[1] == 0 { // integer division by zero - just return a big number - return MaxInt // poor man's infinity + if operands[1] == 0 { + return 0, fmt.Errorf("integer divide by zero") } - return operands[0] % operands[1] + return operands[0] % operands[1], nil default: - panic(fmt.Sprintf("Unknown arithmetic operator: %s", op)) + return 0, fmt.Errorf("unknown arithmetic operator: %s", op) } - return acc + return acc, nil } diff --git a/dataframe/math_test.go b/dataframe/math_test.go index a6a2fa3..febaa61 100644 --- a/dataframe/math_test.go +++ b/dataframe/math_test.go @@ -4,6 +4,7 @@ import ( "fmt" "math" "reflect" + "strings" "testing" "github.com/go-gota/gota/series" @@ -252,6 +253,36 @@ func TestMath(t *testing.T) { } +func TestErrors(t *testing.T) { + expectError("at least one operand", func(df DataFrame) DataFrame { + return df.Math("Empty operands", "+") + }, t) + + expectError("cannot perform arithmetic with column of type string", func(df DataFrame) DataFrame { + return df.Math("Non-numeric type", "+", "Strings") + }, t) + + expectError("unknown arithmetic operator", func(df DataFrame) DataFrame { + return df.Math("unknown operator", "!", "Primes") + }, t) + + expectError("integer divide by zero", func(df DataFrame) DataFrame { + return df.Math("Divide by zero", "/", "Primes", "Naturals0") + }, t) + + // reciprocal + expectError("integer divide by zero", func(df DataFrame) DataFrame { + return df.Math("Divide by zero", "/", "Naturals0") + }, t) + + // modulo 0 + expectError("integer divide by zero", func(df DataFrame) DataFrame { + return df.Math("Divide by zero", "%", "Primes", "Naturals0") + }, t) + + // catch panic on unknown op +} + // Test helpers type testTable []struct { @@ -279,3 +310,16 @@ func runTestTable(table testTable, input DataFrame, t *testing.T) { } } } + +func expectError(message string, fut func(DataFrame) DataFrame, t *testing.T) { + df := New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Primes"), + series.New([]int{0, 1, 2, 3, 4}, series.Int, "Naturals0"), + ) + df = fut(df) + if !strings.Contains(df.Err.Error(), message) { + t.Fatalf("expected error to contain '%s', but got %v", message, df.Err) + } +} From 7527397b3e861773008566bf63edafc177ab1f34 Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Wed, 23 Jun 2021 14:50:31 -0400 Subject: [PATCH 06/11] update replace in go.mod to repo --- dataframe/math_test.go | 3 --- go.mod | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/dataframe/math_test.go b/dataframe/math_test.go index febaa61..4b84436 100644 --- a/dataframe/math_test.go +++ b/dataframe/math_test.go @@ -1,7 +1,6 @@ package dataframe import ( - "fmt" "math" "reflect" "strings" @@ -249,8 +248,6 @@ func TestMath(t *testing.T) { runTestTable(table, input, t) - fmt.Println(input) // DEBUG does it have all columns? - } func TestErrors(t *testing.T) { diff --git a/go.mod b/go.mod index 645bdfc..4a74e1f 100644 --- a/go.mod +++ b/go.mod @@ -4,4 +4,4 @@ go 1.16 require gonum.org/v1/gonum v0.9.1 -replace github.com/go-gota/gota => /Users/danielpcox/projects/decipher/gota +replace github.com/go-gota/gota => github.com/greymatter-io/gota v0.10.2-0.20210623183612-03d5231a52dc From aa97592c8ba5918a52a3708f8ae0a67e04683b9f Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Thu, 24 Jun 2021 16:44:37 -0400 Subject: [PATCH 07/11] adding FindElem to easily select a single value by labels --- dataframe/dataframe.go | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 6f43a88..0b157f1 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -1972,6 +1972,37 @@ func (df DataFrame) Describe() DataFrame { return ddf } +// TODO tests +// Finds a specific element like `Elem`, but using a column and value to get the row, +// and a column within that row to pinpoint an element. If multiple rows match the +// `column` x `keyInColumn` coordinate, a value is only returned for the first match. +// If no match is found or columns don't exist, `ok` is set to false. +func (df DataFrame) FindElem(colname string, keyInColumn interface{}, columnInRow string) (value series.Element, ok bool) { + // find column index for given `columnInRow` coordinate + cidx := findInStringSlice(columnInRow, df.Names()) + if cidx < 0 { + return value, false + } + // find row index for given `colname` and `keyInColumn` coordinates + c1idx := findInStringSlice(colname, df.Names()) + if c1idx < 0 { + return value, false + } + s := df.columns[c1idx] + ridx := -1 + for i := 0; i < s.Len(); i++ { + if s.Val(i) == keyInColumn { + ridx = i + break + } + } + if ridx < 0 { + return value, false + } + + return df.Elem(ridx, cidx), true +} + // Vector operators on arbitrary numeric columns // Applies `op` using elements from `lcolnm` and `rcolnm` as left and right operands, @@ -2108,11 +2139,6 @@ func floatOp(op interface{}, operands []float64) float64 { return acc } -// placeholders for infinity -// TODO prefer to handle errors with integer ops -const MaxUint = ^uint(0) -const MaxInt = int(MaxUint >> 1) - func intOp(op interface{}, operands []int) (int, error) { var acc int // accumulator for n-ary operators if len(operands) == 0 { From 2fed518c6059fad63a41debcd333ba775491d43b Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Thu, 24 Jun 2021 19:11:28 -0400 Subject: [PATCH 08/11] adding tests for FindElem --- dataframe/find_elem_test.go | 107 ++++++++++++++++++++++++++++++++++++ dataframe/math_test.go | 2 +- 2 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 dataframe/find_elem_test.go diff --git a/dataframe/find_elem_test.go b/dataframe/find_elem_test.go new file mode 100644 index 0000000..4ebc2a2 --- /dev/null +++ b/dataframe/find_elem_test.go @@ -0,0 +1,107 @@ +package dataframe + +import ( + "testing" + + "github.com/go-gota/gota/series" +) + +func TestFindElem(t *testing.T) { + /* Input is a 5x4 DataFrame + + Strings Floats Primes Naturals + 0: e 2.718000 1 1 + 1: Pi 3.142000 3 2 + 2: Phi 1.618000 5 3 + 3: Sqrt2 1.414000 7 4 + 4: Ln2 0.693000 11 5 + + */ + df := New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Ints"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]bool{false, true, false, false, false}, series.Bool, "Bools"), + ) + + t.Run("String lookup of float value", func(t *testing.T) { + e, ok := df.FindElem("Strings", "Pi", "Floats") + if !ok { + t.Fatal("failed to find value") + } + observed := e.Float() + expected := 3.142 + if observed != expected { + t.Fatalf("values did not match - expected %f but got %f", expected, observed) + } + }) + + t.Run("Float lookup of string value", func(t *testing.T) { + e, ok := df.FindElem("Floats", 3.142, "Strings") + if !ok { + t.Fatal("failed to find value") + } + observed := e.String() + expected := "Pi" + if observed != expected { + t.Fatalf("values did not match - expected %s but got %s", expected, observed) + } + }) + + t.Run("Int lookup of bool value", func(t *testing.T) { + e, ok := df.FindElem("Ints", 3, "Bools") + if !ok { + t.Fatal("failed to find value") + } + observed, _ := e.Bool() + expected := true + if observed != expected { + t.Fatalf("values did not match - expected %t but got %t", expected, observed) + } + }) + + t.Run("Bool lookup of int value", func(t *testing.T) { + e, ok := df.FindElem("Bools", true, "Ints") + if !ok { + t.Fatal("failed to find value") + } + observed, _ := e.Int() + expected := 3 + if observed != expected { + t.Fatalf("values did not match - expected %d but got %d", expected, observed) + } + }) + + t.Run("Multiple matches returns first", func(t *testing.T) { + e, ok := df.FindElem("Bools", false, "Ints") + if !ok { + t.Fatal("failed to find value") + } + observed, _ := e.Int() + expected := 1 + if observed != expected { + t.Fatalf("values did not match - expected %d but got %d", expected, observed) + } + }) + + t.Run("First column not found sets ok to false", func(t *testing.T) { + _, ok := df.FindElem("Eentz", 11, "Strings") + if ok { + t.Fatal("expected ok false") + } + }) + + t.Run("Key not found sets ok to false", func(t *testing.T) { + _, ok := df.FindElem("Ints", 12, "Strings") + if ok { + t.Fatal("expected ok false") + } + }) + + t.Run("Second column not found sets ok to false", func(t *testing.T) { + _, ok := df.FindElem("Ints", 11, "Ropes") + if ok { + t.Fatal("expected ok false") + } + }) +} diff --git a/dataframe/math_test.go b/dataframe/math_test.go index 4b84436..67c9618 100644 --- a/dataframe/math_test.go +++ b/dataframe/math_test.go @@ -250,7 +250,7 @@ func TestMath(t *testing.T) { } -func TestErrors(t *testing.T) { +func TestMathErrors(t *testing.T) { expectError("at least one operand", func(df DataFrame) DataFrame { return df.Math("Empty operands", "+") }, t) From 05485a17dc7f523201a7df6e0231157796c5b49d Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Fri, 25 Jun 2021 12:41:29 -0400 Subject: [PATCH 09/11] documentation --- README.md | 26 +++++++++++++++++ dataframe/dataframe.go | 14 +++++---- dataframe/examples_test.go | 60 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5b8bf55..7221f4e 100644 --- a/README.md +++ b/README.md @@ -264,6 +264,32 @@ df.Capply(mean) df.Rapply(mean) ``` +#### Math + +Element-wise arithmetic vector operations are available on `int` and +`float64` values through the `Math` method: + +```go +df := dataframe.New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Primes"), + series.New([]int{1, 2, 3, 4, 5}, series.Int, "Naturals"), +) + +// Returns a new DataFrame with a column named "Diff" = Floats - Primes +withNewDiffColumn := df.Math("Diff", "-", "Floats", "Primes") +``` + +It is also possible for the second argument (the operator) to be a +function (unary, binary, or trinary) on `int` or `float64` (especially +useful with Go's `math` package): + +```go +import "math" +withNewFMACol := df.Math("FMA", math.FMA, "Floats", "Primes", "Naturals") +``` + #### Chaining operations DataFrames support a number of methods for wrangling the data, diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 0b157f1..f9251ac 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -1972,11 +1972,11 @@ func (df DataFrame) Describe() DataFrame { return ddf } -// TODO tests -// Finds a specific element like `Elem`, but using a column and value to get the row, +// Finds a specific element (like `dataframe.Elem`), but using a column and value to get the row, // and a column within that row to pinpoint an element. If multiple rows match the // `column` x `keyInColumn` coordinate, a value is only returned for the first match. // If no match is found or columns don't exist, `ok` is set to false. +// Note that this function is slow for many rows. In the future this will be corrected by indexing. func (df DataFrame) FindElem(colname string, keyInColumn interface{}, columnInRow string) (value series.Element, ok bool) { // find column index for given `columnInRow` coordinate cidx := findInStringSlice(columnInRow, df.Names()) @@ -2003,10 +2003,12 @@ func (df DataFrame) FindElem(colname string, keyInColumn interface{}, columnInRo return df.Elem(ridx, cidx), true } -// Vector operators on arbitrary numeric columns - -// Applies `op` using elements from `lcolnm` and `rcolnm` as left and right operands, -// and stores the output in a new column `newcolnm`. +// Element-wise arithmetic vector operations on `int` and `float64` values. +// Applies `op` to the columns specified in operandcols, and stores the result +// in a new column named `resultcolnm`. +// `op` may be a string representing an arithmetic operator ("+", "-", "*", "/". also "%" on ints) +// or a unary, binary, or trinary function on `int` or `float`. +// Automatically coerces `int` to `float64` if necessary. func (df DataFrame) Math(resultcolnm string, op interface{}, operandcols ...string) DataFrame { if df.Err != nil { return df diff --git a/dataframe/examples_test.go b/dataframe/examples_test.go index 6687ea7..fd5593c 100644 --- a/dataframe/examples_test.go +++ b/dataframe/examples_test.go @@ -2,6 +2,7 @@ package dataframe_test import ( "fmt" + "math" "strings" "github.com/go-gota/gota/dataframe" @@ -266,3 +267,62 @@ func ExampleDataFrame_Describe() { ) fmt.Println(df.Describe()) } + +func ExampleDataFrame_FindElem() { + df := dataframe.New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Ints"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]bool{false, true, false, false, false}, series.Bool, "Bools"), + ) + + if f, ok := df.FindElem("Strings", "Pi", "Floats"); ok { + fmt.Printf("The value of Pi is %f\n", f.Float()) + } +} + +func ExampleDataFrame_Math() { + /* `input` is a 5x4 DataFrame: + + Strings Floats Primes Naturals + 0: e 2.718000 1 1 + 1: Pi 3.142000 3 2 + 2: Phi 1.618000 5 3 + 3: Sqrt2 1.414000 7 4 + 4: Ln2 0.693000 11 5 + + */ + df := dataframe.New( + series.New([]string{"e", "Pi", "Phi", "Sqrt2", "Ln2"}, series.String, "Strings"), + series.New([]float64{2.718, 3.142, 1.618, 1.414, 0.693}, series.Float, "Floats"), + series.New([]int{1, 3, 5, 7, 11}, series.Int, "Primes"), + series.New([]int{1, 2, 3, 4, 5}, series.Int, "Naturals"), + ) + + // `Math` takes a new column name, an operator (string or func) and at least one column name + withNewDiffColumn := df.Math("Diff", "-", "Floats", "Primes") + + // New `DataFrame` now has a column named "Diff" which is + // the result of subtracting Primes from Floats. + fmt.Println(withNewDiffColumn) + + /* + Strings Floats Primes Naturals Diff + 0: e 2.718000 1 1 1.718000 + 1: Pi 3.142000 3 2 0.142000 + 2: Phi 1.618000 5 3 -3.382000 + 3: Sqrt2 1.414000 7 4 -5.586000 + 4: Ln2 0.693000 11 5 -10.307000 + + */ + + // Also supports passing unary, binary, or trinary functions of + // int or float64, e.g., for functions from Go's `math` package. + // (Note here that `dataframe.Math` supports specifying many + // column names depending on the given operator, and also that + // it automatically coerces int to float64 when `op` is a + // function on float64.) + withNewFMACol := df.Math("FMA", math.FMA, "Floats", "Primes", "Naturals") + + fmt.Println(withNewFMACol) +} From 44283e0114e94114e631f486945b43d24afa6ace Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Fri, 25 Jun 2021 12:44:36 -0400 Subject: [PATCH 10/11] removing replace directive from go.mod for PR --- go.mod | 2 -- 1 file changed, 2 deletions(-) diff --git a/go.mod b/go.mod index 4a74e1f..f5b0250 100644 --- a/go.mod +++ b/go.mod @@ -3,5 +3,3 @@ module github.com/go-gota/gota go 1.16 require gonum.org/v1/gonum v0.9.1 - -replace github.com/go-gota/gota => github.com/greymatter-io/gota v0.10.2-0.20210623183612-03d5231a52dc From 0d7eb16c1f67006b6c16e83b3d24b3f2852b0487 Mon Sep 17 00:00:00 2001 From: Daniel Cox Date: Fri, 25 Jun 2021 15:30:18 -0400 Subject: [PATCH 11/11] adding Math and FindElem to changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aad6416..136db38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ This project adheres to [Semantic Versioning](http://semver.org/). - Combining filters with AND - User-defined filters - Concatination of Dataframes +- Math for vector operations on multiple columns +- FindElem for content-based location of an element ### Changed - Make fixColnames faster