diff --git a/Makefile b/Makefile index fb374cb910..fe27b8dd30 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,9 @@ build: @echo "Build complete. The Miller executable is ./mlr (or .\mlr.exe on Windows)." @echo "You can use 'make check' to run tests". +quiet: + @go build github.com/johnkerl/miller/cmd/mlr + # For interactive use, 'mlr regtest' offers more options and transparency. check: unit-test regression-test @echo "Tests complete. You can use 'make install' if you like, optionally preceded" diff --git a/docs/src/data/filenames.txt b/docs/src/data/filenames.txt new file mode 100644 index 0000000000..466fa4515c --- /dev/null +++ b/docs/src/data/filenames.txt @@ -0,0 +1,2 @@ +data/a.csv +data/b.csv diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 3a60469192..a1cea93f14 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -178,6 +178,7 @@ MILLER(1) MILLER(1) mlr help mlrrc mlr help output-colorization mlr help type-arithmetic-info + mlr help type-arithmetic-info-extended Shorthands: mlr -g = mlr help flags mlr -l = mlr help list-verbs @@ -3648,5 +3649,5 @@ MILLER(1) MILLER(1) - 2023-11-12 MILLER(1) + 2023-12-02 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index ef75697117..77ca6785f0 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -157,6 +157,7 @@ MILLER(1) MILLER(1) mlr help mlrrc mlr help output-colorization mlr help type-arithmetic-info + mlr help type-arithmetic-info-extended Shorthands: mlr -g = mlr help flags mlr -l = mlr help list-verbs @@ -3627,4 +3628,4 @@ MILLER(1) MILLER(1) - 2023-11-12 MILLER(1) + 2023-12-02 MILLER(1) diff --git a/docs/src/online-help.md b/docs/src/online-help.md index 8318c4f0a5..5bbee15a1b 100644 --- a/docs/src/online-help.md +++ b/docs/src/online-help.md @@ -86,6 +86,7 @@ Other: mlr help mlrrc mlr help output-colorization mlr help type-arithmetic-info + mlr help type-arithmetic-info-extended Shorthands: mlr -g = mlr help flags mlr -l = mlr help list-verbs diff --git a/docs/src/record-heterogeneity.md b/docs/src/record-heterogeneity.md index 5794fc49a3..d02a524482 100644 --- a/docs/src/record-heterogeneity.md +++ b/docs/src/record-heterogeneity.md @@ -154,8 +154,7 @@ with 1) for too-long rows: }, { "a": 4, - "b": 5, - "c": "" + "b": 5 }, { "a": 7, @@ -455,7 +454,9 @@ Miller handles explicit header changes as just shown. If your CSV input contains
a,b,c 1,2,3 -4,5, + +a,b +4,5 a,b,c,4 7,8,9,10 diff --git a/docs/src/reference-main-null-data.md b/docs/src/reference-main-null-data.md index 0de7c67c91..63bfffaa93 100644 --- a/docs/src/reference-main-null-data.md +++ b/docs/src/reference-main-null-data.md @@ -239,17 +239,44 @@ resource=/some/other/path,loadsec=0.97,ok=false,loadmillis=970 ## Arithmetic rules -If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for plus (other arithmetic/boolean/bitwise operators are similar): +If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for `+`, `&&, and `||`. Notes: + +* Other arithmetic, boolean, and bitwise operators besides `&&` and `||` are similar to `+`. +* The `&&` and `||` obey _short-circuiting semantics_. That is: + * `false && X` is `false` and `X` is not evaluated even if it is a complex expression (maybe including function calls) + * `true || X` is `true` and `X` is not evaluated even if it is a complex expression (maybe including function calls) +* This means in particular that: + * `false && X` is false even if `X` is an error, a non-boolean type, etc. + * `true || X` is true even if `X` is an error, a non-boolean type, etc.-mlr help type-arithmetic-info +mlr help type-arithmetic-info-extended-(+) | 1 2.5 (empty) (absent) (error) ------- + ------ ------ ------ ------ ------ -1 | 2 3.5 1 1 (error) -2.5 | 3.5 5 2.5 2.5 (error) -(empty) | 1 2.5 (empty) (absent) (error) -(absent) | 1 2.5 (absent) (absent) (error) -(error) | (error) (error) (error) (error) (error) +(+) | 1 2.5 true (empty) (absent) (error) +------ + ------ ------ ------ ------ ------ ------ +1 | 2 3.5 (error) 1 1 (error) +2.5 | 3.5 5 (error) 2.5 2.5 (error) +true | (error) (error) (error) (error) (error) (error) +(empty) | 1 2.5 (error) (empty) (absent) (error) +(absent) | 1 2.5 (error) (absent) (absent) (error) +(error) | (error) (error) (error) (error) (error) (error) + +(&&) | true false 3 (empty) (absent) (error) +------ + ------ ------ ------ ------ ------ ------ +true | true false (error) (error) (absent) (error) +false | false false false false false false +3 | (error) (error) (error) (error) (absent) (error) +(empty) | true false (error) (error) (absent) (error) +(absent) | true false (error) (absent) (absent) (error) +(error) | (error) (error) (error) (error) (error) (error) + +(||) | true false 3 (empty) (absent) (error) +------ + ------ ------ ------ ------ ------ ------ +true | true true true true true true +false | true false (error) (error) (absent) (error) +3 | (error) (error) (error) (error) (absent) (error) +(empty) | true false (error) (error) (absent) (error) +(absent) | true false (error) (absent) (absent) (error) +(error) | (error) (error) (error) (error) (error) (error)diff --git a/docs/src/reference-main-null-data.md.in b/docs/src/reference-main-null-data.md.in index 381a46522b..087edaa788 100644 --- a/docs/src/reference-main-null-data.md.in +++ b/docs/src/reference-main-null-data.md.in @@ -119,8 +119,16 @@ GENMD-EOF ## Arithmetic rules -If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for plus (other arithmetic/boolean/bitwise operators are similar): +If you're interested in a formal description of how empty and absent fields participate in arithmetic, here's a table for `+`, `&&, and `||`. Notes: + +* Other arithmetic, boolean, and bitwise operators besides `&&` and `||` are similar to `+`. +* The `&&` and `||` obey _short-circuiting semantics_. That is: + * `false && X` is `false` and `X` is not evaluated even if it is a complex expression (maybe including function calls) + * `true || X` is `true` and `X` is not evaluated even if it is a complex expression (maybe including function calls) +* This means in particular that: + * `false && X` is false even if `X` is an error, a non-boolean type, etc. + * `true || X` is true even if `X` is an error, a non-boolean type, etc. GENMD-RUN-COMMAND -mlr help type-arithmetic-info +mlr help type-arithmetic-info-extended GENMD-EOF diff --git a/man/manpage.txt b/man/manpage.txt index ef75697117..77ca6785f0 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -157,6 +157,7 @@ MILLER(1) MILLER(1) mlr help mlrrc mlr help output-colorization mlr help type-arithmetic-info + mlr help type-arithmetic-info-extended Shorthands: mlr -g = mlr help flags mlr -l = mlr help list-verbs @@ -3627,4 +3628,4 @@ MILLER(1) MILLER(1) - 2023-11-12 MILLER(1) + 2023-12-02 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index c61b2cb67e..6bf4d493d3 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2023-11-11 +.\" Date: 2023-12-02 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2023-11-11" "\ \&" "\ \&" +.TH "MILLER" "1" "2023-12-02" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -192,6 +192,7 @@ Other: mlr help mlrrc mlr help output-colorization mlr help type-arithmetic-info + mlr help type-arithmetic-info-extended Shorthands: mlr -g = mlr help flags mlr -l = mlr help list-verbs diff --git a/pkg/dsl/cst/builtin_functions.go b/pkg/dsl/cst/builtin_functions.go index 26f1daccee..397e7869cd 100644 --- a/pkg/dsl/cst/builtin_functions.go +++ b/pkg/dsl/cst/builtin_functions.go @@ -32,7 +32,7 @@ func (root *RootNode) BuildBuiltinFunctionCallsiteNode( if builtinFunctionInfo.hasMultipleArities { // E.g. "+" and "-" return root.BuildMultipleArityFunctionCallsiteNode(astNode, builtinFunctionInfo) } else if builtinFunctionInfo.zaryFunc != nil { - return root.BuildZaryFunctionCallsiteNode(astNode, builtinFunctionInfo) + return BuildZaryFunctionCallsiteNode(astNode, builtinFunctionInfo) } else if builtinFunctionInfo.unaryFunc != nil { return root.BuildUnaryFunctionCallsiteNode(astNode, builtinFunctionInfo) } else if builtinFunctionInfo.unaryFuncWithContext != nil { @@ -89,7 +89,7 @@ type ZaryFunctionCallsiteNode struct { zaryFunc bifs.ZaryFunc } -func (root *RootNode) BuildZaryFunctionCallsiteNode( +func BuildZaryFunctionCallsiteNode( astNode *dsl.ASTNode, builtinFunctionInfo *BuiltinFunctionInfo, ) (IEvaluable, error) { @@ -228,25 +228,25 @@ func (root *RootNode) BuildBinaryFunctionCallsiteNode( // Special short-circuiting cases if builtinFunctionInfo.name == "&&" { - return root.BuildLogicalANDOperatorNode( + return BuildLogicalANDOperatorNode( evaluable1, evaluable2, ), nil } if builtinFunctionInfo.name == "||" { - return root.BuildLogicalOROperatorNode( + return BuildLogicalOROperatorNode( evaluable1, evaluable2, ), nil } if builtinFunctionInfo.name == "??" { - return root.BuildAbsentCoalesceOperatorNode( + return BuildAbsentCoalesceOperatorNode( evaluable1, evaluable2, ), nil } if builtinFunctionInfo.name == "???" { - return root.BuildEmptyCoalesceOperatorNode( + return BuildEmptyCoalesceOperatorNode( evaluable1, evaluable2, ), nil @@ -557,7 +557,7 @@ func (root *RootNode) BuildTernaryFunctionCallsiteNode( // Special short-circuiting case if builtinFunctionInfo.name == "?:" { - return root.BuildStandardTernaryOperatorNode( + return BuildStandardTernaryOperatorNode( evaluable1, evaluable2, evaluable3, @@ -703,7 +703,7 @@ type LogicalANDOperatorNode struct { a, b IEvaluable } -func (root *RootNode) BuildLogicalANDOperatorNode(a, b IEvaluable) *LogicalANDOperatorNode { +func BuildLogicalANDOperatorNode(a, b IEvaluable) *LogicalANDOperatorNode { return &LogicalANDOperatorNode{ a: a, b: b, @@ -712,53 +712,74 @@ func (root *RootNode) BuildLogicalANDOperatorNode(a, b IEvaluable) *LogicalANDOp // This is different from most of the evaluator functions in that it does // short-circuiting: since is logical AND, the second argument is not evaluated -// if the first argument is false. -// -// Disposition matrix: -// -// { -//a b ERROR ABSENT EMPTY STRING INT FLOAT BOOL -//ERROR : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR}, -//ABSENT : {ERROR, absent, ERROR, ERROR, ERROR, ERROR, absent}, -//EMPTY : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR}, -//STRING : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR}, -//INT : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR}, -//FLOAT : {ERROR, ERROR, ERROR, ERROR, ERROR, ERROR, ERROR}, -//BOOL : {ERROR, absent, ERROR, ERROR, ERROR, ERROR, a&&b}, -// } -// -// which without the all-error rows/columns reduces to -// -// { -//a b ABSENT BOOL -//ABSENT : {absent, absent}, -//BOOL : {absent, a&&b}, -// } +// if the first argument is false. Thus we cannot use disposition matrices. // -// So: -// * Evaluate a -// * If a is not absent or bool: return error -// * If a is absent: return absent -// * If a is false: return a -// * Now a is boolean true -// * Evaluate b -// * If b is not absent or bool: return error -// * If b is absent: return absent -// * Return a && b +// * evaluate a +// * if a is error: +// * return a +// * elif a is absent: +// * Evaluate b +// * if b is error: return error +// * elif b is empty or absent: return absent +// * elif b is empty or absent: return absent +// * else: return b +// * elif a is empty: +// * evaluate b +// * if b is error: return error +// * elif b is empty: return empty +// * elif b is absent: return absent +// * else: return b +// * else: +// * return the BIF (using its disposition matrix) + +// mlr help type-arithmetic-info-extended | lumin -c red .error. | lumin -c blue .absent. | lumin -c green .empty. func (node *LogicalANDOperatorNode) Evaluate( state *runtime.State, ) *mlrval.Mlrval { aout := node.a.Evaluate(state) atype := aout.Type() - if !(atype == mlrval.MT_ABSENT || atype == mlrval.MT_BOOL) { - return mlrval.FromNotNamedTypeError("&&", aout, "absent or boolean") + + if atype == mlrval.MT_ERROR { + return aout } + if atype == mlrval.MT_ABSENT { - return mlrval.ABSENT + bout := node.b.Evaluate(state) + btype := bout.Type() + if btype == mlrval.MT_ERROR { + return bout + } + if btype == mlrval.MT_VOID || btype == mlrval.MT_ABSENT { + return mlrval.ABSENT + } + if btype != mlrval.MT_BOOL { + return mlrval.FromNotNamedTypeError("&&", bout, "absent or boolean") + } + return bout } + + if atype == mlrval.MT_VOID { + bout := node.b.Evaluate(state) + btype := bout.Type() + if btype == mlrval.MT_ERROR { + return bout + } + if btype == mlrval.MT_VOID { + return mlrval.FromNotNamedTypeError("&&", bout, "absent or boolean") + } + if btype == mlrval.MT_ABSENT { + return mlrval.ABSENT + } + if btype != mlrval.MT_BOOL { + return mlrval.FromNotNamedTypeError("&&", bout, "absent or boolean") + } + return bout + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if aout.IsFalse() { - // This means false && bogus type evaluates to true, which is sad but + // This means false && bogus type evaluates to false, which is sad but // which we MUST do in order to not violate the short-circuiting // property. We would have to evaluate b to know if it were error or // not. @@ -782,7 +803,7 @@ type LogicalOROperatorNode struct { a, b IEvaluable } -func (root *RootNode) BuildLogicalOROperatorNode(a, b IEvaluable) *LogicalOROperatorNode { +func BuildLogicalOROperatorNode(a, b IEvaluable) *LogicalOROperatorNode { return &LogicalOROperatorNode{ a: a, b: b, @@ -792,19 +813,54 @@ func (root *RootNode) BuildLogicalOROperatorNode(a, b IEvaluable) *LogicalOROper // This is different from most of the evaluator functions in that it does // short-circuiting: since is logical OR, the second argument is not evaluated // if the first argument is false. -// -// See the disposition-matrix discussion for LogicalANDOperator. + func (node *LogicalOROperatorNode) Evaluate( state *runtime.State, ) *mlrval.Mlrval { aout := node.a.Evaluate(state) atype := aout.Type() - if !(atype == mlrval.MT_ABSENT || atype == mlrval.MT_BOOL) { - return mlrval.FromNotNamedTypeError("||", aout, "absent or boolean") + + if atype == mlrval.MT_ERROR { + return aout } + if atype == mlrval.MT_ABSENT { - return mlrval.ABSENT + bout := node.b.Evaluate(state) + btype := bout.Type() + if btype == mlrval.MT_ERROR { + return bout + } + if btype == mlrval.MT_VOID || btype == mlrval.MT_ABSENT { + return mlrval.ABSENT + } + if btype == mlrval.MT_VOID { + return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean") + } + if btype != mlrval.MT_BOOL { + return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean") + } + return bout } + + if atype == mlrval.MT_VOID { + bout := node.b.Evaluate(state) + btype := bout.Type() + if btype == mlrval.MT_ERROR { + return bout + } + if btype == mlrval.MT_VOID { + return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean") + } + if btype == mlrval.MT_ABSENT { + return mlrval.ABSENT + } + if btype != mlrval.MT_BOOL { + return mlrval.FromNotNamedTypeError("||", bout, "absent or boolean") + } + return bout + } + + // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if aout.IsTrue() { // This means true || bogus type evaluates to true, which is sad but // which we MUST do in order to not violate the short-circuiting @@ -821,6 +877,7 @@ func (node *LogicalOROperatorNode) Evaluate( if btype == mlrval.MT_ABSENT { return mlrval.ABSENT } + return bifs.BIF_logical_OR(aout, bout) } @@ -829,7 +886,7 @@ func (node *LogicalOROperatorNode) Evaluate( // current record has no field $foo. type AbsentCoalesceOperatorNode struct{ a, b IEvaluable } -func (root *RootNode) BuildAbsentCoalesceOperatorNode(a, b IEvaluable) *AbsentCoalesceOperatorNode { +func BuildAbsentCoalesceOperatorNode(a, b IEvaluable) *AbsentCoalesceOperatorNode { return &AbsentCoalesceOperatorNode{a: a, b: b} } @@ -852,7 +909,7 @@ func (node *AbsentCoalesceOperatorNode) Evaluate( // when the current record has no field $foo, or when $foo is empty.. type EmptyCoalesceOperatorNode struct{ a, b IEvaluable } -func (root *RootNode) BuildEmptyCoalesceOperatorNode(a, b IEvaluable) *EmptyCoalesceOperatorNode { +func BuildEmptyCoalesceOperatorNode(a, b IEvaluable) *EmptyCoalesceOperatorNode { return &EmptyCoalesceOperatorNode{a: a, b: b} } @@ -874,7 +931,7 @@ func (node *EmptyCoalesceOperatorNode) Evaluate( // ================================================================ type StandardTernaryOperatorNode struct{ a, b, c IEvaluable } -func (root *RootNode) BuildStandardTernaryOperatorNode(a, b, c IEvaluable) *StandardTernaryOperatorNode { +func BuildStandardTernaryOperatorNode(a, b, c IEvaluable) *StandardTernaryOperatorNode { return &StandardTernaryOperatorNode{a: a, b: b, c: c} } func (node *StandardTernaryOperatorNode) Evaluate( diff --git a/pkg/dsl/cst/leaves.go b/pkg/dsl/cst/leaves.go index 382525e524..08b3200a98 100644 --- a/pkg/dsl/cst/leaves.go +++ b/pkg/dsl/cst/leaves.go @@ -375,6 +375,24 @@ func (node *NullLiteralNode) Evaluate( return node.literal } +// ---------------------------------------------------------------- +// Used for testing purposes; not used by the main DSL. + +type MlrvalLiteralNode struct { + literal *mlrval.Mlrval +} + +func BuildMlrvalLiteralNode(literal *mlrval.Mlrval) *MlrvalLiteralNode { + return &MlrvalLiteralNode{ + literal: literal.Copy(), + } +} +func (node *MlrvalLiteralNode) Evaluate( + state *runtime.State, +) *mlrval.Mlrval { + return node.literal +} + // ================================================================ func (root *RootNode) BuildContextVariableNode(astNode *dsl.ASTNode) (IEvaluable, error) { lib.InternalCodingErrorIf(astNode.Token == nil) diff --git a/pkg/terminals/help/entry.go b/pkg/terminals/help/entry.go index 3b25b58b52..a9148c385b 100644 --- a/pkg/terminals/help/entry.go +++ b/pkg/terminals/help/entry.go @@ -16,6 +16,7 @@ import ( "github.com/johnkerl/miller/pkg/dsl/cst" "github.com/johnkerl/miller/pkg/lib" "github.com/johnkerl/miller/pkg/mlrval" + "github.com/johnkerl/miller/pkg/runtime" "github.com/johnkerl/miller/pkg/transformers" ) @@ -114,6 +115,7 @@ func init() { {name: "mlrrc", zaryHandlerFunc: helpMlrrc}, {name: "output-colorization", zaryHandlerFunc: helpOutputColorization}, {name: "type-arithmetic-info", zaryHandlerFunc: helpTypeArithmeticInfo}, + {name: "type-arithmetic-info-extended", zaryHandlerFunc: helpTypeArithmeticInfoExtended}, }, }, { @@ -483,9 +485,18 @@ func helpOutputColorization() { // ---------------------------------------------------------------- func helpTypeArithmeticInfo() { + helpTypeArithmeticInfoAux(false) +} + +func helpTypeArithmeticInfoExtended() { + helpTypeArithmeticInfoAux(true) +} + +func helpTypeArithmeticInfoAux(extended bool) { mlrvals := []*mlrval.Mlrval{ mlrval.FromInt(1), mlrval.FromFloat(2.5), + mlrval.FromBool(true), mlrval.VOID, mlrval.ABSENT, mlrval.FromAnonymousError(), @@ -524,6 +535,70 @@ func helpTypeArithmeticInfo() { fmt.Println() } + if !extended { + return + } + + mlrvals = []*mlrval.Mlrval{ + mlrval.FromBool(true), + mlrval.FromBool(false), + mlrval.FromInt(3), + mlrval.VOID, + mlrval.ABSENT, + mlrval.FromAnonymousError(), + } + + n = len(mlrvals) + + state := runtime.NewEmptyState(cli.DefaultOptions(), false) + + descs := []string{"(&&)", "(||)"} + for k, desc := range descs { + + fmt.Println() + for i := -2; i < n; i++ { + if i == -2 { + fmt.Printf("%-10s |", desc) + } else if i == -1 { + fmt.Printf("%-10s +", "------") + } else if mlrvals[i].IsVoid() { + fmt.Printf("%-10s |", "(empty)") + } else { + fmt.Printf("%-10s |", mlrvals[i].String()) + } + for j := 0; j < n; j++ { + if i == -2 { + if mlrvals[j].IsVoid() { + fmt.Printf("%-10s", "(empty)") + } else { + fmt.Printf(" %-10s", mlrvals[j].String()) + } + } else if i == -1 { + fmt.Printf(" %-10s", "------") + } else { + + inode := cst.BuildMlrvalLiteralNode(mlrvals[i]) + jnode := cst.BuildMlrvalLiteralNode(mlrvals[j]) + + var binary_node cst.IEvaluable + if k == 0 { + binary_node = cst.BuildLogicalANDOperatorNode(inode, jnode) + } else { + binary_node = cst.BuildLogicalOROperatorNode(inode, jnode) + } + + output := binary_node.Evaluate(state) + + if output.IsVoid() { + fmt.Printf(" %-10s", "(empty)") + } else { + fmt.Printf(" %-10s", output.String()) + } + } + } + fmt.Println() + } + } } // ----------------------------------------------------------------