From 30f26b70e00fd094179c76a95952cd460f8ff07c Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 18 Feb 2024 13:48:42 -0500 Subject: [PATCH 1/2] Support thousands separator in `fmtnum` --- pkg/dsl/cst/builtin_function_manager.go | 9 ++- pkg/mlrval/mlrval_format.go | 93 ++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 4 deletions(-) diff --git a/pkg/dsl/cst/builtin_function_manager.go b/pkg/dsl/cst/builtin_function_manager.go index 965c9529bf..90f854fe01 100644 --- a/pkg/dsl/cst/builtin_function_manager.go +++ b/pkg/dsl/cst/builtin_function_manager.go @@ -2000,10 +2000,15 @@ Note that NaN has the property that NaN != NaN, so you need 'is_nan(x)' rather t name: "fmtnum", class: FUNC_CLASS_CONVERSION, help: `Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. -'$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values.`, +'$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values.`, binaryFunc: bifs.BIF_fmtnum, examples: []string{ - `$x = fmtnum($x, "%.6f")`, + `$y = fmtnum($x, "%.6f")`, + `$o = fmtnum($n, "%d")`, + `$o = fmtnum($n, "%12d")`, + `$y = fmtnum($x, "%.6_f")`, + `$o = fmtnum($n, "%_d")`, + `$o = fmtnum($n, "%12_d")`, }, }, diff --git a/pkg/mlrval/mlrval_format.go b/pkg/mlrval/mlrval_format.go index 9f4cb2fe6e..661cad1539 100644 --- a/pkg/mlrval/mlrval_format.go +++ b/pkg/mlrval/mlrval_format.go @@ -2,8 +2,12 @@ package mlrval import ( "fmt" + "os" "strconv" "strings" + + "golang.org/x/text/language" + "golang.org/x/text/message" ) //---------------------------------------------------------------- @@ -103,9 +107,14 @@ func newFormatter( goFormatString = strings.ReplaceAll(goFormatString, "le", "e") goFormatString = strings.ReplaceAll(goFormatString, "lg", "g") - // MIller 5 and below required C format strings compatible with 64-bit ints + // Miller 5 and below required C format strings compatible with 64-bit ints // and double-precision floats: e.g. "%08lld" and "%9.6lf". For Miller 6, - // We must still accept these for backward compatibility. + // we must still accept these for backward compatibility. + if strings.HasSuffix(goFormatString, "_d") { + // Special sub-case of "d"; must be checked first + n := len(goFormatString) + return newFormatterToSeparatedInt(goFormatString[:n-2] + "d"), nil + } if strings.HasSuffix(goFormatString, "d") { return newFormatterToInt(goFormatString), nil } @@ -113,6 +122,11 @@ func newFormatter( return newFormatterToInt(goFormatString), nil } + if strings.HasSuffix(goFormatString, "_f") { + // Special sub-case of "f"; must be checked first + n := len(goFormatString) + return newFormatterToSeparatedFloat(goFormatString[:n-2] + "f"), nil + } if strings.HasSuffix(goFormatString, "f") { return newFormatterToFloat(goFormatString), nil } @@ -164,6 +178,81 @@ func (formatter *formatterToFloat) FormatFloat(floatValue float64) string { // ---------------------------------------------------------------- +func getLanguageTag() language.Tag { + v, ok := os.LookupEnv("LANG") + if ok { + return language.Make(v) + } else { + return language.Make("en") + } +} + +// ---------------------------------------------------------------- + +type formatterToSeparatedInt struct { + goFormatString string + printer *message.Printer +} + +func newFormatterToSeparatedInt(goFormatString string) IFormatter { + return &formatterToSeparatedInt{ + goFormatString: goFormatString, + printer: message.NewPrinter(getLanguageTag()), + } +} + +func (formatter *formatterToSeparatedInt) Format(mv *Mlrval) *Mlrval { + intValue, isInt := mv.GetIntValue() + if isInt { + formatted := formatter.printer.Sprintf(formatter.goFormatString, intValue) + return TryFromIntString(formatted) + } + floatValue, isFloat := mv.GetFloatValue() + if isFloat { + formatted := formatter.printer.Sprintf(formatter.goFormatString, int(floatValue)) + return TryFromIntString(formatted) + } + return mv +} + +func (formatter *formatterToSeparatedInt) FormatFloat(floatValue float64) string { + return formatter.printer.Sprintf(formatter.goFormatString, int(floatValue)) +} + +// ---------------------------------------------------------------- + +type formatterToSeparatedFloat struct { + goFormatString string + printer *message.Printer +} + +func newFormatterToSeparatedFloat(goFormatString string) IFormatter { + return &formatterToSeparatedFloat{ + goFormatString: goFormatString, + printer: message.NewPrinter(getLanguageTag()), + } +} + +func (formatter *formatterToSeparatedFloat) Format(mv *Mlrval) *Mlrval { + floatValue, isFloat := mv.GetFloatValue() + if isFloat { + formatted := formatter.printer.Sprintf(formatter.goFormatString, floatValue) + return TryFromFloatString(formatted) + } + intValue, isInt := mv.GetIntValue() + if isInt { + formatted := formatter.printer.Sprintf(formatter.goFormatString, float64(intValue)) + return TryFromFloatString(formatted) + } + return mv +} + +func (formatter *formatterToSeparatedFloat) FormatFloat(floatValue float64) string { + return formatter.printer.Sprintf(formatter.goFormatString, floatValue) +} + +// ---------------------------------------------------------------- + type formatterToInt struct { goFormatString string } From 73ecb0a03587e30d1242cd4dc74ae3dcbe10fd74 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 18 Feb 2024 13:51:37 -0500 Subject: [PATCH 2/2] doc bits --- docs/src/manpage.md | 11 ++++++++--- docs/src/manpage.txt | 11 ++++++++--- docs/src/reference-dsl-builtin-functions.md | 11 ++++++++--- man/manpage.txt | 11 ++++++++--- man/mlr.1 | 11 ++++++++--- 5 files changed, 40 insertions(+), 15 deletions(-) diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 2d7935bd6c..4b80cdeeee 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -2416,9 +2416,14 @@ This is simply a copy of what you should see on running `man mlr` at a command p $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving non-numeric ones alone 1mfmtnum0m - (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values. - Example: - $x = fmtnum($x, "%.6f") + (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values. + Examples: + $y = fmtnum($x, "%.6f") + $o = fmtnum($n, "%d") + $o = fmtnum($n, "%12d") + $y = fmtnum($x, "%.6_f") + $o = fmtnum($n, "%_d") + $o = fmtnum($n, "%12_d") 1mfold0m (class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument. diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 151b0fc334..15c9d6232b 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -2395,9 +2395,14 @@ $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving non-numeric ones alone 1mfmtnum0m - (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values. - Example: - $x = fmtnum($x, "%.6f") + (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values. + Examples: + $y = fmtnum($x, "%.6f") + $o = fmtnum($n, "%d") + $o = fmtnum($n, "%12d") + $y = fmtnum($x, "%.6_f") + $o = fmtnum($n, "%_d") + $o = fmtnum($n, "%12_d") 1mfold0m (class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument. diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index 3a55821f3f..dbfafc04f8 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -534,9 +534,14 @@ $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving ### fmtnum
-fmtnum  (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values.
-Example:
-$x = fmtnum($x, "%.6f")
+fmtnum  (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values.
+Examples:
+$y = fmtnum($x, "%.6f")
+$o = fmtnum($n, "%d")
+$o = fmtnum($n, "%12d")
+$y = fmtnum($x, "%.6_f")
+$o = fmtnum($n, "%_d")
+$o = fmtnum($n, "%12_d")
 
diff --git a/man/manpage.txt b/man/manpage.txt index 151b0fc334..15c9d6232b 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -2395,9 +2395,14 @@ $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving non-numeric ones alone 1mfmtnum0m - (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values. - Example: - $x = fmtnum($x, "%.6f") + (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values. + Examples: + $y = fmtnum($x, "%.6f") + $o = fmtnum($n, "%d") + $o = fmtnum($n, "%12d") + $y = fmtnum($x, "%.6_f") + $o = fmtnum($n, "%_d") + $o = fmtnum($n, "%12_d") 1mfold0m (class=higher-order-functions #args=3) Given a map or array as first argument and a function as second argument, accumulates entries into a final output -- for example, sum or product. For arrays, the function should take two arguments, for accumulated value and array element. For maps, it should take four arguments, for accumulated key and value, and map-element key and value; it should return the updated accumulator as a new key-value pair (i.e. a single-entry map). The start value for the accumulator is taken from the third argument. diff --git a/man/mlr.1 b/man/mlr.1 index 3d5c75b4ee..505cb8d082 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -3338,9 +3338,14 @@ $* = fmtifnum($*, "%.6f") formats numeric fields in the current record, leaving .RS 0 .\} .nf - (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. This function recurses on array and map values. -Example: -$x = fmtnum($x, "%.6f") + (class=conversion #args=2) Convert int/float/bool to string using printf-style format string (https://pkg.go.dev/fmt), e.g. '$s = fmtnum($n, "%08d")' or '$t = fmtnum($n, "%.6e")'. Miller-specific extension: "%_d" and "%_f" for comma-separated thousands. This function recurses on array and map values. +Examples: +$y = fmtnum($x, "%.6f") +$o = fmtnum($n, "%d") +$o = fmtnum($n, "%12d") +$y = fmtnum($x, "%.6_f") +$o = fmtnum($n, "%_d") +$o = fmtnum($n, "%12_d") .fi .if n \{\ .RE