From f9e325636796e05aad1170fb3014715fc75ce040 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 18 Mar 2022 23:48:27 -0400 Subject: [PATCH] gssub DSL function (#989) * gssub DSL function * build artifacts for gssub source-code change * unit-test files * gssub / Latin 1 example in docs * code-dedupe ssub and gssub --- docs/src/manpage.md | 35 ++++++++------- docs/src/manpage.txt | 35 ++++++++------- docs/src/reference-dsl-builtin-functions.md | 10 ++++- docs/src/special-symbols-and-formatting.md | 21 ++++++++- docs/src/special-symbols-and-formatting.md.in | 18 +++++++- internal/pkg/bifs/regex.go | 23 ++++++++-- .../pkg/dsl/cst/builtin_function_manager.go | 10 +++++ man/manpage.txt | 35 ++++++++------- man/mlr.1 | 43 ++++++++++++------- test/cases/dsl-subs/0004/cmd | 1 + test/cases/dsl-subs/0004/experr | 0 test/cases/dsl-subs/0004/expout | 1 + test/cases/dsl-subs/0004/input | 1 + test/cases/dsl-subs/0004/mlr | 1 + 14 files changed, 167 insertions(+), 67 deletions(-) create mode 100644 test/cases/dsl-subs/0004/cmd create mode 100644 test/cases/dsl-subs/0004/experr create mode 100644 test/cases/dsl-subs/0004/expout create mode 100644 test/cases/dsl-subs/0004/input create mode 100644 test/cases/dsl-subs/0004/mlr diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 0efe44db2b..93f884dd94 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -208,20 +208,20 @@ FUNCTION LIST capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh depth dhms2fsec dhms2sec erf erfc every exp expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values gmt2localtime - gmt2sec gsub haskey hexfmt hms2fsec hms2sec hostname int invqnorm is_absent - is_array is_bool is_boolean is_empty is_empty_map is_error is_float is_int - is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map is_not_null - is_null is_numeric is_present is_string joink joinkv joinv json_parse - json_stringify leafcount length localtime2gmt localtime2sec log log10 log1p - logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min mmul - msub os pow qnorm reduce regextract regextract_or_else round roundm rstrip - sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn - sha1 sha256 sha512 sin sinh sort splita splitax splitkv splitkvx splitnv - splitnvx sqrt ssub strftime strftime_local string strip strlen strptime - strptime_local sub substr substr0 substr1 system systime systimeint tan tanh - tolower toupper truncate typeof unflatten unformat unformatx uptime urand - urand32 urandelement urandint urandrange version ! != !=~ % & && * ** + - . .* - .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec hostname int invqnorm + is_absent is_array is_bool is_boolean is_empty is_empty_map is_error is_float + is_int is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map + is_not_null is_null is_numeric is_present is_string joink joinkv joinv + json_parse json_stringify leafcount length localtime2gmt localtime2sec log + log10 log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 + mexp min mmul msub os pow qnorm reduce regextract regextract_or_else round + roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime + select sgn sha1 sha256 sha512 sin sinh sort splita splitax splitkv splitkvx + splitnv splitnvx sqrt ssub strftime strftime_local string strip strlen + strptime strptime_local sub substr substr0 substr1 system systime systimeint + tan tanh tolower toupper truncate typeof unflatten unformat unformatx uptime + urand urand32 urandelement urandint urandrange version ! != !=~ % & && * ** + + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ COMMENTS-IN-DATA FLAGS Miller lets you put comments in your data, such as @@ -2244,6 +2244,11 @@ FUNCTIONS FOR FILTER/PUT Example: gmt2sec("2001-02-03T04:05:06Z") = 981173106 + gssub + (class=string #args=3) Like gsub but does no regexing. No characters are special. + Example: + gssub("ab.d.fg", ".", "X") gives "abXdXfg" + gsub (class=string #args=3) '$name = gsub($name, "old", "new")': replace all, with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to gsub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io. Examples: @@ -3190,5 +3195,5 @@ SEE ALSO - 2022-03-15 MILLER(1) + 2022-03-19 MILLER(1) diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 51c5de00a4..8559fc53e2 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -187,20 +187,20 @@ FUNCTION LIST capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh depth dhms2fsec dhms2sec erf erfc every exp expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values gmt2localtime - gmt2sec gsub haskey hexfmt hms2fsec hms2sec hostname int invqnorm is_absent - is_array is_bool is_boolean is_empty is_empty_map is_error is_float is_int - is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map is_not_null - is_null is_numeric is_present is_string joink joinkv joinv json_parse - json_stringify leafcount length localtime2gmt localtime2sec log log10 log1p - logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min mmul - msub os pow qnorm reduce regextract regextract_or_else round roundm rstrip - sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn - sha1 sha256 sha512 sin sinh sort splita splitax splitkv splitkvx splitnv - splitnvx sqrt ssub strftime strftime_local string strip strlen strptime - strptime_local sub substr substr0 substr1 system systime systimeint tan tanh - tolower toupper truncate typeof unflatten unformat unformatx uptime urand - urand32 urandelement urandint urandrange version ! != !=~ % & && * ** + - . .* - .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec hostname int invqnorm + is_absent is_array is_bool is_boolean is_empty is_empty_map is_error is_float + is_int is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map + is_not_null is_null is_numeric is_present is_string joink joinkv joinv + json_parse json_stringify leafcount length localtime2gmt localtime2sec log + log10 log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 + mexp min mmul msub os pow qnorm reduce regextract regextract_or_else round + roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime + select sgn sha1 sha256 sha512 sin sinh sort splita splitax splitkv splitkvx + splitnv splitnvx sqrt ssub strftime strftime_local string strip strlen + strptime strptime_local sub substr substr0 substr1 system systime systimeint + tan tanh tolower toupper truncate typeof unflatten unformat unformatx uptime + urand urand32 urandelement urandint urandrange version ! != !=~ % & && * ** + + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ COMMENTS-IN-DATA FLAGS Miller lets you put comments in your data, such as @@ -2223,6 +2223,11 @@ FUNCTIONS FOR FILTER/PUT Example: gmt2sec("2001-02-03T04:05:06Z") = 981173106 + gssub + (class=string #args=3) Like gsub but does no regexing. No characters are special. + Example: + gssub("ab.d.fg", ".", "X") gives "abXdXfg" + gsub (class=string #args=3) '$name = gsub($name, "old", "new")': replace all, with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to gsub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io. Examples: @@ -3169,4 +3174,4 @@ SEE ALSO - 2022-03-15 MILLER(1) + 2022-03-19 MILLER(1) diff --git a/docs/src/reference-dsl-builtin-functions.md b/docs/src/reference-dsl-builtin-functions.md index ecf3211b74..6e7845cc6e 100644 --- a/docs/src/reference-dsl-builtin-functions.md +++ b/docs/src/reference-dsl-builtin-functions.md @@ -74,7 +74,7 @@ is 2. Unary operators such as `!` and `~` show argument-count of 1; the ternary * [**Hashing functions**](#hashing-functions): [md5](#md5), [sha1](#sha1), [sha256](#sha256), [sha512](#sha512). * [**Higher-order-functions functions**](#higher-order-functions-functions): [any](#any), [apply](#apply), [every](#every), [fold](#fold), [reduce](#reduce), [select](#select), [sort](#sort). * [**Math functions**](#math-functions): [abs](#abs), [acos](#acos), [acosh](#acosh), [asin](#asin), [asinh](#asinh), [atan](#atan), [atan2](#atan2), [atanh](#atanh), [cbrt](#cbrt), [ceil](#ceil), [cos](#cos), [cosh](#cosh), [erf](#erf), [erfc](#erfc), [exp](#exp), [expm1](#expm1), [floor](#floor), [invqnorm](#invqnorm), [log](#log), [log10](#log10), [log1p](#log1p), [logifit](#logifit), [max](#max), [min](#min), [qnorm](#qnorm), [round](#round), [roundm](#roundm), [sgn](#sgn), [sin](#sin), [sinh](#sinh), [sqrt](#sqrt), [tan](#tan), [tanh](#tanh), [urand](#urand), [urand32](#urand32), [urandelement](#urandelement), [urandint](#urandint), [urandrange](#urandrange). -* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [format](#format), [gsub](#gsub), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [\.](#dot). +* [**String functions**](#string-functions): [capitalize](#capitalize), [clean_whitespace](#clean_whitespace), [collapse_whitespace](#collapse_whitespace), [format](#format), [gssub](#gssub), [gsub](#gsub), [lstrip](#lstrip), [regextract](#regextract), [regextract_or_else](#regextract_or_else), [rstrip](#rstrip), [ssub](#ssub), [strip](#strip), [strlen](#strlen), [sub](#sub), [substr](#substr), [substr0](#substr0), [substr1](#substr1), [tolower](#tolower), [toupper](#toupper), [truncate](#truncate), [unformat](#unformat), [unformatx](#unformatx), [\.](#dot). * [**System functions**](#system-functions): [hostname](#hostname), [os](#os), [system](#system), [version](#version). * [**Time functions**](#time-functions): [dhms2fsec](#dhms2fsec), [dhms2sec](#dhms2sec), [fsec2dhms](#fsec2dhms), [fsec2hms](#fsec2hms), [gmt2localtime](#gmt2localtime), [gmt2sec](#gmt2sec), [hms2fsec](#hms2fsec), [hms2sec](#hms2sec), [localtime2gmt](#localtime2gmt), [localtime2sec](#localtime2sec), [sec2dhms](#sec2dhms), [sec2gmt](#sec2gmt), [sec2gmtdate](#sec2gmtdate), [sec2hms](#sec2hms), [sec2localdate](#sec2localdate), [sec2localtime](#sec2localtime), [strftime](#strftime), [strftime_local](#strftime_local), [strptime](#strptime), [strptime_local](#strptime_local), [systime](#systime), [systimeint](#systimeint), [uptime](#uptime). * [**Typing functions**](#typing-functions): [asserting_absent](#asserting_absent), [asserting_array](#asserting_array), [asserting_bool](#asserting_bool), [asserting_boolean](#asserting_boolean), [asserting_empty](#asserting_empty), [asserting_empty_map](#asserting_empty_map), [asserting_error](#asserting_error), [asserting_float](#asserting_float), [asserting_int](#asserting_int), [asserting_map](#asserting_map), [asserting_nonempty_map](#asserting_nonempty_map), [asserting_not_array](#asserting_not_array), [asserting_not_empty](#asserting_not_empty), [asserting_not_map](#asserting_not_map), [asserting_not_null](#asserting_not_null), [asserting_null](#asserting_null), [asserting_numeric](#asserting_numeric), [asserting_present](#asserting_present), [asserting_string](#asserting_string), [is_absent](#is_absent), [is_array](#is_array), [is_bool](#is_bool), [is_boolean](#is_boolean), [is_empty](#is_empty), [is_empty_map](#is_empty_map), [is_error](#is_error), [is_float](#is_float), [is_int](#is_int), [is_map](#is_map), [is_nan](#is_nan), [is_nonempty_map](#is_nonempty_map), [is_not_array](#is_not_array), [is_not_empty](#is_not_empty), [is_not_map](#is_not_map), [is_not_null](#is_not_null), [is_null](#is_null), [is_numeric](#is_numeric), [is_present](#is_present), [is_string](#is_string), [typeof](#typeof). @@ -992,6 +992,14 @@ format("{}:{}:{}", 1,2,3,4) gives "1:2:3". +### gssub +
+gssub  (class=string #args=3) Like gsub but does no regexing. No characters are special.
+Example:
+gssub("ab.d.fg", ".", "X") gives "abXdXfg"
+
+ + ### gsub
 gsub  (class=string #args=3) '$name = gsub($name, "old", "new")': replace all, with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to gsub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io.
diff --git a/docs/src/special-symbols-and-formatting.md b/docs/src/special-symbols-and-formatting.md
index 732e5e54dd..e1d39b7a64 100644
--- a/docs/src/special-symbols-and-formatting.md
+++ b/docs/src/special-symbols-and-formatting.md
@@ -165,7 +165,26 @@ b it is!
 c is it ...
 
-The [`ssub`](reference-dsl-builtin-functions.md#ssub) function exists precisely for this reason: so you don't have to escape anything. +The +[`ssub`](reference-dsl-builtin-functions.md#ssub) and +[`gssub`](reference-dsl-builtin-functions.md#gssub) +functions exist precisely for this reason: so you don't have to escape anything. + +The `ssub` and `gssub` functions are also handy for dealing with non-UTF-8 strings such as Latin 1, since Go's +`regexp` library -- which Miller uses -- requires UTF-8 strings. For example: + +
+mlr -n put 'end {
+  name = "Ka\xf0l\xedn og \xdeormundr";
+  name = gssub(name, "\xde", "\u00de");
+  name = gssub(name, "\xf0", "\u00f0");
+  name = gssub(name, "\xed", "\u00ed");
+  print name;
+}'
+
+
+Kaðlín og Þormundr
+
## How to apply math to regex output? diff --git a/docs/src/special-symbols-and-formatting.md.in b/docs/src/special-symbols-and-formatting.md.in index c0aeca606d..f7e249c549 100644 --- a/docs/src/special-symbols-and-formatting.md.in +++ b/docs/src/special-symbols-and-formatting.md.in @@ -89,7 +89,23 @@ GENMD-RUN-COMMAND mlr --oxtab put '$c = ssub($a, "?"," ...")' data/question.dat GENMD-EOF -The [`ssub`](reference-dsl-builtin-functions.md#ssub) function exists precisely for this reason: so you don't have to escape anything. +The +[`ssub`](reference-dsl-builtin-functions.md#ssub) and +[`gssub`](reference-dsl-builtin-functions.md#gssub) +functions exist precisely for this reason: so you don't have to escape anything. + +The `ssub` and `gssub` functions are also handy for dealing with non-UTF-8 strings such as Latin 1, since Go's +`regexp` library -- which Miller uses -- requires UTF-8 strings. For example: + +GENMD-RUN-COMMAND +mlr -n put 'end { + name = "Ka\xf0l\xedn og \xdeormundr"; + name = gssub(name, "\xde", "\u00de"); + name = gssub(name, "\xf0", "\u00f0"); + name = gssub(name, "\xed", "\u00ed"); + print name; +}' +GENMD-EOF ## How to apply math to regex output? diff --git a/internal/pkg/bifs/regex.go b/internal/pkg/bifs/regex.go index 448a622786..41e816f168 100644 --- a/internal/pkg/bifs/regex.go +++ b/internal/pkg/bifs/regex.go @@ -10,6 +10,17 @@ import ( // BIF_ssub implements the ssub function -- no-frills string-replace, no // regexes, no escape sequences. func BIF_ssub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_ssub_gssub(input1, input2, input3, false) +} + +// BIF_gssub implements the gssub function -- no-frills string-replace, no +// regexes, no escape sequences. +func BIF_gssub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { + return bif_ssub_gssub(input1, input2, input3, true) +} + +// bif_ssub_gssub is shared code for BIF_ssub and BIF_gssub. +func bif_ssub_gssub(input1, input2, input3 *mlrval.Mlrval, doAll bool) *mlrval.Mlrval { if input1.IsErrorOrAbsent() { return input1 } @@ -28,9 +39,15 @@ func BIF_ssub(input1, input2, input3 *mlrval.Mlrval) *mlrval.Mlrval { if !input3.IsStringOrVoid() { return mlrval.ERROR } - return mlrval.FromString( - strings.Replace(input1.AcquireStringValue(), input2.AcquireStringValue(), input3.AcquireStringValue(), 1), - ) + if doAll { + return mlrval.FromString( + strings.ReplaceAll(input1.AcquireStringValue(), input2.AcquireStringValue(), input3.AcquireStringValue()), + ) + } else { + return mlrval.FromString( + strings.Replace(input1.AcquireStringValue(), input2.AcquireStringValue(), input3.AcquireStringValue(), 1), + ) + } } // BIF_sub implements the sub function, with support for regexes and regex captures diff --git a/internal/pkg/dsl/cst/builtin_function_manager.go b/internal/pkg/dsl/cst/builtin_function_manager.go index 80322d7527..07d065f100 100644 --- a/internal/pkg/dsl/cst/builtin_function_manager.go +++ b/internal/pkg/dsl/cst/builtin_function_manager.go @@ -472,6 +472,16 @@ argument) doesn't match the pattern (second argument).`, }, }, + { + name: "gssub", + class: FUNC_CLASS_STRING, + help: `Like gsub but does no regexing. No characters are special.`, + ternaryFunc: bifs.BIF_gssub, + examples: []string{ + `gssub("ab.d.fg", ".", "X") gives "abXdXfg"`, + }, + }, + { name: "sub", class: FUNC_CLASS_STRING, diff --git a/man/manpage.txt b/man/manpage.txt index 51c5de00a4..8559fc53e2 100644 --- a/man/manpage.txt +++ b/man/manpage.txt @@ -187,20 +187,20 @@ FUNCTION LIST capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh depth dhms2fsec dhms2sec erf erfc every exp expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values gmt2localtime - gmt2sec gsub haskey hexfmt hms2fsec hms2sec hostname int invqnorm is_absent - is_array is_bool is_boolean is_empty is_empty_map is_error is_float is_int - is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map is_not_null - is_null is_numeric is_present is_string joink joinkv joinv json_parse - json_stringify leafcount length localtime2gmt localtime2sec log log10 log1p - logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min mmul - msub os pow qnorm reduce regextract regextract_or_else round roundm rstrip - sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn - sha1 sha256 sha512 sin sinh sort splita splitax splitkv splitkvx splitnv - splitnvx sqrt ssub strftime strftime_local string strip strlen strptime - strptime_local sub substr substr0 substr1 system systime systimeint tan tanh - tolower toupper truncate typeof unflatten unformat unformatx uptime urand - urand32 urandelement urandint urandrange version ! != !=~ % & && * ** + - . .* - .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ + gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec hostname int invqnorm + is_absent is_array is_bool is_boolean is_empty is_empty_map is_error is_float + is_int is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map + is_not_null is_null is_numeric is_present is_string joink joinkv joinv + json_parse json_stringify leafcount length localtime2gmt localtime2sec log + log10 log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 + mexp min mmul msub os pow qnorm reduce regextract regextract_or_else round + roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime + select sgn sha1 sha256 sha512 sin sinh sort splita splitax splitkv splitkvx + splitnv splitnvx sqrt ssub strftime strftime_local string strip strlen + strptime strptime_local sub substr substr0 substr1 system systime systimeint + tan tanh tolower toupper truncate typeof unflatten unformat unformatx uptime + urand urand32 urandelement urandint urandrange version ! != !=~ % & && * ** + + - . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ COMMENTS-IN-DATA FLAGS Miller lets you put comments in your data, such as @@ -2223,6 +2223,11 @@ FUNCTIONS FOR FILTER/PUT Example: gmt2sec("2001-02-03T04:05:06Z") = 981173106 + gssub + (class=string #args=3) Like gsub but does no regexing. No characters are special. + Example: + gssub("ab.d.fg", ".", "X") gives "abXdXfg" + gsub (class=string #args=3) '$name = gsub($name, "old", "new")': replace all, with support for regular expressions. Capture groups \1 through \9 in the new part are matched from (...) in the old part, and must be used within the same call to gsub -- they don't persist for subsequent DSL statements. See also =~ and regextract. See also "Regular expressions" at https://miller.readthedocs.io. Examples: @@ -3169,4 +3174,4 @@ SEE ALSO - 2022-03-15 MILLER(1) + 2022-03-19 MILLER(1) diff --git a/man/mlr.1 b/man/mlr.1 index dfaddd1be8..0d309b28cb 100644 --- a/man/mlr.1 +++ b/man/mlr.1 @@ -2,12 +2,12 @@ .\" Title: mlr .\" Author: [see the "AUTHOR" section] .\" Generator: ./mkman.rb -.\" Date: 2022-03-15 +.\" Date: 2022-03-19 .\" Manual: \ \& .\" Source: \ \& .\" Language: English .\" -.TH "MILLER" "1" "2022-03-15" "\ \&" "\ \&" +.TH "MILLER" "1" "2022-03-19" "\ \&" "\ \&" .\" ----------------------------------------------------------------- .\" * Portability definitions .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -234,20 +234,20 @@ asserting_present asserting_string atan atan2 atanh bitcount boolean capitalize cbrt ceil clean_whitespace collapse_whitespace concat cos cosh depth dhms2fsec dhms2sec erf erfc every exp expm1 flatten float floor fmtifnum fmtnum fold format fsec2dhms fsec2hms get_keys get_values gmt2localtime -gmt2sec gsub haskey hexfmt hms2fsec hms2sec hostname int invqnorm is_absent -is_array is_bool is_boolean is_empty is_empty_map is_error is_float is_int -is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map is_not_null -is_null is_numeric is_present is_string joink joinkv joinv json_parse -json_stringify leafcount length localtime2gmt localtime2sec log log10 log1p -logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 mexp min mmul -msub os pow qnorm reduce regextract regextract_or_else round roundm rstrip -sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime select sgn -sha1 sha256 sha512 sin sinh sort splita splitax splitkv splitkvx splitnv -splitnvx sqrt ssub strftime strftime_local string strip strlen strptime -strptime_local sub substr substr0 substr1 system systime systimeint tan tanh -tolower toupper truncate typeof unflatten unformat unformatx uptime urand -urand32 urandelement urandint urandrange version ! != !=~ % & && * ** + - . .* -\&.+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ +gmt2sec gssub gsub haskey hexfmt hms2fsec hms2sec hostname int invqnorm +is_absent is_array is_bool is_boolean is_empty is_empty_map is_error is_float +is_int is_map is_nan is_nonempty_map is_not_array is_not_empty is_not_map +is_not_null is_null is_numeric is_present is_string joink joinkv joinv +json_parse json_stringify leafcount length localtime2gmt localtime2sec log +log10 log1p logifit lstrip madd mapdiff mapexcept mapselect mapsum max md5 +mexp min mmul msub os pow qnorm reduce regextract regextract_or_else round +roundm rstrip sec2dhms sec2gmt sec2gmtdate sec2hms sec2localdate sec2localtime +select sgn sha1 sha256 sha512 sin sinh sort splita splitax splitkv splitkvx +splitnv splitnvx sqrt ssub strftime strftime_local string strip strlen +strptime strptime_local sub substr substr0 substr1 system systime systimeint +tan tanh tolower toupper truncate typeof unflatten unformat unformatx uptime +urand urand32 urandelement urandint urandrange version ! != !=~ % & && * ** + +- . .* .+ .- ./ / // < << <= <=> == =~ > >= >> >>> ?: ?? ??? ^ ^^ | || ~ .fi .if n \{\ .RE @@ -3122,6 +3122,17 @@ gmt2sec("2001-02-03T04:05:06Z") = 981173106 .fi .if n \{\ .RE +.SS "gssub" +.if n \{\ +.RS 0 +.\} +.nf + (class=string #args=3) Like gsub but does no regexing. No characters are special. +Example: +gssub("ab.d.fg", ".", "X") gives "abXdXfg" +.fi +.if n \{\ +.RE .SS "gsub" .if n \{\ .RS 0 diff --git a/test/cases/dsl-subs/0004/cmd b/test/cases/dsl-subs/0004/cmd new file mode 100644 index 0000000000..a0c7f557d9 --- /dev/null +++ b/test/cases/dsl-subs/0004/cmd @@ -0,0 +1 @@ +mlr put -f ${CASEDIR}/mlr ./${CASEDIR}/input diff --git a/test/cases/dsl-subs/0004/experr b/test/cases/dsl-subs/0004/experr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/cases/dsl-subs/0004/expout b/test/cases/dsl-subs/0004/expout new file mode 100644 index 0000000000..190525704b --- /dev/null +++ b/test/cases/dsl-subs/0004/expout @@ -0,0 +1 @@ +x=eeee,y=XXXX diff --git a/test/cases/dsl-subs/0004/input b/test/cases/dsl-subs/0004/input new file mode 100644 index 0000000000..73c7ff2897 --- /dev/null +++ b/test/cases/dsl-subs/0004/input @@ -0,0 +1 @@ +x=eeee diff --git a/test/cases/dsl-subs/0004/mlr b/test/cases/dsl-subs/0004/mlr new file mode 100644 index 0000000000..292b0767a6 --- /dev/null +++ b/test/cases/dsl-subs/0004/mlr @@ -0,0 +1 @@ +$y=gssub($x, "e", "X")