diff --git a/README.md b/README.md
index 72dd9bfabe..0f1cb04c9d 100644
--- a/README.md
+++ b/README.md
@@ -34,8 +34,8 @@ indices. Examples:
$mean = $sum / $count # no assignment if count unset
'
% mlr --from infile.dat put -f analyze.mlr
-% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
-% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
+% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
+% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
% mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
% mlr --from infile.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
```
diff --git a/c/cli/mlrcli.c b/c/cli/mlrcli.c
index ce04dc4b5d..bb7f5d5eec 100644
--- a/c/cli/mlrcli.c
+++ b/c/cli/mlrcli.c
@@ -248,8 +248,8 @@ static void main_usage_examples(FILE* o, char* argv0, char* leader) {
fprintf(o, " }\n");
fprintf(o, " $mean = $sum / $count # no assignment if count unset'\n");
fprintf(o, "%s%s --from infile.dat put -f analyze.mlr\n", leader, argv0);
- fprintf(o, "%s%s --from infile.dat put 'tee > \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0);
- fprintf(o, "%s%s --from infile.dat put 'tee | \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0);
+ fprintf(o, "%s%s --from infile.dat put 'tee > \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0);
+ fprintf(o, "%s%s --from infile.dat put 'tee | \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0);
fprintf(o, "%s%s --from infile.dat put -q '@v=$*; dump | \"jq .[]\"'\n", leader, argv0);
fprintf(o, "%s%s --from infile.dat put '(NR %% 1000 == 0) { print > stderr, \"Checkpoint \".NR}'\n",
leader, argv0);
diff --git a/c/mapping/mlr_dsl_cst.c b/c/mapping/mlr_dsl_cst.c
index dfe63cc8e8..45a0eb77a4 100644
--- a/c/mapping/mlr_dsl_cst.c
+++ b/c/mapping/mlr_dsl_cst.c
@@ -2670,17 +2670,20 @@ static void mlr_dsl_tee_keyword_usage(FILE* ostream) {
"tee: prints the current record to specified file.\n"
" This is an immediate print to the specified file (except for pprint format\n"
" which of course waits until the end of the input stream to format all output).\n"
+ "\n"
" The > and >> are for write and append, as in the shell, but (as with awk) the\n"
- " file-overwrite for > is on first write, not per record. The | is for pipe to a\n"
- " process which will process the data. There will be one subordinate process for\n"
- " each distinct value of the piped-to command. Output-formatting flags are taken\n"
- " from the main command line.\n"
+ " file-overwrite for > is on first write, not per record. The | is for piping to\n"
+ " a process which will process the data. There will be one open file for each\n"
+ " distinct file name (for > and >>) or one subordinate process for each distinct\n"
+ " value of the piped-to command (for |). Output-formatting flags are taken from\n"
+ " the main command line.\n"
"\n"
" Example: mlr --from f.dat put 'tee > \"/tmp/data-\".$a, $*'\n"
" Example: mlr --from f.dat put 'tee >> \"/tmp/data-\".$a.$b, $*'\n"
" Example: mlr --from f.dat put 'tee > stderr, $*'\n"
" Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\]\", $*'\n"
- " Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\] > /tmp/data-\".$a, $*'\n");
+ " Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\] > /tmp/data-\".$a, $*'\n"
+ " Example: mlr --from f.dat put -q 'tee | \"gzip > /tmp/data-\".$a.\".gz\", $*'\n");
}
static void mlr_dsl_emit_keyword_usage(FILE* ostream) {
@@ -2689,17 +2692,21 @@ static void mlr_dsl_emit_keyword_usage(FILE* ostream) {
" indices present in the data but not slotted by emit arguments are not output.\n"
"\n"
" With >, >>, or |, the data do not become part of the output record stream but\n"
- " are instead redirected. The > and >> are for write and append, as in the\n"
- " shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
- " record. The | is for pipe to a process which will process the data. There will\n"
- " be one subordinate process for each distinct value of the piped-to command.\n"
- " Output-formatting flags are taken from the main command line.\n"
+ " are instead redirected.\n"
+ "\n"
+ " The > and >> are for write and append, as in the shell, but (as with awk) the\n"
+ " file-overwrite for > is on first write, not per record. The | is for piping to\n"
+ " a process which will process the data. There will be one open file for each\n"
+ " distinct file name (for > and >>) or one subordinate process for each distinct\n"
+ " value of the piped-to command (for |). Output-formatting flags are taken from\n"
+ " the main command line.\n"
"\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >> \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
+ " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | \"gzip > mytap.dat.gz\", @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > stderr, @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | \"grep somepattern\", @*, \"index1\", \"index2\"'\n"
"\n"
@@ -2713,17 +2720,21 @@ static void mlr_dsl_emitp_keyword_usage(FILE* ostream) {
" output concatenated with \":\".\n"
"\n"
" With >, >>, or |, the data do not become part of the output record stream but\n"
- " are instead redirected. The > and >> are for write and append, as in the\n"
- " shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
- " record. The | is for pipe to a process which will process the data. There will\n"
- " be one subordinate process for each distinct value of the piped-to command.\n"
- " Output-formatting flags are taken from the main command line.\n"
+ " are instead redirected.\n"
+ "\n"
+ " The > and >> are for write and append, as in the shell, but (as with awk) the\n"
+ " file-overwrite for > is on first write, not per record. The | is for piping to\n"
+ " a process which will process the data. There will be one open file for each\n"
+ " distinct file name (for > and >>) or one subordinate process for each distinct\n"
+ " value of the piped-to command (for |). Output-formatting flags are taken from\n"
+ " the main command line.\n"
"\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >> \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
+ " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | \"gzip > mytap.dat.gz\", @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > stderr, @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | \"grep somepattern\", @*, \"index1\", \"index2\"'\n"
"\n"
@@ -2736,11 +2747,14 @@ static void mlr_dsl_emitf_keyword_usage(FILE* ostream) {
" output record stream.\n"
"\n"
" With >, >>, or |, the data do not become part of the output record stream but\n"
- " are instead redirected. The > and >> are for write and append, as in the\n"
- " shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
- " record. The | is for pipe to a process which will process the data. There will\n"
- " be one subordinate process for each distinct value of the piped-to command.\n"
- " Output-formatting flags are taken from the main command line.\n"
+ " are instead redirected.\n"
+ "\n"
+ " The > and >> are for write and append, as in the shell, but (as with awk) the\n"
+ " file-overwrite for > is on first write, not per record. The | is for piping to\n"
+ " a process which will process the data. There will be one open file for each\n"
+ " distinct file name (for > and >>) or one subordinate process for each distinct\n"
+ " value of the piped-to command (for |). Output-formatting flags are taken from\n"
+ " the main command line.\n"
"\n"
" Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a'\n"
" Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a, @b, @c'\n"
@@ -2759,10 +2773,14 @@ static void mlr_dsl_dump_keyword_usage(FILE* ostream) {
" to stdout as JSON.\n"
"\n"
" With >, >>, or |, the data do not become part of the output record stream but\n"
- " are instead redirected. The > and >> are for write and append, as in the\n"
- " shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
- " record. The | is for pipe to a process which will process the data. There will\n"
- " be one subordinate process for each distinct value of the piped-to command.\n"
+ " are instead redirected.\n"
+ "\n"
+ " The > and >> are for write and append, as in the shell, but (as with awk) the\n"
+ " file-overwrite for > is on first write, not per record. The | is for piping to\n"
+ " a process which will process the data. There will be one open file for each\n"
+ " distinct file name (for > and >>) or one subordinate process for each distinct\n"
+ " value of the piped-to command (for |). Output-formatting flags are taken from\n"
+ " the main command line.\n"
"\n"
" Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump }'\n"
" Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump > \"mytap.dat\"}'\n"
diff --git a/c/output/multi_lrec_writer.c b/c/output/multi_lrec_writer.c
index ec551ed8f8..98a9a5add8 100644
--- a/c/output/multi_lrec_writer.c
+++ b/c/output/multi_lrec_writer.c
@@ -73,11 +73,13 @@ void multi_lrec_writer_output_srec(multi_lrec_writer_t* pmlw, lrec_t* poutrec, c
fflush(pstate->output_stream);
} else {
if (pstate->is_popen) {
- if (pclose(pstate->output_stream) != 0) {
- perror("pclose");
- fprintf(stderr, "%s: pclose error on \"%s\".\n", MLR_GLOBALS.bargv0, filename_or_command);
- exit(1);
- }
+ // Sadly, pclose returns an error even on well-formed commands. For example, if the popened
+ // command was "grep nonesuch" and the string "nonesuch" was not encountered, grep returns
+ // non-zero and popen flags it as an error. We cannot differentiate these from genuine
+ // failure cases so the best choice is to simply call pclose and ignore error codes.
+ // If a piped-to command does fail then it should have some output to stderr which the
+ // user can take advantage of.
+ (void)pclose(pstate->output_stream);
} else {
if (fclose(pstate->output_stream) != 0) {
perror("fclose");
@@ -107,11 +109,13 @@ void multi_lrec_writer_drain(multi_lrec_writer_t* pmlw) {
pstate->plrec_writer->pprocess_func(pstate->plrec_writer->pvstate, pstate->output_stream, NULL);
fflush(pstate->output_stream);
if (pstate->is_popen) {
- if (pclose(pstate->output_stream) != 0) {
- perror("pclose");
- fprintf(stderr, "%s: pclose error on \"%s\".\n", MLR_GLOBALS.bargv0, pstate->filename_or_command);
- exit(1);
- }
+ // Sadly, pclose returns an error even on well-formed commands. For example, if the popened
+ // command was "grep nonesuch" and the string "nonesuch" was not encountered, grep returns
+ // non-zero and popen flags it as an error. We cannot differentiate these from genuine
+ // failure cases so the best choice is to simply call pclose and ignore error codes.
+ // If a piped-to command does fail then it should have some output to stderr which the
+ // user can take advantage of.
+ (void)pclose(pstate->output_stream);
} else {
if (fclose(pstate->output_stream) != 0) {
perror("fclose");
diff --git a/c/todo.txt b/c/todo.txt
index e326e9e445..7c4a4f35f7 100644
--- a/c/todo.txt
+++ b/c/todo.txt
@@ -26,20 +26,6 @@ TOP OF LIST:
PRE-RELEASE 4.4.0:
* ignore subproc abend (e.g. grep nonesuch) + UT cases
-! mld for all redirected I/O
-* mld: kws not avail as boundvars & what happens if tried: "syntax error"
-
-cookbook:
-* mlr --from ../data/big.dkvp put -q 'tee > $a.$b.".txt", $*'
- wc -l ??????.txt
- 40005 ekseks.txt
- 40116 ekshat.txt
- 40105 ekspan.txt
- 40257 ekswye.txt
- ...
-* mlr step -a shift
-* ... then put -q '' or ... then nothing
-* asv et al.
----------------------------------------------------------------
FOR 4.4.0:
@@ -64,6 +50,7 @@ POST-4.4.0:
? --imd ?
? put/tee --oxxx flags overlays ?
+? mlr step -a shift --by {n}
----------------------------------------------------------------
! lemon refactor
diff --git a/doc/content-for-reference.html b/doc/content-for-reference.html
index bf961c1c59..d118a28cb3 100644
--- a/doc/content-for-reference.html
+++ b/doc/content-for-reference.html
@@ -904,33 +904,34 @@
Emit-all statements for put
POKI_RUN_COMMAND{{mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit @*,"a","b"}'}}HERE
POKI_RUN_COMMAND{{mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit (@sum, @count),"a","b"}'}}HERE
-Redirected output statements for put
+Redirected-output statements for put
The tee, emitf, emitp, emit, print, and
-dump keyword all allow you to redirect output to one or more files or
-pipe-to commands.
+dump keywords all allow you to redirect output to one or more files or
+pipe-to commands. The filenames/commands are strings which can be constructed
+using record-dependent values, so you can do things like splitting a table into
+multiple files, one for each account ID, and so on.
-
- Note the following:
+ Details:
mlr put sends the current record (possibly modified by the
put expression) to the output record stream. Records are then input to
the following verb in a then-chain (if any), else printed to standard
-output. The tee keyword additionally writes the output record to
-specified file(s) or pipe-to command, or immediately to
-stdout/stderr.
+output (unless put -q). The tee keyword additionally
+writes the output record to specified file(s) or pipe-to command, or
+immediately to stdout/stderr.
POKI_RUN_COMMAND{{mlr --help-keyword tee}}HERE
- mlr put’ emitf, emitp, and emit
-send out-of-stream variables to the output record stream. These are then input
-to the following verb in a then-chain (if any), else printed to
-standard output. When redirected with >, >>, or
-|, they instead write the out-of-stream variable(s) to
-specified file(s) or pipe-to command, or immediately to
+ mlr put’s emitf, emitp, and
+emit send out-of-stream variables to the output record stream. These
+are then input to the following verb in a then-chain (if any), else
+printed to standard output. When redirected with >,
+>>, or |, they instead write the out-of-stream
+variable(s) to specified file(s) or pipe-to command, or immediately to
stdout/stderr.
POKI_RUN_COMMAND{{mlr --help-keyword emitf}}HERE
diff --git a/doc/index-snippet.txt b/doc/index-snippet.txt
index fef781d0d7..10a489d87b 100644
--- a/doc/index-snippet.txt
+++ b/doc/index-snippet.txt
@@ -16,7 +16,7 @@
$mean = $sum / $count # no assignment if count unset
'
% mlr --from infile.dat put -f analyze.mlr
-% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
-% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
+% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
+% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
% mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
% mlr --from infile.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
diff --git a/doc/index.html b/doc/index.html
index 4f8cc87889..08ea196af0 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -169,8 +169,8 @@
$mean = $sum / $count # no assignment if count unset
'
% mlr --from infile.dat put -f analyze.mlr
-% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
-% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
+% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
+% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
% mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
% mlr --from infile.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
diff --git a/doc/manpage.html b/doc/manpage.html
index 0cdd993a56..fa94c8ed91 100644
--- a/doc/manpage.html
+++ b/doc/manpage.html
@@ -185,8 +185,8 @@
}
$mean = $sum / $count # no assignment if count unset'
mlr --from infile.dat put -f analyze.mlr
- mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
- mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
+ mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
+ mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
mlr --from infile.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
@@ -1506,34 +1506,41 @@
tee: prints the current record to specified file.
This is an immediate print to the specified file (except for pprint format
which of course waits until the end of the input stream to format all output).
+
The > and >> are for write and append, as in the shell, but (as with awk) the
- file-overwrite for > is on first write, not per record. The | is for pipe to a
- process which will process the data. There will be one subordinate process for
- each distinct value of the piped-to command. Output-formatting flags are taken
- from the main command line.
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put 'tee > "/tmp/data-".$a, $*'
Example: mlr --from f.dat put 'tee >> "/tmp/data-".$a.$b, $*'
Example: mlr --from f.dat put 'tee > stderr, $*'
Example: mlr --from f.dat put -q 'tee | "tr [a-z\] [A-Z\]", $*'
Example: mlr --from f.dat put -q 'tee | "tr [a-z\] [A-Z\] > /tmp/data-".$a, $*'
+ Example: mlr --from f.dat put -q 'tee | "gzip > /tmp/data-".$a.".gz", $*'
emit
emit: inserts an out-of-stream variable into the output record stream. Hashmap
indices present in the data but not slotted by emit arguments are not output.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > "mytap.dat", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >> "mytap.dat", @*, "index1", "index2"'
+ Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "gzip > mytap.dat.gz", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > stderr, @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "grep somepattern", @*, "index1", "index2"'
@@ -1545,17 +1552,21 @@
output concatenated with ":".
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > "mytap.dat", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >> "mytap.dat", @*, "index1", "index2"'
+ Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "gzip > mytap.dat.gz", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > stderr, @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "grep somepattern", @*, "index1", "index2"'
@@ -1566,11 +1577,14 @@
output record stream.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a'
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a, @b, @c'
@@ -1587,10 +1601,14 @@
to stdout as JSON.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump }'
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump > "mytap.dat"}'
@@ -1648,7 +1666,7 @@
- 2016-08-11 MILLER(1)
+ 2016-08-12 MILLER(1)
diff --git a/doc/manpage.txt b/doc/manpage.txt
index 65ddad9cba..13e6918f9d 100644
--- a/doc/manpage.txt
+++ b/doc/manpage.txt
@@ -39,8 +39,8 @@ EXAMPLES
}
$mean = $sum / $count # no assignment if count unset'
mlr --from infile.dat put -f analyze.mlr
- mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
- mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
+ mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
+ mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
mlr --from infile.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
@@ -1360,34 +1360,41 @@ KEYWORDS FOR PUT
tee: prints the current record to specified file.
This is an immediate print to the specified file (except for pprint format
which of course waits until the end of the input stream to format all output).
+
The > and >> are for write and append, as in the shell, but (as with awk) the
- file-overwrite for > is on first write, not per record. The | is for pipe to a
- process which will process the data. There will be one subordinate process for
- each distinct value of the piped-to command. Output-formatting flags are taken
- from the main command line.
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put 'tee > "/tmp/data-".$a, $*'
Example: mlr --from f.dat put 'tee >> "/tmp/data-".$a.$b, $*'
Example: mlr --from f.dat put 'tee > stderr, $*'
Example: mlr --from f.dat put -q 'tee | "tr [a-z\] [A-Z\]", $*'
Example: mlr --from f.dat put -q 'tee | "tr [a-z\] [A-Z\] > /tmp/data-".$a, $*'
+ Example: mlr --from f.dat put -q 'tee | "gzip > /tmp/data-".$a.".gz", $*'
emit
emit: inserts an out-of-stream variable into the output record stream. Hashmap
indices present in the data but not slotted by emit arguments are not output.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > "mytap.dat", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >> "mytap.dat", @*, "index1", "index2"'
+ Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "gzip > mytap.dat.gz", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > stderr, @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "grep somepattern", @*, "index1", "index2"'
@@ -1399,17 +1406,21 @@ KEYWORDS FOR PUT
output concatenated with ":".
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > "mytap.dat", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >> "mytap.dat", @*, "index1", "index2"'
+ Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "gzip > mytap.dat.gz", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > stderr, @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "grep somepattern", @*, "index1", "index2"'
@@ -1420,11 +1431,14 @@ KEYWORDS FOR PUT
output record stream.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a'
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a, @b, @c'
@@ -1441,10 +1455,14 @@ KEYWORDS FOR PUT
to stdout as JSON.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump }'
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump > "mytap.dat"}'
@@ -1502,4 +1520,4 @@ SEE ALSO
- 2016-08-11 MILLER(1)
+ 2016-08-12 MILLER(1)
diff --git a/doc/mlr.1 b/doc/mlr.1
index bde9b61b03..533daca524 100644
--- a/doc/mlr.1
+++ b/doc/mlr.1
@@ -2,12 +2,12 @@
.\" Title: mlr
.\" Author: [see the "AUTHOR" section]
.\" Generator: ./mkman.rb
-.\" Date: 2016-08-11
+.\" Date: 2016-08-12
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
-.TH "MILLER" "1" "2016-08-11" "\ \&" "\ \&"
+.TH "MILLER" "1" "2016-08-12" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Portability definitions
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -64,8 +64,8 @@ mlr --from estimates.tbl put '
}
$mean = $sum / $count # no assignment if count unset'
mlr --from infile.dat put -f analyze.mlr
-mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
-mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
+mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
+mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
mlr --from infile.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
.fi
@@ -2339,17 +2339,20 @@ unset: clears field(s) from the current record, or an out-of-stream variable.
tee: prints the current record to specified file.
This is an immediate print to the specified file (except for pprint format
which of course waits until the end of the input stream to format all output).
+
The > and >> are for write and append, as in the shell, but (as with awk) the
- file-overwrite for > is on first write, not per record. The | is for pipe to a
- process which will process the data. There will be one subordinate process for
- each distinct value of the piped-to command. Output-formatting flags are taken
- from the main command line.
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put 'tee > "/tmp/data-".$a, $*'
Example: mlr --from f.dat put 'tee >> "/tmp/data-".$a.$b, $*'
Example: mlr --from f.dat put 'tee > stderr, $*'
Example: mlr --from f.dat put -q 'tee | "tr [a-z\e] [A-Z\e]", $*'
Example: mlr --from f.dat put -q 'tee | "tr [a-z\e] [A-Z\e] > /tmp/data-".$a, $*'
+ Example: mlr --from f.dat put -q 'tee | "gzip > /tmp/data-".$a.".gz", $*'
.fi
.if n \{\
.RE
@@ -2362,17 +2365,21 @@ emit: inserts an out-of-stream variable into the output record stream. Hashmap
indices present in the data but not slotted by emit arguments are not output.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > "mytap.dat", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >> "mytap.dat", @*, "index1", "index2"'
+ Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "gzip > mytap.dat.gz", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > stderr, @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "grep somepattern", @*, "index1", "index2"'
@@ -2390,17 +2397,21 @@ emitp: inserts an out-of-stream variable into the output record stream.
output concatenated with ":".
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > "mytap.dat", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >> "mytap.dat", @*, "index1", "index2"'
+ Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "gzip > mytap.dat.gz", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > stderr, @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "grep somepattern", @*, "index1", "index2"'
@@ -2417,11 +2428,14 @@ emitf: inserts non-indexed out-of-stream variable(s) side-by-side into the
output record stream.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a'
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a, @b, @c'
@@ -2444,10 +2458,14 @@ dump: prints all currently defined out-of-stream variables immediately
to stdout as JSON.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump }'
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump > "mytap.dat"}'
diff --git a/doc/reference.html b/doc/reference.html
index 928844966e..f6e040de73 100644
--- a/doc/reference.html
+++ b/doc/reference.html
@@ -181,7 +181,7 @@
• Emit statements for put
• Multi-emit statements for put
• Emit-all statements for put
- • Redirected output statements for put
+ • Redirected-output statements for put
• Unset statements for put
• More variable assignments for put
• Pattern-action blocks for put
@@ -337,8 +337,8 @@
}
$mean = $sum / $count # no assignment if count unset'
mlr --from infile.dat put -f analyze.mlr
- mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
- mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
+ mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
+ mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
mlr --from infile.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
@@ -3137,23 +3137,24 @@
-Redirected output statements for put
+Redirected-output statements for put
The tee, emitf, emitp, emit, print, and
-dump keyword all allow you to redirect output to one or more files or
-pipe-to commands.
+dump keywords all allow you to redirect output to one or more files or
+pipe-to commands. The filenames/commands are strings which can be constructed
+using record-dependent values, so you can do things like splitting a table into
+multiple files, one for each account ID, and so on.
-
- Note the following:
+ Details:
mlr put sends the current record (possibly modified by the
put expression) to the output record stream. Records are then input to
the following verb in a then-chain (if any), else printed to standard
-output. The tee keyword additionally writes the output record to
-specified file(s) or pipe-to command, or immediately to
-stdout/stderr.
+output (unless put -q). The tee keyword additionally
+writes the output record to specified file(s) or pipe-to command, or
+immediately to stdout/stderr.
@@ -3162,28 +3163,31 @@
tee: prints the current record to specified file.
This is an immediate print to the specified file (except for pprint format
which of course waits until the end of the input stream to format all output).
+
The > and >> are for write and append, as in the shell, but (as with awk) the
- file-overwrite for > is on first write, not per record. The | is for pipe to a
- process which will process the data. There will be one subordinate process for
- each distinct value of the piped-to command. Output-formatting flags are taken
- from the main command line.
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put 'tee > "/tmp/data-".$a, $*'
Example: mlr --from f.dat put 'tee >> "/tmp/data-".$a.$b, $*'
Example: mlr --from f.dat put 'tee > stderr, $*'
Example: mlr --from f.dat put -q 'tee | "tr [a-z\] [A-Z\]", $*'
Example: mlr --from f.dat put -q 'tee | "tr [a-z\] [A-Z\] > /tmp/data-".$a, $*'
+ Example: mlr --from f.dat put -q 'tee | "gzip > /tmp/data-".$a.".gz", $*'
- mlr put’ emitf, emitp, and emit
-send out-of-stream variables to the output record stream. These are then input
-to the following verb in a then-chain (if any), else printed to
-standard output. When redirected with >, >>, or
-|, they instead write the out-of-stream variable(s) to
-specified file(s) or pipe-to command, or immediately to
+ mlr put’s emitf, emitp, and
+emit send out-of-stream variables to the output record stream. These
+are then input to the following verb in a then-chain (if any), else
+printed to standard output. When redirected with >,
+>>, or |, they instead write the out-of-stream
+variable(s) to specified file(s) or pipe-to command, or immediately to
stdout/stderr.
@@ -3194,11 +3198,14 @@
output record stream.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a'
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a, @b, @c'
@@ -3221,17 +3228,21 @@
output concatenated with ":".
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > "mytap.dat", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >> "mytap.dat", @*, "index1", "index2"'
+ Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "gzip > mytap.dat.gz", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > stderr, @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "grep somepattern", @*, "index1", "index2"'
@@ -3247,17 +3258,21 @@
indices present in the data but not slotted by emit arguments are not output.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > "mytap.dat", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >> "mytap.dat", @*, "index1", "index2"'
+ Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "gzip > mytap.dat.gz", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > stderr, @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "grep somepattern", @*, "index1", "index2"'
@@ -3288,10 +3303,14 @@
to stdout as JSON.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump }'
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump > "mytap.dat"}'
@@ -5465,33 +5484,40 @@
tee: prints the current record to specified file.
This is an immediate print to the specified file (except for pprint format
which of course waits until the end of the input stream to format all output).
+
The > and >> are for write and append, as in the shell, but (as with awk) the
- file-overwrite for > is on first write, not per record. The | is for pipe to a
- process which will process the data. There will be one subordinate process for
- each distinct value of the piped-to command. Output-formatting flags are taken
- from the main command line.
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put 'tee > "/tmp/data-".$a, $*'
Example: mlr --from f.dat put 'tee >> "/tmp/data-".$a.$b, $*'
Example: mlr --from f.dat put 'tee > stderr, $*'
Example: mlr --from f.dat put -q 'tee | "tr [a-z\] [A-Z\]", $*'
Example: mlr --from f.dat put -q 'tee | "tr [a-z\] [A-Z\] > /tmp/data-".$a, $*'
+ Example: mlr --from f.dat put -q 'tee | "gzip > /tmp/data-".$a.".gz", $*'
emit: inserts an out-of-stream variable into the output record stream. Hashmap
indices present in the data but not slotted by emit arguments are not output.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > "mytap.dat", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >> "mytap.dat", @*, "index1", "index2"'
+ Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "gzip > mytap.dat.gz", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > stderr, @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | "grep somepattern", @*, "index1", "index2"'
@@ -5502,17 +5528,21 @@
output concatenated with ":".
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > "mytap.dat", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >> "mytap.dat", @*, "index1", "index2"'
+ Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "gzip > mytap.dat.gz", @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > stderr, @*, "index1", "index2"'
Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | "grep somepattern", @*, "index1", "index2"'
@@ -5522,11 +5552,14 @@
output record stream.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
- Output-formatting flags are taken from the main command line.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a'
Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a, @b, @c'
@@ -5542,10 +5575,14 @@
to stdout as JSON.
With >, >>, or |, the data do not become part of the output record stream but
- are instead redirected. The > and >> are for write and append, as in the
- shell, but (as with awk) the file-overwrite for > is on first write, not per
- record. The | is for pipe to a process which will process the data. There will
- be one subordinate process for each distinct value of the piped-to command.
+ are instead redirected.
+
+ The > and >> are for write and append, as in the shell, but (as with awk) the
+ file-overwrite for > is on first write, not per record. The | is for piping to
+ a process which will process the data. There will be one open file for each
+ distinct file name (for > and >>) or one subordinate process for each distinct
+ value of the piped-to command (for |). Output-formatting flags are taken from
+ the main command line.
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump }'
Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump > "mytap.dat"}'