diff --git a/README.md b/README.md index 72dd9bfabe..0f1cb04c9d 100644 --- a/README.md +++ b/README.md @@ -34,8 +34,8 @@ indices. Examples: $mean = $sum / $count # no assignment if count unset ' % mlr --from infile.dat put -f analyze.mlr -% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*' -% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*' +% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*' +% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*' % mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"' % mlr --from infile.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}' ``` diff --git a/c/cli/mlrcli.c b/c/cli/mlrcli.c index ce04dc4b5d..bb7f5d5eec 100644 --- a/c/cli/mlrcli.c +++ b/c/cli/mlrcli.c @@ -248,8 +248,8 @@ static void main_usage_examples(FILE* o, char* argv0, char* leader) { fprintf(o, " }\n"); fprintf(o, " $mean = $sum / $count # no assignment if count unset'\n"); fprintf(o, "%s%s --from infile.dat put -f analyze.mlr\n", leader, argv0); - fprintf(o, "%s%s --from infile.dat put 'tee > \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0); - fprintf(o, "%s%s --from infile.dat put 'tee | \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0); + fprintf(o, "%s%s --from infile.dat put 'tee > \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0); + fprintf(o, "%s%s --from infile.dat put 'tee | \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0); fprintf(o, "%s%s --from infile.dat put -q '@v=$*; dump | \"jq .[]\"'\n", leader, argv0); fprintf(o, "%s%s --from infile.dat put '(NR %% 1000 == 0) { print > stderr, \"Checkpoint \".NR}'\n", leader, argv0); diff --git a/c/mapping/mlr_dsl_cst.c b/c/mapping/mlr_dsl_cst.c index dfe63cc8e8..45a0eb77a4 100644 --- a/c/mapping/mlr_dsl_cst.c +++ b/c/mapping/mlr_dsl_cst.c @@ -2670,17 +2670,20 @@ static void mlr_dsl_tee_keyword_usage(FILE* ostream) { "tee: prints the current record to specified file.\n" " This is an immediate print to the specified file (except for pprint format\n" " which of course waits until the end of the input stream to format all output).\n" + "\n" " The > and >> are for write and append, as in the shell, but (as with awk) the\n" - " file-overwrite for > is on first write, not per record. The | is for pipe to a\n" - " process which will process the data. There will be one subordinate process for\n" - " each distinct value of the piped-to command. Output-formatting flags are taken\n" - " from the main command line.\n" + " file-overwrite for > is on first write, not per record. The | is for piping to\n" + " a process which will process the data. There will be one open file for each\n" + " distinct file name (for > and >>) or one subordinate process for each distinct\n" + " value of the piped-to command (for |). Output-formatting flags are taken from\n" + " the main command line.\n" "\n" " Example: mlr --from f.dat put 'tee > \"/tmp/data-\".$a, $*'\n" " Example: mlr --from f.dat put 'tee >> \"/tmp/data-\".$a.$b, $*'\n" " Example: mlr --from f.dat put 'tee > stderr, $*'\n" " Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\]\", $*'\n" - " Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\] > /tmp/data-\".$a, $*'\n"); + " Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\] > /tmp/data-\".$a, $*'\n" + " Example: mlr --from f.dat put -q 'tee | \"gzip > /tmp/data-\".$a.\".gz\", $*'\n"); } static void mlr_dsl_emit_keyword_usage(FILE* ostream) { @@ -2689,17 +2692,21 @@ static void mlr_dsl_emit_keyword_usage(FILE* ostream) { " indices present in the data but not slotted by emit arguments are not output.\n" "\n" " With >, >>, or |, the data do not become part of the output record stream but\n" - " are instead redirected. The > and >> are for write and append, as in the\n" - " shell, but (as with awk) the file-overwrite for > is on first write, not per\n" - " record. The | is for pipe to a process which will process the data. There will\n" - " be one subordinate process for each distinct value of the piped-to command.\n" - " Output-formatting flags are taken from the main command line.\n" + " are instead redirected.\n" + "\n" + " The > and >> are for write and append, as in the shell, but (as with awk) the\n" + " file-overwrite for > is on first write, not per record. The | is for piping to\n" + " a process which will process the data. There will be one open file for each\n" + " distinct file name (for > and >>) or one subordinate process for each distinct\n" + " value of the piped-to command (for |). Output-formatting flags are taken from\n" + " the main command line.\n" "\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums, \"index1\", \"index2\"'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @*, \"index1\", \"index2\"'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > \"mytap.dat\", @*, \"index1\", \"index2\"'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >> \"mytap.dat\", @*, \"index1\", \"index2\"'\n" + " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | \"gzip > mytap.dat.gz\", @*, \"index1\", \"index2\"'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > stderr, @*, \"index1\", \"index2\"'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | \"grep somepattern\", @*, \"index1\", \"index2\"'\n" "\n" @@ -2713,17 +2720,21 @@ static void mlr_dsl_emitp_keyword_usage(FILE* ostream) { " output concatenated with \":\".\n" "\n" " With >, >>, or |, the data do not become part of the output record stream but\n" - " are instead redirected. The > and >> are for write and append, as in the\n" - " shell, but (as with awk) the file-overwrite for > is on first write, not per\n" - " record. The | is for pipe to a process which will process the data. There will\n" - " be one subordinate process for each distinct value of the piped-to command.\n" - " Output-formatting flags are taken from the main command line.\n" + " are instead redirected.\n" + "\n" + " The > and >> are for write and append, as in the shell, but (as with awk) the\n" + " file-overwrite for > is on first write, not per record. The | is for piping to\n" + " a process which will process the data. There will be one open file for each\n" + " distinct file name (for > and >>) or one subordinate process for each distinct\n" + " value of the piped-to command (for |). Output-formatting flags are taken from\n" + " the main command line.\n" "\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums, \"index1\", \"index2\"'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @*, \"index1\", \"index2\"'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > \"mytap.dat\", @*, \"index1\", \"index2\"'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >> \"mytap.dat\", @*, \"index1\", \"index2\"'\n" + " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | \"gzip > mytap.dat.gz\", @*, \"index1\", \"index2\"'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > stderr, @*, \"index1\", \"index2\"'\n" " Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | \"grep somepattern\", @*, \"index1\", \"index2\"'\n" "\n" @@ -2736,11 +2747,14 @@ static void mlr_dsl_emitf_keyword_usage(FILE* ostream) { " output record stream.\n" "\n" " With >, >>, or |, the data do not become part of the output record stream but\n" - " are instead redirected. The > and >> are for write and append, as in the\n" - " shell, but (as with awk) the file-overwrite for > is on first write, not per\n" - " record. The | is for pipe to a process which will process the data. There will\n" - " be one subordinate process for each distinct value of the piped-to command.\n" - " Output-formatting flags are taken from the main command line.\n" + " are instead redirected.\n" + "\n" + " The > and >> are for write and append, as in the shell, but (as with awk) the\n" + " file-overwrite for > is on first write, not per record. The | is for piping to\n" + " a process which will process the data. There will be one open file for each\n" + " distinct file name (for > and >>) or one subordinate process for each distinct\n" + " value of the piped-to command (for |). Output-formatting flags are taken from\n" + " the main command line.\n" "\n" " Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a'\n" " Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a, @b, @c'\n" @@ -2759,10 +2773,14 @@ static void mlr_dsl_dump_keyword_usage(FILE* ostream) { " to stdout as JSON.\n" "\n" " With >, >>, or |, the data do not become part of the output record stream but\n" - " are instead redirected. The > and >> are for write and append, as in the\n" - " shell, but (as with awk) the file-overwrite for > is on first write, not per\n" - " record. The | is for pipe to a process which will process the data. There will\n" - " be one subordinate process for each distinct value of the piped-to command.\n" + " are instead redirected.\n" + "\n" + " The > and >> are for write and append, as in the shell, but (as with awk) the\n" + " file-overwrite for > is on first write, not per record. The | is for piping to\n" + " a process which will process the data. There will be one open file for each\n" + " distinct file name (for > and >>) or one subordinate process for each distinct\n" + " value of the piped-to command (for |). Output-formatting flags are taken from\n" + " the main command line.\n" "\n" " Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump }'\n" " Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump > \"mytap.dat\"}'\n" diff --git a/c/output/multi_lrec_writer.c b/c/output/multi_lrec_writer.c index ec551ed8f8..98a9a5add8 100644 --- a/c/output/multi_lrec_writer.c +++ b/c/output/multi_lrec_writer.c @@ -73,11 +73,13 @@ void multi_lrec_writer_output_srec(multi_lrec_writer_t* pmlw, lrec_t* poutrec, c fflush(pstate->output_stream); } else { if (pstate->is_popen) { - if (pclose(pstate->output_stream) != 0) { - perror("pclose"); - fprintf(stderr, "%s: pclose error on \"%s\".\n", MLR_GLOBALS.bargv0, filename_or_command); - exit(1); - } + // Sadly, pclose returns an error even on well-formed commands. For example, if the popened + // command was "grep nonesuch" and the string "nonesuch" was not encountered, grep returns + // non-zero and popen flags it as an error. We cannot differentiate these from genuine + // failure cases so the best choice is to simply call pclose and ignore error codes. + // If a piped-to command does fail then it should have some output to stderr which the + // user can take advantage of. + (void)pclose(pstate->output_stream); } else { if (fclose(pstate->output_stream) != 0) { perror("fclose"); @@ -107,11 +109,13 @@ void multi_lrec_writer_drain(multi_lrec_writer_t* pmlw) { pstate->plrec_writer->pprocess_func(pstate->plrec_writer->pvstate, pstate->output_stream, NULL); fflush(pstate->output_stream); if (pstate->is_popen) { - if (pclose(pstate->output_stream) != 0) { - perror("pclose"); - fprintf(stderr, "%s: pclose error on \"%s\".\n", MLR_GLOBALS.bargv0, pstate->filename_or_command); - exit(1); - } + // Sadly, pclose returns an error even on well-formed commands. For example, if the popened + // command was "grep nonesuch" and the string "nonesuch" was not encountered, grep returns + // non-zero and popen flags it as an error. We cannot differentiate these from genuine + // failure cases so the best choice is to simply call pclose and ignore error codes. + // If a piped-to command does fail then it should have some output to stderr which the + // user can take advantage of. + (void)pclose(pstate->output_stream); } else { if (fclose(pstate->output_stream) != 0) { perror("fclose"); diff --git a/c/todo.txt b/c/todo.txt index e326e9e445..7c4a4f35f7 100644 --- a/c/todo.txt +++ b/c/todo.txt @@ -26,20 +26,6 @@ TOP OF LIST: PRE-RELEASE 4.4.0: * ignore subproc abend (e.g. grep nonesuch) + UT cases -! mld for all redirected I/O -* mld: kws not avail as boundvars & what happens if tried: "syntax error" - -cookbook: -* mlr --from ../data/big.dkvp put -q 'tee > $a.$b.".txt", $*' - wc -l ??????.txt - 40005 ekseks.txt - 40116 ekshat.txt - 40105 ekspan.txt - 40257 ekswye.txt - ... -* mlr step -a shift -* ... then put -q '' or ... then nothing -* asv et al. ---------------------------------------------------------------- FOR 4.4.0: @@ -64,6 +50,7 @@ POST-4.4.0: ? --imd ? ? put/tee --oxxx flags overlays ? +? mlr step -a shift --by {n} ---------------------------------------------------------------- ! lemon refactor diff --git a/doc/content-for-reference.html b/doc/content-for-reference.html index bf961c1c59..d118a28cb3 100644 --- a/doc/content-for-reference.html +++ b/doc/content-for-reference.html @@ -904,33 +904,34 @@

Emit-all statements for put

POKI_RUN_COMMAND{{mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit @*,"a","b"}'}}HERE POKI_RUN_COMMAND{{mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit (@sum, @count),"a","b"}'}}HERE -

Redirected output statements for put

+

Redirected-output statements for put

The tee, emitf, emitp, emit, print, and -dump keyword all allow you to redirect output to one or more files or -pipe-to commands. +dump keywords all allow you to redirect output to one or more files or +pipe-to commands. The filenames/commands are strings which can be constructed +using record-dependent values, so you can do things like splitting a table into +multiple files, one for each account ID, and so on. - -

Note the following: +

Details: