doc neaten

johnkerl · Aug 12, 2016 · 5954234 · 5954234
1 parent 59717e5
commit 5954234
Show file tree

Hide file tree

Showing 12 changed files with 313 additions and 212 deletions.
diff --git a/README.md b/README.md
@@ -34,8 +34,8 @@ indices.  Examples:
   $mean = $sum / $count # no assignment if count unset
 '
 % mlr --from infile.dat put -f analyze.mlr
-% mlr --from infile.dat put 'tee >  "./taps/data-".$a."-".$b, $*'
-% mlr --from infile.dat put 'tee |  "gzip > ./taps/data-".$a."-".$b.".gz", $*'
+% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
+% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
 % mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
 % mlr --from infile.dat put  '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
 ```

diff --git a/c/cli/mlrcli.c b/c/cli/mlrcli.c
@@ -248,8 +248,8 @@ static void main_usage_examples(FILE* o, char* argv0, char* leader) {
 	fprintf(o, "  }\n");
 	fprintf(o, "  $mean = $sum / $count # no assignment if count unset'\n");
 	fprintf(o, "%s%s --from infile.dat put -f analyze.mlr\n", leader, argv0);
-	fprintf(o, "%s%s --from infile.dat put 'tee >  \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0);
-	fprintf(o, "%s%s --from infile.dat put 'tee |  \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0);
+	fprintf(o, "%s%s --from infile.dat put 'tee > \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0);
+	fprintf(o, "%s%s --from infile.dat put 'tee | \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0);
 	fprintf(o, "%s%s --from infile.dat put -q '@v=$*; dump | \"jq .[]\"'\n", leader, argv0);
 	fprintf(o, "%s%s --from infile.dat put  '(NR %% 1000 == 0) { print > stderr, \"Checkpoint \".NR}'\n",
 		leader, argv0);

diff --git a/c/mapping/mlr_dsl_cst.c b/c/mapping/mlr_dsl_cst.c
@@ -2670,17 +2670,20 @@ static void mlr_dsl_tee_keyword_usage(FILE* ostream) {
 		"tee: prints the current record to specified file.\n"
 		"  This is an immediate print to the specified file (except for pprint format\n"
 		"  which of course waits until the end of the input stream to format all output).\n"
+		"\n"
 		"  The > and >> are for write and append, as in the shell, but (as with awk) the\n"
-		"  file-overwrite for > is on first write, not per record. The | is for pipe to a\n"
-		"  process which will process the data. There will be one subordinate process for\n"
-		"  each distinct value of the piped-to command. Output-formatting flags are taken\n"
-		"  from the main command line.\n"
+		"  file-overwrite for > is on first write, not per record. The | is for piping to\n"
+		"  a process which will process the data. There will be one open file for each\n"
+		"  distinct file name (for > and >>) or one subordinate process for each distinct\n"
+		"  value of the piped-to command (for |). Output-formatting flags are taken from\n"
+		"  the main command line.\n"
 		"\n"
 		"  Example: mlr --from f.dat put 'tee >  \"/tmp/data-\".$a, $*'\n"
 		"  Example: mlr --from f.dat put 'tee >> \"/tmp/data-\".$a.$b, $*'\n"
 		"  Example: mlr --from f.dat put 'tee >  stderr, $*'\n"
 		"  Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\]\", $*'\n"
-		"  Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\] > /tmp/data-\".$a, $*'\n");
+		"  Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\] > /tmp/data-\".$a, $*'\n"
+		"  Example: mlr --from f.dat put -q 'tee | \"gzip > /tmp/data-\".$a.\".gz\", $*'\n");
 }
 
 static void mlr_dsl_emit_keyword_usage(FILE* ostream) {
@@ -2689,17 +2692,21 @@ static void mlr_dsl_emit_keyword_usage(FILE* ostream) {
 		"  indices present in the data but not slotted by emit arguments are not output.\n"
 		"\n"
 		"  With >, >>, or |, the data do not become part of the output record stream but\n"
-		"  are instead redirected.  The > and >> are for write and append, as in the\n"
-		"  shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
-		"  record. The | is for pipe to a process which will process the data. There will\n"
-		"  be one subordinate process for each distinct value of the piped-to command.\n"
-		"  Output-formatting flags are taken from the main command line.\n"
+		"  are instead redirected.\n"
+		"\n"
+		"  The > and >> are for write and append, as in the shell, but (as with awk) the\n"
+		"  file-overwrite for > is on first write, not per record. The | is for piping to\n"
+		"  a process which will process the data. There will be one open file for each\n"
+		"  distinct file name (for > and >>) or one subordinate process for each distinct\n"
+		"  value of the piped-to command (for |). Output-formatting flags are taken from\n"
+		"  the main command line.\n"
 		"\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums, \"index1\", \"index2\"'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @*, \"index1\", \"index2\"'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >  \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >> \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
+		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | \"gzip > mytap.dat.gz\", @*, \"index1\", \"index2\"'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > stderr, @*, \"index1\", \"index2\"'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | \"grep somepattern\", @*, \"index1\", \"index2\"'\n"
 		"\n"
@@ -2713,17 +2720,21 @@ static void mlr_dsl_emitp_keyword_usage(FILE* ostream) {
 		"  output concatenated with \":\".\n"
 		"\n"
 		"  With >, >>, or |, the data do not become part of the output record stream but\n"
-		"  are instead redirected.  The > and >> are for write and append, as in the\n"
-		"  shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
-		"  record. The | is for pipe to a process which will process the data. There will\n"
-		"  be one subordinate process for each distinct value of the piped-to command.\n"
-		"  Output-formatting flags are taken from the main command line.\n"
+		"  are instead redirected.\n"
+		"\n"
+		"  The > and >> are for write and append, as in the shell, but (as with awk) the\n"
+		"  file-overwrite for > is on first write, not per record. The | is for piping to\n"
+		"  a process which will process the data. There will be one open file for each\n"
+		"  distinct file name (for > and >>) or one subordinate process for each distinct\n"
+		"  value of the piped-to command (for |). Output-formatting flags are taken from\n"
+		"  the main command line.\n"
 		"\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums, \"index1\", \"index2\"'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @*, \"index1\", \"index2\"'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >  \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >> \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
+		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | \"gzip > mytap.dat.gz\", @*, \"index1\", \"index2\"'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > stderr, @*, \"index1\", \"index2\"'\n"
 		"  Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | \"grep somepattern\", @*, \"index1\", \"index2\"'\n"
 		"\n"
@@ -2736,11 +2747,14 @@ static void mlr_dsl_emitf_keyword_usage(FILE* ostream) {
 		"  output record stream.\n"
 		"\n"
 		"  With >, >>, or |, the data do not become part of the output record stream but\n"
-		"  are instead redirected.  The > and >> are for write and append, as in the\n"
-		"  shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
-		"  record. The | is for pipe to a process which will process the data. There will\n"
-		"  be one subordinate process for each distinct value of the piped-to command.\n"
-		"  Output-formatting flags are taken from the main command line.\n"
+		"  are instead redirected.\n"
+		"\n"
+		"  The > and >> are for write and append, as in the shell, but (as with awk) the\n"
+		"  file-overwrite for > is on first write, not per record. The | is for piping to\n"
+		"  a process which will process the data. There will be one open file for each\n"
+		"  distinct file name (for > and >>) or one subordinate process for each distinct\n"
+		"  value of the piped-to command (for |). Output-formatting flags are taken from\n"
+		"  the main command line.\n"
 		"\n"
 		"  Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a'\n"
 		"  Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a, @b, @c'\n"
@@ -2759,10 +2773,14 @@ static void mlr_dsl_dump_keyword_usage(FILE* ostream) {
 		"  to stdout as JSON.\n"
 		"\n"
 		"  With >, >>, or |, the data do not become part of the output record stream but\n"
-		"  are instead redirected.  The > and >> are for write and append, as in the\n"
-		"  shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
-		"  record. The | is for pipe to a process which will process the data. There will\n"
-		"  be one subordinate process for each distinct value of the piped-to command.\n"
+		"  are instead redirected.\n"
+		"\n"
+		"  The > and >> are for write and append, as in the shell, but (as with awk) the\n"
+		"  file-overwrite for > is on first write, not per record. The | is for piping to\n"
+		"  a process which will process the data. There will be one open file for each\n"
+		"  distinct file name (for > and >>) or one subordinate process for each distinct\n"
+		"  value of the piped-to command (for |). Output-formatting flags are taken from\n"
+		"  the main command line.\n"
 		"\n"
 		"  Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump }'\n"
 		"  Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump >  \"mytap.dat\"}'\n"

diff --git a/c/output/multi_lrec_writer.c b/c/output/multi_lrec_writer.c
@@ -73,11 +73,13 @@ void multi_lrec_writer_output_srec(multi_lrec_writer_t* pmlw, lrec_t* poutrec, c
 			fflush(pstate->output_stream);
 	} else {
 		if (pstate->is_popen) {
-			if (pclose(pstate->output_stream) != 0) {
-				perror("pclose");
-				fprintf(stderr, "%s: pclose error on \"%s\".\n", MLR_GLOBALS.bargv0, filename_or_command);
-				exit(1);
-			}
+			// Sadly, pclose returns an error even on well-formed commands. For example, if the popened
+			// command was "grep nonesuch" and the string "nonesuch" was not encountered, grep returns
+			// non-zero and popen flags it as an error. We cannot differentiate these from genuine
+			// failure cases so the best choice is to simply call pclose and ignore error codes.
+			// If a piped-to command does fail then it should have some output to stderr which the
+			// user can take advantage of.
+			(void)pclose(pstate->output_stream);
 		} else {
 			if (fclose(pstate->output_stream) != 0) {
 				perror("fclose");
@@ -107,11 +109,13 @@ void multi_lrec_writer_drain(multi_lrec_writer_t* pmlw) {
 		pstate->plrec_writer->pprocess_func(pstate->plrec_writer->pvstate, pstate->output_stream, NULL);
 		fflush(pstate->output_stream);
 		if (pstate->is_popen) {
-			if (pclose(pstate->output_stream) != 0) {
-				perror("pclose");
-				fprintf(stderr, "%s: pclose error on \"%s\".\n", MLR_GLOBALS.bargv0, pstate->filename_or_command);
-				exit(1);
-			}
+			// Sadly, pclose returns an error even on well-formed commands. For example, if the popened
+			// command was "grep nonesuch" and the string "nonesuch" was not encountered, grep returns
+			// non-zero and popen flags it as an error. We cannot differentiate these from genuine
+			// failure cases so the best choice is to simply call pclose and ignore error codes.
+			// If a piped-to command does fail then it should have some output to stderr which the
+			// user can take advantage of.
+			(void)pclose(pstate->output_stream);
 		} else {
 			if (fclose(pstate->output_stream) != 0) {
 				perror("fclose");

diff --git a/c/todo.txt b/c/todo.txt
@@ -26,20 +26,6 @@ TOP OF LIST:
 PRE-RELEASE 4.4.0:
 
 * ignore subproc abend (e.g. grep nonesuch) + UT cases
-! mld for all redirected I/O
-* mld: kws not avail as boundvars & what happens if tried: "syntax error"
-
-cookbook:
-* mlr --from ../data/big.dkvp put -q 'tee > $a.$b.".txt", $*'
-  wc -l ??????.txt
-     40005 ekseks.txt
-     40116 ekshat.txt
-     40105 ekspan.txt
-     40257 ekswye.txt
-     ...
-* mlr step -a shift
-* ... then put -q '' or ... then nothing
-* asv et al.
 
 ----------------------------------------------------------------
 FOR 4.4.0:
@@ -64,6 +50,7 @@ POST-4.4.0:
 
 ? --imd ?
 ? put/tee --oxxx flags overlays ?
+? mlr step -a shift --by {n}
 
 ----------------------------------------------------------------
 ! lemon refactor

diff --git a/doc/content-for-reference.html b/doc/content-for-reference.html
@@ -904,33 +904,34 @@ <h3>Emit-all statements for put</h3>
 POKI_RUN_COMMAND{{mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit @*,"a","b"}'}}HERE
 POKI_RUN_COMMAND{{mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit (@sum, @count),"a","b"}'}}HERE
 
-<h3>Redirected output statements for put</h3>
+<h3>Redirected-output statements for put</h3>
 
 The <b>tee</b>, <b>emitf</b>, <b>emitp</b>, <b>emit</b>, <b>print</b>, and
-<b>dump</b> keyword all allow you to redirect output to one or more files or
-pipe-to commands.
+<b>dump</b> keywords all allow you to redirect output to one or more files or
+pipe-to commands. The filenames/commands are strings which can be constructed
+using record-dependent values, so you can do things like splitting a table into
+multiple files, one for each account ID, and so on.
 
-
-<p/> Note the following:
+<p/> Details:
 
 <ul>
 
 <li/> <tt>mlr put</tt> sends the current record (possibly modified by the
 <tt>put</tt> expression) to the output record stream. Records are then input to
 the following verb in a <tt>then</tt>-chain (if any), else printed to standard
-output. The <b>tee</b> keyword <i>additionally</i> writes the output record to
-specified file(s) or pipe-to command, or immediately to
-<tt>stdout</tt>/<tt>stderr</tt>.
+output (unless <tt>put -q</tt>). The <b>tee</b> keyword <i>additionally</i>
+writes the output record to specified file(s) or pipe-to command, or
+immediately to <tt>stdout</tt>/<tt>stderr</tt>.
 
 POKI_RUN_COMMAND{{mlr --help-keyword tee}}HERE
 
 
-<li/> <tt>mlr put</tt>&rsquo; <tt>emitf</tt>, <tt>emitp</tt>, and <tt>emit</tt>
-send out-of-stream variables to the output record stream. These are then input
-to the following verb in a <tt>then</tt>-chain (if any), else printed to
-standard output. When redirected with <tt>&gt;</tt>, <tt>&gt;&gt;</tt>, or
-<tt>|</tt>, they <i>instead</i> write the out-of-stream variable(s) to
-specified file(s) or pipe-to command, or immediately to
+<li/> <tt>mlr put</tt>&rsquo;s <tt>emitf</tt>, <tt>emitp</tt>, and
+<tt>emit</tt> send out-of-stream variables to the output record stream. These
+are then input to the following verb in a <tt>then</tt>-chain (if any), else
+printed to standard output. When redirected with <tt>&gt;</tt>,
+<tt>&gt;&gt;</tt>, or <tt>|</tt>, they <i>instead</i> write the out-of-stream
+variable(s) to specified file(s) or pipe-to command, or immediately to
 <tt>stdout</tt>/<tt>stderr</tt>.
 
 POKI_RUN_COMMAND{{mlr --help-keyword emitf}}HERE

diff --git a/doc/index-snippet.txt b/doc/index-snippet.txt
@@ -16,7 +16,7 @@
   $mean = $sum / $count # no assignment if count unset
 '
 % mlr --from infile.dat put -f analyze.mlr
-% mlr --from infile.dat put 'tee >  "./taps/data-".$a."-".$b, $*'
-% mlr --from infile.dat put 'tee |  "gzip > ./taps/data-".$a."-".$b.".gz", $*'
+% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
+% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
 % mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
 % mlr --from infile.dat put  '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
diff --git a/doc/index.html b/doc/index.html
@@ -169,8 +169,8 @@
   $mean = $sum / $count # no assignment if count unset
 '
 % mlr --from infile.dat put -f analyze.mlr
-% mlr --from infile.dat put 'tee &gt;  "./taps/data-".$a."-".$b, $*'
-% mlr --from infile.dat put 'tee |  "gzip &gt; ./taps/data-".$a."-".$b.".gz", $*'
+% mlr --from infile.dat put 'tee &gt; "./taps/data-".$a."-".$b, $*'
+% mlr --from infile.dat put 'tee | "gzip &gt; ./taps/data-".$a."-".$b.".gz", $*'
 % mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
 % mlr --from infile.dat put  '(NR % 1000 == 0) { print &gt; stderr, "Checkpoint ".NR}'
 </pre>