Skip to content

Commit

Permalink
doc neaten
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Aug 12, 2016
1 parent 59717e5 commit 5954234
Show file tree
Hide file tree
Showing 12 changed files with 313 additions and 212 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ indices. Examples:
$mean = $sum / $count # no assignment if count unset
'
% mlr --from infile.dat put -f analyze.mlr
% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
% mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
% mlr --from infile.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
```
Expand Down
4 changes: 2 additions & 2 deletions c/cli/mlrcli.c
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ static void main_usage_examples(FILE* o, char* argv0, char* leader) {
fprintf(o, " }\n");
fprintf(o, " $mean = $sum / $count # no assignment if count unset'\n");
fprintf(o, "%s%s --from infile.dat put -f analyze.mlr\n", leader, argv0);
fprintf(o, "%s%s --from infile.dat put 'tee > \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0);
fprintf(o, "%s%s --from infile.dat put 'tee | \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0);
fprintf(o, "%s%s --from infile.dat put 'tee > \"./taps/data-\".$a.\"-\".$b, $*'\n", leader, argv0);
fprintf(o, "%s%s --from infile.dat put 'tee | \"gzip > ./taps/data-\".$a.\"-\".$b.\".gz\", $*'\n", leader, argv0);
fprintf(o, "%s%s --from infile.dat put -q '@v=$*; dump | \"jq .[]\"'\n", leader, argv0);
fprintf(o, "%s%s --from infile.dat put '(NR %% 1000 == 0) { print > stderr, \"Checkpoint \".NR}'\n",
leader, argv0);
Expand Down
66 changes: 42 additions & 24 deletions c/mapping/mlr_dsl_cst.c
Original file line number Diff line number Diff line change
Expand Up @@ -2670,17 +2670,20 @@ static void mlr_dsl_tee_keyword_usage(FILE* ostream) {
"tee: prints the current record to specified file.\n"
" This is an immediate print to the specified file (except for pprint format\n"
" which of course waits until the end of the input stream to format all output).\n"
"\n"
" The > and >> are for write and append, as in the shell, but (as with awk) the\n"
" file-overwrite for > is on first write, not per record. The | is for pipe to a\n"
" process which will process the data. There will be one subordinate process for\n"
" each distinct value of the piped-to command. Output-formatting flags are taken\n"
" from the main command line.\n"
" file-overwrite for > is on first write, not per record. The | is for piping to\n"
" a process which will process the data. There will be one open file for each\n"
" distinct file name (for > and >>) or one subordinate process for each distinct\n"
" value of the piped-to command (for |). Output-formatting flags are taken from\n"
" the main command line.\n"
"\n"
" Example: mlr --from f.dat put 'tee > \"/tmp/data-\".$a, $*'\n"
" Example: mlr --from f.dat put 'tee >> \"/tmp/data-\".$a.$b, $*'\n"
" Example: mlr --from f.dat put 'tee > stderr, $*'\n"
" Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\]\", $*'\n"
" Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\] > /tmp/data-\".$a, $*'\n");
" Example: mlr --from f.dat put -q 'tee | \"tr \[a-z\\] \[A-Z\\] > /tmp/data-\".$a, $*'\n"
" Example: mlr --from f.dat put -q 'tee | \"gzip > /tmp/data-\".$a.\".gz\", $*'\n");
}

static void mlr_dsl_emit_keyword_usage(FILE* ostream) {
Expand All @@ -2689,17 +2692,21 @@ static void mlr_dsl_emit_keyword_usage(FILE* ostream) {
" indices present in the data but not slotted by emit arguments are not output.\n"
"\n"
" With >, >>, or |, the data do not become part of the output record stream but\n"
" are instead redirected. The > and >> are for write and append, as in the\n"
" shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
" record. The | is for pipe to a process which will process the data. There will\n"
" be one subordinate process for each distinct value of the piped-to command.\n"
" Output-formatting flags are taken from the main command line.\n"
" are instead redirected.\n"
"\n"
" The > and >> are for write and append, as in the shell, but (as with awk) the\n"
" file-overwrite for > is on first write, not per record. The | is for piping to\n"
" a process which will process the data. There will be one open file for each\n"
" distinct file name (for > and >>) or one subordinate process for each distinct\n"
" value of the piped-to command (for |). Output-formatting flags are taken from\n"
" the main command line.\n"
"\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @sums, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit >> \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | \"gzip > mytap.dat.gz\", @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit > stderr, @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emit | \"grep somepattern\", @*, \"index1\", \"index2\"'\n"
"\n"
Expand All @@ -2713,17 +2720,21 @@ static void mlr_dsl_emitp_keyword_usage(FILE* ostream) {
" output concatenated with \":\".\n"
"\n"
" With >, >>, or |, the data do not become part of the output record stream but\n"
" are instead redirected. The > and >> are for write and append, as in the\n"
" shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
" record. The | is for pipe to a process which will process the data. There will\n"
" be one subordinate process for each distinct value of the piped-to command.\n"
" Output-formatting flags are taken from the main command line.\n"
" are instead redirected.\n"
"\n"
" The > and >> are for write and append, as in the shell, but (as with awk) the\n"
" file-overwrite for > is on first write, not per record. The | is for piping to\n"
" a process which will process the data. There will be one open file for each\n"
" distinct file name (for > and >>) or one subordinate process for each distinct\n"
" value of the piped-to command (for |). Output-formatting flags are taken from\n"
" the main command line.\n"
"\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @sums, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp >> \"mytap.dat\", @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | \"gzip > mytap.dat.gz\", @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp > stderr, @*, \"index1\", \"index2\"'\n"
" Example: mlr --from f.dat put '@sums[$a][$b]+=$x; emitp | \"grep somepattern\", @*, \"index1\", \"index2\"'\n"
"\n"
Expand All @@ -2736,11 +2747,14 @@ static void mlr_dsl_emitf_keyword_usage(FILE* ostream) {
" output record stream.\n"
"\n"
" With >, >>, or |, the data do not become part of the output record stream but\n"
" are instead redirected. The > and >> are for write and append, as in the\n"
" shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
" record. The | is for pipe to a process which will process the data. There will\n"
" be one subordinate process for each distinct value of the piped-to command.\n"
" Output-formatting flags are taken from the main command line.\n"
" are instead redirected.\n"
"\n"
" The > and >> are for write and append, as in the shell, but (as with awk) the\n"
" file-overwrite for > is on first write, not per record. The | is for piping to\n"
" a process which will process the data. There will be one open file for each\n"
" distinct file name (for > and >>) or one subordinate process for each distinct\n"
" value of the piped-to command (for |). Output-formatting flags are taken from\n"
" the main command line.\n"
"\n"
" Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a'\n"
" Example: mlr --from f.dat put '@a=$i;@b+=$x;@c+=$y; emitf @a, @b, @c'\n"
Expand All @@ -2759,10 +2773,14 @@ static void mlr_dsl_dump_keyword_usage(FILE* ostream) {
" to stdout as JSON.\n"
"\n"
" With >, >>, or |, the data do not become part of the output record stream but\n"
" are instead redirected. The > and >> are for write and append, as in the\n"
" shell, but (as with awk) the file-overwrite for > is on first write, not per\n"
" record. The | is for pipe to a process which will process the data. There will\n"
" be one subordinate process for each distinct value of the piped-to command.\n"
" are instead redirected.\n"
"\n"
" The > and >> are for write and append, as in the shell, but (as with awk) the\n"
" file-overwrite for > is on first write, not per record. The | is for piping to\n"
" a process which will process the data. There will be one open file for each\n"
" distinct file name (for > and >>) or one subordinate process for each distinct\n"
" value of the piped-to command (for |). Output-formatting flags are taken from\n"
" the main command line.\n"
"\n"
" Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump }'\n"
" Example: mlr --from f.dat put -q '@v[NR]=$*; end { dump > \"mytap.dat\"}'\n"
Expand Down
24 changes: 14 additions & 10 deletions c/output/multi_lrec_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,13 @@ void multi_lrec_writer_output_srec(multi_lrec_writer_t* pmlw, lrec_t* poutrec, c
fflush(pstate->output_stream);
} else {
if (pstate->is_popen) {
if (pclose(pstate->output_stream) != 0) {
perror("pclose");
fprintf(stderr, "%s: pclose error on \"%s\".\n", MLR_GLOBALS.bargv0, filename_or_command);
exit(1);
}
// Sadly, pclose returns an error even on well-formed commands. For example, if the popened
// command was "grep nonesuch" and the string "nonesuch" was not encountered, grep returns
// non-zero and popen flags it as an error. We cannot differentiate these from genuine
// failure cases so the best choice is to simply call pclose and ignore error codes.
// If a piped-to command does fail then it should have some output to stderr which the
// user can take advantage of.
(void)pclose(pstate->output_stream);
} else {
if (fclose(pstate->output_stream) != 0) {
perror("fclose");
Expand Down Expand Up @@ -107,11 +109,13 @@ void multi_lrec_writer_drain(multi_lrec_writer_t* pmlw) {
pstate->plrec_writer->pprocess_func(pstate->plrec_writer->pvstate, pstate->output_stream, NULL);
fflush(pstate->output_stream);
if (pstate->is_popen) {
if (pclose(pstate->output_stream) != 0) {
perror("pclose");
fprintf(stderr, "%s: pclose error on \"%s\".\n", MLR_GLOBALS.bargv0, pstate->filename_or_command);
exit(1);
}
// Sadly, pclose returns an error even on well-formed commands. For example, if the popened
// command was "grep nonesuch" and the string "nonesuch" was not encountered, grep returns
// non-zero and popen flags it as an error. We cannot differentiate these from genuine
// failure cases so the best choice is to simply call pclose and ignore error codes.
// If a piped-to command does fail then it should have some output to stderr which the
// user can take advantage of.
(void)pclose(pstate->output_stream);
} else {
if (fclose(pstate->output_stream) != 0) {
perror("fclose");
Expand Down
15 changes: 1 addition & 14 deletions c/todo.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,6 @@ TOP OF LIST:
PRE-RELEASE 4.4.0:

* ignore subproc abend (e.g. grep nonesuch) + UT cases
! mld for all redirected I/O
* mld: kws not avail as boundvars & what happens if tried: "syntax error"

cookbook:
* mlr --from ../data/big.dkvp put -q 'tee > $a.$b.".txt", $*'
wc -l ??????.txt
40005 ekseks.txt
40116 ekshat.txt
40105 ekspan.txt
40257 ekswye.txt
...
* mlr step -a shift
* ... then put -q '' or ... then nothing
* asv et al.

----------------------------------------------------------------
FOR 4.4.0:
Expand All @@ -64,6 +50,7 @@ POST-4.4.0:

? --imd ?
? put/tee --oxxx flags overlays ?
? mlr step -a shift --by {n}

----------------------------------------------------------------
! lemon refactor
Expand Down
29 changes: 15 additions & 14 deletions doc/content-for-reference.html
Original file line number Diff line number Diff line change
Expand Up @@ -904,33 +904,34 @@ <h3>Emit-all statements for put</h3>
POKI_RUN_COMMAND{{mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit @*,"a","b"}'}}HERE
POKI_RUN_COMMAND{{mlr --from data/small --opprint put -q '@sum[$a][$b] += $x; @count[$a][$b] += 1; end{emit (@sum, @count),"a","b"}'}}HERE

<h3>Redirected output statements for put</h3>
<h3>Redirected-output statements for put</h3>

The <b>tee</b>, <b>emitf</b>, <b>emitp</b>, <b>emit</b>, <b>print</b>, and
<b>dump</b> keyword all allow you to redirect output to one or more files or
pipe-to commands.
<b>dump</b> keywords all allow you to redirect output to one or more files or
pipe-to commands. The filenames/commands are strings which can be constructed
using record-dependent values, so you can do things like splitting a table into
multiple files, one for each account ID, and so on.


<p/> Note the following:
<p/> Details:

<ul>

<li/> <tt>mlr put</tt> sends the current record (possibly modified by the
<tt>put</tt> expression) to the output record stream. Records are then input to
the following verb in a <tt>then</tt>-chain (if any), else printed to standard
output. The <b>tee</b> keyword <i>additionally</i> writes the output record to
specified file(s) or pipe-to command, or immediately to
<tt>stdout</tt>/<tt>stderr</tt>.
output (unless <tt>put -q</tt>). The <b>tee</b> keyword <i>additionally</i>
writes the output record to specified file(s) or pipe-to command, or
immediately to <tt>stdout</tt>/<tt>stderr</tt>.

POKI_RUN_COMMAND{{mlr --help-keyword tee}}HERE


<li/> <tt>mlr put</tt>&rsquo; <tt>emitf</tt>, <tt>emitp</tt>, and <tt>emit</tt>
send out-of-stream variables to the output record stream. These are then input
to the following verb in a <tt>then</tt>-chain (if any), else printed to
standard output. When redirected with <tt>&gt;</tt>, <tt>&gt;&gt;</tt>, or
<tt>|</tt>, they <i>instead</i> write the out-of-stream variable(s) to
specified file(s) or pipe-to command, or immediately to
<li/> <tt>mlr put</tt>&rsquo;s <tt>emitf</tt>, <tt>emitp</tt>, and
<tt>emit</tt> send out-of-stream variables to the output record stream. These
are then input to the following verb in a <tt>then</tt>-chain (if any), else
printed to standard output. When redirected with <tt>&gt;</tt>,
<tt>&gt;&gt;</tt>, or <tt>|</tt>, they <i>instead</i> write the out-of-stream
variable(s) to specified file(s) or pipe-to command, or immediately to
<tt>stdout</tt>/<tt>stderr</tt>.

POKI_RUN_COMMAND{{mlr --help-keyword emitf}}HERE
Expand Down
4 changes: 2 additions & 2 deletions doc/index-snippet.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
$mean = $sum / $count # no assignment if count unset
'
% mlr --from infile.dat put -f analyze.mlr
% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
% mlr --from infile.dat put 'tee > "./taps/data-".$a."-".$b, $*'
% mlr --from infile.dat put 'tee | "gzip > ./taps/data-".$a."-".$b.".gz", $*'
% mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
% mlr --from infile.dat put '(NR % 1000 == 0) { print > stderr, "Checkpoint ".NR}'
4 changes: 2 additions & 2 deletions doc/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@
$mean = $sum / $count # no assignment if count unset
'
% mlr --from infile.dat put -f analyze.mlr
% mlr --from infile.dat put 'tee &gt; "./taps/data-".$a."-".$b, $*'
% mlr --from infile.dat put 'tee | "gzip &gt; ./taps/data-".$a."-".$b.".gz", $*'
% mlr --from infile.dat put 'tee &gt; "./taps/data-".$a."-".$b, $*'
% mlr --from infile.dat put 'tee | "gzip &gt; ./taps/data-".$a."-".$b.".gz", $*'
% mlr --from infile.dat put -q '@v=$*; dump | "jq .[]"'
% mlr --from infile.dat put '(NR % 1000 == 0) { print &gt; stderr, "Checkpoint ".NR}'
</pre>
Expand Down
Loading

0 comments on commit 5954234

Please sign in to comment.