From dd09eb2bb4329c9b14b15f61ef43eb07fb9252d6 Mon Sep 17 00:00:00 2001 From: Sean Mackesey Date: Thu, 29 Jul 2021 07:56:22 -0700 Subject: [PATCH 1/7] update dasht-query-line and dasht-query-html to work with tsv --- bin/dasht-query-html | 40 ++++++++++++++++++++----------------- bin/dasht-query-line | 37 ++++++++++++++++++---------------- man/man1/dasht-query-line.1 | 24 ++++++++++------------ 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/bin/dasht-query-html b/bin/dasht-query-html index 1f4356f..f0e5dbb 100755 --- a/bin/dasht-query-html +++ b/bin/dasht-query-html @@ -113,42 +113,46 @@ trap 'exit 44' USR1 # exit with a nonzero status when no results found if (pattern == "") pattern = "^." # grouped by leading character } NR == 1 { print "" } - $2 == "=" { result[$1] = substr($0, index($0, $2) + length($2) + 1) } - $1 == "from" { result["from"] = wordbreak_cached(result["from"], "") } - $1 == "name" { + { + + # $1, $2, $3, $4 :: name, docset, type, url + # mark search terms with STX and ETX bytes which are ignored by escape() if (pattern) { - gsub(pattern, "\002&\003", result["name"]) + gsub(pattern, "\002&\003", $1) } # mark word-wrappable points with VT bytes which are ignored by escape() - result["name"] = wordbreak(result["name"], "\v", "\002\003") + $1 = wordbreak($1, "\v", "\002\003") # escape XML entities in search result to make them visible in browsers - result["name"] = escape(result["name"]) + $1 = escape($1) # insert word-break opportunity tags at points marked by VT bytes - gsub("\v", "", result["name"]) + gsub("\v", "", $1) # highlight search terms in search result using the STX and ETX markers if (pattern) { - gsub("\002", "", result["name"]) - gsub("\003", "", result["name"]) + gsub("\002", "", $1) + gsub("\003", "", $1) } - } - $1 == "url" { print \ - ""\ - ""\ - ""\ - ""\ - "" + + $2 = wordbreak_cached($2, "") # docset field + + print \ + ""\ + ""\ + ""\ + ""\ + "" + } END { if (NR > 0) { print "
" result["name"] "" result["from"] "" tolower(result["type"]) "
" $1 "" $2 "" tolower($3) "
" - if (NR == 4) { + if (NR == 1) { # there was only one search result, so automatically visit its url - print "" + print "" } } } diff --git a/bin/dasht-query-line b/bin/dasht-query-line index 34dc78d..36210d5 100755 --- a/bin/dasht-query-line +++ b/bin/dasht-query-line @@ -4,7 +4,7 @@ # # ## NAME # -# dasht-query-line - searches [Dash] docsets and emits groups of lines +# dasht-query-line - searches [Dash] docsets and emits results as tsv # # ## SYNOPSIS # @@ -28,8 +28,8 @@ # # Searches for *PATTERN* in all installed [Dash] docsets, optionally searching # only in those whose names match *DOCSET*s, by calling dasht-query-exec(1) -# and emits the results in groups of lines, as described in "Results" below. -# However, if no results were found, this program exits with a nonzero status. +# and emits the results as TSV. However, if no results were found, this program +# exits with a nonzero status. # # ### Searching # @@ -42,26 +42,24 @@ # # ### Results # -# Each search result is printed to stdout as a group of four lines of text: +# Each search result is printed to stdout as a tab-separated line with fields: # -# `name` `=` *VALUE* +# `name` # Name of the token that matched the *PATTERN*. # -# `type` `=` *VALUE* +# `type` # Type of the token, as defined in the docset. # -# `from` `=` *VALUE* +# `from` # Name of the docset this result was found in. # -# `url` `=` *VALUE* +# `url` # URL of the API documentation for this result. # -# For example, here is a search result for "c - x" from the "bash" docset: +# For example, here is a search result for "c - x" from the "bash" docset, with +# tab characters represented by "": # -# name = undo (C-_ or C-x C-u) -# type = Function -# from = Bash -# url = file:///home/sunny/.local/share/dasht/docsets/Bash.docset/Contents/Resources/Documents/bash/Miscellaneous-Commands.html#//apple_ref/Function/undo%20%28C%2D%5F%20or%20C%2Dx%20C%2Du%29 +# undo (C-_ or C-x C-u)FunctionBashfile:///home/sunny/.local/share/dasht/docsets/Bash.docset/Contents/Resources/Documents/bash/Miscellaneous-Commands.html#//apple_ref/Function/undo%20%28C%2D%5F%20or%20C%2Dx%20C%2Du%29 # # ## ENVIRONMENT # @@ -245,9 +243,11 @@ dasht-docsets "$@" | while read -r docset; do { $1 = $1 } # strip whitespace from key + $2 == "=" { + result[$1] = substr($0, index($0, $2) + length($2) + 1) + } + $1 == "url" { were_any_results_found=1 - # indicate the source of this result - print "from = " docset # strip embedded XML from result URL gsub("<.*>", "", $3) @@ -259,9 +259,12 @@ dasht-docsets "$@" | while read -r docset; do # resolve URL to filesystem location $3 = file_url $3 - } - /./ # reject any empty lines from input + # print TSV line + printf "%s\t%s\t%s\t%s\n", result["name"], docset, \ + result["type"], $3 + + } END { exit !were_any_results_found } ' && kill -s USR1 $$ || : # notify this script if any results were found diff --git a/man/man1/dasht-query-line.1 b/man/man1/dasht-query-line.1 index c511843..f6a988d 100644 --- a/man/man1/dasht-query-line.1 +++ b/man/man1/dasht-query-line.1 @@ -1,7 +1,7 @@ .TH DASHT\-QUERY\-LINE 1 2020\-05\-16 2.4.0 .SH NAME .PP -dasht\-query\-line \- searches Dash \[la]https://kapeli.com/dash\[ra] docsets and emits groups of lines +dasht\-query\-line \- searches Dash \[la]https://kapeli.com/dash\[ra] docsets and emits groups of lines as tsv .SH SYNOPSIS .PP \fB\fCdasht\-query\-line\fR [\fIPATTERN\fP] [\fIDOCSET\fP]... @@ -24,8 +24,8 @@ Searches for \fIPATTERN\fP in all installed Dash \[la]https://kapeli.com/dash\[r only in those whose names match \fIDOCSET\fPs, by calling .BR dasht-query-exec (1) -and emits the results in groups of lines, as described in "Results" below. -However, if no results were found, this program exits with a nonzero status. +and emits the results as TSV. However, if no results were found, this program +exits with a nonzero status. .SS Searching .PP Whitespace characters in \fIPATTERN\fP are treated as wildcards, whereas the @@ -36,28 +36,26 @@ can match anywhere: beginning, middle, or end. As a result, if \fIPATTERN\fP is undefined, it becomes a whitespace wildcard and thereby matches everything. .SS Results .PP -Each search result is printed to stdout as a group of four lines of text: +Each search result is printed to stdout as a tab\-separated line with fields: .TP -\fB\fCname\fR \fB\fC=\fR \fIVALUE\fP +\fB\fCname\fR Name of the token that matched the \fIPATTERN\fP\&. .TP -\fB\fCtype\fR \fB\fC=\fR \fIVALUE\fP +\fB\fCtype\fR Type of the token, as defined in the docset. .TP -\fB\fCfrom\fR \fB\fC=\fR \fIVALUE\fP +\fB\fCfrom\fR Name of the docset this result was found in. .TP -\fB\fCurl\fR \fB\fC=\fR \fIVALUE\fP +\fB\fCurl\fR URL of the API documentation for this result. .PP -For example, here is a search result for "c \- x" from the "bash" docset: +For example, here is a search result for "c \- x" from the "bash" docset, with +tab characters represented by "": .PP .RS .nf -name = undo (C\-_ or C\-x C\-u) -type = Function -from = Bash -url = file:///home/sunny/.local/share/dasht/docsets/Bash.docset/Contents/Resources/Documents/bash/Miscellaneous\-Commands.html#//apple_ref/Function/undo%20%28C%2D%5F%20or%20C%2Dx%20C%2Du%29 +undo (C\-_ or C\-x C\-u)FunctionBashfile:///home/sunny/.local/share/dasht/docsets/Bash.docset/Contents/Resources/Documents/bash/Miscellaneous\-Commands.html#//apple_ref/Function/undo%20%28C%2D%5F%20or%20C%2Dx%20C%2Du%29 .fi .RE .SH ENVIRONMENT From 3ecd91b1195b4fbc8ef51f09059647aef69f5926 Mon Sep 17 00:00:00 2001 From: Sean Mackesey Date: Thu, 5 Aug 2021 08:44:55 -0700 Subject: [PATCH 2/7] dasht-query-line doc and style fixes --- bin/dasht-query-line | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/bin/dasht-query-line b/bin/dasht-query-line index 36210d5..6101e25 100755 --- a/bin/dasht-query-line +++ b/bin/dasht-query-line @@ -4,7 +4,7 @@ # # ## NAME # -# dasht-query-line - searches [Dash] docsets and emits results as tsv +# dasht-query-line - searches [Dash] docsets and emits results as TSV # # ## SYNOPSIS # @@ -28,8 +28,8 @@ # # Searches for *PATTERN* in all installed [Dash] docsets, optionally searching # only in those whose names match *DOCSET*s, by calling dasht-query-exec(1) -# and emits the results as TSV. However, if no results were found, this program -# exits with a nonzero status. +# and emits the results in Tab Separated Values (TSV) format. However, if no +# results were found, this program exits with a nonzero status. # # ### Searching # @@ -42,7 +42,8 @@ # # ### Results # -# Each search result is printed to stdout as a tab-separated line with fields: +# Each search result is printed to stdout as a line with 4 tab-separated +# fields: # # `name` # Name of the token that matched the *PATTERN*. @@ -244,7 +245,7 @@ dasht-docsets "$@" | while read -r docset; do { $1 = $1 } # strip whitespace from key $2 == "=" { - result[$1] = substr($0, index($0, $2) + length($2) + 1) + result[$1] = substr($0, length($1) + 1 + length($2) + 1) } $1 == "url" { were_any_results_found=1 @@ -260,9 +261,13 @@ dasht-docsets "$@" | while read -r docset; do # resolve URL to filesystem location $3 = file_url $3 - # print TSV line - printf "%s\t%s\t%s\t%s\n", result["name"], docset, \ - result["type"], $3 + printf( \ + "%s\t%s\t%s\t%s\n", \ + result["name"], \ + docset, \ + result["type"], \ + $3 \ + ) } From 3be5280aec88b5edca7aa268ee16d3d382114141 Mon Sep 17 00:00:00 2001 From: Sean Mackesey Date: Thu, 26 Aug 2021 08:11:44 -0700 Subject: [PATCH 3/7] cosmetic update --- bin/dasht-query-html | 24 ++++++++++++------------ bin/dasht-query-line | 8 +------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/bin/dasht-query-html b/bin/dasht-query-html index f0e5dbb..9c832cb 100755 --- a/bin/dasht-query-html +++ b/bin/dasht-query-html @@ -115,35 +115,35 @@ trap 'exit 44' USR1 # exit with a nonzero status when no results found NR == 1 { print "" } { - # $1, $2, $3, $4 :: name, docset, type, url + name = $1; docset = $2; type = $3; url = $4; # mark search terms with STX and ETX bytes which are ignored by escape() if (pattern) { - gsub(pattern, "\002&\003", $1) + gsub(pattern, "\002&\003", $name) } # mark word-wrappable points with VT bytes which are ignored by escape() - $1 = wordbreak($1, "\v", "\002\003") + $name = wordbreak($name, "\v", "\002\003") # escape XML entities in search result to make them visible in browsers - $1 = escape($1) + $name = escape($name) # insert word-break opportunity tags at points marked by VT bytes - gsub("\v", "", $1) + gsub("\v", "", $name) # highlight search terms in search result using the STX and ETX markers if (pattern) { - gsub("\002", "", $1) - gsub("\003", "", $1) + gsub("\002", "", $name) + gsub("\003", "", $name) } - $2 = wordbreak_cached($2, "") # docset field + $docset = wordbreak_cached($docset, "") # docset field print \ ""\ - ""\ - ""\ - ""\ + ""\ + ""\ + ""\ "" } @@ -152,7 +152,7 @@ trap 'exit 44' USR1 # exit with a nonzero status when no results found print "
" $1 "" $2 "" tolower($3) "" $name "" $docset "" tolower($type) "
" if (NR == 1) { # there was only one search result, so automatically visit its url - print "" + print "" } } } diff --git a/bin/dasht-query-line b/bin/dasht-query-line index 6101e25..6ca25cf 100755 --- a/bin/dasht-query-line +++ b/bin/dasht-query-line @@ -261,13 +261,7 @@ dasht-docsets "$@" | while read -r docset; do # resolve URL to filesystem location $3 = file_url $3 - printf( \ - "%s\t%s\t%s\t%s\n", \ - result["name"], \ - docset, \ - result["type"], \ - $3 \ - ) + printf("%s\t%s\t%s\t%s\n", result["name"], docset, result["type"], $3) } From f37f12d392f27837e6a499b0009454752fdd1b4b Mon Sep 17 00:00:00 2001 From: "Suraj N. Kurapati" Date: Wed, 25 Aug 2021 22:59:14 -0700 Subject: [PATCH 4/7] beautify and clean up documentation --- bin/dasht-query-line | 11 +++++------ man/man1/dasht-query-line.1 | 12 ++++++------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/bin/dasht-query-line b/bin/dasht-query-line index 6ca25cf..a000613 100755 --- a/bin/dasht-query-line +++ b/bin/dasht-query-line @@ -28,8 +28,8 @@ # # Searches for *PATTERN* in all installed [Dash] docsets, optionally searching # only in those whose names match *DOCSET*s, by calling dasht-query-exec(1) -# and emits the results in Tab Separated Values (TSV) format. However, if no -# results were found, this program exits with a nonzero status. +# and emits the results, one per line, in Tab-Separated Values (TSV) format. +# However, if no results were found, this program exits with a nonzero status. # # ### Searching # @@ -42,8 +42,7 @@ # # ### Results # -# Each search result is printed to stdout as a line with 4 tab-separated -# fields: +# Each search result is printed stdout as a line with 4 tab-separated fields: # # `name` # Name of the token that matched the *PATTERN*. @@ -57,8 +56,8 @@ # `url` # URL of the API documentation for this result. # -# For example, here is a search result for "c - x" from the "bash" docset, with -# tab characters represented by "": +# For example, here is a search result for "c - x" from the "bash" docset, +# with the tab separators represented by `` for illustrative purposes: # # undo (C-_ or C-x C-u)FunctionBashfile:///home/sunny/.local/share/dasht/docsets/Bash.docset/Contents/Resources/Documents/bash/Miscellaneous-Commands.html#//apple_ref/Function/undo%20%28C%2D%5F%20or%20C%2Dx%20C%2Du%29 # diff --git a/man/man1/dasht-query-line.1 b/man/man1/dasht-query-line.1 index f6a988d..63eb6dd 100644 --- a/man/man1/dasht-query-line.1 +++ b/man/man1/dasht-query-line.1 @@ -1,7 +1,7 @@ .TH DASHT\-QUERY\-LINE 1 2020\-05\-16 2.4.0 .SH NAME .PP -dasht\-query\-line \- searches Dash \[la]https://kapeli.com/dash\[ra] docsets and emits groups of lines as tsv +dasht\-query\-line \- searches Dash \[la]https://kapeli.com/dash\[ra] docsets and emits results as TSV .SH SYNOPSIS .PP \fB\fCdasht\-query\-line\fR [\fIPATTERN\fP] [\fIDOCSET\fP]... @@ -24,8 +24,8 @@ Searches for \fIPATTERN\fP in all installed Dash \[la]https://kapeli.com/dash\[r only in those whose names match \fIDOCSET\fPs, by calling .BR dasht-query-exec (1) -and emits the results as TSV. However, if no results were found, this program -exits with a nonzero status. +and emits the results, one per line, in Tab\-Separated Values (TSV) format. +However, if no results were found, this program exits with a nonzero status. .SS Searching .PP Whitespace characters in \fIPATTERN\fP are treated as wildcards, whereas the @@ -36,7 +36,7 @@ can match anywhere: beginning, middle, or end. As a result, if \fIPATTERN\fP is undefined, it becomes a whitespace wildcard and thereby matches everything. .SS Results .PP -Each search result is printed to stdout as a tab\-separated line with fields: +Each search result is printed stdout as a line with 4 tab\-separated fields: .TP \fB\fCname\fR Name of the token that matched the \fIPATTERN\fP\&. @@ -50,8 +50,8 @@ Name of the docset this result was found in. \fB\fCurl\fR URL of the API documentation for this result. .PP -For example, here is a search result for "c \- x" from the "bash" docset, with -tab characters represented by "": +For example, here is a search result for "c \- x" from the "bash" docset, +with the tab separators represented by \fB\fC\fR for illustrative purposes: .PP .RS .nf From c57d668f2a65e9547186bc86c1ab17ba511cf132 Mon Sep 17 00:00:00 2001 From: "Suraj N. Kurapati" Date: Thu, 26 Aug 2021 22:58:17 -0700 Subject: [PATCH 5/7] dasht-query-line: AWK substr() uses 1-based index --- bin/dasht-query-line | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/dasht-query-line b/bin/dasht-query-line index a000613..2a29238 100755 --- a/bin/dasht-query-line +++ b/bin/dasht-query-line @@ -244,7 +244,8 @@ dasht-docsets "$@" | while read -r docset; do { $1 = $1 } # strip whitespace from key $2 == "=" { - result[$1] = substr($0, length($1) + 1 + length($2) + 1) + # skip over the first 2 fields and grab the rest of the line + result[$1] = substr($0, 1 + length($1) + 1 + length($2) + 1) } $1 == "url" { were_any_results_found=1 From d3855296122ec99bf66b72d2d8ddbd9861fdf35b Mon Sep 17 00:00:00 2001 From: Sean Mackesey Date: Fri, 27 Aug 2021 06:36:23 -0700 Subject: [PATCH 6/7] fix awk variable naming --- bin/dasht-query-html | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/bin/dasht-query-html b/bin/dasht-query-html index 9c832cb..574f7f4 100755 --- a/bin/dasht-query-html +++ b/bin/dasht-query-html @@ -119,31 +119,31 @@ trap 'exit 44' USR1 # exit with a nonzero status when no results found # mark search terms with STX and ETX bytes which are ignored by escape() if (pattern) { - gsub(pattern, "\002&\003", $name) + gsub(pattern, "\002&\003", name) } # mark word-wrappable points with VT bytes which are ignored by escape() - $name = wordbreak($name, "\v", "\002\003") + name = wordbreak(name, "\v", "\002\003") # escape XML entities in search result to make them visible in browsers - $name = escape($name) + name = escape(name) # insert word-break opportunity tags at points marked by VT bytes - gsub("\v", "", $name) + gsub("\v", "", name) # highlight search terms in search result using the STX and ETX markers if (pattern) { - gsub("\002", "", $name) - gsub("\003", "", $name) + gsub("\002", "", name) + gsub("\003", "", name) } - $docset = wordbreak_cached($docset, "") # docset field + docset = wordbreak_cached(docset, "") # docset field print \ ""\ - "" $name ""\ - "" $docset ""\ - "" tolower($type) ""\ + "" name ""\ + "" docset ""\ + "" tolower(type) ""\ "" } @@ -152,7 +152,7 @@ trap 'exit 44' USR1 # exit with a nonzero status when no results found print "" if (NR == 1) { # there was only one search result, so automatically visit its url - print "" + print "" } } } From 6c1e309341d7031cecab2589af554ac361030fc0 Mon Sep 17 00:00:00 2001 From: Sean Mackesey Date: Mon, 30 Aug 2021 09:01:10 -0700 Subject: [PATCH 7/7] fix field separator for dasht-query-html --- bin/dasht-query-html | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/dasht-query-html b/bin/dasht-query-html index 574f7f4..ea34eaf 100755 --- a/bin/dasht-query-html +++ b/bin/dasht-query-html @@ -111,6 +111,7 @@ trap 'exit 44' USR1 # exit with a nonzero status when no results found gsub("[[:space:]]+", ".*", pattern) # treat whitespace as wildcards pattern = ignorecase(pattern) # emulate IGNORECASE=1 for POSIX if (pattern == "") pattern = "^." # grouped by leading character + FS = "\t" # split lines with tab } NR == 1 { print "" } {