diff --git a/docs/src/customization.md b/docs/src/customization.md index 5a787ad4fc..cbc69928f3 100644 --- a/docs/src/customization.md +++ b/docs/src/customization.md @@ -50,7 +50,7 @@ and the `--csv` part will automatically be understood. If you do want to process * You can include any command-line flags, except the "terminal" ones such as `--help`. -* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though. +* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though. * The formatting rule is you need to put one flag beginning with `--` per line: for example, `--csv` on one line and `--nr-progress-mod 1000` on a separate line. diff --git a/docs/src/customization.md.in b/docs/src/customization.md.in index 9a1d2894b7..00367b2f76 100644 --- a/docs/src/customization.md.in +++ b/docs/src/customization.md.in @@ -34,7 +34,7 @@ and the `--csv` part will automatically be understood. If you do want to process * You can include any command-line flags, except the "terminal" ones such as `--help`. -* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though. +* The `--prepipe`, `--load`, and `--mload` flags aren't allowed in `.mlrrc` as they control code execution, and could result in your scripts running things you don't expect if you receive data from someone with a `./.mlrrc` in it. You can use `--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though. * The formatting rule is you need to put one flag beginning with `--` per line: for example, `--csv` on one line and `--nr-progress-mod 1000` on a separate line. diff --git a/docs/src/data-diving-examples.md b/docs/src/data-diving-examples.md index 39738f193d..100716ec26 100644 --- a/docs/src/data-diving-examples.md +++ b/docs/src/data-diving-examples.md @@ -160,11 +160,11 @@ CITRUS COUNTY 1332.9 79974.9 483785.1 stats2 -a corr,linreg-ols,r2 -f tiv_2011,tiv_2012
-tiv_2011_tiv_2012_corr  0.9730497632351692
-tiv_2011_tiv_2012_ols_m 0.9835583980337723
-tiv_2011_tiv_2012_ols_b 433854.6428968317
+tiv_2011_tiv_2012_corr  0.9730497632351701
+tiv_2011_tiv_2012_ols_m 0.9835583980337732
+tiv_2011_tiv_2012_ols_b 433854.6428968301
 tiv_2011_tiv_2012_ols_n 36634
-tiv_2011_tiv_2012_r2    0.9468258417320189
+tiv_2011_tiv_2012_r2    0.9468258417320204
 
@@ -322,7 +322,7 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
           u_v_corr              w_x_corr
-0.1334180491027861 -0.011319841199866178
+0.1334180491027861 -0.011319841199852926
 
@@ -332,22 +332,22 @@ Look at bivariate stats by color and shape. In particular, `u,v` pairwise correl
 
  color    shape              u_v_corr               w_x_corr
-   red   circle    0.9807984401887236   -0.01856553658708754
-orange   square   0.17685855992752927   -0.07104431573806054
- green   circle   0.05764419437577255    0.01179572988801509
-   red   square   0.05574477124893523 -0.0006801456507510942
-yellow triangle   0.04457273771962798   0.024604310103081825
-yellow   square   0.04379172927296089   -0.04462197201631237
-purple   circle   0.03587354936895086     0.1341133954140899
-  blue   square   0.03241153095761164  -0.053507648119643196
-  blue triangle  0.015356427073158766 -0.0006089997461435399
-orange   circle  0.010518953877704048   -0.16279397329279383
-   red triangle   0.00809782571528034   0.012486621357942596
-purple triangle  0.005155190909099334  -0.045057909256220656
-purple   square -0.025680276963377404    0.05769429647930396
- green   square   -0.0257760734502851  -0.003265173252087127
-orange triangle -0.030456661186085785    -0.1318699981926352
-yellow   circle  -0.06477331572781474    0.07369449819706045
-  blue   circle  -0.10234761901929677  -0.030528539069837757
- green triangle  -0.10901825107358765   -0.04848782060162929
+   red   circle    0.9807984401887242  -0.018565536587084836
+orange   square   0.17685855992752933   -0.07104431573805543
+ green   circle   0.05764419437577257   0.011795729888018455
+   red   square    0.0557447712489348 -0.0006801456507506415
+yellow triangle    0.0445727377196281   0.024604310103079844
+yellow   square    0.0437917292729612  -0.044621972016306265
+purple   circle   0.03587354936895115    0.13411339541407613
+  blue   square   0.03241153095761152   -0.05350764811965621
+  blue triangle  0.015356427073158612 -0.0006089997461408209
+orange   circle  0.010518953877704181    -0.1627939732927932
+   red triangle   0.00809782571528054    0.01248662135795501
+purple triangle  0.005155190909099739   -0.04505790925621933
+purple   square  -0.02568027696337717   0.057694296479293694
+ green   square -0.025776073450284875 -0.0032651732520739014
+orange triangle -0.030456661186085584   -0.13186999819263814
+yellow   circle  -0.06477331572781515     0.0736944981970553
+  blue   circle   -0.1023476190192966  -0.030528539069839333
+ green triangle  -0.10901825107358747   -0.04848782060162855
 
diff --git a/docs/src/example-mlr-s-script b/docs/src/example-mlr-s-script index 7b9cdb972c..50e3f5db45 100755 --- a/docs/src/example-mlr-s-script +++ b/docs/src/example-mlr-s-script @@ -1,5 +1,5 @@ #!/usr/bin/env mlr -s --c2p -filter '$quantity != 20' +filter '$quantity != 20' # Here is a comment then count-distinct -f shape then fraction -f count diff --git a/docs/src/glossary.md b/docs/src/glossary.md index bb731297b7..774975c41e 100644 --- a/docs/src/glossary.md +++ b/docs/src/glossary.md @@ -905,3 +905,8 @@ See also the [arrays page](reference-main-arrays.md), as well as the page on A [data-compression format supported by Miller](reference-main-compressed-data.md). Files compressed using ZLIB compression normally end in `.z`. + +## ZSTD / .zst + +A [data-compression format supported by Miller](reference-main-compressed-data.md). +Files compressed using ZSTD compression normally end in`.zst`. diff --git a/docs/src/glossary.md.in b/docs/src/glossary.md.in index 7e03b7d11b..b8eb8f4177 100644 --- a/docs/src/glossary.md.in +++ b/docs/src/glossary.md.in @@ -889,3 +889,8 @@ See also the [arrays page](reference-main-arrays.md), as well as the page on A [data-compression format supported by Miller](reference-main-compressed-data.md). Files compressed using ZLIB compression normally end in `.z`. + +## ZSTD / .zst + +A [data-compression format supported by Miller](reference-main-compressed-data.md). +Files compressed using ZSTD compression normally end in`.zst`. diff --git a/docs/src/manpage.md b/docs/src/manpage.md index 08ffd80225..d801934335 100644 --- a/docs/src/manpage.md +++ b/docs/src/manpage.md @@ -194,12 +194,13 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap case cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + fraction gap grep group-by group-like gsub having-fields head histogram + json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split ssub stats1 stats2 step + sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace + unsparsify 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -262,7 +263,7 @@ MILLER(1) MILLER(1) Miller offers a few different ways to handle reading data files which have been compressed. - * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin` + * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin` * Decompression done outside the Miller process: `--prepipe` `--prepipex` Using `--prepipe` and `--prepipex` you can specify an action to be @@ -285,7 +286,7 @@ MILLER(1) MILLER(1) Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any decisions that might have been made based on the file suffix. Likewise, - `--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified. + `--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified. --bz2in Uncompress bzip2 within the Miller process. Done by default if file ends in `.bz2`. @@ -302,6 +303,8 @@ MILLER(1) MILLER(1) `.mlrrc`. --prepipe-zcat Same as `--prepipe zcat`, except this is allowed in `.mlrrc`. + --prepipe-zstdcat Same as `--prepipe zstdcat`, except this is allowed + in `.mlrrc`. --prepipex {decompression command} Like `--prepipe` with one exception: doesn't insert `<` between command and filename at runtime. Useful @@ -310,6 +313,8 @@ MILLER(1) MILLER(1) in `.mlrrc` to avoid unexpected code execution. --zin Uncompress zlib within the Miller process. Done by default if file ends in `.z`. + --zstdin Uncompress zstd within the Miller process. Done by + default if file ends in `.zstd`. 1mCSV/TSV-ONLY FLAGS0m These are flags which are applicable to CSV format. @@ -572,6 +577,11 @@ MILLER(1) MILLER(1) to be modified, except when input is from `tail -f`. See also https://miller.readthedocs.io/en/latest/reference-main-flag-list/. + --s-no-comment-strip {file name} + Take command-line flags from file name, like -s, but + with no comment-stripping. For more information + please see + https://miller.readthedocs.io/en/latest/scripting/. --seed {n} with `n` of the form `12345678` or `0xcafefeed`. For `put`/`filter` `urand`, `urandint`, and `urand32`. --tz {timezone} Specify timezone, overriding `$TZ` environment @@ -1236,6 +1246,15 @@ MILLER(1) MILLER(1) Options: -h|--help Show this message. + 1mgsub0m + Usage: mlr gsub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and handling multiple matches, like the `gsub` DSL function. + See also the `sub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. @@ -1844,6 +1863,14 @@ MILLER(1) MILLER(1) See also the "tee" DSL function which lets you do more ad-hoc customization. + 1mssub0m + Usage: mlr ssub [options] + Replaces old string with new string in specified field(s), without regex support for + the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across @@ -1981,6 +2008,15 @@ MILLER(1) MILLER(1) https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. + 1msub0m + Usage: mlr sub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and not handling multiple matches, like the `sub` DSL function. + See also the `gsub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. diff --git a/docs/src/manpage.txt b/docs/src/manpage.txt index 58ff3991fd..0c04fc330e 100644 --- a/docs/src/manpage.txt +++ b/docs/src/manpage.txt @@ -173,12 +173,13 @@ MILLER(1) MILLER(1) 1mVERB LIST0m altkv bar bootstrap case cat check clean-whitespace count-distinct count count-similar cut decimate fill-down fill-empty filter flatten format-values - fraction gap grep group-by group-like having-fields head histogram json-parse - json-stringify join label latin1-to-utf8 least-frequent merge-fields - most-frequent nest nothing put regularize remove-empty-columns rename reorder - repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records - sort sort-within-records split stats1 stats2 step summary tac tail tee - template top utf8-to-latin1 unflatten uniq unspace unsparsify + fraction gap grep group-by group-like gsub having-fields head histogram + json-parse json-stringify join label latin1-to-utf8 least-frequent + merge-fields most-frequent nest nothing put regularize remove-empty-columns + rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle + skip-trivial-records sort sort-within-records split ssub stats1 stats2 step + sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace + unsparsify 1mFUNCTION LIST0m abs acos acosh any append apply arrayify asin asinh asserting_absent @@ -241,7 +242,7 @@ MILLER(1) MILLER(1) Miller offers a few different ways to handle reading data files which have been compressed. - * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin` + * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin` * Decompression done outside the Miller process: `--prepipe` `--prepipex` Using `--prepipe` and `--prepipex` you can specify an action to be @@ -264,7 +265,7 @@ MILLER(1) MILLER(1) Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any decisions that might have been made based on the file suffix. Likewise, - `--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified. + `--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified. --bz2in Uncompress bzip2 within the Miller process. Done by default if file ends in `.bz2`. @@ -281,6 +282,8 @@ MILLER(1) MILLER(1) `.mlrrc`. --prepipe-zcat Same as `--prepipe zcat`, except this is allowed in `.mlrrc`. + --prepipe-zstdcat Same as `--prepipe zstdcat`, except this is allowed + in `.mlrrc`. --prepipex {decompression command} Like `--prepipe` with one exception: doesn't insert `<` between command and filename at runtime. Useful @@ -289,6 +292,8 @@ MILLER(1) MILLER(1) in `.mlrrc` to avoid unexpected code execution. --zin Uncompress zlib within the Miller process. Done by default if file ends in `.z`. + --zstdin Uncompress zstd within the Miller process. Done by + default if file ends in `.zstd`. 1mCSV/TSV-ONLY FLAGS0m These are flags which are applicable to CSV format. @@ -551,6 +556,11 @@ MILLER(1) MILLER(1) to be modified, except when input is from `tail -f`. See also https://miller.readthedocs.io/en/latest/reference-main-flag-list/. + --s-no-comment-strip {file name} + Take command-line flags from file name, like -s, but + with no comment-stripping. For more information + please see + https://miller.readthedocs.io/en/latest/scripting/. --seed {n} with `n` of the form `12345678` or `0xcafefeed`. For `put`/`filter` `urand`, `urandint`, and `urand32`. --tz {timezone} Specify timezone, overriding `$TZ` environment @@ -1215,6 +1225,15 @@ MILLER(1) MILLER(1) Options: -h|--help Show this message. + 1mgsub0m + Usage: mlr gsub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and handling multiple matches, like the `gsub` DSL function. + See also the `sub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mhaving-fields0m Usage: mlr having-fields [options] Conditionally passes through records depending on each record's field names. @@ -1823,6 +1842,14 @@ MILLER(1) MILLER(1) See also the "tee" DSL function which lets you do more ad-hoc customization. + 1mssub0m + Usage: mlr ssub [options] + Replaces old string with new string in specified field(s), without regex support for + the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1mstats10m Usage: mlr stats1 [options] Computes univariate statistics for one or more given fields, accumulated across @@ -1960,6 +1987,15 @@ MILLER(1) MILLER(1) https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average for more information on EWMA. + 1msub0m + Usage: mlr sub [options] + Replaces old string with new string in specified field(s), with regex support + for the old string and not handling multiple matches, like the `sub` DSL function. + See also the `gsub` and `ssub` verbs. + Options: + -f {a,b,c} Field names to convert. + -h|--help Show this message. + 1msummary0m Usage: mlr summary [options] Show summary statistics about the input data. diff --git a/docs/src/new-in-miller-6.md b/docs/src/new-in-miller-6.md index 3170819c9d..32633b6f8e 100644 --- a/docs/src/new-in-miller-6.md +++ b/docs/src/new-in-miller-6.md @@ -143,7 +143,7 @@ the `TZ` environment variable. Please see [DSL datetime/timezone functions](refe ### In-process support for compressed input -In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z` and `.bz2` files. Please see the page on [Compressed data](reference-main-compressed-data.md) for more information. +In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z`, `.bz2`, and `.zst` files. Please see the page on [Compressed data](reference-main-compressed-data.md) for more information. ### Support for reading web URLs diff --git a/docs/src/new-in-miller-6.md.in b/docs/src/new-in-miller-6.md.in index 43ea44d905..c450a96224 100644 --- a/docs/src/new-in-miller-6.md.in +++ b/docs/src/new-in-miller-6.md.in @@ -125,7 +125,7 @@ the `TZ` environment variable. Please see [DSL datetime/timezone functions](refe ### In-process support for compressed input -In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z` and `.bz2` files. Please see the page on [Compressed data](reference-main-compressed-data.md) for more information. +In addition to `--prepipe gunzip`, you can now use the `--gzin` flag. In fact, if your files end in `.gz` you don't even need to do that -- Miller will autodetect by file extension and automatically uncompress `mlr --csv cat foo.csv.gz`. Similarly for `.z`, `.bz2`, and `.zst` files. Please see the page on [Compressed data](reference-main-compressed-data.md) for more information. ### Support for reading web URLs diff --git a/docs/src/reference-main-compressed-data.md b/docs/src/reference-main-compressed-data.md index a54ed8026b..729cf5bbcc 100644 --- a/docs/src/reference-main-compressed-data.md +++ b/docs/src/reference-main-compressed-data.md @@ -16,13 +16,13 @@ Quick links: # Compressed data -As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, and -ZLIB formats transparently, and in-process. And (as before Miller 6) you have a +As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, ZLIB, and +ZSTD formats transparently, and in-process. And (as before Miller 6) you have a more general `--prepipe` option to support other decompression programs. ## Automatic detection on input -If your files end in `.gz`, `.bz2`, or `.z` then Miller will autodetect by file extension: +If your files end in `.gz`, `.bz2`, `.z`, or `.zst` then Miller will autodetect by file extension:
 file gz-example.csv.gz
@@ -52,7 +52,7 @@ This will decompress the input data on the fly, while leaving the disk file unmo
 
 ## Manual detection on input
 
-If the filename doesn't in in `.gz`, `.bz2`, or `.z` then you can use the flags `--gzin`, `--bz2in`, or `--zin` to let Miller know:
+If the filename doesn't in in `.gz`, `.bz2`, `-z`, or `.zst` then you can use the flags `--gzin`, `--bz2in`, `--zin`, or `--zstdin` to let Miller know:
 
 
 mlr --csv --gzin sort -f color myfile.bin # myfile.bin has gzip contents
@@ -94,7 +94,7 @@ If the command has flags, quote them: e.g. `mlr --prepipe 'zcat -cf'`.
 
 In your [.mlrrc file](customization.md), `--prepipe` and `--prepipex` are not
 allowed as they could be used for unexpected code execution. You can use
-`--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though.
+`--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though.
 
 Note that this feature is quite general and is not limited to decompression
 utilities. You can use it to apply per-file filters of your choice: e.g. `mlr
@@ -107,7 +107,7 @@ There is a `--prepipe` and a `--prepipex`:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified on the Miller
 command line, it replaces any autodetect decisions that might have been made
-based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin` are ignored if
+based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin`/`--zstdin` are ignored if
 `--prepipe` or `--prepipex` is also specified.
 
 ## Compressed output
diff --git a/docs/src/reference-main-compressed-data.md.in b/docs/src/reference-main-compressed-data.md.in
index b13e5e7327..cbca6a3c34 100644
--- a/docs/src/reference-main-compressed-data.md.in
+++ b/docs/src/reference-main-compressed-data.md.in
@@ -1,12 +1,12 @@
 # Compressed data
 
-As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, and
-ZLIB formats transparently, and in-process. And (as before Miller 6) you have a
+As of [Miller 6](new-in-miller-6.md), Miller supports reading GZIP, BZIP2, ZLIB, and
+ZSTD formats transparently, and in-process. And (as before Miller 6) you have a
 more general `--prepipe` option to support other decompression programs.
 
 ## Automatic detection on input
 
-If your files end in `.gz`, `.bz2`, or `.z` then Miller will autodetect by file extension:
+If your files end in `.gz`, `.bz2`, `.z`, or `.zst` then Miller will autodetect by file extension:
 
 GENMD-CARDIFY-HIGHLIGHT-ONE
 file gz-example.csv.gz
@@ -21,7 +21,7 @@ This will decompress the input data on the fly, while leaving the disk file unmo
 
 ## Manual detection on input
 
-If the filename doesn't in in `.gz`, `.bz2`, or `.z` then you can use the flags `--gzin`, `--bz2in`, or `--zin` to let Miller know:
+If the filename doesn't in in `.gz`, `.bz2`, `-z`, or `.zst` then you can use the flags `--gzin`, `--bz2in`, `--zin`, or `--zstdin` to let Miller know:
 
 GENMD-CARDIFY-HIGHLIGHT-ONE
 mlr --csv --gzin sort -f color myfile.bin # myfile.bin has gzip contents
@@ -50,7 +50,7 @@ If the command has flags, quote them: e.g. `mlr --prepipe 'zcat -cf'`.
 
 In your [.mlrrc file](customization.md), `--prepipe` and `--prepipex` are not
 allowed as they could be used for unexpected code execution. You can use
-`--prepipe-bz2`, `--prepipe-gunzip`, and `--prepipe-zcat` in `.mlrrc`, though.
+`--prepipe-bz2`, `--prepipe-gunzip`, `--prepipe-zcat`, and `--prepipe-zstdcat` in `.mlrrc`, though.
 
 Note that this feature is quite general and is not limited to decompression
 utilities. You can use it to apply per-file filters of your choice: e.g. `mlr
@@ -63,7 +63,7 @@ There is a `--prepipe` and a `--prepipex`:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified on the Miller
 command line, it replaces any autodetect decisions that might have been made
-based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin` are ignored if
+based on the filename extension. Likewise, `--gzin`/`--bz2in`/`--zin`/`--zstdin` are ignored if
 `--prepipe` or `--prepipex` is also specified.
 
 ## Compressed output
diff --git a/docs/src/reference-main-flag-list.md b/docs/src/reference-main-flag-list.md
index 24e2cbc388..8e2daf9d02 100644
--- a/docs/src/reference-main-flag-list.md
+++ b/docs/src/reference-main-flag-list.md
@@ -72,7 +72,7 @@ Notes:
 Miller offers a few different ways to handle reading data files
 	which have been compressed.
 
-* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
 * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
 Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -95,7 +95,7 @@ compression (or other) utilities, simply pipe the output:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
 decisions that might have been made based on the file suffix. Likewise,
-`--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+`--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
 
 **Flags:**
@@ -106,8 +106,10 @@ decisions that might have been made based on the file suffix. Likewise,
 * `--prepipe-bz2`: Same as  `--prepipe bz2`, except this is allowed in `.mlrrc`.
 * `--prepipe-gunzip`: Same as  `--prepipe gunzip`, except this is allowed in `.mlrrc`.
 * `--prepipe-zcat`: Same as  `--prepipe zcat`, except this is allowed in `.mlrrc`.
+* `--prepipe-zstdcat`: Same as  `--prepipe zstdcat`, except this is allowed in `.mlrrc`.
 * `--prepipex {decompression command}`: Like `--prepipe` with one exception: doesn't insert `<` between command and filename at runtime. Useful for some commands like `unzip -qc` which don't read standard input.  Allowed at the command line, but not in `.mlrrc` to avoid unexpected code execution.
 * `--zin`: Uncompress zlib within the Miller process. Done by default if file ends in `.z`.
+* `--zstdin`: Uncompress zstd within the Miller process. Done by default if file ends in `.zstd`.
 
 ## CSV/TSV-only flags
 
@@ -281,6 +283,7 @@ These are flags which don't fit into any other category.
 * `--ofmtf {n}`: Use --ofmtf 6 as shorthand for --ofmt %.6f, etc.
 * `--ofmtg {n}`: Use --ofmtg 6 as shorthand for --ofmt %.6g, etc.
 * `--records-per-batch {n}`: This is an internal parameter for maximum number of records in a batch size. Normally this does not need to be modified, except when input is from `tail -f`. See also https://miller.readthedocs.io/en/latest/reference-main-flag-list/.
+* `--s-no-comment-strip {file name}`: Take command-line flags from file name, like -s, but with no comment-stripping. For more information please see https://miller.readthedocs.io/en/latest/scripting/.
 * `--seed {n}`: with `n` of the form `12345678` or `0xcafefeed`. For `put`/`filter` `urand`, `urandint`, and `urand32`.
 * `--tz {timezone}`: Specify timezone, overriding `$TZ` environment variable (if any).
 * `-I`: Process files in-place. For each file name on the command line, output is written to a temp file in the same directory, which is then renamed over the original. Each file is processed in isolation: if the output format is CSV, CSV headers will be present in each output file, statistics are only over each file's own records; and so on.
diff --git a/docs/src/reference-verbs.md b/docs/src/reference-verbs.md
index 998900ddf6..6e9fbb4780 100644
--- a/docs/src/reference-verbs.md
+++ b/docs/src/reference-verbs.md
@@ -1447,6 +1447,55 @@ record_count resource
 150          /path/to/second/file
 
+## gsub + +
+mlr gsub -h
+
+
+Usage: mlr gsub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and handling multiple matches, like the `gsub` DSL function.
+See also the `sub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXlow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXlow circXe   true  8  73    63.9785  4.2370
+example.csv yeXlow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXXow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXXow circXe   true  8  73    63.9785  4.2370
+example.csv yeXXow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ ## having-fields
@@ -3120,6 +3169,54 @@ then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
 See also the "tee" DSL function which lets you do more ad-hoc customization.
 
+## ssub + +
+mlr ssub -h
+
+
+Usage: mlr ssub [options]
+Replaces old string with new string in specified field(s), without regex support for
+the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f filename . o
+
+
+filename    color  shape    flag  k  index quantity rate
+oxample.csv yellow triangle true  1  11    43.6498  9.8870
+oxample.csv red    square   true  2  15    79.2778  0.0130
+oxample.csv red    circle   true  3  16    13.8103  2.9010
+oxample.csv red    square   false 4  48    77.5542  7.4670
+oxample.csv purple triangle false 5  51    81.2290  8.5910
+oxample.csv red    square   false 6  64    77.1991  9.5310
+oxample.csv purple triangle false 7  65    80.1405  5.8240
+oxample.csv yellow circle   true  8  73    63.9785  4.2370
+oxample.csv yellow circle   true  9  87    63.5058  8.3350
+oxample.csv purple square   false 10 91    72.3735  8.2430
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then ssub -f filename . o
+
+
+filename    color  shape    flag  k  index quantity rate
+exampleocsv yellow triangle true  1  11    43.6498  9.8870
+exampleocsv red    square   true  2  15    79.2778  0.0130
+exampleocsv red    circle   true  3  16    13.8103  2.9010
+exampleocsv red    square   false 4  48    77.5542  7.4670
+exampleocsv purple triangle false 5  51    81.2290  8.5910
+exampleocsv red    square   false 6  64    77.1991  9.5310
+exampleocsv purple triangle false 7  65    80.1405  5.8240
+exampleocsv yellow circle   true  8  73    63.9785  4.2370
+exampleocsv yellow circle   true  9  87    63.5058  8.3350
+exampleocsv purple square   false 10 91    72.3735  8.2430
+
+ ## stats1
@@ -3307,14 +3404,14 @@ fields, optionally categorized by one or more fields.
   data/medium
 
-x_y_cov    0.000042574820827444476
-x_y_corr   0.0005042001844467462
-y_y_cov    0.08461122467974003
+x_y_cov    0.00004257482082749404
+x_y_corr   0.0005042001844473328
+y_y_cov    0.08461122467974005
 y_y_corr   1
-x2_xy_cov  0.04188382281779374
-x2_xy_corr 0.630174342037994
-x2_y2_cov  -0.00030953725962542085
-x2_y2_corr -0.0034249088761121966
+x2_xy_cov  0.041883822817793716
+x2_xy_corr 0.6301743420379936
+x2_y2_cov  -0.0003095372596253918
+x2_y2_corr -0.003424908876111875
 
@@ -3323,12 +3420,12 @@ x2_y2_corr -0.0034249088761121966
   data/medium
 
-a   x_y_ols_m             x_y_ols_b           x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
-pan 0.01702551273681908   0.5004028922897639  2081      0.00028691820445814767  1         0         2081      1      0.8781320866715662 0.11908230147563566 2081        0.41749827377311266
-eks 0.0407804923685586    0.48140207967651016 1965      0.0016461239223448587   1         0         1965      1      0.8978728611690183 0.10734054433612333 1965        0.45563223864254526
-wye -0.03915349075204814  0.5255096523974456  1966      0.0015051268704373607   1         0         1966      1      0.8538317334220835 0.1267454301662969  1966        0.38991721818599295
-zee 0.0027812364960399147 0.5043070448033061  2047      0.000007751652858786137 1         0         2047      1      0.8524439912011013 0.12401684308018937 2047        0.39356598090006495
-hat -0.018620577041095078 0.5179005397264935  1941      0.0003520036646055585   1         0         1941      1      0.8412305086345014 0.13557328318623216 1941        0.3687944261732265
+a   x_y_ols_m             x_y_ols_b          x_y_ols_n x_y_r2                  y_y_ols_m y_y_ols_b                           y_y_ols_n y_y_r2 xy_y2_ols_m        xy_y2_ols_b         xy_y2_ols_n xy_y2_r2
+pan 0.017025512736819345  0.500402892289764  2081      0.00028691820445815624  1         -0.00000000000000002890430283104539 2081      1      0.8781320866715664 0.11908230147563569 2081        0.4174982737731127
+eks 0.04078049236855813   0.4814020796765104 1965      0.0016461239223448218   1         0.00000000000000017862676354313703  1965      1      0.897872861169018  0.1073405443361234  1965        0.4556322386425451
+wye -0.03915349075204785  0.5255096523974457 1966      0.0015051268704373377   1         0.00000000000000004464425401127647  1966      1      0.8538317334220837 0.1267454301662969  1966        0.3899172181859931
+zee 0.0027812364960401333 0.5043070448033061 2047      0.000007751652858787357 1         0.00000000000000004819404567023685  2047      1      0.8524439912011011 0.12401684308018947 2047        0.39356598090006495
+hat -0.018620577041095272 0.5179005397264937 1941      0.00035200366460556604  1         -0.00000000000000003400445761787692 1941      1      0.8412305086345017 0.13557328318623207 1941        0.3687944261732266
 
Here's an example simple line-fit. The `x` and `y` @@ -3414,11 +3511,11 @@ upsec_count_pca_quality 0.9999590846136102 donesec 92.33051350964094 color purple -upsec_count_pca_m -39.03009744795354 -upsec_count_pca_b 979.9883413064914 +upsec_count_pca_m -39.030097447953594 +upsec_count_pca_b 979.9883413064917 upsec_count_pca_n 21 upsec_count_pca_quality 0.9999908956206317 -donesec 25.10852919630297 +donesec 25.108529196302943
## step @@ -3574,6 +3671,55 @@ $ each 10 uptime | mlr -p step -a delta -f 11 +## sub + +
+mlr sub -h
+
+
+Usage: mlr sub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and not handling multiple matches, like the `sub` DSL function.
+See also the `gsub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXlow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXlow circXe   true  8  73    63.9785  4.2370
+example.csv yeXlow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ +
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+
+
+filename    color  shape    flag  k  index quantity rate
+example.csv yeXXow triangXe true  1  11    43.6498  9.8870
+example.csv red    square   true  2  15    79.2778  0.0130
+example.csv red    circXe   true  3  16    13.8103  2.9010
+example.csv red    square   false 4  48    77.5542  7.4670
+example.csv purpXe triangXe false 5  51    81.2290  8.5910
+example.csv red    square   false 6  64    77.1991  9.5310
+example.csv purpXe triangXe false 7  65    80.1405  5.8240
+example.csv yeXXow circXe   true  8  73    63.9785  4.2370
+example.csv yeXXow circXe   true  9  87    63.5058  8.3350
+example.csv purpXe square   false 10 91    72.3735  8.2430
+
+ ## summary
@@ -3646,9 +3792,9 @@ distinct_count 5      5      10000              10000                  10000
 mode           pan    wye    1                  0.3467901443380824     0.7268028627434533
 sum            0      0      50005000           4986.019681679581      5062.057444929905
 mean           -      -      5000.5             0.49860196816795804    0.5062057444929905
-stddev         -      -      2886.8956799071675 0.2902925151144007     0.290880086426933
-var            -      -      8334166.666666667  0.08426974433144456    0.08461122467974003
-skewness       -      -      0                  -0.0006899591185521965 -0.017849760120133784
+stddev         -      -      2886.8956799071675 0.29029251511440074    0.2908800864269331
+var            -      -      8334166.666666667  0.08426974433144457    0.08461122467974005
+skewness       -      -      0                  -0.0006899591185517494 -0.01784976012013298
 minlen         3      3      1                  15                     13
 maxlen         3      3      5                  22                     22
 min            eks    eks    1                  0.00004509679127584487 0.00008818962627266114
diff --git a/docs/src/reference-verbs.md.in b/docs/src/reference-verbs.md.in
index 0ff0bd15dd..44feda3deb 100644
--- a/docs/src/reference-verbs.md.in
+++ b/docs/src/reference-verbs.md.in
@@ -487,6 +487,20 @@ GENMD-RUN-COMMAND
 mlr --opprint group-like data/het.dkvp
 GENMD-EOF
 
+## gsub
+
+GENMD-RUN-COMMAND
+mlr gsub -h
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+GENMD-EOF
+
 ## having-fields
 
 GENMD-RUN-COMMAND
@@ -987,6 +1001,20 @@ GENMD-RUN-COMMAND
 mlr split --help
 GENMD-EOF
 
+## ssub
+
+GENMD-RUN-COMMAND
+mlr ssub -h
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then sub -f filename . o
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then ssub -f filename . o
+GENMD-EOF
+
 ## stats1
 
 GENMD-RUN-COMMAND
@@ -1095,6 +1123,20 @@ Example deriving uptime-delta from system uptime:
 
 GENMD-INCLUDE-ESCAPED(data/ping-delta-example.txt)
 
+## sub
+
+GENMD-RUN-COMMAND
+mlr sub -h
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then sub -f color,shape l X
+GENMD-EOF
+
+GENMD-RUN-COMMAND
+mlr --icsv --opprint --from example.csv cat --filename then gsub -f color,shape l X
+GENMD-EOF
+
 ## summary
 
 GENMD-RUN-COMMAND
diff --git a/docs/src/scripting.md b/docs/src/scripting.md
index 29cac3fb71..71c6b22a0c 100644
--- a/docs/src/scripting.md
+++ b/docs/src/scripting.md
@@ -137,7 +137,7 @@ Here instead of putting `#!/bin/bash` on the first line, we can put `mlr` direct
 
 #!/usr/bin/env mlr -s
 --c2p
-filter '$quantity != 20'
+filter '$quantity != 20' # Here is a comment
 then count-distinct -f shape
 then fraction -f count
 
@@ -149,6 +149,7 @@ Points: * You leave off the initial `mlr` since that's present on line 1. * You don't need all the backslashing for line-continuations. * You don't need the explicit `--` or `"$@"`. +* All text from `#` to end of line is stripped out. If for any reason you need to suppress this, please use `mlr --s-no-comment-strip` in place of `mlr -s`. Then you can do diff --git a/docs/src/scripting.md.in b/docs/src/scripting.md.in index 0e4afc9ac4..3234c93984 100644 --- a/docs/src/scripting.md.in +++ b/docs/src/scripting.md.in @@ -67,6 +67,7 @@ Points: * You leave off the initial `mlr` since that's present on line 1. * You don't need all the backslashing for line-continuations. * You don't need the explicit `--` or `"$@"`. +* All text from `#` to end of line is stripped out. If for any reason you need to suppress this, please use `mlr --s-no-comment-strip` in place of `mlr -s`. Then you can do diff --git a/docs/src/two-pass-algorithms.md b/docs/src/two-pass-algorithms.md index 146f3a81e1..e475aebf3b 100644 --- a/docs/src/two-pass-algorithms.md +++ b/docs/src/two-pass-algorithms.md @@ -598,8 +598,8 @@ hat pan 0.4643355557376876 x_count 10000 x_sum 4986.019681679581 x_mean 0.49860196816795804 -x_var 0.08426974433144456 -x_stddev 0.2902925151144007 +x_var 0.08426974433144457 +x_stddev 0.29029251511440074
diff --git a/go.mod b/go.mod
index b9e11f7eb3..2373dea148 100644
--- a/go.mod
+++ b/go.mod
@@ -34,6 +34,7 @@ require (
 	github.com/davecgh/go-spew v1.1.1 // indirect
 	github.com/felixge/fgprof v0.9.3 // indirect
 	github.com/google/pprof v0.0.0-20211214055906-6f57359322fd // indirect
+	github.com/klauspost/compress v1.16.7 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
diff --git a/go.sum b/go.sum
index e896c8f4e9..84593de572 100644
--- a/go.sum
+++ b/go.sum
@@ -15,6 +15,8 @@ github.com/johnkerl/lumin v1.0.0 h1:CV34cHZOJ92Y02RbQ0rd4gA0C06Qck9q8blOyaPoWpU=
 github.com/johnkerl/lumin v1.0.0/go.mod h1:eLf5AdQOaLvzZ2zVy4REr/DSeEwG+CZreHwNLICqv9E=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
 github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
+github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
+github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc h1:RKf14vYWi2ttpEmkA4aQ3j4u9dStX2t4M8UM6qqNsG8=
 github.com/lestrrat-go/envload v0.0.0-20180220234015-a3eb8ddeffcc/go.mod h1:kopuH9ugFRkIXf3YoqHKyrJ9YfUFsckUU9S7B+XP+is=
 github.com/lestrrat-go/strftime v1.0.6 h1:CFGsDEt1pOpFNU+TJB0nhz9jl+K0hZSLE205AhTIGQQ=
diff --git a/internal/pkg/cli/option_parse.go b/internal/pkg/cli/option_parse.go
index cb01c27410..0ee362f2b3 100644
--- a/internal/pkg/cli/option_parse.go
+++ b/internal/pkg/cli/option_parse.go
@@ -2200,7 +2200,8 @@ func CompressedDataPrintInfo() {
 	fmt.Print(`Miller offers a few different ways to handle reading data files
 	which have been compressed.
 
-* Decompression done within the Miller process itself: ` + "`--bz2in`" + ` ` + "`--gzin`" + ` ` + "`--zin`" + `
+* Decompression done within the Miller process itself: ` + "`--bz2in`" + ` ` + "`--gzin`" + ` ` + "`--zin`" + "`--zstdin`" +
+		`
 * Decompression done outside the Miller process: ` + "`--prepipe`" + ` ` + "`--prepipex`" + `
 
 Using ` + "`--prepipe`" + ` and ` + "`--prepipex`" + ` you can specify an action to be
@@ -2223,7 +2224,7 @@ compression (or other) utilities, simply pipe the output:
 
 Lastly, note that if ` + "`--prepipe`" + ` or ` + "`--prepipex`" + ` is specified, it replaces any
 decisions that might have been made based on the file suffix. Likewise,
-` + "`--gzin`" + `/` + "`--bz2in`" + `/` + "`--zin`" + ` are ignored if ` + "`--prepipe`" + ` is also specified.
+` + "`--gzin`" + `/` + "`--bz2in`" + `/` + "`--zin`" + "`--zin`" + ` are ignored if ` + "`--prepipe`" + ` is also specified.
 `)
 }
 
@@ -2278,6 +2279,16 @@ var CompressedDataFlagSection = FlagSection{
 			},
 		},
 
+		{
+			name: "--prepipe-zstdcat",
+			help: "Same as  `--prepipe zstdcat`, except this is allowed in `.mlrrc`.",
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.Prepipe = "zstdcat"
+				options.ReaderOptions.PrepipeIsRaw = false
+				*pargi += 1
+			},
+		},
+
 		{
 			name: "--prepipe-bz2",
 			help: "Same as  `--prepipe bz2`, except this is allowed in `.mlrrc`.",
@@ -2314,6 +2325,15 @@ var CompressedDataFlagSection = FlagSection{
 				*pargi += 1
 			},
 		},
+
+		{
+			name: "--zstdin",
+			help: "Uncompress zstd within the Miller process. Done by default if file ends in `.zstd`.",
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				options.ReaderOptions.FileInputEncoding = lib.FileInputEncodingZstd
+				*pargi += 1
+			},
+		},
 	},
 }
 
@@ -2988,5 +3008,16 @@ has its own overhead.`,
 				*pargi += 2
 			},
 		},
+
+		{
+			name: "--s-no-comment-strip",
+			arg:  "{file name}",
+			help: `Take command-line flags from file name, like -s, but with no comment-stripping. For more information please see ` +
+				lib.DOC_URL + `/en/latest/scripting/.`,
+			parser: func(args []string, argc int, pargi *int, options *TOptions) {
+				// Already handled in main(). Nothing to do here except to accept this as valid syntax.
+				*pargi += 2
+			},
+		},
 	},
 }
diff --git a/internal/pkg/climain/mlrcli_shebang.go b/internal/pkg/climain/mlrcli_shebang.go
index 99811e6a80..e465ed2b76 100644
--- a/internal/pkg/climain/mlrcli_shebang.go
+++ b/internal/pkg/climain/mlrcli_shebang.go
@@ -3,6 +3,7 @@ package climain
 import (
 	"fmt"
 	"io/ioutil"
+	"regexp"
 	"strings"
 
 	"github.com/johnkerl/miller/internal/pkg/lib"
@@ -25,10 +26,16 @@ import (
 // * This is how shebang lines work
 // * There are Miller verbs with -s flags and we don't want to disrupt their behavior.
 func maybeInterpolateDashS(args []string) ([]string, error) {
+	stripComments := true
+
 	if len(args) < 2 {
 		return args, nil
 	}
-	if args[1] != "-s" { // Normal case
+	if args[1] == "-s" {
+		stripComments = true
+	} else if args[1] == "--s-no-comment-strip" {
+		stripComments = false
+	} else { // Normal case
 		return args, nil
 	}
 	if len(args) < 3 {
@@ -59,9 +66,12 @@ func maybeInterpolateDashS(args []string) ([]string, error) {
 		}
 	}
 
-	// TODO: maybe support comment lines deeper within the script-file.
-	// Make sure they're /^[\s]+#/ since we don't want to disrupt a "#" within
-	// strings which are not actually comment characters.
+	if stripComments {
+		re := regexp.MustCompile(`#.*`)
+		for i, _ := range lines {
+			lines[i] = re.ReplaceAllString(lines[i], "")
+		}
+	}
 
 	// Re-join lines to strings, and pass off to a shell-parser to split into
 	// an args[]-style array.
diff --git a/internal/pkg/lib/file_readers.go b/internal/pkg/lib/file_readers.go
index fa42688efd..1511200406 100644
--- a/internal/pkg/lib/file_readers.go
+++ b/internal/pkg/lib/file_readers.go
@@ -25,6 +25,7 @@ import (
 	"compress/gzip"
 	"compress/zlib"
 	"fmt"
+	"github.com/klauspost/compress/zstd"
 	"io"
 	"net/http"
 	"os"
@@ -38,6 +39,7 @@ const (
 	FileInputEncodingBzip2
 	FileInputEncodingGzip
 	FileInputEncodingZlib
+	FileInputEncodingZstd
 )
 
 // OpenFileForRead: If prepipe is non-empty, popens "{prepipe} < {filename}"
@@ -160,6 +162,8 @@ func openEncodedHandleForRead(
 		return gzip.NewReader(handle)
 	case FileInputEncodingZlib:
 		return zlib.NewReader(handle)
+	case FileInputEncodingZstd:
+		return NewZstdReadCloser(handle)
 	}
 
 	InternalCodingErrorIf(encoding != FileInputEncodingDefault)
@@ -173,6 +177,9 @@ func openEncodedHandleForRead(
 	if strings.HasSuffix(filename, ".z") {
 		return zlib.NewReader(handle)
 	}
+	if strings.HasSuffix(filename, ".zst") {
+		return NewZstdReadCloser(handle)
+	}
 
 	// Pass along os.Stdin or os.Open(filename)
 	return handle, nil
@@ -200,6 +207,32 @@ func (rc *BZip2ReadCloser) Close() error {
 	return rc.originalHandle.Close()
 }
 
+// ----------------------------------------------------------------
+// ZstdReadCloser remedies the fact that zstd.NewReader does not implement io.ReadCloser.
+type ZstdReadCloser struct {
+	originalHandle io.ReadCloser
+	zstdHandle     io.Reader
+}
+
+func NewZstdReadCloser(handle io.ReadCloser) (*ZstdReadCloser, error) {
+	zstdHandle, err := zstd.NewReader(handle)
+	if err != nil {
+		return nil, err
+	}
+	return &ZstdReadCloser{
+		originalHandle: handle,
+		zstdHandle:     zstdHandle,
+	}, nil
+}
+
+func (rc *ZstdReadCloser) Read(p []byte) (n int, err error) {
+	return rc.zstdHandle.Read(p)
+}
+
+func (rc *ZstdReadCloser) Close() error {
+	return rc.originalHandle.Close()
+}
+
 // ----------------------------------------------------------------
 
 // IsEOF handles the following problem: reading past end of files opened with
diff --git a/internal/pkg/platform/getargs_windows.go b/internal/pkg/platform/getargs_windows.go
index 536a6288e1..4349e43462 100644
--- a/internal/pkg/platform/getargs_windows.go
+++ b/internal/pkg/platform/getargs_windows.go
@@ -11,6 +11,7 @@ package platform
 import (
 	"fmt"
 	"os"
+	"path/filepath"
 	"strings"
 
 	shellquote "github.com/kballard/go-shellquote"
@@ -76,7 +77,20 @@ func GetArgs() []string {
 		}
 	}
 	//printArgs(retargs, "NEW")
-	return retargs
+
+	globbed := make([]string, 0)
+	for i, _ := range retargs {
+		// Expand things like *.csv
+		matches, err := filepath.Glob(retargs[i])
+		if matches != nil && err == nil {
+			globbed = append(globbed, matches...)
+		} else {
+			globbed = append(globbed, retargs[i])
+		}
+	}
+	//printArgs(globbed, "NEW")
+
+	return globbed
 }
 
 // ----------------------------------------------------------------
diff --git a/internal/pkg/transformers/aaa_transformer_table.go b/internal/pkg/transformers/aaa_transformer_table.go
index 60f490e0d8..ece90a8584 100644
--- a/internal/pkg/transformers/aaa_transformer_table.go
+++ b/internal/pkg/transformers/aaa_transformer_table.go
@@ -33,6 +33,7 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
 	GrepSetup,
 	GroupBySetup,
 	GroupLikeSetup,
+	GsubSetup,
 	HavingFieldsSetup,
 	HeadSetup,
 	HistogramSetup,
@@ -62,9 +63,11 @@ var TRANSFORMER_LOOKUP_TABLE = []TransformerSetup{
 	SortSetup,
 	SortWithinRecordsSetup,
 	SplitSetup,
+	SsubSetup,
 	Stats1Setup,
 	Stats2Setup,
 	StepSetup,
+	SubSetup,
 	SummarySetup,
 	TacSetup,
 	TailSetup,
diff --git a/internal/pkg/transformers/gsub.go b/internal/pkg/transformers/gsub.go
new file mode 100644
index 0000000000..550aeda5af
--- /dev/null
+++ b/internal/pkg/transformers/gsub.go
@@ -0,0 +1,157 @@
+package transformers
+
+import (
+	"container/list"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/johnkerl/miller/internal/pkg/bifs"
+	"github.com/johnkerl/miller/internal/pkg/cli"
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+	"github.com/johnkerl/miller/internal/pkg/types"
+)
+
+// ----------------------------------------------------------------
+const verbNameGsub = "gsub"
+
+var GsubSetup = TransformerSetup{
+	Verb:         verbNameGsub,
+	UsageFunc:    transformerGsubUsage,
+	ParseCLIFunc: transformerGsubParseCLI,
+	IgnoresInput: false,
+}
+
+func transformerGsubUsage(
+	o *os.File,
+) {
+	fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameGsub)
+	fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n")
+	fmt.Fprintf(o, "for the old string and handling multiple matches, like the `gsub` DSL function.\n")
+	fmt.Fprintf(o, "See also the `sub` and `ssub` verbs.\n")
+	fmt.Fprintf(o, "Options:\n")
+	fmt.Fprintf(o, "-f {a,b,c}  Field names to convert.\n")
+	fmt.Fprintf(o, "-h|--help   Show this message.\n")
+}
+
+func transformerGsubParseCLI(
+	pargi *int,
+	argc int,
+	args []string,
+	_ *cli.TOptions,
+	doConstruct bool, // false for first pass of CLI-parse, true for second pass
+) IRecordTransformer {
+
+	// Skip the verb name from the current spot in the mlr command line
+	argi := *pargi
+	verb := args[argi]
+	argi++
+
+	// Parse local flags
+	var fieldNames []string = nil
+	var oldText string
+	var newText string
+
+	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
+		opt := args[argi]
+		if !strings.HasPrefix(opt, "-") {
+			break // No more flag options to process
+		}
+		if args[argi] == "--" {
+			break // All transformers must do this so main-flags can follow verb-flags
+		}
+		argi++
+
+		if opt == "-h" || opt == "--help" {
+			transformerGsubUsage(os.Stdout)
+			os.Exit(0)
+
+		} else if opt == "-f" {
+			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+		} else {
+			transformerGsubUsage(os.Stderr)
+			os.Exit(1)
+		}
+	}
+
+	if fieldNames == nil {
+		transformerGsubUsage(os.Stderr)
+		os.Exit(1)
+	}
+
+	// Get the old and new text from the command line
+	if (argc - argi) < 2 {
+		transformerGsubUsage(os.Stderr)
+		os.Exit(1)
+	}
+	oldText = args[argi]
+	newText = args[argi+1]
+
+	argi += 2
+
+	*pargi = argi
+	if !doConstruct { // All transformers must do this for main command-line parsing
+		return nil
+	}
+
+	transformer, err := NewTransformerGsub(
+		fieldNames,
+		oldText,
+		newText,
+	)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+
+	return transformer
+}
+
+// ----------------------------------------------------------------
+type TransformerGsub struct {
+	fieldNames []string
+	oldText    *mlrval.Mlrval
+	newText    *mlrval.Mlrval
+}
+
+// ----------------------------------------------------------------
+func NewTransformerGsub(
+	fieldNames []string,
+	oldText string,
+	newText string,
+) (*TransformerGsub, error) {
+	tr := &TransformerGsub{
+		fieldNames: fieldNames,
+		oldText:    mlrval.FromString(oldText),
+		newText:    mlrval.FromString(newText),
+	}
+	return tr, nil
+}
+
+func (tr *TransformerGsub) Transform(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
+
+	if !inrecAndContext.EndOfStream {
+		inrec := inrecAndContext.Record
+
+		for _, fieldName := range tr.fieldNames {
+			oldValue := inrec.Get(fieldName)
+			if oldValue == nil {
+				continue
+			}
+
+			newValue := bifs.BIF_gsub(oldValue, tr.oldText, tr.newText)
+
+			inrec.PutReference(fieldName, newValue)
+		}
+
+		outputRecordsAndContexts.PushBack(inrecAndContext)
+	} else {
+		outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker
+	}
+}
diff --git a/internal/pkg/transformers/ssub.go b/internal/pkg/transformers/ssub.go
new file mode 100644
index 0000000000..bd8e542473
--- /dev/null
+++ b/internal/pkg/transformers/ssub.go
@@ -0,0 +1,156 @@
+package transformers
+
+import (
+	"container/list"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/johnkerl/miller/internal/pkg/bifs"
+	"github.com/johnkerl/miller/internal/pkg/cli"
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+	"github.com/johnkerl/miller/internal/pkg/types"
+)
+
+// ----------------------------------------------------------------
+const verbNameSsub = "ssub"
+
+var SsubSetup = TransformerSetup{
+	Verb:         verbNameSsub,
+	UsageFunc:    transformerSsubUsage,
+	ParseCLIFunc: transformerSsubParseCLI,
+	IgnoresInput: false,
+}
+
+func transformerSsubUsage(
+	o *os.File,
+) {
+	fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSsub)
+	fmt.Fprintf(o, "Replaces old string with new string in specified field(s), without regex support for\n")
+	fmt.Fprintf(o, "the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.\n")
+	fmt.Fprintf(o, "Options:\n")
+	fmt.Fprintf(o, "-f {a,b,c}  Field names to convert.\n")
+	fmt.Fprintf(o, "-h|--help   Show this message.\n")
+}
+
+func transformerSsubParseCLI(
+	pargi *int,
+	argc int,
+	args []string,
+	_ *cli.TOptions,
+	doConstruct bool, // false for first pass of CLI-parse, true for second pass
+) IRecordTransformer {
+
+	// Skip the verb name from the current spot in the mlr command line
+	argi := *pargi
+	verb := args[argi]
+	argi++
+
+	// Parse local flags
+	var fieldNames []string = nil
+	var oldText string
+	var newText string
+
+	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
+		opt := args[argi]
+		if !strings.HasPrefix(opt, "-") {
+			break // No more flag options to process
+		}
+		if args[argi] == "--" {
+			break // All transformers must do this so main-flags can follow verb-flags
+		}
+		argi++
+
+		if opt == "-h" || opt == "--help" {
+			transformerSsubUsage(os.Stdout)
+			os.Exit(0)
+
+		} else if opt == "-f" {
+			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+		} else {
+			transformerSsubUsage(os.Stderr)
+			os.Exit(1)
+		}
+	}
+
+	if fieldNames == nil {
+		transformerSsubUsage(os.Stderr)
+		os.Exit(1)
+	}
+
+	// Get the old and new text from the command line
+	if (argc - argi) < 2 {
+		transformerSsubUsage(os.Stderr)
+		os.Exit(1)
+	}
+	oldText = args[argi]
+	newText = args[argi+1]
+
+	argi += 2
+
+	*pargi = argi
+	if !doConstruct { // All transformers must do this for main command-line parsing
+		return nil
+	}
+
+	transformer, err := NewTransformerSsub(
+		fieldNames,
+		oldText,
+		newText,
+	)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+
+	return transformer
+}
+
+// ----------------------------------------------------------------
+type TransformerSsub struct {
+	fieldNames []string
+	oldText    *mlrval.Mlrval
+	newText    *mlrval.Mlrval
+}
+
+// ----------------------------------------------------------------
+func NewTransformerSsub(
+	fieldNames []string,
+	oldText string,
+	newText string,
+) (*TransformerSsub, error) {
+	tr := &TransformerSsub{
+		fieldNames: fieldNames,
+		oldText:    mlrval.FromString(oldText),
+		newText:    mlrval.FromString(newText),
+	}
+	return tr, nil
+}
+
+func (tr *TransformerSsub) Transform(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
+
+	if !inrecAndContext.EndOfStream {
+		inrec := inrecAndContext.Record
+
+		for _, fieldName := range tr.fieldNames {
+			oldValue := inrec.Get(fieldName)
+			if oldValue == nil {
+				continue
+			}
+
+			newValue := bifs.BIF_ssub(oldValue, tr.oldText, tr.newText)
+
+			inrec.PutReference(fieldName, newValue)
+		}
+
+		outputRecordsAndContexts.PushBack(inrecAndContext)
+	} else {
+		outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker
+	}
+}
diff --git a/internal/pkg/transformers/sub.go b/internal/pkg/transformers/sub.go
new file mode 100644
index 0000000000..eee7783624
--- /dev/null
+++ b/internal/pkg/transformers/sub.go
@@ -0,0 +1,157 @@
+package transformers
+
+import (
+	"container/list"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/johnkerl/miller/internal/pkg/bifs"
+	"github.com/johnkerl/miller/internal/pkg/cli"
+	"github.com/johnkerl/miller/internal/pkg/mlrval"
+	"github.com/johnkerl/miller/internal/pkg/types"
+)
+
+// ----------------------------------------------------------------
+const verbNameSub = "sub"
+
+var SubSetup = TransformerSetup{
+	Verb:         verbNameSub,
+	UsageFunc:    transformerSubUsage,
+	ParseCLIFunc: transformerSubParseCLI,
+	IgnoresInput: false,
+}
+
+func transformerSubUsage(
+	o *os.File,
+) {
+	fmt.Fprintf(o, "Usage: %s %s [options]\n", "mlr", verbNameSub)
+	fmt.Fprintf(o, "Replaces old string with new string in specified field(s), with regex support\n")
+	fmt.Fprintf(o, "for the old string and not handling multiple matches, like the `sub` DSL function.\n")
+	fmt.Fprintf(o, "See also the `gsub` and `ssub` verbs.\n")
+	fmt.Fprintf(o, "Options:\n")
+	fmt.Fprintf(o, "-f {a,b,c}  Field names to convert.\n")
+	fmt.Fprintf(o, "-h|--help   Show this message.\n")
+}
+
+func transformerSubParseCLI(
+	pargi *int,
+	argc int,
+	args []string,
+	_ *cli.TOptions,
+	doConstruct bool, // false for first pass of CLI-parse, true for second pass
+) IRecordTransformer {
+
+	// Skip the verb name from the current spot in the mlr command line
+	argi := *pargi
+	verb := args[argi]
+	argi++
+
+	// Parse local flags
+	var fieldNames []string = nil
+	var oldText string
+	var newText string
+
+	for argi < argc /* variable increment: 1 or 2 depending on flag */ {
+		opt := args[argi]
+		if !strings.HasPrefix(opt, "-") {
+			break // No more flag options to process
+		}
+		if args[argi] == "--" {
+			break // All transformers must do this so main-flags can follow verb-flags
+		}
+		argi++
+
+		if opt == "-h" || opt == "--help" {
+			transformerSubUsage(os.Stdout)
+			os.Exit(0)
+
+		} else if opt == "-f" {
+			fieldNames = cli.VerbGetStringArrayArgOrDie(verb, opt, args, &argi, argc)
+		} else {
+			transformerSubUsage(os.Stderr)
+			os.Exit(1)
+		}
+	}
+
+	if fieldNames == nil {
+		transformerSubUsage(os.Stderr)
+		os.Exit(1)
+	}
+
+	// Get the old and new text from the command line
+	if (argc - argi) < 2 {
+		transformerSubUsage(os.Stderr)
+		os.Exit(1)
+	}
+	oldText = args[argi]
+	newText = args[argi+1]
+
+	argi += 2
+
+	*pargi = argi
+	if !doConstruct { // All transformers must do this for main command-line parsing
+		return nil
+	}
+
+	transformer, err := NewTransformerSub(
+		fieldNames,
+		oldText,
+		newText,
+	)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, err)
+		os.Exit(1)
+	}
+
+	return transformer
+}
+
+// ----------------------------------------------------------------
+type TransformerSub struct {
+	fieldNames []string
+	oldText    *mlrval.Mlrval
+	newText    *mlrval.Mlrval
+}
+
+// ----------------------------------------------------------------
+func NewTransformerSub(
+	fieldNames []string,
+	oldText string,
+	newText string,
+) (*TransformerSub, error) {
+	tr := &TransformerSub{
+		fieldNames: fieldNames,
+		oldText:    mlrval.FromString(oldText),
+		newText:    mlrval.FromString(newText),
+	}
+	return tr, nil
+}
+
+func (tr *TransformerSub) Transform(
+	inrecAndContext *types.RecordAndContext,
+	outputRecordsAndContexts *list.List, // list of *types.RecordAndContext
+	inputDownstreamDoneChannel <-chan bool,
+	outputDownstreamDoneChannel chan<- bool,
+) {
+	HandleDefaultDownstreamDone(inputDownstreamDoneChannel, outputDownstreamDoneChannel)
+
+	if !inrecAndContext.EndOfStream {
+		inrec := inrecAndContext.Record
+
+		for _, fieldName := range tr.fieldNames {
+			oldValue := inrec.Get(fieldName)
+			if oldValue == nil {
+				continue
+			}
+
+			newValue := bifs.BIF_sub(oldValue, tr.oldText, tr.newText)
+
+			inrec.PutReference(fieldName, newValue)
+		}
+
+		outputRecordsAndContexts.PushBack(inrecAndContext)
+	} else {
+		outputRecordsAndContexts.PushBack(inrecAndContext) // emit end-of-stream marker
+	}
+}
diff --git a/man/manpage.txt b/man/manpage.txt
index 58ff3991fd..0c04fc330e 100644
--- a/man/manpage.txt
+++ b/man/manpage.txt
@@ -173,12 +173,13 @@ MILLER(1)                                                            MILLER(1)
 1mVERB LIST0m
        altkv bar bootstrap case cat check clean-whitespace count-distinct count
        count-similar cut decimate fill-down fill-empty filter flatten format-values
-       fraction gap grep group-by group-like having-fields head histogram json-parse
-       json-stringify join label latin1-to-utf8 least-frequent merge-fields
-       most-frequent nest nothing put regularize remove-empty-columns rename reorder
-       repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records
-       sort sort-within-records split stats1 stats2 step summary tac tail tee
-       template top utf8-to-latin1 unflatten uniq unspace unsparsify
+       fraction gap grep group-by group-like gsub having-fields head histogram
+       json-parse json-stringify join label latin1-to-utf8 least-frequent
+       merge-fields most-frequent nest nothing put regularize remove-empty-columns
+       rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
+       skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
+       sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
+       unsparsify
 
 1mFUNCTION LIST0m
        abs acos acosh any append apply arrayify asin asinh asserting_absent
@@ -241,7 +242,7 @@ MILLER(1)                                                            MILLER(1)
        Miller offers a few different ways to handle reading data files
             which have been compressed.
 
-       * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+       * Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
        * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
        Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -264,7 +265,7 @@ MILLER(1)                                                            MILLER(1)
 
        Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
        decisions that might have been made based on the file suffix. Likewise,
-       `--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+       `--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
        --bz2in                  Uncompress bzip2 within the Miller process. Done by
                                 default if file ends in `.bz2`.
@@ -281,6 +282,8 @@ MILLER(1)                                                            MILLER(1)
                                 `.mlrrc`.
        --prepipe-zcat           Same as `--prepipe zcat`, except this is allowed in
                                 `.mlrrc`.
+       --prepipe-zstdcat        Same as `--prepipe zstdcat`, except this is allowed
+                                in `.mlrrc`.
        --prepipex {decompression command}
                                 Like `--prepipe` with one exception: doesn't insert
                                 `<` between command and filename at runtime. Useful
@@ -289,6 +292,8 @@ MILLER(1)                                                            MILLER(1)
                                 in `.mlrrc` to avoid unexpected code execution.
        --zin                    Uncompress zlib within the Miller process. Done by
                                 default if file ends in `.z`.
+       --zstdin                 Uncompress zstd within the Miller process. Done by
+                                default if file ends in `.zstd`.
 
 1mCSV/TSV-ONLY FLAGS0m
        These are flags which are applicable to CSV format.
@@ -551,6 +556,11 @@ MILLER(1)                                                            MILLER(1)
                                 to be modified, except when input is from `tail -f`.
                                 See also
                                 https://miller.readthedocs.io/en/latest/reference-main-flag-list/.
+       --s-no-comment-strip {file name}
+                                Take command-line flags from file name, like -s, but
+                                with no comment-stripping. For more information
+                                please see
+                                https://miller.readthedocs.io/en/latest/scripting/.
        --seed {n}               with `n` of the form `12345678` or `0xcafefeed`. For
                                 `put`/`filter` `urand`, `urandint`, and `urand32`.
        --tz {timezone}          Specify timezone, overriding `$TZ` environment
@@ -1215,6 +1225,15 @@ MILLER(1)                                                            MILLER(1)
        Options:
        -h|--help Show this message.
 
+   1mgsub0m
+       Usage: mlr gsub [options]
+       Replaces old string with new string in specified field(s), with regex support
+       for the old string and handling multiple matches, like the `gsub` DSL function.
+       See also the `sub` and `ssub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1mhaving-fields0m
        Usage: mlr having-fields [options]
        Conditionally passes through records depending on each record's field names.
@@ -1823,6 +1842,14 @@ MILLER(1)                                                            MILLER(1)
 
        See also the "tee" DSL function which lets you do more ad-hoc customization.
 
+   1mssub0m
+       Usage: mlr ssub [options]
+       Replaces old string with new string in specified field(s), without regex support for
+       the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1mstats10m
        Usage: mlr stats1 [options]
        Computes univariate statistics for one or more given fields, accumulated across
@@ -1960,6 +1987,15 @@ MILLER(1)                                                            MILLER(1)
        https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
        for more information on EWMA.
 
+   1msub0m
+       Usage: mlr sub [options]
+       Replaces old string with new string in specified field(s), with regex support
+       for the old string and not handling multiple matches, like the `sub` DSL function.
+       See also the `gsub` and `ssub` verbs.
+       Options:
+       -f {a,b,c}  Field names to convert.
+       -h|--help   Show this message.
+
    1msummary0m
        Usage: mlr summary [options]
        Show summary statistics about the input data.
diff --git a/man/mlr.1 b/man/mlr.1
index 50d617ebcc..ab56c69bb3 100644
--- a/man/mlr.1
+++ b/man/mlr.1
@@ -214,12 +214,13 @@ for all things with "map" in their names.
 .nf
 altkv bar bootstrap case cat check clean-whitespace count-distinct count
 count-similar cut decimate fill-down fill-empty filter flatten format-values
-fraction gap grep group-by group-like having-fields head histogram json-parse
-json-stringify join label latin1-to-utf8 least-frequent merge-fields
-most-frequent nest nothing put regularize remove-empty-columns rename reorder
-repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle skip-trivial-records
-sort sort-within-records split stats1 stats2 step summary tac tail tee
-template top utf8-to-latin1 unflatten uniq unspace unsparsify
+fraction gap grep group-by group-like gsub having-fields head histogram
+json-parse json-stringify join label latin1-to-utf8 least-frequent
+merge-fields most-frequent nest nothing put regularize remove-empty-columns
+rename reorder repeat reshape sample sec2gmtdate sec2gmt seqgen shuffle
+skip-trivial-records sort sort-within-records split ssub stats1 stats2 step
+sub summary tac tail tee template top utf8-to-latin1 unflatten uniq unspace
+unsparsify
 .fi
 .if n \{\
 .RE
@@ -304,7 +305,7 @@ Notes:
 Miller offers a few different ways to handle reading data files
 	which have been compressed.
 
-* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin`
+* Decompression done within the Miller process itself: `--bz2in` `--gzin` `--zin``--zstdin`
 * Decompression done outside the Miller process: `--prepipe` `--prepipex`
 
 Using `--prepipe` and `--prepipex` you can specify an action to be
@@ -327,7 +328,7 @@ compression (or other) utilities, simply pipe the output:
 
 Lastly, note that if `--prepipe` or `--prepipex` is specified, it replaces any
 decisions that might have been made based on the file suffix. Likewise,
-`--gzin`/`--bz2in`/`--zin` are ignored if `--prepipe` is also specified.
+`--gzin`/`--bz2in`/`--zin``--zin` are ignored if `--prepipe` is also specified.
 
 --bz2in                  Uncompress bzip2 within the Miller process. Done by
                          default if file ends in `.bz2`.
@@ -344,6 +345,8 @@ decisions that might have been made based on the file suffix. Likewise,
                          `.mlrrc`.
 --prepipe-zcat           Same as `--prepipe zcat`, except this is allowed in
                          `.mlrrc`.
+--prepipe-zstdcat        Same as `--prepipe zstdcat`, except this is allowed
+                         in `.mlrrc`.
 --prepipex {decompression command}
                          Like `--prepipe` with one exception: doesn't insert
                          `<` between command and filename at runtime. Useful
@@ -352,6 +355,8 @@ decisions that might have been made based on the file suffix. Likewise,
                          in `.mlrrc` to avoid unexpected code execution.
 --zin                    Uncompress zlib within the Miller process. Done by
                          default if file ends in `.z`.
+--zstdin                 Uncompress zstd within the Miller process. Done by
+                         default if file ends in `.zstd`.
 .fi
 .if n \{\
 .RE
@@ -670,6 +675,11 @@ These are flags which don't fit into any other category.
                          to be modified, except when input is from `tail -f`.
                          See also
                          https://miller.readthedocs.io/en/latest/reference-main-flag-list/.
+--s-no-comment-strip {file name}
+                         Take command-line flags from file name, like -s, but
+                         with no comment-stripping. For more information
+                         please see
+                         https://miller.readthedocs.io/en/latest/scripting/.
 --seed {n}               with `n` of the form `12345678` or `0xcafefeed`. For
                          `put`/`filter` `urand`, `urandint`, and `urand32`.
 --tz {timezone}          Specify timezone, overriding `$TZ` environment
@@ -1520,6 +1530,21 @@ Options:
 .fi
 .if n \{\
 .RE
+.SS "gsub"
+.if n \{\
+.RS 0
+.\}
+.nf
+Usage: mlr gsub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and handling multiple matches, like the `gsub` DSL function.
+See also the `sub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+.fi
+.if n \{\
+.RE
 .SS "having-fields"
 .if n \{\
 .RS 0
@@ -2302,6 +2327,20 @@ See also the "tee" DSL function which lets you do more ad-hoc customization.
 .fi
 .if n \{\
 .RE
+.SS "ssub"
+.if n \{\
+.RS 0
+.\}
+.nf
+Usage: mlr ssub [options]
+Replaces old string with new string in specified field(s), without regex support for
+the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+.fi
+.if n \{\
+.RE
 .SS "stats1"
 .if n \{\
 .RS 0
@@ -2457,6 +2496,21 @@ for more information on EWMA.
 .fi
 .if n \{\
 .RE
+.SS "sub"
+.if n \{\
+.RS 0
+.\}
+.nf
+Usage: mlr sub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and not handling multiple matches, like the `sub` DSL function.
+See also the `gsub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+.fi
+.if n \{\
+.RE
 .SS "summary"
 .if n \{\
 .RS 0
diff --git a/test/cases/cli-help/0001/expout b/test/cases/cli-help/0001/expout
index d6f70fe41a..55efea8ac7 100644
--- a/test/cases/cli-help/0001/expout
+++ b/test/cases/cli-help/0001/expout
@@ -379,6 +379,16 @@ Outputs records in batches having identical field names.
 Options:
 -h|--help Show this message.
 
+================================================================
+gsub
+Usage: mlr gsub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and handling multiple matches, like the `gsub` DSL function.
+See also the `sub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
 ================================================================
 having-fields
 Usage: mlr having-fields [options]
@@ -1016,6 +1026,15 @@ then there will be split_yellow_triangle.csv, split_yellow_square.csv, etc.
 
 See also the "tee" DSL function which lets you do more ad-hoc customization.
 
+================================================================
+ssub
+Usage: mlr ssub [options]
+Replaces old string with new string in specified field(s), without regex support for
+the old string, like the `ssub` DSL function. See also the `gsub` and `sub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
 ================================================================
 stats1
 Usage: mlr stats1 [options]
@@ -1156,6 +1175,16 @@ Please see https://miller.readthedocs.io/en/latest/reference-verbs.html#filter o
 https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
 for more information on EWMA.
 
+================================================================
+sub
+Usage: mlr sub [options]
+Replaces old string with new string in specified field(s), with regex support
+for the old string and not handling multiple matches, like the `sub` DSL function.
+See also the `gsub` and `ssub` verbs.
+Options:
+-f {a,b,c}  Field names to convert.
+-h|--help   Show this message.
+
 ================================================================
 summary
 Usage: mlr summary [options]
diff --git a/test/cases/globbing/0001/a.csv b/test/cases/globbing/0001/a.csv
new file mode 100644
index 0000000000..bfde6bfa0b
--- /dev/null
+++ b/test/cases/globbing/0001/a.csv
@@ -0,0 +1,2 @@
+a,b,c
+1,2,3
diff --git a/test/cases/globbing/0001/b.csv b/test/cases/globbing/0001/b.csv
new file mode 100644
index 0000000000..a9411aa9de
--- /dev/null
+++ b/test/cases/globbing/0001/b.csv
@@ -0,0 +1,2 @@
+a,b,c
+4,5,6
diff --git a/test/cases/globbing/0001/cmd b/test/cases/globbing/0001/cmd
new file mode 100644
index 0000000000..a5eecc5776
--- /dev/null
+++ b/test/cases/globbing/0001/cmd
@@ -0,0 +1 @@
+mlr --c2p cat ${CASEDIR}/*.csv
diff --git a/test/cases/globbing/0001/experr b/test/cases/globbing/0001/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/globbing/0001/expout b/test/cases/globbing/0001/expout
new file mode 100644
index 0000000000..d0c04ad137
--- /dev/null
+++ b/test/cases/globbing/0001/expout
@@ -0,0 +1,3 @@
+a b c
+1 2 3
+4 5 6
diff --git a/test/cases/io-compressed-input/0014/cmd b/test/cases/io-compressed-input/0014/cmd
new file mode 100644
index 0000000000..f6141361ef
--- /dev/null
+++ b/test/cases/io-compressed-input/0014/cmd
@@ -0,0 +1 @@
+mlr count -g a test/input/medium.zst
diff --git a/test/cases/io-compressed-input/0014/experr b/test/cases/io-compressed-input/0014/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/io-compressed-input/0014/expout b/test/cases/io-compressed-input/0014/expout
new file mode 100644
index 0000000000..7dcf142127
--- /dev/null
+++ b/test/cases/io-compressed-input/0014/expout
@@ -0,0 +1,5 @@
+a=pan,count=8
+a=eks,count=10
+a=wye,count=7
+a=zee,count=8
+a=hat,count=7
diff --git a/test/cases/io-compressed-input/0015/cmd b/test/cases/io-compressed-input/0015/cmd
new file mode 100644
index 0000000000..8a6e18c1e2
--- /dev/null
+++ b/test/cases/io-compressed-input/0015/cmd
@@ -0,0 +1 @@
+mlr --zstdin count -g a < test/input/medium.zst
diff --git a/test/cases/io-compressed-input/0015/experr b/test/cases/io-compressed-input/0015/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/io-compressed-input/0015/expout b/test/cases/io-compressed-input/0015/expout
new file mode 100644
index 0000000000..7dcf142127
--- /dev/null
+++ b/test/cases/io-compressed-input/0015/expout
@@ -0,0 +1,5 @@
+a=pan,count=8
+a=eks,count=10
+a=wye,count=7
+a=zee,count=8
+a=hat,count=7
diff --git a/test/cases/io-compressed-input/0016/cmd b/test/cases/io-compressed-input/0016/cmd
new file mode 100644
index 0000000000..7d38bc22ac
--- /dev/null
+++ b/test/cases/io-compressed-input/0016/cmd
@@ -0,0 +1 @@
+mlr --zstdin count -g a test/input/medium.zst
diff --git a/test/cases/io-compressed-input/0016/experr b/test/cases/io-compressed-input/0016/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/io-compressed-input/0016/expout b/test/cases/io-compressed-input/0016/expout
new file mode 100644
index 0000000000..7dcf142127
--- /dev/null
+++ b/test/cases/io-compressed-input/0016/expout
@@ -0,0 +1,5 @@
+a=pan,count=8
+a=eks,count=10
+a=wye,count=7
+a=zee,count=8
+a=hat,count=7
diff --git a/test/cases/verb-case/x b/test/cases/verb-case/x
deleted file mode 100644
index a24cc18bdf..0000000000
--- a/test/cases/verb-case/x
+++ /dev/null
@@ -1,13 +0,0 @@
-mkdir 0020; echo mlr --from test/input.cases-csv --c2j case -u > 0020/cmd
-mkdir 0021; echo mlr --from test/input.cases-csv --c2j case -l > 0021/cmd
-mkdir 0022; echo mlr --from test/input.cases-csv --c2j case -s > 0022/cmd
-mkdir 0023; echo mlr --from test/input.cases-csv --c2j case -t > 0023/cmd
-mkdir 0024; echo mlr --from test/input.cases-csv --c2j case -k -u > 0024/cmd
-mkdir 0025; echo mlr --from test/input.cases-csv --c2j case -k -l > 0025/cmd
-mkdir 0026; echo mlr --from test/input.cases-csv --c2j case -k -s > 0026/cmd
-mkdir 0027; echo mlr --from test/input.cases-csv --c2j case -k -t > 0027/cmd
-mkdir 0028; echo mlr --from test/input.cases-csv --c2j case -v -u > 0028/cmd
-mkdir 0029; echo mlr --from test/input.cases-csv --c2j case -v -l > 0029/cmd
-mkdir 0030; echo mlr --from test/input.cases-csv --c2j case -v -s > 0030/cmd
-mkdir 0031; echo mlr --from test/input.cases-csv --c2j case -v -t > 0031/cmd
-mkdir 0032; echo mlr --from test/input.cases-csv --c2j case -u apple,ball then case -l cat,dog > 0032/cmd
diff --git a/test/cases/verb-sub-gsub-ssub/0001/cmd b/test/cases/verb-sub-gsub-ssub/0001/cmd
new file mode 100644
index 0000000000..7d4cec775c
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0001/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy sub  -f a,b e X
diff --git a/test/cases/verb-sub-gsub-ssub/0001/experr b/test/cases/verb-sub-gsub-ssub/0001/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/verb-sub-gsub-ssub/0001/expout b/test/cases/verb-sub-gsub-ssub/0001/expout
new file mode 100644
index 0000000000..917c3f5ed6
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0001/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+Xks pan 2  0.75867996 0.52215111
+wyX wyX 3  0.20460331 0.33831853
+Xks wyX 4  0.38139939 0.13418874
+wyX pan 5  0.57328892 0.86362447
+zXe pan 6  0.52712616 0.49322129
+Xks zXe 7  0.61178406 0.18788492
+zXe wyX 8  0.59855401 0.97618139
+hat wyX 9  0.03144188 0.74955076
+pan wyX 10 0.50262601 0.95261836
diff --git a/test/cases/verb-sub-gsub-ssub/0002/cmd b/test/cases/verb-sub-gsub-ssub/0002/cmd
new file mode 100644
index 0000000000..f33200891d
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0002/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy gsub -f a,b e X
diff --git a/test/cases/verb-sub-gsub-ssub/0002/experr b/test/cases/verb-sub-gsub-ssub/0002/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/verb-sub-gsub-ssub/0002/expout b/test/cases/verb-sub-gsub-ssub/0002/expout
new file mode 100644
index 0000000000..49d53727b3
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0002/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+Xks pan 2  0.75867996 0.52215111
+wyX wyX 3  0.20460331 0.33831853
+Xks wyX 4  0.38139939 0.13418874
+wyX pan 5  0.57328892 0.86362447
+zXX pan 6  0.52712616 0.49322129
+Xks zXX 7  0.61178406 0.18788492
+zXX wyX 8  0.59855401 0.97618139
+hat wyX 9  0.03144188 0.74955076
+pan wyX 10 0.50262601 0.95261836
diff --git a/test/cases/verb-sub-gsub-ssub/0003/cmd b/test/cases/verb-sub-gsub-ssub/0003/cmd
new file mode 100644
index 0000000000..ff6b15c4ac
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0003/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy sub  -f a,b . X
diff --git a/test/cases/verb-sub-gsub-ssub/0003/experr b/test/cases/verb-sub-gsub-ssub/0003/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/verb-sub-gsub-ssub/0003/expout b/test/cases/verb-sub-gsub-ssub/0003/expout
new file mode 100644
index 0000000000..a8b8e86432
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0003/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+Xan Xan 1  0.34679014 0.72680286
+Xks Xan 2  0.75867996 0.52215111
+Xye Xye 3  0.20460331 0.33831853
+Xks Xye 4  0.38139939 0.13418874
+Xye Xan 5  0.57328892 0.86362447
+Xee Xan 6  0.52712616 0.49322129
+Xks Xee 7  0.61178406 0.18788492
+Xee Xye 8  0.59855401 0.97618139
+Xat Xye 9  0.03144188 0.74955076
+Xan Xye 10 0.50262601 0.95261836
diff --git a/test/cases/verb-sub-gsub-ssub/0004/cmd b/test/cases/verb-sub-gsub-ssub/0004/cmd
new file mode 100644
index 0000000000..8770d578d5
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0004/cmd
@@ -0,0 +1 @@
+mlr --d2p --from test/input/abixy ssub -f a,b e X
diff --git a/test/cases/verb-sub-gsub-ssub/0004/experr b/test/cases/verb-sub-gsub-ssub/0004/experr
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test/cases/verb-sub-gsub-ssub/0004/expout b/test/cases/verb-sub-gsub-ssub/0004/expout
new file mode 100644
index 0000000000..917c3f5ed6
--- /dev/null
+++ b/test/cases/verb-sub-gsub-ssub/0004/expout
@@ -0,0 +1,11 @@
+a   b   i  x          y
+pan pan 1  0.34679014 0.72680286
+Xks pan 2  0.75867996 0.52215111
+wyX wyX 3  0.20460331 0.33831853
+Xks wyX 4  0.38139939 0.13418874
+wyX pan 5  0.57328892 0.86362447
+zXe pan 6  0.52712616 0.49322129
+Xks zXe 7  0.61178406 0.18788492
+zXe wyX 8  0.59855401 0.97618139
+hat wyX 9  0.03144188 0.74955076
+pan wyX 10 0.50262601 0.95261836
diff --git a/test/input/medium.zst b/test/input/medium.zst
new file mode 100644
index 0000000000..f7b5c9a0d7
Binary files /dev/null and b/test/input/medium.zst differ