:description("Be chatty on stderr about the generated comand stream")
c:flag("--extended-escapes")
:description("Write control characters in file names with $'...' escapes")
+ c:flag("--no-human-escape")
+ :description("Avoid trying to be kind to humans with paths in the logs")
argparse_flag_inul(c)
argparse_flag_nul(c)
c:mutex(inplace, mv)
local shell_escape =
args.extended_escapes and cdblib.extended_shell_escape
or cdblib.posix_shell_escape
- local human_escape = cdblib.human_shell_escape
+ local human_escape = cdblib.extended_shell_escape
+ if not args.no_human_escape then
+ local ok, rexlib = pcall(require, "rex_posix")
+ if not ok then
+ io.stderr:write("Unable to load locale-aware regex library")
+ else
+ os.setlocale("")
+ human_escape = cdblib.mk_human_shell_escape(rexlib)
+ end
+ end
local path_crush =
args.relative and function(p) plpath.relpath(p, args.relative) end
end
)
+----------------------------------------------------------------- }}}
+--------------------------------------------- Command: escape esc {{{
+
+mksubcmd(function(c)
+ c:name("escape esc")
+ :description("Escape input records, usually for shells")
+ c:argument("how"):default("posix")
+ :description("How to escape lines")
+ -- TODO: :choices("digest", "posix", "extended", "human")
+ argparse_flag_nul(c)
+ argparse_flag_inul(c)
+ end,
+ function(args)
+ local eol = args.nul and '\0' or '\n'
+
+ local f
+ do
+ local h = args.how
+ if h == "digest" then f = cdblib.escape_gnu_digest
+ elseif h == "posix" then f = cdblib.posix_shell_escape
+ elseif h == "extended" then f = cdblib.extended_shell_escape
+ elseif h == "human" then
+ os.setlocale("")
+ local rexlib = require("rex_posix")
+ f = cdblib.mk_human_shell_escape(rexlib)
+ else error("Bad escape function selector " .. h)
+ end
+ end
+
+ for l in cdblib.iter_lines_or_nul(args.inul)() do
+ io.write(f(l), eol)
+ end
+ end
+)
+
----------------------------------------------------------------- }}}
--------------------------------------------- Top-level executive {{{
----------------------------------------------------------------- }}}
------------------------------------------- Path escape utilities {{{
+-- This appears to be pretty safe, even in the presence of non-ASCII bytes.
+-- That's kind of great and we will use this by default whenever we generate
+-- text for a shell.
function _M.posix_shell_escape(str)
return "'" .. str:gsub("'", "'\"'\"'") .. "'"
end
-- While POSIX shells understand control characters inside single quotes, they
-- are unfriendly to read as such. Some shells have a $'...' escape that can
--- process things like \t and \xXX. This uses that instead. Perhaps we should
--- have a version that actually uses \t, but, honestly, if you're hitting this
--- case you deserve what you get.
+-- process things like \t and \xXX. This uses that instead, though always in
+-- the \xXX form.
local function extended_shell_escape(str)
return "'" ..
- str:gsub("['%c]", function(c)
- return c == "'" and "'\"'\"'" or ("'$'\\x%02x''"):format(c:byte())
+ str:gsub("['%G]", function(c)
+ return c == " " and " "
+ or c == "'" and "'\"'\"'"
+ or ("'$'\\x%02x''"):format(c:byte())
end) .. "'"
end
_M.extended_shell_escape = extended_shell_escape
-function _M.human_shell_escape(str)
- if not str:find("[%c]") then
- -- no control characters, and...
- if not str:find("'") then
- -- no single quotes, so simple enough to just single-quote the thing
- return "'" .. str .. "'"
- elseif not str:find('["$`\\]') then
- -- single quote but no double quote, dollar, backtick, or backslash
- return '"' .. str .. '"'
+-- Formatting for humans is... more exciting, as we expect these to end up on
+-- a screen with no intermediate processing. Astoundingly more subtle. We
+-- use posix rexlib.
+function _M.mk_human_shell_escape(rexlib)
+ local nonglyph = rexlib.new("[^[:graph:] ]", rexlib.REG_EXTENDED)
+ local nonshell = rexlib.new("[^-%',._+:@/ [:alnum:]]", rexlib.REG_EXTENDED)
+
+ return function(str)
+ if not nonglyph:find(str) then
+ -- no control characters, and...
+ if not str:find("'") then
+ -- no single quotes, so simple enough to just single-quote the thing
+ return "'" .. str .. "'"
+ elseif not nonshell:find(str) then
+ -- single quote but otherwise all double-quoted shell-safe characters
+ -- (notably, no double quote, dollar, backtick, or backslash, but also
+ -- no non-ASCII)
+ return '"' .. str .. '"'
+ end
end
- end
- -- If none of the special cases apply, just do the full thing
- return extended_shell_escape(str)
+ -- If none of the special cases apply, be overzealous but hopefully safe
+ return extended_shell_escape(str)
+ end
end
----------------------------------------------------------------- }}}
cat >${LOG1} <<HERE
5 new
\\6 new\\\\esc
-9 twinned copy
+9 twinned copy 🎵
4 ti'cky copy
4 ti'cky copy with \$extra
HERE
${LUA} ./cdb --db ${DB1} ingest --target x --prune-log=${LOG4} --verbose <${LOG1} >${LOG2} 2>${LOG3}
# Import commands on stdout
diff -u - ${LOG2} <<HERE
-cp 'new' 'x/new'
-cp 'new\\esc' 'x/new\\esc'
+cp -- 'new' 'x/new'
+cp -- 'new\\esc' 'x/new\\esc'
HERE
# Log on stderr
diff -u - ${LOG3} <<HERE
Import 'new' to 'x/new'
Import 'new\\esc' to 'x/new\\esc'
-Import hash 9 from path 'twinned copy' already in database at 'twinned'
+Import hash 9 from path 'twinned copy 🎵' already in database at 'twinned'
Import hash 4 from path "ti'cky copy" already in database at "ti'cky"
Import hash 4 from path 'ti'"'"'cky copy with \$extra' already in database at "ti'cky"
HERE
# Prunelog
diff -u - <(tr '\000' '\n' <${LOG4}) <<HERE
-twinned copy
+twinned copy 🎵
ti'cky copy
ti'cky copy with \$extra
HERE