From: Nathaniel Wesley Filardo Date: Sat, 19 Feb 2022 10:15:45 +0000 (+0000) Subject: Overhaul shell and human escaping X-Git-Url: https://hydra-www.ietfng.org/gitweb/?a=commitdiff_plain;h=291b62b0c4be1da9f2785b904219b5f02ca53e6c;p=csdb Overhaul shell and human escaping Add "cdb-util escape" utlity to expose library functions Expand and revise test cases --- diff --git a/cdb b/cdb index 6474daa..418ef5b 100755 --- a/cdb +++ b/cdb @@ -235,6 +235,8 @@ mksubcmd(function(c) :description("Be chatty on stderr about the generated comand stream") c:flag("--extended-escapes") :description("Write control characters in file names with $'...' escapes") + c:flag("--no-human-escape") + :description("Avoid trying to be kind to humans with paths in the logs") argparse_flag_inul(c) argparse_flag_nul(c) c:mutex(inplace, mv) @@ -271,7 +273,16 @@ mksubcmd(function(c) local shell_escape = args.extended_escapes and cdblib.extended_shell_escape or cdblib.posix_shell_escape - local human_escape = cdblib.human_shell_escape + local human_escape = cdblib.extended_shell_escape + if not args.no_human_escape then + local ok, rexlib = pcall(require, "rex_posix") + if not ok then + io.stderr:write("Unable to load locale-aware regex library") + else + os.setlocale("") + human_escape = cdblib.mk_human_shell_escape(rexlib) + end + end local path_crush = args.relative and function(p) plpath.relpath(p, args.relative) end diff --git a/cdb-util b/cdb-util index ab7b0ef..e42caad 100755 --- a/cdb-util +++ b/cdb-util @@ -86,6 +86,41 @@ mksubcmd(function(c) end ) +----------------------------------------------------------------- }}} +--------------------------------------------- Command: escape esc {{{ + +mksubcmd(function(c) + c:name("escape esc") + :description("Escape input records, usually for shells") + c:argument("how"):default("posix") + :description("How to escape lines") + -- TODO: :choices("digest", "posix", "extended", "human") + argparse_flag_nul(c) + argparse_flag_inul(c) + end, + function(args) + local eol = args.nul and '\0' or '\n' + + local f + do + local h = args.how + if h == "digest" then f = cdblib.escape_gnu_digest + elseif h == "posix" then f = cdblib.posix_shell_escape + elseif h == "extended" then f = cdblib.extended_shell_escape + elseif h == "human" then + os.setlocale("") + local rexlib = require("rex_posix") + f = cdblib.mk_human_shell_escape(rexlib) + else error("Bad escape function selector " .. h) + end + end + + for l in cdblib.iter_lines_or_nul(args.inul)() do + io.write(f(l), eol) + end + end +) + ----------------------------------------------------------------- }}} --------------------------------------------- Top-level executive {{{ diff --git a/cdblib.lua b/cdblib.lua index f57ae17..40e04fc 100644 --- a/cdblib.lua +++ b/cdblib.lua @@ -190,37 +190,51 @@ end ----------------------------------------------------------------- }}} ------------------------------------------- Path escape utilities {{{ +-- This appears to be pretty safe, even in the presence of non-ASCII bytes. +-- That's kind of great and we will use this by default whenever we generate +-- text for a shell. function _M.posix_shell_escape(str) return "'" .. str:gsub("'", "'\"'\"'") .. "'" end -- While POSIX shells understand control characters inside single quotes, they -- are unfriendly to read as such. Some shells have a $'...' escape that can --- process things like \t and \xXX. This uses that instead. Perhaps we should --- have a version that actually uses \t, but, honestly, if you're hitting this --- case you deserve what you get. +-- process things like \t and \xXX. This uses that instead, though always in +-- the \xXX form. local function extended_shell_escape(str) return "'" .. - str:gsub("['%c]", function(c) - return c == "'" and "'\"'\"'" or ("'$'\\x%02x''"):format(c:byte()) + str:gsub("['%G]", function(c) + return c == " " and " " + or c == "'" and "'\"'\"'" + or ("'$'\\x%02x''"):format(c:byte()) end) .. "'" end _M.extended_shell_escape = extended_shell_escape -function _M.human_shell_escape(str) - if not str:find("[%c]") then - -- no control characters, and... - if not str:find("'") then - -- no single quotes, so simple enough to just single-quote the thing - return "'" .. str .. "'" - elseif not str:find('["$`\\]') then - -- single quote but no double quote, dollar, backtick, or backslash - return '"' .. str .. '"' +-- Formatting for humans is... more exciting, as we expect these to end up on +-- a screen with no intermediate processing. Astoundingly more subtle. We +-- use posix rexlib. +function _M.mk_human_shell_escape(rexlib) + local nonglyph = rexlib.new("[^[:graph:] ]", rexlib.REG_EXTENDED) + local nonshell = rexlib.new("[^-%',._+:@/ [:alnum:]]", rexlib.REG_EXTENDED) + + return function(str) + if not nonglyph:find(str) then + -- no control characters, and... + if not str:find("'") then + -- no single quotes, so simple enough to just single-quote the thing + return "'" .. str .. "'" + elseif not nonshell:find(str) then + -- single quote but otherwise all double-quoted shell-safe characters + -- (notably, no double quote, dollar, backtick, or backslash, but also + -- no non-ASCII) + return '"' .. str .. '"' + end end - end - -- If none of the special cases apply, just do the full thing - return extended_shell_escape(str) + -- If none of the special cases apply, be overzealous but hopefully safe + return extended_shell_escape(str) + end end ----------------------------------------------------------------- }}} diff --git a/test-util.sh b/test-util.sh index fc2e670..90af33a 100755 --- a/test-util.sh +++ b/test-util.sh @@ -40,5 +40,53 @@ HERE 1 /b HERE +# Test the various shell escapes +ARB=$(echo "a\rb") +cat >${LOG1} <${LOG1} <${LOG2} 2>${LOG3} # Import commands on stdout diff -u - ${LOG2} <