From: Nathaniel Wesley Filardo Date: Sat, 19 Feb 2022 13:01:41 +0000 (+0000) Subject: Further changes to path escape handling in ingest X-Git-Url: https://hydra-www.ietfng.org/gitweb/?a=commitdiff_plain;h=c121dde23da117695b75aeec56e0fadc051d42b4;p=csdb Further changes to path escape handling in ingest --- diff --git a/README.rst b/README.rst index ebacacf..fa06bf7 100644 --- a/README.rst +++ b/README.rst @@ -222,8 +222,8 @@ recompute digests. its mechanism is somewhat crude and not always great for human consumption). However, POSIX shells are willing to forgive control characters in quoted strings while humans and terminals are more likely to make a mess of things. -The ``--extended-escapes`` flag will cause ``ingest`` to be more aggressive -about quoting such characters, making them overtly visible. +The ``--escape {posix,extended,human}`` option will change how ``ingest`` quotes +such characters. .. _injest_reflex: diff --git a/cdb b/cdb index d277f20..29c77b2 100755 --- a/cdb +++ b/cdb @@ -234,8 +234,10 @@ mksubcmd(function(c) :description("Record new files in the digest log as they are") c:flag("--verbose") :description("Be chatty on stderr about the generated comand stream") - c:flag("--extended-escapes") - :description("Write control characters in file names with $'...' escapes") + c:option("--escape") + -- TODO: :choices("posix", "extended", "human") + :default("posix") + :description("Control how special characters in paths are escaped") c:flag("--no-human-escape") :description("Avoid trying to be kind to humans with paths in the logs") argparse_flag_inul(c) @@ -257,9 +259,26 @@ mksubcmd(function(c) end end - local shell_escape = - args.extended_escapes and cdblib.extended_shell_escape - or cdblib.posix_shell_escape + local human_escape = cdblib.extended_shell_escape + if not args.no_human_escape then + local ok, rexlib = pcall(require, "rex_posix") + if not ok then + io.stderr:write("Unable to load locale-aware regex library") + else + os.setlocale("") + human_escape = cdblib.mk_human_shell_escape(rexlib) + end + end + + local shell_escape + do + local h = args.escape + if h == "posix" then shell_escape = cdblib.posix_shell_escape + elseif h == "extended" then shell_escape = cdblib.extended_shell_escape + elseif h == "human" then shell_escape = human_escape + else error("Bad escape function selector " .. h) + end + end local function log_prune() end if args.prune then @@ -281,17 +300,6 @@ mksubcmd(function(c) local sth_path_by_hash = assert(sql_mk_pathid_find_by_hash(dbh)) local sth_superseder_by_hash = assert(sql_mk_superseder_find_by_hash(dbh)) - local human_escape = cdblib.extended_shell_escape - if not args.no_human_escape then - local ok, rexlib = pcall(require, "rex_posix") - if not ok then - io.stderr:write("Unable to load locale-aware regex library") - else - os.setlocale("") - human_escape = cdblib.mk_human_shell_escape(rexlib) - end - end - local path_crush = args.relative and function(p) plpath.relpath(p, args.relative) end or plpath.basename diff --git a/test.sh b/test.sh index f711351..290d8a4 100755 --- a/test.sh +++ b/test.sh @@ -196,7 +196,7 @@ HERE # And with some rude characters in the path name diff -u - <(${LUA} ./cdb --db ${DB1} ingest --target x <<<'5 rude'$'\r''new') \ <<<"cp -- 'rude"$'\r'"new' 'x/rude"$'\r'"new'" -diff -u - <(${LUA} ./cdb --db ${DB1} ingest --target x --extended-escapes <<<'5 rude'$'\t''new') \ +diff -u - <(${LUA} ./cdb --db ${DB1} ingest --target x --escape extended <<<'5 rude'$'\t''new') \ <<<"cp -- 'rude'\$'\\x09''new' 'x/rude'\$'\\x09''new'" diff -u - <(${LUA} ./cdb --db ${DB1} ingest --target x --verbose 2>&1 <<<'9 rude'$'\r''copy') \ <<<"Import hash 9 from path 'rude'$'\\x0d''copy' already in database at 'twinned'"