From 1a68389c099b7c4542f69289e77fd161ce20aa11 Mon Sep 17 00:00:00 2001 From: Nathaniel Wesley Filardo Date: Sat, 19 Feb 2022 11:00:16 +0000 Subject: [PATCH] Reconcile README and ingest command implementation --- README.rst | 13 +++++++++---- cdb | 28 ++++++++++++++++++---------- test.sh | 22 +++++++++++++++++----- 3 files changed, 44 insertions(+), 19 deletions(-) diff --git a/README.rst b/README.rst index 97f4ecf..ebacacf 100644 --- a/README.rst +++ b/README.rst @@ -237,17 +237,22 @@ presence elsewhere in the database. We can enumerate files not tracked using above:: find ${DIR} -type f -print0 | \ - cdb --db ${DB} filterpath --in-path --predicate=out -0 -1 --format '$u$z' > ${DB}.new-files0 - xargs -0 sha512sum > ${DB}.new < ${DB}.new-files0 + cdb --db ${DB} filterpath --in-path --predicate=out -0 -1 --format '$u$z' | \ + xargs -0 sha512sum > ${DB}.new We can then prepare to prune duplicates and add unique files:: - cdb --db ${DB} ingest -1 --prune --inplace --digest-log ${DB}.new2 < ${DB}.new > ${DB}.prune + cdb --db ${DB} ingest --prune --inplace --digest-log ${DB}.new2 < ${DB}.new > ${DB}.prune Add new files to the database with:: cdb --db ${DB} addh < ${DB}.new2 -After reviewing the files to be pruned in ``${DB}.prune``, it can be executed:: +Inspect the pruning commands to be run, and then execute them with:: sh < ${DB}.prune + +(If you have, or might have, unusual path names, you may be better served with +``--prune-log`` rather than ``--prune``. The resulting, ``NUL``-terminated list +of files can be inspected with ``cdb-util escape human -0`` and run with ``xargs +-0 -- rm --``.) diff --git a/cdb b/cdb index 418ef5b..d277f20 100755 --- a/cdb +++ b/cdb @@ -223,9 +223,10 @@ mksubcmd(function(c) :description("Don't crush targets to basename; trim given prefix instead") local mv = c:flag("--move") :description("Issue move, not copy, commands for new files") - c:option("--prune-log") - :argname("file") - :description("Log of files with colliding digests; NUL-separated") + c:option("--prune") + :argname("LOG") + :default(""):defmode("a") + :description("Generate rm for, or log w/ NULs, files with colliding digests") c:option("--digest-log") :argname("file") :description("Write novel digest records to the indicated file") @@ -256,11 +257,21 @@ mksubcmd(function(c) end end + local shell_escape = + args.extended_escapes and cdblib.extended_shell_escape + or cdblib.posix_shell_escape + local function log_prune() end - if args.prune_log then - local prunef = assert(io.open(args.prune_log, "wb")) - function log_prune(p) - return prunef:write(p, "\0") + if args.prune then + if args.prune == "" then + function log_prune(p) + return io.write("rm -- ", shell_escape(p), eol) + end + else + local prunef = assert(io.open(args.prune, "wb")) + function log_prune(p) + return prunef:write(p, "\0") + end end end @@ -270,9 +281,6 @@ mksubcmd(function(c) local sth_path_by_hash = assert(sql_mk_pathid_find_by_hash(dbh)) local sth_superseder_by_hash = assert(sql_mk_superseder_find_by_hash(dbh)) - local shell_escape = - args.extended_escapes and cdblib.extended_shell_escape - or cdblib.posix_shell_escape local human_escape = cdblib.extended_shell_escape if not args.no_human_escape then local ok, rexlib = pcall(require, "rex_posix") diff --git a/test.sh b/test.sh index 9e087d4..f711351 100755 --- a/test.sh +++ b/test.sh @@ -131,12 +131,12 @@ diff -u - <(${LUA} ./cdb --db ${DB1} mappath <<<"ordinary") <<<"1 ordinary" # Test 'ingeset' cat >${LOG1} <${LOG2} 2>${LOG3} +${LUA} ./cdb --db ${DB1} ingest --target x --prune=${LOG4} --verbose <${LOG1} >${LOG2} 2>${LOG3} # Import commands on stdout diff -u - ${LOG2} <${LOG2} 2>${LOG3} +diff -u - ${LOG2} <${LOG1} 2>${LOG2} \ +${LUA} ./cdb --db ${DB1} ingest --prune=${LOG3} --verbose >${LOG1} 2>${LOG2} \ <<<'5 new'$'\n''4 copy' # No output stdout diff -u /dev/null ${LOG1} @@ -291,7 +303,7 @@ diff -u /dev/null <(${LUA} ./cdb --db ${DB1} maph 11) diff -u /dev/null <(${LUA} ./cdb --db ${DB1} gc) # Test ingest with supers -${LUA} ./cdb --db ${DB1} ingest --target x --prune-log=${LOG3} --digest-log=${LOG4} --verbose \ +${LUA} ./cdb --db ${DB1} ingest --target x --prune=${LOG3} --digest-log=${LOG4} --verbose \ >${LOG1} 2>${LOG2} <