above::
find ${DIR} -type f -print0 | \
- cdb --db ${DB} filterpath --in-path --predicate=out -0 -1 --format '$u$z' > ${DB}.new-files0
- xargs -0 sha512sum > ${DB}.new < ${DB}.new-files0
+ cdb --db ${DB} filterpath --in-path --predicate=out -0 -1 --format '$u$z' | \
+ xargs -0 sha512sum > ${DB}.new
We can then prepare to prune duplicates and add unique files::
- cdb --db ${DB} ingest -1 --prune --inplace --digest-log ${DB}.new2 < ${DB}.new > ${DB}.prune
+ cdb --db ${DB} ingest --prune --inplace --digest-log ${DB}.new2 < ${DB}.new > ${DB}.prune
Add new files to the database with::
cdb --db ${DB} addh < ${DB}.new2
-After reviewing the files to be pruned in ``${DB}.prune``, it can be executed::
+Inspect the pruning commands to be run, and then execute them with::
sh < ${DB}.prune
+
+(If you have, or might have, unusual path names, you may be better served with
+``--prune-log`` rather than ``--prune``. The resulting, ``NUL``-terminated list
+of files can be inspected with ``cdb-util escape human -0`` and run with ``xargs
+-0 -- rm --``.)
:description("Don't crush targets to basename; trim given prefix instead")
local mv = c:flag("--move")
:description("Issue move, not copy, commands for new files")
- c:option("--prune-log")
- :argname("file")
- :description("Log of files with colliding digests; NUL-separated")
+ c:option("--prune")
+ :argname("LOG")
+ :default(""):defmode("a")
+ :description("Generate rm for, or log w/ NULs, files with colliding digests")
c:option("--digest-log")
:argname("file")
:description("Write novel digest records to the indicated file")
end
end
+ local shell_escape =
+ args.extended_escapes and cdblib.extended_shell_escape
+ or cdblib.posix_shell_escape
+
local function log_prune() end
- if args.prune_log then
- local prunef = assert(io.open(args.prune_log, "wb"))
- function log_prune(p)
- return prunef:write(p, "\0")
+ if args.prune then
+ if args.prune == "" then
+ function log_prune(p)
+ return io.write("rm -- ", shell_escape(p), eol)
+ end
+ else
+ local prunef = assert(io.open(args.prune, "wb"))
+ function log_prune(p)
+ return prunef:write(p, "\0")
+ end
end
end
local sth_path_by_hash = assert(sql_mk_pathid_find_by_hash(dbh))
local sth_superseder_by_hash = assert(sql_mk_superseder_find_by_hash(dbh))
- local shell_escape =
- args.extended_escapes and cdblib.extended_shell_escape
- or cdblib.posix_shell_escape
local human_escape = cdblib.extended_shell_escape
if not args.no_human_escape then
local ok, rexlib = pcall(require, "rex_posix")
# Test 'ingeset'
cat >${LOG1} <<HERE
5 new
-\\6 new\\\\esc
9 twinned copy 🎵
+\\6 new\\\\esc
4 ti'cky copy
4 ti'cky copy with \$extra
HERE
-${LUA} ./cdb --db ${DB1} ingest --target x --prune-log=${LOG4} --verbose <${LOG1} >${LOG2} 2>${LOG3}
+${LUA} ./cdb --db ${DB1} ingest --target x --prune=${LOG4} --verbose <${LOG1} >${LOG2} 2>${LOG3}
# Import commands on stdout
diff -u - ${LOG2} <<HERE
cp -- 'new' 'x/new'
# Log on stderr
diff -u - ${LOG3} <<HERE
Import 'new' to 'x/new'
-Import 'new\\esc' to 'x/new\\esc'
Import hash 9 from path 'twinned copy 🎵' already in database at 'twinned'
+Import 'new\\esc' to 'x/new\\esc'
Import hash 4 from path "ti'cky copy" already in database at "ti'cky"
Import hash 4 from path 'ti'"'"'cky copy with \$extra' already in database at "ti'cky"
HERE
ti'cky copy with \$extra
HERE
+# Again, with intermixed pruning commands
+${LUA} ./cdb --db ${DB1} ingest --target x --prune <${LOG1} >${LOG2} 2>${LOG3}
+diff -u - ${LOG2} <<HERE
+cp -- 'new' 'x/new'
+rm -- 'twinned copy 🎵'
+cp -- 'new\\esc' 'x/new\\esc'
+rm -- 'ti'"'"'cky copy'
+rm -- 'ti'"'"'cky copy with \$extra'
+HERE
+diff -u /dev/null ${LOG3}
+
+# With move
diff -u - <(${LUA} ./cdb --db ${DB1} ingest --move --target x --digest-log ${LOG2} <${LOG1}) <<HERE
mv -- 'new' 'x/new'
mv -- 'new\\esc' 'x/new\\esc'
diff -u /dev/null <(${LUA} ./cdb --db ${DB1} gc)
# And without actually doing the import
-${LUA} ./cdb --db ${DB1} ingest --prune-log=${LOG3} --verbose >${LOG1} 2>${LOG2} \
+${LUA} ./cdb --db ${DB1} ingest --prune=${LOG3} --verbose >${LOG1} 2>${LOG2} \
<<<'5 new'$'\n''4 copy'
# No output stdout
diff -u /dev/null ${LOG1}
diff -u /dev/null <(${LUA} ./cdb --db ${DB1} gc)
# Test ingest with supers
-${LUA} ./cdb --db ${DB1} ingest --target x --prune-log=${LOG3} --digest-log=${LOG4} --verbose \
+${LUA} ./cdb --db ${DB1} ingest --target x --prune=${LOG3} --digest-log=${LOG4} --verbose \
>${LOG1} 2>${LOG2} <<HERE
0 won't fix super
11 pre-ordinary again