From 01432191390734ac90a48858d91ca70d4c09fa7a Mon Sep 17 00:00:00 2001 From: Nathaniel Wesley Filardo Date: Sun, 13 Feb 2022 01:59:29 +0000 Subject: [PATCH] Far-reaching overhaul * Ingest command * Library tweaks * Documentation fixes and updates * A very simple test script that, despite its simplicity, catches bugs * First stabs at superseder work --- .gitignore | 1 + README.rst | 172 +++++---- cdb | 906 +++++++++++++++++++++++++++++++++++---------- cdb-digestrelative | 3 +- cdblib.lua | 141 +++++-- test.sh | 350 +++++++++++++++++ 6 files changed, 1286 insertions(+), 287 deletions(-) create mode 100755 test.sh diff --git a/.gitignore b/.gitignore index 1377554..5810156 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ *.swp +tmp/ diff --git a/README.rst b/README.rst index 07c7cbe..d0d4cc7 100644 --- a/README.rst +++ b/README.rst @@ -19,6 +19,17 @@ like ``find`` and the GNU coreutils digest programs (e.g. ``sha512sum``), delegating details of filesystem traversal and choice of hash and so on to the user. +Dependencies +############ + +This program requires... + +* either the Lua 5.3 interpreter or luajit, + +* the Lua ``argparse`` and ``penlight`` libraries, and + +* ``lua-dbi`` and its ``lua-dbi-sqlite3`` driver. + Supported Operations #################### @@ -43,7 +54,7 @@ Or, for all files under a path:: If we have a pile of digest files already, each of which contains digests of paths relative to its location, we can generate a database, ``${DB2}`` from them -with the assistance of the ``digestrelative`` tool:: +with the assistance of the ``cdb-digestrelative`` tool:: find ${DIR} -type f -name SHA512SUMS -print0 | cdb-digestrelative --inul | cdb --db ${DB} addh @@ -69,28 +80,51 @@ because the former can be more informative in the case of mismatching digests digest). If it's easier to have the database generate the set of files, that can be done:: - cdb --db ${DB} look \* --no-hashes --unescape --nul | xargs -0 sha512sum | cdb --db ${DB} verh + cdb --db ${DB} look \* --format '$u$z' --nul | xargs -0 sha512sum | cdb --db ${DB} verh -Add Checksums For Missing Files -=============================== +Add Missing Checksums +===================== -We can quickly construct a "just paths" database, which associates all paths -with a single digest, from the current state of the file system as follows:: +We can augment a database of files by filtering a list of files we have to +exclude the list of files we know about. If, however, there is a possibility +that some of these files are duplicates of ones already in the database, you may +be better off using ``ingest`` `reflexively `_. - cdb --db ${JPDB} init - find ${DIR} -type f -printf "0 %p\\0" | ./cdb --db ${JPDB} addh --inul +Using filterpath +---------------- -This database may not seem very useful, but when combined with ``cdb diff`` we -can quickly find all paths whose checksums are unknown to the database:: +We can generate the list of files we don't know about using ``find`` and +``cdb filterpath``:: + + find ${DIR} -type f -print0 | \ + cdb --db ${DB} filterpath --inul --format '$u$z' --nul > ${DB}.new-files0 - cdb --db ${DB} diff ${JPDB} --flavor=path --which=super +.. _xargs_sha: We can then script computing those files' checksums and adding the new reports to the database:: - cdb --db ${DB} diff ${JPDB} --flavor=path --which=super --no-headers --nul --unescape > ${JPDB}.new-files0 - xargs -0 sha512sum > ${JPDB}.new < ${JPDB}.new-files0 - cdb --db ${DB} addh < ${JPDB}.new + xargs -0 sha512sum > ${DB}.new < ${DB}.new-files0 + cdb --db ${DB} addh < ${DB}.new + +Using diff +---------- + +.. _just_paths: + +For a different approach, we can quickly construct a "just paths" database, +which associates all paths with a single digest, from the current state of the +file system as follows:: + + cdb --db ${JPDB} init + find ${DIR} -type f -printf "0 %p\\0" | cdb --db ${JPDB} addh --inul + +This database may not seem very useful, but when combined with ``cdb --db diff`` we +can quickly find all paths whose checksums are unknown to the database:: + + cdb --db ${DB} diff ${JPDB} --flavor=path --which=super --format '$u$z' --nul > ${DB}.new-files0 + +And then proceed as `above `_. From Another Database --------------------- @@ -99,7 +133,7 @@ If we have another database that knows digests for our files, rather than computing digests again, we can extract checksums from ``${DB2}`` and install them into ``${DB}``:: - cdb --db ${DB2} mapp --inul < ${JPDB}.new-files0 | cdb --db ${DB} addh + cdb --db ${DB2} look --inul < ${DB}.new-files0 | cdb --db ${DB} addh Responding to File Moves ======================== @@ -108,7 +142,7 @@ Armed with a "just paths" database as per the above, we can then direct the database to prune tracked paths not in the "just paths" database if the hashes are observed elsewhere:: - cdb --db ${DB} diff ${JPDB} --flavor=path --which=sub --no-headers --nul --unescape > ${JPDB}.missing-files0 + cdb --db ${DB} diff ${JPDB} --flavor=path --which=sub --no-headers --format '$u$z' --nul > ${JPDB}.missing-files0 cdb --db ${DB} domv --inul < ${JPDB}.missing-files0 cdb --db ${DB} gc > ${DB}.gc sqlite3 ${DB} < ${DB}.gc @@ -138,78 +172,82 @@ given hash. Add Superseder ============== -Indicate that some file contents are to be considered a lesser version of some -other contents. - -.. TODO +By Existing Paths +----------------- -Report Novelty -============== +Indicate that some file contents are to be considered a lesser version of some +other contents:: -Given a path, measure its checksum and report if it does not match, and has not -been superseded by, any observation already recorded in the database. + cdb --db ${DB} addsuper /old/path /new/path +After this command is run, ``domv`` will be willing to remove the ``/old/path`` +entry from the database. .. TODO -.. This command would be useful for ingesting things into a library or pruning - collections of files outside the library. - --------------------------------------------------------------------------------- - -Example Uses -############ - -A photo library -=============== - -Suppose ``/mnt/photos`` contains a collection of photos. We might want to... - -* measure all the files in that directory, flagging new and updated contents:: - - $ cksdb /mnt/photos/.cksdb observe /mnt/photos +By Hashes +--------- -* measure all the files in that directory, automatically updating the database:: +Superseder records can also be added from ``stdin`` using ``addsuperhash`` (or +``addsh``). This command reads in lines of the form :: - $ cksdb /mnt/photos/.cksdb observe --new --changed /mnt/photos + old-digest new-digest notes -* report duplicates anywhere in the library:: +The ``notes`` field extends to the end of the line; if newlines are desired in +the recorded notes, use ``--inul`` and separate records by NUL bytes. - $ cksdb /mnt/photos/.cksdb ls --duplicate +Ingest +====== -* report files in a particular directory that also exist anywhere else in the - library:: +Given a digest stream, partition it into hashes already in the database and +hashes novel to the database. For the former, optionally generate ``rm`` +commands, and for the latter, optionally generate ``mv`` or ``cp`` commands +to import into the library. Novel hashes, and their new paths, may optionally +be recorded as well, to be subsequently added to the database:: - $ cksdb /mnt/photos/.cksdb ls --duplicate /mnt/photos/dir1 + find /source/path -type f -exec sha512sum {} \+ | \ + cdb --db ${DB} ingest --target /new/path --prune -* restrict the search for duplication to another direcotry:: +This will produce a stream of shell commands to copy files given by ``find`` +into the ``/new/path`` directory (using their basename therein). Passing +``--move`` generates move rather than copy commands. Passing ``--prune`` +additionally issues ``rm`` commands for *source* files whose hashes collide with +something already in the database. - $ cksdb /mnt/photos/.cksdb ls --duplicate /mnt/photos/dir1 \ - --also /mnt/photos/dir2 +The ``--digest-log FILE`` option will cause ``import`` to write to FILE every +new digest encountered in the stream, associated with its new name in +``/new/path``. This can then be fed back through ``addhash`` without needing to +recompute digests. -* explicitly acknowledge a deletion by removing observations of it:: +``ingest`` knows how to quote paths for safe handling by POSIX shells (though +its mechanism is somewhat crude and not always great for human consumption). +However, POSIX shells are willing to forgive control characters in quoted +strings while humans and terminals are more likely to make a mess of things. +The ``--extended-escapes`` flag will cause ``ingest`` to be more aggressive +about quoting such characters, making them overtly visible. - $ cksdb /mnt/photos/.cksdb rm /mnt/photos/filename +.. _injest_reflex: -* indicate that the last observed content of ``foo.jpg`` is superseded by the - last observed content of ``foo.raw``:: +Reflexive Use of Ingest +----------------------- - $ cksdb /mnt/photos/.cksdb supersede /mnt/photos/foo.jpg /mnt/photos/foo.raw +The ``ingest`` command can also be used "reflexively" on the managed collection +of files to either add files that are not tracked or prune files that have +presence elsewhere in the database. We can enumerate files not tracked using +``filterpath`` and compute their checksums as we did in `Add Missing Checksums`_ +above:: -* import files from outside the library, say, in ``/mnt/sdcard``, skipping - duplicate and superseded files and removing all examined files (that is, - imported, duplicate, and superseded; ``--harvest``):: + find ${DIR} -type f -print0 | \ + cdb --db ${DB} filterpath --in-path --predicate=out -0 -1 --format '$u$z' > ${DB}.new-files0 + xargs -0 sha512sum > ${DB}.new < ${DB}.new-files0 - $ cksdb /mnt/photos/.cksdb import --harvest /mnt/photos/newdir /mnt/sdcard +We can then prepare to prune duplicates and add unique files:: -* import from another database:: + cdb --db ${DB} ingest -1 --prune --inplace --digest-log ${DB}.new2 < ${DB}.new > ${DB}.prune - $ cksdb /mnt/photos/.cksdb import-db /mnt/oldphotos/.cksdb +Add new files to the database with:: -Cross-Database Operations -========================= + cdb --db ${DB} addh < ${DB}.new2 -Compute violations of set-theoretic relationships between a database and the -union of one or more other databases:: +After reviewing the files to be pruned in ``${DB}.prune``, it can be executed:: - $ cksdb /mnt/photos/.cksdb is-subset /mnt/backups/photos/.cksdb - $ cksdb /mnt/photos/.cksdb is-superset /mnt/backups/photos/.cksdb + sh < ${DB}.prune diff --git a/cdb b/cdb index c8040fd..5a24ada 100755 --- a/cdb +++ b/cdb @@ -1,13 +1,19 @@ -#!/usr/bin/env lua5.3 +#!/usr/bin/env luajit +-- Should also be executable under lua5.3, since we sit in the intersection of +-- the two languages and our dependencies do too or are available in both +-- environments. --------------------------------------------------------- Imports {{{ local argparse = require "argparse" local dbi = require "DBI" +local plapp = require "pl.app" local plpath = require "pl.path" local plstringx = require "pl.stringx" local pltablex = require "pl.tablex" +local pltext = require "pl.text" +plapp.require_here() local cdblib = require "cdblib" ----------------------------------------------------------------- }}} @@ -27,6 +33,12 @@ local function sql_run_one(sth, ...) return sth:fetch() end +local function sql_run_one_x(sth, ...) + local res, err = sql_run_one(sth, ...) + if res == false and err ~= nil then error(err) end + return res +end + ----------------------------------------------------------------- }}} -------------------------------------------------- SQL statements {{{ @@ -35,38 +47,127 @@ local function sql_mk_path_upsert(dbh) ON CONFLICT DO UPDATE SET path = path RETURNING pathid]]) end -local function sql_mk_path_find(dbh) +local function sql_mk_pathid_find(dbh) return dbh:prepare([[SELECT pathid FROM paths WHERE path = ?]]) end +local function sql_mk_pathid_find_by_hash(dbh) + return dbh:prepare([[SELECT path + FROM path_hash NATURAL JOIN paths NATURAL JOIN hashes WHERE hash = ?]]) +end + local function sql_mk_hash_upsert(dbh) return dbh:prepare([[INSERT INTO hashes (hash) VALUES (?) ON CONFLICT DO UPDATE SET hash = hash RETURNING hashid]]) end -local function sql_mk_hash_find(dbh) +local function sql_mk_hashid_find(dbh) return dbh:prepare([[SELECT hashid FROM hashes WHERE hash = ?]]) end -local function sql_mk_path_find_by_hash(dbh) - return dbh:prepare([[SELECT path - FROM path_hash NATURAL JOIN paths NATURAL JOIN hashes WHERE hash = ?]]) +local function sql_mk_hashid_find_by_path(dbh) + return dbh:prepare( + [[SELECT hashid FROM path_hash NATURAL JOIN paths WHERE path = ?]]) +end + +local function sql_mk_superseder_find_by_hash(dbh) + return dbh:prepare([[SELECT supersederid, note + FROM hash_hash_superseders + JOIN hashes AS o ON o.hashid == oldid + WHERE o.hash == ? + ]]) +end + +local function sql_mk_superseder_find_hash_by_hash(dbh) + return dbh:prepare([[SELECT n.hash AS newhash, note + FROM hash_hash_superseders + JOIN hashes AS n ON n.hashid == newid + JOIN hashes AS o ON o.hashid == oldid + WHERE o.hash == ? + ]]) +end + +----------------------------------------------------------------- }}} +----------------------------------------------- Command utilities {{{ + +local function iter_gnu_digest_stderr(baseiter) + local errcb = function(line) + io.stderr:write("Bad line: ", line, "\n") + return true -- continue iteration + end + return cdblib.iter_gnu_digest(errcb, baseiter) +end + +local function mk_progress_pair(fn) + local progeach = function() end + local progfin = function() end + if fn then + local f = assert(io.open(fn, "w")) + local n = 0 + progeach = function(i) + local o = n + i = i or 1 + n = n + i + if (n % 256) + i >= 256 then + f:write(("Processed %d records\r"):format(n)); f:flush() + end + end + progfin = function() + f:write(("Processed %d records\n"):format(n)) + end + end + return progeach, progfin end ----------------------------------------------------------------- }}} ------------------------------------------------- Argparse, part 1 {{{ +---------------------------------------------- Argparse utilities {{{ + +local function argparse_flag_progress(c) + -- This is a bit of a mess. We want... + -- nothing "/dev/fd/1" (the :init on the positive side) + -- --progress "/dev/fd/2" (the :default on the positive side) + -- --progress=x x (the value given) + -- --no-progress false (the result of store_false on the "no" side) + local nf = c:flag("--no-progress") + :target("progress") + :action("store_false") + :description("Suppress progress reporting") + local pf = c:option("--progress") + :args(1) + :hidden(true) + :init("/dev/fd/1") + :default("/dev/fd/2"):defmode("a") + :description("Show progress") + c:mutex(nf, pf) -- applies only to overtly given forms, not defaults; yay! + return pf +end + +local function argparse_flag_nul(c) + return c:flag("--nul -0") + :description("NUL-terminate output records") + :default(false) +end + +local function mk_default_render_template() + return pltext.Template("$e$h $f$z") +end local function argparse_for_render(c) c:flag("--unescape") - :description("Do not escape the filenames; ambiguous without --nul") + :description("Do not escape filenames ($f is $u); likely use --nul, too") :default(false) c:flag("--nul -0") - :description("NUL-terminate lines rather than newline") + :description("NUL-terminate records ($z is NUL rather than newline)") :default(false) + c:option("--format") + :description("Output format specifier") + :default("$e$h $f$z") end -local function renderers_for(args) - return cdblib.renderers_for(args.nul, args.unescape) +local function renderer_for(args) + return cdblib.renderer_for(args.nul, args.unescape, + args.format and pltext.Template(args.format) + or mk_default_render_template()) end local function argparse_flag_inul(c) @@ -75,65 +176,195 @@ local function argparse_flag_inul(c) :default(false) end -local function argparse_no_hashes(c) - c:flag("--no-hashes") - :description("Elide hashes in output; no leading space with --unescape") -end - local function argparse_for_db_filter(c) c:option("--predicate"):default("in") -- TODO: :choices({"in", "out"}) - argparse_no_hashes(c) argparse_for_render(c) end +local function argparse_opt_graft(c) + return c:option("--graft") + :default(""):show_default(false) + :description("Graft a prefix to input file names") +end + +----------------------------------------------------------------- }}} +-------------------- Argparse Globals and Command Grouping part 1 {{{ + local argp = argparse("cdb", "checksum database tool") --- global options -argp:option("--database --db") +-- global options must come before commands, and must be options rather than +-- arguments if we want --help to do the right thing, sadly. +argp:option("--db --database") + :target("database") :args(1) :description("Indicate primary checksum database") +-- grouping logic, part 1. Sadly, this needs to run "all at once" but we want +-- to define our commands incrementally! local argp_groups = {} local function argp_group(gname, cmd) argp_groups[gname] = argp_groups[gname] or {} table.insert(argp_groups[gname], cmd) end +-- the workhorse for our subcommands below. local function mksubcmd(cmdinit, body) local cmd = argp:command() - cmdinit(cmd) cmd:action(function(args, name) - args.command = name - args.command_fn = body + args._command = name + args._command_fn = body end) + cmdinit(cmd) return cmd end ----------------------------------------------------------------- }}} ---------------------------------------------------- Command: init {{{ +------------------------------------------------- Ingest commands {{{ +---------------------------------------------- Command: ingest in {{{ + +mksubcmd(function(c) + c:name("ingest in") + :description("Generate commands to ingest new files from digest stream") + c:option("--target") + :argname("dir") + :description("Target for copy commands; if omitted, ignore new files") + local relative = c:option("--relative") + :argname("path") + :description("Don't crush targets to basename; trim given prefix instead") + local mv = c:flag("--move") + :description("Issue move, not copy, commands for new files") + c:option("--prune-log") + :argname("file") + :description("Log of files with colliding digests; NUL-separated") + c:option("--digest-log") + :argname("file") + :description("Write novel digest records to the indicated file") + local inplace = c:flag("--inplace") + :description("Record new files in the digest log as they are") + c:flag("--verbose") + :description("Be chatty on stderr about the generated comand stream") + c:flag("--extended-escapes") + :description("Write control characters in file names with $'...' escapes") + argparse_flag_inul(c) + argparse_flag_nul(c) + c:mutex(inplace, mv) + c:mutex(inplace, relative) + argp_group("Ingest", c) + end, + function(args, dbh) + local eol = args.nul and "\0" or "\n" + + local function log_new_hash() end + if args.digest_log then + local logf = assert(io.open(args.digest_log, "wb")) + local rend = cdblib.renderer_for(false, false, mk_default_render_template()) + function log_new_hash(h, p) + return logf:write(rend(h, p)) + end + end + + local function log_prune() end + if args.prune_log then + local prunef = assert(io.open(args.prune_log, "wb")) + function log_prune(p) + return prunef:write(p, "\0") + end + end + + + local mkiter = iter_gnu_digest_stderr(cdblib.iter_lines_or_nul(args.inul)) + local sth_hash_find = assert(sql_mk_hashid_find(dbh)) + local sth_path_by_hash = assert(sql_mk_pathid_find_by_hash(dbh)) + local sth_superseder_by_hash = assert(sql_mk_superseder_find_by_hash(dbh)) + + local shell_escape = + args.extended_escapes and cdblib.extended_shell_escape + or cdblib.posix_shell_escape + local human_escape = cdblib.human_shell_escape + + local path_crush = + args.relative and function(p) plpath.relpath(p, args.relative) end + or plpath.basename + + function explain_found_hash(h, p) + local res = sql_run_one_x(sth_path_by_hash, h) + if res then + return table.concat({ "Import hash ", h, " from path ", p, + " already in database at ", human_escape(res[1])}) + end + + local res = sql_run_one_x(sth_superseder_by_hash, h) + if res then + return table.concat({"Import hash ", h, " from path ", p, + " already in database but superseded"}) + end + + return nil + end + + for h, p in mkiter() do + local res = sql_run_one_x(sth_hash_find, h) + if res then + local hep = human_escape(p) + local exp = explain_found_hash(h, hep) + if exp then + if args.verbose then io.stderr:write(exp, "\n") end + log_prune(p) + else + -- Leave inexplicable things alone + if args.verbose then + io.stderr:write("Import hash ", h, " from path ", hep, + " in database without explanation! Leaving in place.\n") + end + end + elseif args.target then + local q = plpath.join(args.target, path_crush(p)) + if args.verbose then + io.stderr:write("Import ", human_escape(p), + " to ", human_escape(q), "\n") + end + io.write(args.move and "mv" or "cp", + " ", shell_escape(p), " ", shell_escape(q), eol) + log_new_hash(h, q) + elseif args.inplace then + io.stderr:write("Adding in place ", human_escape(p), "\n") + log_new_hash(h, p) + elseif args.verbose then + io.stderr:write("Not importing new ", human_escape(p), "\n") + end + end + end) + +----------------------------------------------------------------- }}} +----------------------------------------------------------------- }}} +----------------------------------------- Administrative commands {{{ +---------------------------------------- Command: initialize init {{{ mksubcmd(function(c) - c:name("init") + c:name("initialize init") :description("Initialize the database") - argp_group("Administrative Commands", c) + argp_group("Administrative", c) end, function(args, dbh) local function ddo(sql) assert(dbi.Do(dbh, sql)) end + ddo([[PRAGMA auto_vacuum="incremental";]]) ddo([[CREATE TABLE IF NOT EXISTS paths ( pathid INTEGER PRIMARY KEY ASC, path TEXT NOT NULL UNIQUE ON CONFLICT FAIL)]]) - ddo([[CREATE TABLE IF NOT EXISTS hashes ( + ddo([[CREATE TABLE IF NOT EXISTS hashes ( hashid INTEGER PRIMARY KEY ASC, hash TEXT NOT NULL UNIQUE ON CONFLICT FAIL)]]) ddo([[CREATE TABLE IF NOT EXISTS hash_hash_superseders ( supersederid INTEGER PRIMARY KEY ASC, oldid INTEGER REFERENCES hashes(hashid), newid INTEGER REFERENCES hashes(hashid), + timestamp INTEGER DEFAULT CURRENT_TIMESTAMP, note TEXT, UNIQUE(oldid, newid) ON CONFLICT FAIL)]]) ddo([[CREATE INDEX IF NOT EXISTS hash_hash_superseders_idx_old ON hash_hash_superseders (oldid)]]) + -- TODO: can we make path_hash a WITHOUT ROWID table? ddo([[CREATE TABLE IF NOT EXISTS path_hash ( pairid INTEGER PRIMARY KEY ASC, pathid INTEGER REFERENCES paths(pathid), @@ -144,32 +375,38 @@ mksubcmd(function(c) ON path_hash (pathid)]]) ddo([[CREATE INDEX IF NOT EXISTS path_hash_idx_hashid ON path_hash (hashid)]]) - ddo([[CREATE VIEW IF NOT EXISTS v_path_hash AS SELECT + ddo([[CREATE VIEW IF NOT EXISTS v_path_hash AS SELECT pairid, pathid, hashid, path, hash, timestamp FROM path_hash NATURAL JOIN paths NATURAL JOIN hashes]]) dbh:commit() end) ----------------------------------------------------------------- }}} ---------------------------------------------------- Command: stat {{{ +--------------------------------------------- Command: stats stat {{{ mksubcmd(function(c) - c:name("stat") + c:name("stats stat") :description("Report statistics") - argp_group("Administrative Commands", c) + argp_group("Administrative", c) end, function(args, dbh) local nhash, npath, nobsv nhash = assert(sql_do(dbh, "SELECT COUNT(*) FROM hashes" )):fetch()[1] npath = assert(sql_do(dbh, "SELECT COUNT(*) FROM paths" )):fetch()[1] nobsv = assert(sql_do(dbh, "SELECT COUNT(*) FROM path_hash" )):fetch()[1] - print(("nhash=%d npath=%d nobsv=%d"):format(nhash, npath, nobsv)) + nsupr = assert(sql_do(dbh, "SELECT COUNT(*) FROM hash_hash_superseders")) + :fetch()[1] + print(("nhash=%d npath=%d nobsv=%d nsuper=%d") + :format(nhash, npath, nobsv, nsupr)) end) +----------------------------------------------------------------- }}} +------------------------------------------------ Command: dbgc gc {{{ + mksubcmd(function(c) - c:name("gc") + c:name("dbgc gc") :description("Generate SQL to prune identifiers not used by observations") - argp_group("Administrative Commands", c) + argp_group("Administrative", c) end, function(args, dbh) local sth_paths_dead = assert(sql_do(dbh, @@ -190,77 +427,188 @@ mksubcmd(function(c) print(("DELETE FROM hashes WHERE hashid = %d;"):format(h[1])) end end) +----------------------------------------------------------------- }}} +-------------------------------------------------- Command: dbopt {{{ + +mksubcmd(function(c) + c:name("dbopt") + :description("ANALYZE and VACUUM the database") + argp_group("Administrative", c) + end, + function(args, dbh) + local function ddo(sql) assert(dbi.Do(dbh, sql)) end + ddo("ANALYZE") + dbh:commit() + dbh:autocommit(true) -- that is, do not implicitly BEGIN a transaction + ddo("VACUUM") + end) + +----------------------------------------------------------------- }}} +---------------------------------------------- Command: dumpsuper {{{ + +mksubcmd(function(c) + c:name("dumpsuper") + :description("Dump information about superseder pairs") + local v = c:flag("--verbose") + :description("Be slightly more informative, render for human consumption") + local n = argparse_flag_nul(c) + c:mutex(v, n) + argp_group("Administrative", c) + end, + function(args, dbh) + + local print_row + if args.verbose then + local sth_path_find_by_hash = assert(sql_mk_pathid_find_by_hash(dbh)) + local function print_paths(hash) + sth_path_find_by_hash:execute(hash) + for prow in sth_path_find_by_hash:rows() do + io.write(" ", prow[1], "\n") + end + end + function print_row(srow) + print("Entry:", srow.note) + print(" old:" , srow.oldhash) + print_paths(srow.oldhash) + print(" new:" , srow.newhash) + print_paths(srow.newhash) + print() + end + elseif args.nul then + function print_row(srow) + io.write(srow.oldhash, " ", srow.newhash, " ", srow.note, "\0") + end + else + function print_row(srow) + io.write(srow.oldhash, " ", srow.newhash, " ", srow.note, "\n") + end + end + + local sth = assert(dbh:prepare( + [[SELECT note, o.hash AS oldhash, n.hash AS newhash + FROM hash_hash_superseders + JOIN hashes AS n ON n.hashid == newid + JOIN hashes AS o ON o.hashid == oldid]])) + sth:execute() + for srow in sth:rows(true) do print_row(srow) end + end) + +----------------------------------------------------------------- }}} +--------------------------------------------- Command: checksuper {{{ + +mksubcmd(function(c) + c:name("checksuper") + :description("Perform sanity checks on superseders") + argp_group("Administrative", c) + end, + function(args, dbh) + local sth_path_find_by_hash = assert(sql_mk_pathid_find_by_hash(dbh)) + local sth_superseder_by_hash = + assert(sql_mk_superseder_find_hash_by_hash(dbh)) + + local sth = assert(dbh:prepare( + [[SELECT note, o.hash AS oldhash, n.hash AS newhash + FROM hash_hash_superseders + JOIN hashes AS n ON n.hashid == newid + JOIN hashes AS o ON o.hashid == oldid]])) + sth:execute() + for srow in sth:rows(true) do + -- Ensure that each new-side superseder is either itself superseded or + -- has a path in the database + local res = sql_run_one_x(sth_path_find_by_hash, srow.newhash) + if res == nil then + local res = sql_run_one_x(sth_superseder_by_hash, srow.newhash) + if res == nil then + print("Superseder record without replacement:") + print(" note:", srow.note) + print(" old:" , srow.oldhash) + print(" new:" , srow.newhash) + print() + end + end + end + end) ----------------------------------------------------------------- }}} ---------------------------------------------------- Command: look {{{ +----------------------------------------------------------------- }}} +-------------------------------------------------- Query commands {{{ +-------------------------------------------- Command: lookup look {{{ mksubcmd(function(c) - c:name("look") + c:name("lookup look") :description("Look up checksums for path glob(s)") - c:argument("glob", "Path globs to search"):args("+") + c:argument("glob") + :args("*") + :description("Path globs to search (none to stream from stdin)") + c:flag("--timestamps") + :description("Prefix lines by recorded observation timestamp") argparse_for_render(c) - argparse_no_hashes(c) - argp_group("Queries", c) + argp_group("Query", c) end, function(args, dbh) local sql = assert(dbh:prepare( - [[SELECT hash, path FROM v_path_hash WHERE path GLOB ?]])) - local render_both, render_path = renderers_for(args) - local renderer = args.no_hashes - and function(h, p) return render_path(p) end - or render_both - for _, glob in ipairs(args.glob) do + [[SELECT timestamp, hash, path FROM v_path_hash WHERE path GLOB ?]])) + local rend_dig = renderer_for(args) + local rend_row = + args.timestamps + and function(row) + return row.timestamp, " ", rend_dig(row.hash, row.path) + end + or function(row) return rend_dig(row.hash, row.path) end + local iter = + #args.glob ~= 0 and cdblib.iter_table(args.glob) or cdblib.iter_lines() + for glob in iter() do sql:execute(glob) - for row in sql:rows() do - io.write(renderer(table.unpack(row))) + for row in sql:rows(true) do + io.write(rend_row(row)) end end end) ----------------------------------------------------------------- }}} ---------------------------------------------------- Command: mapp {{{ +------------------------------------------- Command: mappath mapp {{{ mksubcmd(function(c) - c:name("mapp") - :description("Map paths to hashes in the database, like look") - c:flag("--no-paths") - :description("Print only the resulting hashes") - :default(false) + c:name("mappath mapp") + :description("Map paths (on stdin) to hashes in the database; look w/o glob") argparse_for_render(c) argparse_flag_inul(c) - argp_group("Queries", c) + argp_group("Query", c) end, function(args, dbh) local sth = assert(dbh:prepare( [[SELECT hash FROM v_path_hash WHERE path = ?]])) - local render = args.no_paths - and function(h, p) return h, args.nul and '\0' or '\n' end - or renderers_for(args) - + local render = renderer_for(args) local mkiter = cdblib.iter_lines_or_nul(args.inul) for p in mkiter() do sth:execute(p) for row in sth:rows() do io.write(render(row[1], p)) end + -- TODO: What if we didn't find anything? end end) ----------------------------------------------------------------- }}} ---------------------------------------------------- Command: maph {{{ +------------------------------------------- Command: maphash maph {{{ mksubcmd(function(c) - c:name("maph") + c:name("maphash maph") :description("Map hashes to paths in the database") + local hash = c:argument("hash") + :args("*") + :description("Hashes to look up") argparse_for_render(c) - argparse_flag_inul(c) - argp_group("Queries", c) + local inul = argparse_flag_inul(c) + c:mutex(hash, inul) + argp_group("Query", c) end, function(args, dbh) - local sth = assert(dbh:prepare( - [[SELECT path FROM v_path_hash WHERE hash = ?]])) - local render = renderers_for(args) - local mkiter = cdblib.iter_lines_or_nul(args.inul) + local sth = assert(sql_mk_pathid_find_by_hash(dbh)) + local render = renderer_for(args) + local mkiter = + #args.hash ~= 0 and cdblib.iter_table(args.hash) + or cdblib.iter_lines_or_nul(args.inul) for h in mkiter() do sth:execute(h) for row in sth:rows() do @@ -270,12 +618,12 @@ mksubcmd(function(c) end) ----------------------------------------------------------------- }}} ---------------------------------------------------- Command: cflx {{{ +----------------------------------------- Command: conflicts cflx {{{ mksubcmd(function(c) - c:name("cflx") + c:name("conflicts cflx") :description("Find conflicting measurements of paths") - argp_group("Queries", c) + argp_group("Query", c) end, function(args, dbh) local sth = assert(sql_do(dbh, @@ -291,120 +639,75 @@ mksubcmd(function(c) print("PATH", (cdblib.escape_gnu_digest(p))) end print((" observed hash %s with id %d at %s"):format(h, pairid, ts)) + -- TODO: that's probably not the right thing to print end end) ----------------------------------------------------------------- }}} ---------------------------------------------------- Command: addh {{{ - -mksubcmd(function(c) - c:name("addh") - :description("Ingest digest tool output") - c:option("--graft"):default("") - :description("Graft a prefix to input file names") - c:flag("--replace-paths") - :description("Remove all existing observations of reported paths") - argparse_flag_inul(c) - argp_group("Updates", c) - end, - function(args, dbh) - local sth_path_upsert = assert(sql_mk_path_upsert(dbh)) - local sth_hash_upsert = assert(sql_mk_hash_upsert(dbh)) - local sth_obsv_del = assert(dbh:prepare( - [[DELETE FROM path_hash WHERE pathid = ?]])) - local sth_obsv_upsert = assert(dbh:prepare( - [[INSERT OR REPLACE INTO path_hash (pathid, hashid) VALUES (?, ?)]])) - - local mkiter = cdblib.iter_lines_or_nul(args.inul) - - local nadded = 0 - - for h, p in cdblib.iter_gnu_digest(mkiter)() do - p = plpath.normpath(plpath.join(args.graft, p)) - local pid = sql_run_one(sth_path_upsert, p)[1] - if args.replace_path then sth_obsv_del:execute(pid) end - local hid = sql_run_one(sth_hash_upsert, h)[1] - sth_obsv_upsert:execute(pid, hid) - if sth_obsv_upsert:affected() > 0 then dbh:commit() end - - nadded = nadded + 1 - io.write(("Processed %d hashes\r"):format(nadded)); io.flush() - end - io.write("\n") - end) - ------------------------------------------------------------------ }}} ---------------------------------------------------- Command: filh {{{ +---------------------------------------- Command: filterhash filh {{{ mksubcmd(function(c) - c:name("filh") + c:name("filterhash filh") :description("Filter digest tool lines against database by hash") argparse_for_db_filter(c) - argp_group("Queries", c) + argp_group("Query", c) end, function(args, dbh) local rex = (args.predicate == "in") - local rend_both, rend_path = renderers_for(args) - local renderer = args.no_hashes - and function(h, p) return rend_path(p) end - or rend_both - local sth = sql_mk_hash_find(dbh) - for h, p in cdblib.iter_gnu_digest(cdblib.mk_lines_iter())() do - local res, err = sql_run_one(sth, h) - if res == false and err ~= nil then error(err) end + local renderer = renderer_for(args) + local sth = assert(sql_mk_hashid_find(dbh)) + for h, p in iter_gnu_digest_stderr(cdblib.iter_lines())() do + local res = sql_run_one_x(sth, h) if (res ~= nil) == rex then io.write(renderer(h, p)) end end end) ----------------------------------------------------------------- }}} ---------------------------------------------------- Command: filp {{{ +---------------------------------------- Command: filterpath filp {{{ mksubcmd(function(c) - c:name("filp") + c:name("filterpath filp") :description("Filter digest tool lines against database by path") argparse_for_db_filter(c) - c:flag("--just-paths") - :description("Input is a series of paths without digests") + c:option("--in-paths") + :description("Input is a series of bare paths; optional arg is dummy digest") + :args(1) + :default("-"):defmode("a") argparse_flag_inul(c) - argp_group("Queries", c) + argp_group("Query", c) end, function(args, dbh) local rex = (args.predicate == "in") - local rend_both, rend_path = renderers_for(args) - local renderer = args.no_hashes - and function(h, p) return rend_path(p) end - or rend_both + local renderer = renderer_for(args) local mkiter = cdblib.iter_lines_or_nul(args.inul) - local mkiter = args.just_paths - and cdblib.iter_just_paths_as_digest(mkiter) - or cdblib.iter_gnu_digest(mkiter) + local mkiter = args.in_paths + and cdblib.iter_just_paths_as_digest(args.in_paths, mkiter) + or iter_gnu_digest_stderr(mkiter) - local sth = sql_mk_path_find(dbh) + local sth = sql_mk_pathid_find(dbh) for h, p in mkiter() do - local res, err = sql_run_one(sth, p) - if res == false and err ~= nil then error(err) end + local res = sql_run_one_x(sth, p) if (res ~= nil) == rex then io.write(renderer(h, p)) end end end) ----------------------------------------------------------------- }}} ---------------------------------------------------- Command: verh {{{ +--------------------------------- Command: verifyhash verify verh {{{ mksubcmd(function(c) - c:name("verh") + c:name("verifyhash verify verh") :description("Verify reported digests against database") - c:option("--graft") - :description("Graft a prefix to input file names") - :default("") + argparse_opt_graft(c) c:flag("--also-mismatch") :description("Also report other hashes associated with a path") - argp_group("Queries", c) + argparse_flag_inul(c) + argp_group("Query", c) end, function(args, dbh) - local sth_path_find = assert(sql_mk_path_find(dbh)) - local sth_hash_find = assert(sql_mk_hash_find(dbh)) - local sth_path_find_by_hash = assert(sql_mk_path_find_by_hash(dbh)) + local sth_path_find = assert(sql_mk_pathid_find(dbh)) + local sth_hash_find = assert(sql_mk_hashid_find(dbh)) + local sth_path_find_by_hash = assert(sql_mk_pathid_find_by_hash(dbh)) local sth_obsv_find_by_pathid_hash = assert(dbh:prepare([[SELECT pairid FROM path_hash NATURAL JOIN hashes WHERE pathid = ? AND hash = ?]])) @@ -413,9 +716,9 @@ mksubcmd(function(c) FROM path_hash NATURAL JOIN hashes WHERE pathid = ? AND hash != ?]])) local fail = 0 - for h, p in cdblib.iter_gnu_digest(cdblib.mk_lines_iter())() do + for h, p in iter_gnu_digest_stderr(cdblib.iter_lines_or_nul(args.inul))() do p = plpath.normpath(plpath.join(args.graft, p)) - local pid = sql_run_one(sth_path_find, p) + local pid = sql_run_one_x(sth_path_find, p) if pid == nil then -- Path not in database print(("Path '%s' is not in database"):format(p)) @@ -436,12 +739,12 @@ mksubcmd(function(c) -- Path in database pid = pid[1] local didfail = false - local obsvid = sql_run_one(sth_obsv_find_by_pathid_hash, pid, h) + local obsvid = sql_run_one_x(sth_obsv_find_by_pathid_hash, pid, h) if obsvid == nil then -- Observation not in database print(("Path '%s' not associated with that hash in database"):format(p)) didfail = true - else print("OK: ", p) -- XXX + else print("OK:", p) -- TODO end if args.also_mismatch then sth_obsv_find_by_pathid_nothash:execute(pid, h) @@ -466,132 +769,359 @@ mksubcmd(function(c) c:name("diff") :description("Compare against another database") c:argument("db2") - c:option("--flavor"):default("both") + c:option("--flavor"):default("all") :description("Database aspects to compare") - -- TODO :choices("hash", "path", "both") + -- TODO :choices("hash", "path", "both", "supers", "all") c:option("--which"):default("symm") :description("Direction of comparison") -- TODO :choices("sub", "super", "symm") c:flag("--no-headers") :description("Suppress headers in output") argparse_for_render(c) - argp_group("Queries", c) + argp_group("Query", c) end, function(args, dbh) - local rend_hash, rend_path = renderers_for(args) + local renderer = renderer_for(args) local header = args.no_headers and function() end or print local function header(x) if not args.no_headers then print(x) end end dbi.Do(dbh, "ATTACH DATABASE ? AS other", args.db2) - - if pltablex.find({"path", "both"}, args.flavor) then + + if pltablex.find({"path", "both", "all"}, args.flavor) then if pltablex.find({"sub", "symm"}, args.which) then header("-- Paths in local database not in remote:") - for row in sql_do(dbh, + for row in assert(sql_do(dbh, [[SELECT path FROM paths WHERE path NOT IN (SELECT path FROM other.paths) - ORDER BY path]]):rows() do - io.write(rend_path(row[1])) - end + ORDER BY path]])):rows() do + io.write(renderer("", row[1])) + end end if pltablex.find({"super", "symm"}, args.which) then header("-- Paths in remote database not in local:") - for row in sql_do(dbh, + for row in assert(sql_do(dbh, [[SELECT path FROM other.paths WHERE path NOT IN (SELECT path FROM paths) - ORDER BY path]]):rows() do - io.write(rend_path(row[1])) - end + ORDER BY path]])):rows() do + io.write(renderer("", row[1])) + end end end - if pltablex.find({"hash", "both"}, args.flavor) then + if pltablex.find({"hash", "both", "all"}, args.flavor) then if pltablex.find({"sub", "symm"}, args.which) then header("-- Hashes in local database not in remote:") - for row in sql_do(dbh, + for row in assert(sql_do(dbh, [[SELECT hash, path FROM hashes NATURAL JOIN path_hash NATURAL JOIN paths WHERE hash NOT IN (SELECT hash FROM other.hashes) - ORDER BY path]]):rows() do - io.write(rend_hash(table.unpack(row))) - end + ORDER BY path]])):rows() do + io.write(renderer(table.unpack(row))) + end end if pltablex.find({"super", "symm"}, args.which) then header("-- Hashes in remote database not in local:") - for row in sql_do(dbh, + for row in assert(sql_do(dbh, [[SELECT hash, path FROM other.hashes NATURAL JOIN other.path_hash NATURAL JOIN other.paths WHERE hash NOT IN (SELECT hash FROM hashes) - ORDER BY path]]):rows() do - io.write(rend_hash(table.unpack(row))) - end + ORDER BY path]])):rows() do + io.write(renderer(table.unpack(row))) + end + end + end + + if pltablex.find({"supers", "all"}, args.flavor) then + if pltablex.find({"sub", "symm"}, args.which) then + header("-- Superseders in local database not in remote:") + for row in assert(sql_do(dbh, + [[SELECT old.hash, new.hash, timestamp, note + FROM hash_hash_superseders AS s + JOIN hashes AS old ON s.oldid == old.hashid + JOIN hashes AS new ON s.newid == new.hashid + WHERE (old.hash, new.hash) NOT IN ( + SELECT oold.hash, onew.hash + FROM other.hash_hash_superseders AS os + JOIN other.hashes AS oold ON os.oldid == oold.hashid + JOIN other.hashes AS onew ON os.newid == onew.hashid + ) + ]])):rows() do + row[3] = table.concat({'(', row[3], ')'}) + io.write(table.concat(row, " "), "\n") + end + end + if pltablex.find({"super", "symm"}, args.which) then + header("-- Superseders in remote database not in local:") + for row in assert(sql_do(dbh, + [[SELECT oold.hash, onew.hash, timestamp, note + FROM other.hash_hash_superseders AS os + JOIN other.hashes AS oold ON os.oldid == oold.hashid + JOIN other.hashes AS onew ON os.newid == onew.hashid + WHERE (oold.hash, onew.hash) NOT IN ( + SELECT old.hash, new.hash + FROM hash_hash_superseders AS s + JOIN hashes AS old ON s.oldid == old.hashid + JOIN hashes AS new ON s.newid == new.hashid + ) + ]])):rows() do + row[3] = table.concat({'(', row[3], ')'}) + io.write(table.concat(row, " "), "\n") + end end end + header("-- End of diff report") end) +----------------------------------------------------------------- }}} +----------------------------------------------------------------- }}} +------------------------------------------------- Update commands {{{ +------------------------------------------- Command: addhash addh {{{ + +mksubcmd(function(c) + c:name("addhash addh") + :description("Ingest digest tool output") + argparse_opt_graft(c) + argparse_flag_inul(c) + argparse_flag_progress(c) + c:flag("--replace-paths") + :description("Remove all existing observations of reported paths") + c:flag("--keep-timestamps") + :description("Do not update the observation timestamp fields") + argp_group("Update", c) + end, + function(args, dbh) + local sth_path_upsert = assert(sql_mk_path_upsert(dbh)) + local sth_hash_upsert = assert(sql_mk_hash_upsert(dbh)) + local sth_obsv_del = assert(dbh:prepare( + [[DELETE FROM path_hash WHERE pathid = ?]])) + local sth_obsv_upsert = assert(dbh:prepare( + [[INSERT OR IGNORE INTO path_hash (pathid, hashid) VALUES (?, ?)]])) + local sth_obsv_upd_ts = not args.keep_timestamps and assert(dbh:prepare( + [[UPDATE path_hash SET timestamp = CURRENT_TIMESTAMP + WHERE pathid = ? AND hashid = ?]])) + + local mkiter = cdblib.iter_lines_or_nul(args.inul) + + local progeach, progfin = mk_progress_pair(args.progress) + + for h, p in iter_gnu_digest_stderr(mkiter)() do + local docommit = false + p = plpath.normpath(plpath.join(args.graft, p)) + local pid = sql_run_one_x(sth_path_upsert, p)[1] + assert (pid ~= nil) + if args.replace_paths then + sth_obsv_del:execute(pid) + docommit = sth_obsv_del:affected() > 0 + end + local hid = sql_run_one_x(sth_hash_upsert, h)[1] + assert (hid ~= nil) + sth_obsv_upsert:execute(pid, hid) + docommit = docommit or sth_obsv_upsert:affected() > 0 + if sth_obsv_upd_ts then + sth_obsv_upd_ts:execute(pid, hid) + assert(sth_obsv_upd_ts:affected() == 1) + docommit = true + end + if docommit then dbh:commit() end + + progeach() + end + progfin() + end) + ----------------------------------------------------------------- }}} --------------------------------------------------- Command: domv {{{ mksubcmd(function(c) c:name("domv") :description("Remove given paths if hashes exist elsewhere") + local path = c:argument("path") + :args("*") + :description("Paths to move (none to stream from stdin)") c:flag("--dry-run -n") :description("Do not perform deletions") - :default(false) - argparse_flag_inul(c) - argp_group("Updates", c) + c:flag("--verbose") + :description("Be chatty") + local inul = argparse_flag_inul(c) + c:mutex(path, inul) + argp_group("Update", c) end, function(args, dbh) local qsth = assert(dbh:prepare( [[SELECT path FROM v_path_hash WHERE hash IN (SELECT hash FROM v_path_hash WHERE path = ?1) AND path != ?1]])) - local dsth = assert(dbh:prepare( + + local ssth = assert(dbh:prepare( + [[SELECT hash_hash_superseders.timestamp, note + FROM hash_hash_superseders + JOIN path_hash ON path_hash.hashid == hash_hash_superseders.oldid + JOIN paths ON path_hash.pathid == paths.pathid + WHERE path = ?]])) + + local dhsth = assert(dbh:prepare( [[DELETE FROM path_hash WHERE pathid IN (SELECT pathid FROM paths WHERE path = ?)]])) - local mkiter = cdblib.iter_lines_or_nul(args.inul) - for p in mkiter() do - assert(qsth:execute(p)) + local dsth = assert(dbh:prepare([[DELETE FROM paths WHERE path = ?]])) - print("Trying mv:", p) + local mkiter = #args.path ~= 0 + and cdblib.iter_table(args.path) + or cdblib.iter_lines_or_nul(args.inul) + + for p in mkiter() do + if args.verbose then print("Trying mv:", p) end local ok = false + + assert(qsth:execute(p)) for row in qsth:rows() do - print("Found", row[1]) + if args.verbose then print("Found path", row[1]) end ok = true end + if not ok then + assert(ssth:execute(p)) + for row in ssth:rows() do + if args.verbose then print("Found super", row[2], "at", row[1]) end + ok = true + end + end + if ok and not args.dry_run then + assert(dhsth:execute(p)) assert(dsth:execute(p)) dbh:commit() - print("OK", dsth:affected()) + if args.verbose then print("OK", dsth:affected()) end end end end) ----------------------------------------------------------------------------- }}} ---------------------------------------------------- Command Grouping, Part 2 {{{ +----------------------------------------------------------------- }}} +------------------------------------- Command: addsuperhash addsh {{{ + +local function iter_supers(baseiter) + return function() return coroutine.wrap(function() + for line in baseiter() do + if line == nil then return nil end + local oh, nh, xtra = line:match("^(%x*)%s+(%x*)(.*)$") + if oh == nil then + print("Bad line (missing hashes?):", line) -- XXX + else + local txt = xtra:match("^%s*(.*)$") + if xtra ~= nil and txt == nil then + print("Bad line (malformed suffix):", line) -- XXX + else + coroutine.yield(oh, nh, txt or "") + end + end + end + end) end +end + +mksubcmd(function(c) + c:name("addsuperhash addsh") + :description("Ingest superseder assertions (\"hash hash note\")") + argparse_flag_progress(c) + argparse_flag_inul(c) + argp_group("Update", c) + end, + function(args, dbh) + local sth_hash_upsert = assert(sql_mk_hash_upsert(dbh)) + local sth_super_upsert = assert(dbh:prepare( + [[INSERT OR REPLACE INTO hash_hash_superseders (oldid, newid, note) + VALUES (?, ?, ?)]])) + + local mkiter = cdblib.iter_lines_or_nul(args.inul) + local progeach, progfin = mk_progress_pair(args.progress) + + for oh, nh, txt in iter_supers(mkiter)() do + local ohid = sql_run_one_x(sth_hash_upsert, oh)[1] + assert (ohid ~= nil) + local nhid = sql_run_one_x(sth_hash_upsert, nh)[1] + assert (nhid ~= nil) + sth_super_upsert:execute(ohid, nhid, txt) + if sth_super_upsert:affected() > 0 then dbh:commit() end + + progeach() + end + progfin() + end) + +----------------------------------------------------------------- }}} +------------------------------------------ Command: addsuper adds {{{ + +mksubcmd(function(c) + c:name("addsuper adds") + :description("Indicate that one path is the superseder of another") + c:argument("oldpath") + :description("Path to superseded file") + c:argument("newpath") + :description("Path to superseder file") + c:argument("note") + :args("*") + :description("Note for superseder entry; multiple args concat by space") + argp_group("Update", c) + end, + function(args, dbh) + local sth_hash_by_path = assert(sql_mk_hashid_find_by_path(dbh)) + + local oldhid = sql_run_one_x(sth_hash_by_path, args.oldpath) + if oldhid == nil then + print("No hash associated with old path") + return + elseif sth_hash_by_path:fetch() then + print("Two hashes associated with old path; refusing") + return + end + oldhid = oldhid[1] + + local newhid = sql_run_one_x(sth_hash_by_path, args.newpath) + if newhid == nil then + print("No hash associated with new path") + return + elseif sth_hash_by_path:fetch() then + print("Two hashes associated with new path; refusing") + return + end + newhid = newhid[1] + + if oldhid == newhid then + print("Equal hashes for paths; refusing") + return + end + + sql_do(dbh, [[INSERT OR REPLACE INTO + hash_hash_superseders (oldid, newid, note) VALUES (?,?,?)]], + oldhid, newhid, table.concat(args.note, " ")) + dbh:commit() + end) + +----------------------------------------------------------------- }}} +----------------------------------------------------------------- }}} +--------------------------- Argparse and Command Grouping, Part 2 {{{ -for _, g in ipairs{"Queries", "Updates", "Administrative Commmands"} do - argp:group(g, table.unpack(argp_groups[g] or {})) +for _, g in ipairs{"Ingest", "Query", "Update", "Administrative"} do + argp:group(g .. " commands", table.unpack(argp_groups[g] or {})) argp_groups[g] = nil end -- Any stragglers? for k,v in pairs(argp_groups) do argp:group(k, table.unpack(v)) end ----------------------------------------------------------------------------- }}} --------------------------------------------------------- Top-level executive {{{ +----------------------------------------------------------------- }}} +--------------------------------------------- Top-level executive {{{ local args = argp:parse() -if not args.database then error "--database is required" end +-- io.stderr:write((require "pl.pretty").write(args), "\n") + +if not args.database then argp:error("Database is required") end local dbh, err = dbi.Connect("SQLite3", args.database) if not dbh then error ("Database error: " .. err) end -args:command_fn(dbh) +args:_command_fn(dbh) ----------------------------------------------------------------------------- }}} +----------------------------------------------------------------- }}} diff --git a/cdb-digestrelative b/cdb-digestrelative index 925c259..f9cfe34 100755 --- a/cdb-digestrelative +++ b/cdb-digestrelative @@ -1,4 +1,5 @@ -#!/usr/bin/env lua5.3 +#!/usr/bin/env luajit +-- Also runs under lua5.3 -- Read a stream of GNU digest filenames and concat them together, adjusting -- paths by prefixing the relative path of each digest file. diff --git a/cdblib.lua b/cdblib.lua index a2bb2c5..69d7acc 100644 --- a/cdblib.lua +++ b/cdblib.lua @@ -2,6 +2,8 @@ local plstringx = require "pl.stringx" local _M = {} +------------------------------------------------ GNU digest tools {{{ + -- Escape file name for GNU digest; returns new form and number, which is 0 if -- string is unaltered and positive if escaping was necessary. -- @@ -24,13 +26,16 @@ local function unescape_gnu_digest(fn) end _M.unescape_gnu_digest = unescape_gnu_digest -function _M.iter_gnu_digest(baseiter) +-- Iterate a GNU digest tool stream, canonicalizing file names into their +-- unescaped form if necessary. `errcb` is invoked for lines that do not match +-- and may return `false` to stop iteration. +function _M.iter_gnu_digest(errcb, baseiter) return function() return coroutine.wrap(function() for line in baseiter() do if line == nil then return nil end local esc, h, fn = line:match("^(\\?)(%x*) [ *](.*)$") if esc == nil then - print("Bad line:", line) -- XXX + if errcb(line) == false then return nil end else coroutine.yield(h, (esc == "") and fn or unescape_gnu_digest(fn)) end @@ -38,18 +43,21 @@ function _M.iter_gnu_digest(baseiter) end) end end -function _M.iter_just_paths_as_digest(baseiter) +function _M.iter_just_paths_as_digest(dummyhash, baseiter) return function() return coroutine.wrap(function() for line in baseiter() do if line == nil then return nil end - coroutine.yield("-", line) + coroutine.yield(dummyhash, line) end end) end end +----------------------------------------------------------------- }}} +---------------------------------------------- Iterator utilities {{{ + -- a custom delimited string iterator, useful for nul-separated records, e.g. -- :: (string, () -> () -!> string) -> () -> () -!> string -function _M.mk_delim_iter(delim, baseiter) +function _M.iter_delim(delim, baseiter) local ix = 0 local s = { fin = {}, incomplete = {} } @@ -80,11 +88,21 @@ function _M.mk_delim_iter(delim, baseiter) return function() return coroutine.wrap(function() for chunk in baseiter() do proc(chunk) - + -- while we have a complete delimited string, return one - while #s.fin > 0 do - ix = ix + 1 - coroutine.yield(ix, table.remove(s.fin)) + if #s.fin > 0 then + local t = s.fin + s.fin = {} + + -- reverse once, then drain from the "front" + do + local i, n = 1, #t + while i < n do t[i], t[n] = t[n], t[i]; i = i+1; n = n-1 end + end + while #t > 0 do + ix = ix + 1 + coroutine.yield(ix, table.remove(t)) + end end end end) end @@ -97,39 +115,100 @@ function _M.iter_just_2nd(baseiter) end -- :: (file or nil) -> () -> () -!> string -function _M.mk_read_iter(f) +function _M.iter_read(f) f = f or io.input() return function() return function() return f:read(1024) end end end -function _M.mk_lines_iter(f) +function _M.iter_lines(f) return function() return (f or io.input()):lines() end end -- Iterate stdin as either newline-terminated or NUL-terminated records --- :: (boolean, file or nil) -> () -!> string +-- :: (boolean, file or nil) -> () -> () -!> string function _M.iter_lines_or_nul(nul, f) assert(type(nul) == "boolean") - return nul and _M.iter_just_2nd(_M.mk_delim_iter("\0", _M.mk_read_iter(f))) - or _M.mk_lines_iter(f) + return nul and _M.iter_just_2nd(_M.iter_delim("\0", _M.iter_read(f))) + or _M.iter_lines(f) end -function _M.renderers_for(nul, unescape) - assert(type(nul) == "boolean") - assert(type(unescape) == "boolean") - local fin = nul and '\0' or '\n' - local mangle_path = unescape - and function(p) return p, fin end - or function(p) - local np, nesc = escape_gnu_digest(p) - return (nesc == 0 and "" or "\\"), " ", np, fin - end - local mangle_full = unescape - and function(h, f) return "", h, " ", f, fin end - or function(h, f) - local nf, nesc = escape_gnu_digest(f) - return (nesc == 0 and "" or "\\"), h, " ", nf, fin - end - return mangle_full, mangle_path +function _M.iter_table(t) + return function() return coroutine.wrap(function() + for _, v in ipairs(t) do coroutine.yield(v) end + end) end end +----------------------------------------------------------------- }}} +--------------------------------------------- Generator utilities {{{ + +-- lazily generate and cache escaped version +local function _renderer_for_esc(t,k) + local nesc + t.f, nesc = escape_gnu_digest(t.u) + t.e = nesc == 0 and "" or "\\" + return t[k] + end + +-- Generate a renderer for a choice of common parameters. In the resulting +-- template expansion, +-- +-- $e expands to "\\" (resp. "") if the path was (resp. was not) escaped +-- $f expands to the optionally escaped file name (see $e) +-- $h expands to the hash +-- $u expands to the unescaped file name +-- $z expands to the appropriate record separator ("\n" or "\0") +-- +function _M.renderer_for(nul, unescape, template) + local v = { z = nul and "\0" or "\n" + , f = unescape and function(t) return t.u end or _renderer_for_esc + , e = unescape and "" or _renderer_for_esc + } + local mt = { __index = + function(t,k) + local x = v[k] + return type(x) == "function" and x(t,k) or x + end + } + return function(hash, path) + return template:substitute(setmetatable({h = hash, u = path}, mt)) + end +end + +----------------------------------------------------------------- }}} +------------------------------------------- Path escape utilities {{{ + +function _M.posix_shell_escape(str) + return "'" .. str:gsub("'", "'\"'\"'") .. "'" +end + +-- While POSIX shells understand control characters inside single quotes, they +-- are unfriendly to read as such. Some shells have a $'...' escape that can +-- process things like \t and \xXX. This uses that instead. Perhaps we should +-- have a version that actually uses \t, but, honestly, if you're hitting this +-- case you deserve what you get. +local function extended_shell_escape(str) + return "'" .. + str:gsub("['%c]", function(c) + return c == "'" and "'\"'\"'" or ("'$'\\x%02x''"):format(c:byte()) + end) .. "'" +end +_M.extended_shell_escape = extended_shell_escape + +function _M.human_shell_escape(str) + if not str:find("[%c]") then + -- no control characters, and... + if not str:find("'") then + -- no single quotes, so simple enough to just single-quote the thing + return "'" .. str .. "'" + elseif not str:find('["$`\\]') then + -- single quote but no double quote, dollar, backtick, or backslash + return '"' .. str .. '"' + end + end + + -- If none of the special cases apply, just do the full thing + return extended_shell_escape(str) +end + +----------------------------------------------------------------- }}} + return _M diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..07e3edd --- /dev/null +++ b/test.sh @@ -0,0 +1,350 @@ +#!/bin/zsh + +: ${LUA:=luajit} ${TMPDIR:=/tmp/cdbtest} +DB1=${TMPDIR}/test-db1 +DB2=${TMPDIR}/test-db2 +LOG1=${TMPDIR}/test-log1 +LOG2=${TMPDIR}/test-log2 +LOG3=${TMPDIR}/test-log3 +LOG4=${TMPDIR}/test-log4 + +set -e -u +mkdir -p ${TMPDIR} +rm -f ${DB1} ${LOG1} ${LOG2} ${LOG3} ${LOG4} + +set -x + +# Test 'init' and that we can invoke from a different directory +pushd tmp +${LUA} ../cdb --db ${DB1} init +popd + +# Seed test database with some data +cat >${LOG1} <&1 1>/dev/null) <${LOG1} <${LOG2} 2>${LOG3} +# Import commands on stdout +diff -u - ${LOG2} <${LOG1} 2>${LOG2} \ + <<<'5 new'$'\n''4 copy' +# No output stdout +diff -u /dev/null ${LOG1} +# One prunelog entry +echo -n 'copy\0' | cmp - ${LOG3} +# Log on stderr +diff -u - ${LOG2} <&1 <<<'9 rude'$'\r''copy') \ + <<<"Import hash 9 from path 'rude'$'\\x0d''copy' already in database at 'twinned'" + +# Test 'filterpath' +cat >${LOG1} <${LOG1} <${LOG1} <${LOG1} 2>${LOG2} <