--- /dev/null
+What is this?
+#############
+
+Resurrecting an ancient project, a "mostly read-only file management tool".
+It's intended for keeping a large list of checksums in a database so that
+duplication, movement, and corruption of files can be detected. In addition to
+maintaining a singular database, it also offers cross-database functionality.
+
+We speak of "observations" to mean an association of a file path and its
+contents (or, at least, their cryptographic checksum). Most operations on the
+checksum database pertain to one or more observations.
+
+Theory of Operation
+###################
+
+This program is just a shim around a database; it does not interact with the
+filesystem much itself. Instead, it should be used in composition with things
+like ``find`` and the GNU coreutils digest programs (e.g. ``sha512sum``),
+delegating details of filesystem traversal and choice of hash and so on to the
+user.
+
+Supported Operations
+####################
+
+Initialize A Database
+=====================
+
+::
+
+ cdb --db ${DB} init
+
+Observe A Path
+==============
+
+Add the checksum of a single path to the database. This will create a new
+checksum and/or a new path identifier as needed and will bind them together. ::
+
+ sha512sum $FILE | cdb --db ${DB} addh
+
+Or, for all files under a path::
+
+ find $DIR -type f -exec sha512sum {} \+ | cdb --db ${DB} addh
+
+If we have a pile of digest files already, each of which contains digests of
+paths relative to its location, we can generate a database, ``${DB2}`` from them
+with the assistance of the ``digestrelative`` tool::
+
+ find ${DIR} -type f -name SHA512SUMS -print0 | cdb-digestrelative --inul | cdb --db ${DB} addh
+
+Revalidate A Path Observation
+=============================
+
+Measure the checksum of a path and confirm that the database already held that
+observation. Reports unexpected files as well as mis-checksummed contents. ::
+
+ sha512sum $FILE | cdb --db ${DB} verh
+
+Or for all files under a path::
+
+ find $DIR -type f -exec sha512sum {} \; | cdb --db ${DB} verh
+
+This processing of digest streams is to be preferred to verifying a digest
+stream as generated by the database, e.g.::
+
+ cdb --db ${DB} look \* | sha512sum -c
+
+because the former can be more informative in the case of mismatching digests
+(specifically, the database can look for other paths that have the reported
+digest). If it's easier to have the database generate the set of files, that
+can be done::
+
+ cdb --db ${DB} look \* --no-hashes --unescape --nul | xargs -0 sha512sum | cdb --db ${DB} verh
+
+Add Checksums For Missing Files
+===============================
+
+We can quickly construct a "just paths" database, which associates all paths
+with a single digest, from the current state of the file system as follows::
+
+ cdb --db ${JPDB} init
+ find ${DIR} -type f -printf "0 %p\\0" | ./cdb --db ${JPDB} addh --inul
+
+This database may not seem very useful, but when combined with ``cdb diff`` we
+can quickly find all paths whose checksums are unknown to the database::
+
+ cdb --db ${DB} diff ${JPDB} --flavor=path --which=super
+
+We can then script computing those files' checksums and adding the new reports
+to the database::
+
+ cdb --db ${DB} diff ${JPDB} --flavor=path --which=super --no-headers --nul --unescape > ${JPDB}.new-files0
+ xargs -0 sha512sum > ${JPDB}.new < ${JPDB}.new-files0
+ cdb --db ${DB} addh < ${JPDB}.new
+
+From Another Database
+---------------------
+
+If we have another database that knows digests for our files, rather than
+computing digests again, we can extract checksums from ``${DB2}`` and install
+them into ``${DB}``::
+
+ cdb --db ${DB2} mapp --inul < ${JPDB}.new-files0 | cdb --db ${DB} addh
+
+Responding to File Moves
+========================
+
+Armed with a "just paths" database as per the above, we can then direct the
+database to prune tracked paths not in the "just paths" database if the hashes
+are observed elsewhere::
+
+ cdb --db ${DB} diff ${JPDB} --flavor=path --which=sub --no-headers --nul --unescape > ${JPDB}.missing-files0
+ cdb --db ${DB} domv --inul < ${JPDB}.missing-files0
+ cdb --db ${DB} gc > ${DB}.gc
+ sqlite3 ${DB} < ${DB}.gc
+
+.. TODO or if the observed digest is now superseded?
+
+Find Duplicates
+===============
+
+Given a path prefix (possibly empty), report all logged observations below that
+path of contents that exist in multiple locations (i.e., files with checksum
+collisions).
+
+.. TODO
+
+Remove Path
+===========
+
+Cease to consider a particular path part of the database and remove all
+observations made of it. Since this application is primarily for data hoarders
+who tend not to delete things, one should prefer to :ref:`Respond to File Moves
+<Responding to File Moves>` rather than risk removing the last observation of a
+given hash.
+
+.. TODO
+
+Add Superseder
+==============
+
+Indicate that some file contents are to be considered a lesser version of some
+other contents.
+
+.. TODO
+
+Report Novelty
+==============
+
+Given a path, measure its checksum and report if it does not match, and has not
+been superseded by, any observation already recorded in the database.
+
+.. TODO
+
+.. This command would be useful for ingesting things into a library or pruning
+ collections of files outside the library.
+
+--------------------------------------------------------------------------------
+
+Example Uses
+############
+
+A photo library
+===============
+
+Suppose ``/mnt/photos`` contains a collection of photos. We might want to...
+
+* measure all the files in that directory, flagging new and updated contents::
+
+ $ cksdb /mnt/photos/.cksdb observe /mnt/photos
+
+* measure all the files in that directory, automatically updating the database::
+
+ $ cksdb /mnt/photos/.cksdb observe --new --changed /mnt/photos
+
+* report duplicates anywhere in the library::
+
+ $ cksdb /mnt/photos/.cksdb ls --duplicate
+
+* report files in a particular directory that also exist anywhere else in the
+ library::
+
+ $ cksdb /mnt/photos/.cksdb ls --duplicate /mnt/photos/dir1
+
+* restrict the search for duplication to another direcotry::
+
+ $ cksdb /mnt/photos/.cksdb ls --duplicate /mnt/photos/dir1 \
+ --also /mnt/photos/dir2
+
+* explicitly acknowledge a deletion by removing observations of it::
+
+ $ cksdb /mnt/photos/.cksdb rm /mnt/photos/filename
+
+* indicate that the last observed content of ``foo.jpg`` is superseded by the
+ last observed content of ``foo.raw``::
+
+ $ cksdb /mnt/photos/.cksdb supersede /mnt/photos/foo.jpg /mnt/photos/foo.raw
+
+* import files from outside the library, say, in ``/mnt/sdcard``, skipping
+ duplicate and superseded files and removing all examined files (that is,
+ imported, duplicate, and superseded; ``--harvest``)::
+
+ $ cksdb /mnt/photos/.cksdb import --harvest /mnt/photos/newdir /mnt/sdcard
+
+* import from another database::
+
+ $ cksdb /mnt/photos/.cksdb import-db /mnt/oldphotos/.cksdb
+
+Cross-Database Operations
+=========================
+
+Compute violations of set-theoretic relationships between a database and the
+union of one or more other databases::
+
+ $ cksdb /mnt/photos/.cksdb is-subset /mnt/backups/photos/.cksdb
+ $ cksdb /mnt/photos/.cksdb is-superset /mnt/backups/photos/.cksdb
--- /dev/null
+#!/usr/bin/env lua5.3
+
+--------------------------------------------------------- Imports {{{
+
+local argparse = require "argparse"
+local dbi = require "DBI"
+local plpath = require "pl.path"
+local plstringx = require "pl.stringx"
+local pltablex = require "pl.tablex"
+
+local cdblib = require "cdblib"
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- SQL utilities {{{
+
+local function sql_do(dbh, sql, ...)
+ local sth, err = dbh:prepare(sql)
+ if not sth then return false, err end
+ local ok, err = sth:execute(...)
+ if not ok then return false, err end
+ return sth
+end
+
+local function sql_run_one(sth, ...)
+ local ok, err = sth:execute(...)
+ if not ok then return false, err end
+ return sth:fetch()
+end
+
+----------------------------------------------------------------- }}}
+-------------------------------------------------- SQL statements {{{
+
+local function sql_mk_path_upsert(dbh)
+ return dbh:prepare([[INSERT INTO paths (path) VALUES (?)
+ ON CONFLICT DO UPDATE SET path = path RETURNING pathid]])
+end
+
+local function sql_mk_path_find(dbh)
+ return dbh:prepare([[SELECT pathid FROM paths WHERE path = ?]])
+end
+
+local function sql_mk_hash_upsert(dbh)
+ return dbh:prepare([[INSERT INTO hashes (hash) VALUES (?)
+ ON CONFLICT DO UPDATE SET hash = hash RETURNING hashid]])
+end
+
+local function sql_mk_hash_find(dbh)
+ return dbh:prepare([[SELECT hashid FROM hashes WHERE hash = ?]])
+end
+
+local function sql_mk_path_find_by_hash(dbh)
+ return dbh:prepare([[SELECT path
+ FROM path_hash NATURAL JOIN paths NATURAL JOIN hashes WHERE hash = ?]])
+end
+
+----------------------------------------------------------------- }}}
+------------------------------------------------ Argparse, part 1 {{{
+
+local function argparse_for_render(c)
+ c:flag("--unescape")
+ :description("Do not escape the filenames; ambiguous without --nul")
+ :default(false)
+ c:flag("--nul -0")
+ :description("NUL-terminate lines rather than newline")
+ :default(false)
+end
+
+local function renderers_for(args)
+ return cdblib.renderers_for(args.nul, args.unescape)
+end
+
+local function argparse_flag_inul(c)
+ return c:flag("--inul -1")
+ :description("Input is NUL-delimited, not newline")
+ :default(false)
+end
+
+local function argparse_no_hashes(c)
+ c:flag("--no-hashes")
+ :description("Elide hashes in output; no leading space with --unescape")
+end
+
+local function argparse_for_db_filter(c)
+ c:option("--predicate"):default("in")
+ -- TODO: :choices({"in", "out"})
+ argparse_no_hashes(c)
+ argparse_for_render(c)
+end
+
+local argp = argparse("cdb", "checksum database tool")
+
+-- global options
+argp:option("--database --db")
+ :args(1)
+ :description("Indicate primary checksum database")
+
+local argp_groups = {}
+local function argp_group(gname, cmd)
+ argp_groups[gname] = argp_groups[gname] or {}
+ table.insert(argp_groups[gname], cmd)
+end
+
+local function mksubcmd(cmdinit, body)
+ local cmd = argp:command()
+ cmdinit(cmd)
+ cmd:action(function(args, name)
+ args.command = name
+ args.command_fn = body
+ end)
+ return cmd
+end
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: init {{{
+
+mksubcmd(function(c)
+ c:name("init")
+ :description("Initialize the database")
+ argp_group("Administrative Commands", c)
+ end,
+ function(args, dbh)
+ local function ddo(sql) assert(dbi.Do(dbh, sql)) end
+ ddo([[CREATE TABLE IF NOT EXISTS paths (
+ pathid INTEGER PRIMARY KEY ASC,
+ path TEXT NOT NULL UNIQUE ON CONFLICT FAIL)]])
+ ddo([[CREATE TABLE IF NOT EXISTS hashes (
+ hashid INTEGER PRIMARY KEY ASC,
+ hash TEXT NOT NULL UNIQUE ON CONFLICT FAIL)]])
+ ddo([[CREATE TABLE IF NOT EXISTS hash_hash_superseders (
+ supersederid INTEGER PRIMARY KEY ASC,
+ oldid INTEGER REFERENCES hashes(hashid),
+ newid INTEGER REFERENCES hashes(hashid),
+ note TEXT,
+ UNIQUE(oldid, newid) ON CONFLICT FAIL)]])
+ ddo([[CREATE INDEX IF NOT EXISTS hash_hash_superseders_idx_old
+ ON hash_hash_superseders (oldid)]])
+ ddo([[CREATE TABLE IF NOT EXISTS path_hash (
+ pairid INTEGER PRIMARY KEY ASC,
+ pathid INTEGER REFERENCES paths(pathid),
+ hashid INTEGER REFERENCES hashes(hashid),
+ timestamp INTEGER DEFAULT CURRENT_TIMESTAMP,
+ UNIQUE(pathid, hashid) ON CONFLICT FAIL)]])
+ ddo([[CREATE INDEX IF NOT EXISTS path_hash_idx_pathid
+ ON path_hash (pathid)]])
+ ddo([[CREATE INDEX IF NOT EXISTS path_hash_idx_hashid
+ ON path_hash (hashid)]])
+ ddo([[CREATE VIEW IF NOT EXISTS v_path_hash AS SELECT
+ pairid, pathid, hashid, path, hash, timestamp
+ FROM path_hash NATURAL JOIN paths NATURAL JOIN hashes]])
+ dbh:commit()
+end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: stat {{{
+
+mksubcmd(function(c)
+ c:name("stat")
+ :description("Report statistics")
+ argp_group("Administrative Commands", c)
+ end,
+ function(args, dbh)
+ local nhash, npath, nobsv
+ nhash = assert(sql_do(dbh, "SELECT COUNT(*) FROM hashes" )):fetch()[1]
+ npath = assert(sql_do(dbh, "SELECT COUNT(*) FROM paths" )):fetch()[1]
+ nobsv = assert(sql_do(dbh, "SELECT COUNT(*) FROM path_hash" )):fetch()[1]
+ print(("nhash=%d npath=%d nobsv=%d"):format(nhash, npath, nobsv))
+ end)
+
+mksubcmd(function(c)
+ c:name("gc")
+ :description("Generate SQL to prune identifiers not used by observations")
+ argp_group("Administrative Commands", c)
+ end,
+ function(args, dbh)
+ local sth_paths_dead = assert(sql_do(dbh,
+ [[SELECT pathid, path FROM paths
+ WHERE pathid NOT IN (SELECT pathid FROM path_hash)]]))
+ for p in sth_paths_dead:rows() do
+ print("-- DEAD PATH", p[2])
+ print(("DELETE FROM paths WHERE pathid = %d;"):format(p[1]))
+ end
+
+ local sth_hashes_dead = assert(sql_do(dbh,
+ [[SELECT hashid, hash FROM hashes
+ WHERE hashid NOT IN (SELECT hashid FROM path_hash)
+ AND hashid NOT IN (SELECT oldid FROM hash_hash_superseders)
+ AND hashid NOT IN (SELECT newid FROM hash_hash_superseders)]]))
+ for h in sth_hashes_dead:rows() do
+ print("-- DEAD HASH", h[2])
+ print(("DELETE FROM hashes WHERE hashid = %d;"):format(h[1]))
+ end
+ end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: look {{{
+
+mksubcmd(function(c)
+ c:name("look")
+ :description("Look up checksums for path glob(s)")
+ c:argument("glob", "Path globs to search"):args("+")
+ argparse_for_render(c)
+ argparse_no_hashes(c)
+ argp_group("Queries", c)
+ end,
+ function(args, dbh)
+ local sql = assert(dbh:prepare(
+ [[SELECT hash, path FROM v_path_hash WHERE path GLOB ?]]))
+ local render_both, render_path = renderers_for(args)
+ local renderer = args.no_hashes
+ and function(h, p) return render_path(p) end
+ or render_both
+ for _, glob in ipairs(args.glob) do
+ sql:execute(glob)
+ for row in sql:rows() do
+ io.write(renderer(table.unpack(row)))
+ end
+ end
+ end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: mapp {{{
+
+mksubcmd(function(c)
+ c:name("mapp")
+ :description("Map paths to hashes in the database, like look")
+ c:flag("--no-paths")
+ :description("Print only the resulting hashes")
+ :default(false)
+ argparse_for_render(c)
+ argparse_flag_inul(c)
+ argp_group("Queries", c)
+ end,
+ function(args, dbh)
+ local sth = assert(dbh:prepare(
+ [[SELECT hash FROM v_path_hash WHERE path = ?]]))
+ local render = args.no_paths
+ and function(h, p) return h, args.nul and '\0' or '\n' end
+ or renderers_for(args)
+
+ local mkiter = cdblib.iter_lines_or_nul(args.inul)
+ for p in mkiter() do
+ sth:execute(p)
+ for row in sth:rows() do
+ io.write(render(row[1], p))
+ end
+ end
+ end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: maph {{{
+
+mksubcmd(function(c)
+ c:name("maph")
+ :description("Map hashes to paths in the database")
+ argparse_for_render(c)
+ argparse_flag_inul(c)
+ argp_group("Queries", c)
+ end,
+ function(args, dbh)
+ local sth = assert(dbh:prepare(
+ [[SELECT path FROM v_path_hash WHERE hash = ?]]))
+ local render = renderers_for(args)
+ local mkiter = cdblib.iter_lines_or_nul(args.inul)
+ for h in mkiter() do
+ sth:execute(h)
+ for row in sth:rows() do
+ io.write(render(h, row[1]))
+ end
+ end
+ end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: cflx {{{
+
+mksubcmd(function(c)
+ c:name("cflx")
+ :description("Find conflicting measurements of paths")
+ argp_group("Queries", c)
+ end,
+ function(args, dbh)
+ local sth = assert(sql_do(dbh,
+ [[SELECT path, hash, pairid, timestamp FROM v_path_hash NATURAL JOIN
+ (SELECT pathid, pcount FROM
+ (SELECT pathid, COUNT(pairid) AS pcount FROM path_hash GROUP BY pathid)
+ WHERE pcount > 1)]]))
+ local lastpath = nil
+ for row in sth:rows() do
+ local p, h, pairid, ts = table.unpack(row)
+ if p ~= lastp then
+ lastp = p
+ print("PATH", (cdblib.escape_gnu_digest(p)))
+ end
+ print((" observed hash %s with id %d at %s"):format(h, pairid, ts))
+ end
+ end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: addh {{{
+
+mksubcmd(function(c)
+ c:name("addh")
+ :description("Ingest digest tool output")
+ c:option("--graft"):default("")
+ :description("Graft a prefix to input file names")
+ c:flag("--replace-paths")
+ :description("Remove all existing observations of reported paths")
+ argparse_flag_inul(c)
+ argp_group("Updates", c)
+ end,
+ function(args, dbh)
+ local sth_path_upsert = assert(sql_mk_path_upsert(dbh))
+ local sth_hash_upsert = assert(sql_mk_hash_upsert(dbh))
+ local sth_obsv_del = assert(dbh:prepare(
+ [[DELETE FROM path_hash WHERE pathid = ?]]))
+ local sth_obsv_upsert = assert(dbh:prepare(
+ [[INSERT OR REPLACE INTO path_hash (pathid, hashid) VALUES (?, ?)]]))
+
+ local mkiter = cdblib.iter_lines_or_nul(args.inul)
+
+ local nadded = 0
+
+ for h, p in cdblib.iter_gnu_digest(mkiter)() do
+ p = plpath.normpath(plpath.join(args.graft, p))
+ local pid = sql_run_one(sth_path_upsert, p)[1]
+ if args.replace_path then sth_obsv_del:execute(pid) end
+ local hid = sql_run_one(sth_hash_upsert, h)[1]
+ sth_obsv_upsert:execute(pid, hid)
+ if sth_obsv_upsert:affected() > 0 then dbh:commit() end
+
+ nadded = nadded + 1
+ io.write(("Processed %d hashes\r"):format(nadded)); io.flush()
+ end
+ io.write("\n")
+ end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: filh {{{
+
+mksubcmd(function(c)
+ c:name("filh")
+ :description("Filter digest tool lines against database by hash")
+ argparse_for_db_filter(c)
+ argp_group("Queries", c)
+ end,
+ function(args, dbh)
+ local rex = (args.predicate == "in")
+ local rend_both, rend_path = renderers_for(args)
+ local renderer = args.no_hashes
+ and function(h, p) return rend_path(p) end
+ or rend_both
+ local sth = sql_mk_hash_find(dbh)
+ for h, p in cdblib.iter_gnu_digest(cdblib.mk_lines_iter())() do
+ local res, err = sql_run_one(sth, h)
+ if res == false and err ~= nil then error(err) end
+ if (res ~= nil) == rex then io.write(renderer(h, p)) end
+ end
+ end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: filp {{{
+
+mksubcmd(function(c)
+ c:name("filp")
+ :description("Filter digest tool lines against database by path")
+ argparse_for_db_filter(c)
+ c:flag("--just-paths")
+ :description("Input is a series of paths without digests")
+ argparse_flag_inul(c)
+ argp_group("Queries", c)
+ end,
+ function(args, dbh)
+ local rex = (args.predicate == "in")
+ local rend_both, rend_path = renderers_for(args)
+ local renderer = args.no_hashes
+ and function(h, p) return rend_path(p) end
+ or rend_both
+
+ local mkiter = cdblib.iter_lines_or_nul(args.inul)
+ local mkiter = args.just_paths
+ and cdblib.iter_just_paths_as_digest(mkiter)
+ or cdblib.iter_gnu_digest(mkiter)
+
+ local sth = sql_mk_path_find(dbh)
+ for h, p in mkiter() do
+ local res, err = sql_run_one(sth, p)
+ if res == false and err ~= nil then error(err) end
+ if (res ~= nil) == rex then io.write(renderer(h, p)) end
+ end
+ end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: verh {{{
+
+mksubcmd(function(c)
+ c:name("verh")
+ :description("Verify reported digests against database")
+ c:option("--graft")
+ :description("Graft a prefix to input file names")
+ :default("")
+ c:flag("--also-mismatch")
+ :description("Also report other hashes associated with a path")
+ argp_group("Queries", c)
+ end,
+ function(args, dbh)
+ local sth_path_find = assert(sql_mk_path_find(dbh))
+ local sth_hash_find = assert(sql_mk_hash_find(dbh))
+ local sth_path_find_by_hash = assert(sql_mk_path_find_by_hash(dbh))
+ local sth_obsv_find_by_pathid_hash =
+ assert(dbh:prepare([[SELECT pairid
+ FROM path_hash NATURAL JOIN hashes WHERE pathid = ? AND hash = ?]]))
+ local sth_obsv_find_by_pathid_nothash =
+ assert(dbh:prepare([[SELECT hash
+ FROM path_hash NATURAL JOIN hashes WHERE pathid = ? AND hash != ?]]))
+
+ local fail = 0
+ for h, p in cdblib.iter_gnu_digest(cdblib.mk_lines_iter())() do
+ p = plpath.normpath(plpath.join(args.graft, p))
+ local pid = sql_run_one(sth_path_find, p)
+ if pid == nil then
+ -- Path not in database
+ print(("Path '%s' is not in database"):format(p))
+
+ fail = fail + 1
+
+ local is_elsewhere = false
+ sth_path_find_by_hash:execute(h)
+ for pp in sth_path_find_by_hash:rows() do
+ is_elsewhere = true
+ print(("... but matching hash at '%s'"):format(pp[1]))
+ end
+
+ if not is_elsewhere then
+ print(("... nor is its hash '%s'"):format(h))
+ end
+ else
+ -- Path in database
+ pid = pid[1]
+ local didfail = false
+ local obsvid = sql_run_one(sth_obsv_find_by_pathid_hash, pid, h)
+ if obsvid == nil then
+ -- Observation not in database
+ print(("Path '%s' not associated with that hash in database"):format(p))
+ didfail = true
+ else print("OK: ", p) -- XXX
+ end
+ if args.also_mismatch then
+ sth_obsv_find_by_pathid_nothash:execute(pid, h)
+ for row in sth_obsv_find_by_pathid_nothash:rows() do
+ if not didfail then
+ print(("Path '%s' at expected hash, but"):format(p))
+ end
+ didfail = true
+ print(("... additional hash '%s' in database"):format(row[1]))
+ end
+ end
+ if didfail then fail = fail + 1 end
+ end
+ end
+ if fail ~= 0 then print(("%d total errors"):format(fail)) end
+ end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: diff {{{
+
+mksubcmd(function(c)
+ c:name("diff")
+ :description("Compare against another database")
+ c:argument("db2")
+ c:option("--flavor"):default("both")
+ :description("Database aspects to compare")
+ -- TODO :choices("hash", "path", "both")
+ c:option("--which"):default("symm")
+ :description("Direction of comparison")
+ -- TODO :choices("sub", "super", "symm")
+ c:flag("--no-headers")
+ :description("Suppress headers in output")
+ argparse_for_render(c)
+ argp_group("Queries", c)
+ end,
+ function(args, dbh)
+ local rend_hash, rend_path = renderers_for(args)
+ local header = args.no_headers and function() end or print
+ local function header(x) if not args.no_headers then print(x) end end
+ dbi.Do(dbh, "ATTACH DATABASE ? AS other", args.db2)
+
+ if pltablex.find({"path", "both"}, args.flavor) then
+ if pltablex.find({"sub", "symm"}, args.which) then
+ header("-- Paths in local database not in remote:")
+ for row in sql_do(dbh,
+ [[SELECT path FROM paths
+ WHERE path NOT IN (SELECT path FROM other.paths)
+ ORDER BY path]]):rows() do
+ io.write(rend_path(row[1]))
+ end
+ end
+ if pltablex.find({"super", "symm"}, args.which) then
+ header("-- Paths in remote database not in local:")
+ for row in sql_do(dbh,
+ [[SELECT path FROM other.paths
+ WHERE path NOT IN (SELECT path FROM paths)
+ ORDER BY path]]):rows() do
+ io.write(rend_path(row[1]))
+ end
+ end
+ end
+
+ if pltablex.find({"hash", "both"}, args.flavor) then
+ if pltablex.find({"sub", "symm"}, args.which) then
+ header("-- Hashes in local database not in remote:")
+ for row in sql_do(dbh,
+ [[SELECT hash, path
+ FROM hashes NATURAL JOIN path_hash NATURAL JOIN paths
+ WHERE hash NOT IN (SELECT hash FROM other.hashes)
+ ORDER BY path]]):rows() do
+ io.write(rend_hash(table.unpack(row)))
+ end
+ end
+ if pltablex.find({"super", "symm"}, args.which) then
+ header("-- Hashes in remote database not in local:")
+ for row in sql_do(dbh,
+ [[SELECT hash, path
+ FROM other.hashes NATURAL JOIN other.path_hash NATURAL JOIN other.paths
+ WHERE hash NOT IN (SELECT hash FROM hashes)
+ ORDER BY path]]):rows() do
+ io.write(rend_hash(table.unpack(row)))
+ end
+ end
+ end
+ header("-- End of diff report")
+ end)
+
+----------------------------------------------------------------- }}}
+--------------------------------------------------- Command: domv {{{
+
+mksubcmd(function(c)
+ c:name("domv")
+ :description("Remove given paths if hashes exist elsewhere")
+ c:flag("--dry-run -n")
+ :description("Do not perform deletions")
+ :default(false)
+ argparse_flag_inul(c)
+ argp_group("Updates", c)
+ end,
+ function(args, dbh)
+ local qsth = assert(dbh:prepare(
+ [[SELECT path FROM v_path_hash
+ WHERE hash IN (SELECT hash FROM v_path_hash WHERE path = ?1)
+ AND path != ?1]]))
+ local dsth = assert(dbh:prepare(
+ [[DELETE FROM path_hash WHERE pathid IN
+ (SELECT pathid FROM paths WHERE path = ?)]]))
+
+ local mkiter = cdblib.iter_lines_or_nul(args.inul)
+ for p in mkiter() do
+ assert(qsth:execute(p))
+
+ print("Trying mv:", p)
+
+ local ok = false
+ for row in qsth:rows() do
+ print("Found", row[1])
+ ok = true
+ end
+
+ if ok and not args.dry_run then
+ assert(dsth:execute(p))
+ dbh:commit()
+ print("OK", dsth:affected())
+ end
+ end
+ end)
+
+---------------------------------------------------------------------------- }}}
+--------------------------------------------------- Command Grouping, Part 2 {{{
+
+for _, g in ipairs{"Queries", "Updates", "Administrative Commmands"} do
+ argp:group(g, table.unpack(argp_groups[g] or {}))
+ argp_groups[g] = nil
+end
+
+-- Any stragglers?
+for k,v in pairs(argp_groups) do argp:group(k, table.unpack(v)) end
+
+---------------------------------------------------------------------------- }}}
+-------------------------------------------------------- Top-level executive {{{
+
+local args = argp:parse()
+if not args.database then error "--database is required" end
+
+local dbh, err = dbi.Connect("SQLite3", args.database)
+if not dbh then
+ error ("Database error: " .. err)
+end
+
+args:command_fn(dbh)
+
+---------------------------------------------------------------------------- }}}
--- /dev/null
+local plstringx = require "pl.stringx"
+
+local _M = {}
+
+-- Escape file name for GNU digest; returns new form and number, which is 0 if
+-- string is unaltered and positive if escaping was necessary.
+--
+-- The GNU digest specification is incomplete and does not promise that all
+-- file names with backslashes are escaped, though that seems to be true in
+-- practice. That is, while the tools appear to always generate the first of
+-- these two options, the second appears to be permitted by documentation as
+-- well: "\012...ef as\\df" and "012...ef as\df". We follow along and always
+-- escape backslashes even if there are no \r or \n characters in the rest of
+-- the string.
+local function escape_gnu_digest(fn)
+ return fn:gsub("[\\\r\n]", {['\\']='\\\\', ['\r']='\\r', ['\n']='\\n'})
+end
+_M.escape_gnu_digest = escape_gnu_digest
+
+-- The inverse transformation of escape_gnu_digest. Applied unconditionally, so
+-- please condition invocation on knowing that the line needs to be escaped.
+local function unescape_gnu_digest(fn)
+ return fn:gsub("\\.", {['\\\\']='\\', ['\\r']='\r', ['\\n']='\n'})
+end
+_M.unescape_gnu_digest = unescape_gnu_digest
+
+function _M.iter_gnu_digest(baseiter)
+ return function() return coroutine.wrap(function()
+ for line in baseiter() do
+ if line == nil then return nil end
+ local esc, h, fn = line:match("^(\\?)(%x*) [ *](.*)$")
+ if esc == nil then
+ print("Bad line:", line) -- XXX
+ else
+ coroutine.yield(h, (esc == "") and fn or unescape_gnu_digest(fn))
+ end
+ end
+ end) end
+end
+
+function _M.iter_just_paths_as_digest(baseiter)
+ return function() return coroutine.wrap(function()
+ for line in baseiter() do
+ if line == nil then return nil end
+ coroutine.yield("-", line)
+ end
+ end) end
+end
+
+-- a custom delimited string iterator, useful for nul-separated records, e.g.
+-- :: (string, () -> () -!> string) -> () -> () -!> string
+function _M.mk_delim_iter(delim, baseiter)
+ local ix = 0
+ local s = { fin = {}, incomplete = {} }
+
+ local function proc(chunk)
+ local splits = plstringx.split(chunk, delim)
+
+ if #splits == 1 then -- zero or one delimiter
+ if #splits[1] == 0 then -- one delimiter (necessarily the whole string)
+ if #s.incomplete > 0 then -- and a prefix exists
+ s.fin = { table.concat(s.incomplete) }
+ s.incomplete = {}
+ end
+ else -- zero delimiters
+ table.insert(s.incomplete, chunk) -- grow incomplete fragment
+ end
+ else -- one or more delimiters
+ local ni = table.remove(splits)
+
+ table.insert(s.incomplete, splits[1])
+ splits[1] = table.concat(s.incomplete)
+ s.fin = splits
+
+ s.incomplete = {}
+ if #ni ~= 0 then s.incomplete[1] = ni end
+ end
+ end
+
+ return function() return coroutine.wrap(function()
+ for chunk in baseiter() do
+ proc(chunk)
+
+ -- while we have a complete delimited string, return one
+ while #s.fin > 0 do
+ ix = ix + 1
+ coroutine.yield(ix, table.remove(s.fin))
+ end
+ end
+ end) end
+end
+
+function _M.iter_just_2nd(baseiter)
+ return function() return coroutine.wrap(function()
+ for k, v in baseiter() do coroutine.yield(v) end end)
+ end
+end
+
+-- :: (file or nil) -> () -> () -!> string
+function _M.mk_read_iter(f)
+ f = f or io.input()
+ return function() return function() return f:read(1024) end end
+end
+function _M.mk_lines_iter(f)
+ return function() return (f or io.input()):lines() end
+end
+
+-- Iterate stdin as either newline-terminated or NUL-terminated records
+-- :: (boolean, file or nil) -> () -!> string
+function _M.iter_lines_or_nul(nul, f)
+ assert(type(nul) == "boolean")
+ return nul and _M.iter_just_2nd(_M.mk_delim_iter("\0", _M.mk_read_iter(f)))
+ or _M.mk_lines_iter(f)
+end
+
+function _M.renderers_for(nul, unescape)
+ assert(type(nul) == "boolean")
+ assert(type(unescape) == "boolean")
+ local fin = nul and '\0' or '\n'
+ local mangle_path = unescape
+ and function(p) return p, fin end
+ or function(p)
+ local np, nesc = escape_gnu_digest(p)
+ return (nesc == 0 and "" or "\\"), " ", np, fin
+ end
+ local mangle_full = unescape
+ and function(h, f) return "", h, " ", f, fin end
+ or function(h, f)
+ local nf, nesc = escape_gnu_digest(f)
+ return (nesc == 0 and "" or "\\"), h, " ", nf, fin
+ end
+ return mangle_full, mangle_path
+end
+
+return _M