sbase

suckless unix tools
git clone git://git.suckless.org/sbase
Log | Files | Refs | README | LICENSE

commit e8fe04c543eab13e892beda05cb9bccb9e4f441e
parent 39a4c55378294437627421571a51b64bd5e09623
Author: remph <lhr@disroot.org>
Date:   Mon,  3 Mar 2025 19:52:37 +0100

du: Dedup hardlinks

Conform to POSIX, which says `Files with multiple links shall be counted
and written for only one entry,' in the 2008[1] and 2013[2] editions, and
uses more words to say the same thing in the 2017[3] and 2024[4] editions.
This patch also keeps inodes between operands and dedups symlinks if
applicable, which are implementation-defined in 2017 and required in 2024.
See also the `RATIONALE' section in the 2024 edition.

[1] https://pubs.opengroup.org/onlinepubs/9699919799.2008edition/utilities/du.html
[2] https://pubs.opengroup.org/onlinepubs/9699919799.2013edition/utilities/du.html
[3] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/du.html
[4] https://pubs.opengroup.org/onlinepubs/9799919799/utilities/du.html

Diffstat:
Mdu.c | 53++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/du.c b/du.c @@ -5,6 +5,7 @@ #include <errno.h> #include <fcntl.h> #include <limits.h> +#include <search.h> #include <stdint.h> #include <stdlib.h> #include <stdio.h> @@ -20,6 +21,11 @@ static int aflag = 0; static int sflag = 0; static int hflag = 0; +struct file { + dev_t devno; + ino_t inode; +}; + static void printpath(off_t n, const char *path) { @@ -35,16 +41,61 @@ nblks(blkcnt_t blocks) return (512 * blocks + blksize - 1) / blksize; } +static int +cmp(const void *p1, const void *p2) +{ + const struct file *f1 = p1, *f2 = p2; + + if (f1->devno > f2->devno) + return -1; + if (f1->devno < f2->devno) + return 1; + + /* f1->devno == f2->devno */ + if (f1->inode < f2->inode) + return -1; + if (f1->inode > f2->inode) + return 1; + + return 0; +} + +static int +duplicated(dev_t dev, ino_t ino) +{ + static void *tree; + struct file **fpp, *fp, file = {dev, ino}; + + if ((fpp = tsearch(&file, &tree, cmp)) == NULL) + eprintf("%s:", argv0); + + if (*fpp != &file) + return 1; + + /* new file added */ + fp = emalloc(sizeof(*fp)); + *fp = file; + *fpp = fp; + + return 0; +} + static void du(int dirfd, const char *path, struct stat *st, void *data, struct recursor *r) { off_t *total = data, subtotal; subtotal = nblks(st->st_blocks); - if (S_ISDIR(st->st_mode)) + if (S_ISDIR(st->st_mode)) { recurse(dirfd, path, &subtotal, r); + } else if (r->follow != 'P' || st->st_nlink > 1) { + if (duplicated(st->st_dev, st->st_ino)) + goto print; + } + *total += subtotal; +print: if (!r->depth) printpath(*total, r->path); else if (!sflag && r->depth <= maxdepth && (S_ISDIR(st->st_mode) || aflag))