commit e8fe04c543eab13e892beda05cb9bccb9e4f441e
parent 39a4c55378294437627421571a51b64bd5e09623
Author: remph <lhr@disroot.org>
Date: Mon, 3 Mar 2025 19:52:37 +0100
du: Dedup hardlinks
Conform to POSIX, which says `Files with multiple links shall be counted
and written for only one entry,' in the 2008[1] and 2013[2] editions, and
uses more words to say the same thing in the 2017[3] and 2024[4] editions.
This patch also keeps inodes between operands and dedups symlinks if
applicable, which are implementation-defined in 2017 and required in 2024.
See also the `RATIONALE' section in the 2024 edition.
[1] https://pubs.opengroup.org/onlinepubs/9699919799.2008edition/utilities/du.html
[2] https://pubs.opengroup.org/onlinepubs/9699919799.2013edition/utilities/du.html
[3] https://pubs.opengroup.org/onlinepubs/9699919799/utilities/du.html
[4] https://pubs.opengroup.org/onlinepubs/9799919799/utilities/du.html
Diffstat:
M | du.c | | | 53 | ++++++++++++++++++++++++++++++++++++++++++++++++++++- |
1 file changed, 52 insertions(+), 1 deletion(-)
diff --git a/du.c b/du.c
@@ -5,6 +5,7 @@
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
+#include <search.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
@@ -20,6 +21,11 @@ static int aflag = 0;
static int sflag = 0;
static int hflag = 0;
+struct file {
+ dev_t devno;
+ ino_t inode;
+};
+
static void
printpath(off_t n, const char *path)
{
@@ -35,16 +41,61 @@ nblks(blkcnt_t blocks)
return (512 * blocks + blksize - 1) / blksize;
}
+static int
+cmp(const void *p1, const void *p2)
+{
+ const struct file *f1 = p1, *f2 = p2;
+
+ if (f1->devno > f2->devno)
+ return -1;
+ if (f1->devno < f2->devno)
+ return 1;
+
+ /* f1->devno == f2->devno */
+ if (f1->inode < f2->inode)
+ return -1;
+ if (f1->inode > f2->inode)
+ return 1;
+
+ return 0;
+}
+
+static int
+duplicated(dev_t dev, ino_t ino)
+{
+ static void *tree;
+ struct file **fpp, *fp, file = {dev, ino};
+
+ if ((fpp = tsearch(&file, &tree, cmp)) == NULL)
+ eprintf("%s:", argv0);
+
+ if (*fpp != &file)
+ return 1;
+
+ /* new file added */
+ fp = emalloc(sizeof(*fp));
+ *fp = file;
+ *fpp = fp;
+
+ return 0;
+}
+
static void
du(int dirfd, const char *path, struct stat *st, void *data, struct recursor *r)
{
off_t *total = data, subtotal;
subtotal = nblks(st->st_blocks);
- if (S_ISDIR(st->st_mode))
+ if (S_ISDIR(st->st_mode)) {
recurse(dirfd, path, &subtotal, r);
+ } else if (r->follow != 'P' || st->st_nlink > 1) {
+ if (duplicated(st->st_dev, st->st_ino))
+ goto print;
+ }
+
*total += subtotal;
+print:
if (!r->depth)
printpath(*total, r->path);
else if (!sflag && r->depth <= maxdepth && (S_ISDIR(st->st_mode) || aflag))