#!/usr/bin/perl -w #) { chomp; # The format should be "HASH-VALUE FILENAME". # The first space is the field separator. $sep = index($_, ' ') + 1; die "no separator: line $.\n" if $sep <= 0; $nhash = substr($_, 0, $sep); $npath = substr($_, $sep); $npath =~ s/^\s*//; if ($ohash eq $nhash) { print "[$opath] --> [$npath]\n" unless $options{'q'}; unless ($options{'d'}) { unlink($npath) || warn("$npath unlink failed\n"); link($opath, $npath) || warn("link ($opath, $npath) failed\n"); } } $ohash = $nhash; $opath = $npath; } exit(0); #--------------------------------------------------------------------- # Print a usage message from the comments and exit. sub usage { my ($emsg) = @_; use Pod::Usage qw(pod2usage); warn "$emsg\n" if defined $emsg; pod2usage(-verbose => 99, -sections => "NAME|SYNOPSIS|OPTIONS"); } sub manpage { my @args = ("perldoc", "$0"); exec { $args[0] } @args; # safe even with one-arg list die("should not get here\n"); } #--------------------------------------------------------------------- # Print the UUID, current version, or source location. sub myuuid { my $UUID = sprintf("%s", q$UUID: b79d686c-8162-3058-b027-1a399279664c $ =~ /UUID: (.*) /); print "$UUID\n"; exit(0); } sub version { my $VERSION = sprintf("%d.%02d", q$Revision: 1.5 $ =~ /(\d+)\.(\d+)/); my $DATE = sprintf("%s", q$Date: 2010-11-02 17:50:12-04 $ =~ /Date: (.*) /); print "$myname $VERSION $DATE\n"; exit(0); } sub where { my $SOURCE = sprintf("%s", q$Source: /home/vogelke/bin/RCS/linkdups,v $ =~ /Source: (.*) /); print "$SOURCE\n"; exit(0); } #--------------------------------------------------------------------- __END__ =head1 NAME linkdups - hard-link identical files to save diskspace =head1 SYNOPSIS linkdups [-dhmquvw] [hashfile] =head1 OPTIONS =over 4 =item B<-d> Debug: prints what would be done without doing it. =item B<-h> Print a brief help message and exit. =item B<-m> Print the manual page and exit. =item B<-q> Don't print filenames being linked. =item B<-u> Print the script UUID and exit. =item B<-v> Print the version and exit. =item B<-w> Print the source location and exit. =back =head1 DESCRIPTION B reads sorted hashes (MD5, RMD160, etc.) for regular files in the same filesystem, looks for duplicate entries, and makes hard-links to save space. The hash-file format should be "HASH-VALUE (one or more spaces) FILENAME". =head1 EXAMPLE root# cd /some/place root# find . -size +70000 -print0 | xargs -0 md5sum | sort > /tmp/hash root# cat /tmp/hash f9203cea58151e423367d147bd334242 ./a/b/c/summary.pdf f9203cea58151e423367d147bd334242 ./d/e/f/report.pdf ... Then, later on: root# cd /some/place root# linkdups /tmp/hash linking ./a/b/c/summary.pdf ./d/e/f/report.pdf ... =head1 AUTHOR Karl Vogel Sumaria Systems, Inc. =cut