#!/usr/bin/perl -w #< file2hash: print MD4 signature for files or stdin use Getopt::Long; use Pod::Usage; use File::Basename; use Digest::MD4; use strict; $ENV{'PATH'} = join ":", qw(/bin /usr/bin /usr/local/bin /opt/sfw/bin); @ARGV = ("-") unless @ARGV; # handle stdin. my $myname = basename($0); $myname =~ s/\.\w*$//; # strip any extension my %options; my @getopt_args = ( 'h|?', # print usage 'm', # print manpage 'u', # print UUID 'v', # print version 'w', # print source location ); Getopt::Long::config("noignorecase", "bundling"); usage() unless GetOptions(\%options, @getopt_args); manpage() if $options{'m'}; myuuid() if $options{'u'}; version() if $options{'v'}; where() if $options{'w'}; usage() if $options{'h'}; # Display the signature for any requested files. push(@ARGV, '-') unless @ARGV; for my $file (@ARGV) { my $md = makesig($file); print "$md $file\n" if $md; } exit(0); # -------------------------------------------------------------------- # saferead($file): opens a file (or stdin) for reading. sub saferead { my $sname = (caller(0))[3]; my $file = shift; my $fh; if ($file eq '-') { unless (open($fh, "<&STDIN")) { warn "$sname: can't dup stdin: $!\n"; $fh = undef; } } else { unless (open($fh, '<', $file)) { warn "$sname: can't read $file: $!\n"; $fh = undef; } } return $fh; } # -------------------------------------------------------------------- # makesig($file): computes $file digest, appends file-size. sub makesig { my $sname = (caller(0))[3]; my $file = shift; my $fh; my $size; my $digest = new Digest::MD4; return unless $fh = saferead($file); return unless (-f $fh || -p $fh); $size = (stat($fh))[7]; eval { $digest->addfile($fh) }; if ($@) { warn "$sname: sum $file: $!\n"; return; } close($fh); return $digest->hexdigest . sprintf("%8.8x", $size); } #--------------------------------------------------------------------- # Print a usage message from the comments and exit. sub usage { my ($emsg) = @_; use Pod::Usage qw(pod2usage); warn "$emsg\n" if defined $emsg; pod2usage(-verbose => 99, -sections => "NAME|SYNOPSIS|OPTIONS"); } sub manpage { use Pod::Man(); my $parser = Pod::Man->new(); open(STDOUT, "| groff -T ascii -man | gcat -s | less") || die "groff\n"; $parser->parse_from_file($0); close STDOUT || die "$myname: can't close stdout: $!\n"; $? = 1 if $? == 255; # from die exit($?); } #--------------------------------------------------------------------- # Print the UUID, current version, or source location. sub myuuid { my $UUID = sprintf("%s", q$UUID: 5ba9072d-f664-378e-9666-2fa6d1544870 $ =~ /UUID: (.*) /); print "$UUID\n"; exit(0); } sub version { my $VERSION = sprintf("%d.%02d", q$Revision: 1.4 $ =~ /(\d+)\.(\d+)/); my $DATE = sprintf("%s", q$Date: 2008/06/11 23:48:30 $ =~ /Date: (.*) /); print "$myname $VERSION $DATE\n"; exit(0); } sub where { my $SOURCE = sprintf("%s", print "$SOURCE\n"; exit(0); } #--------------------------------------------------------------------- __END__ =head1 NAME file2hash - print a unique signature for files =head1 SYNOPSIS file2hash [-hmuvw] [file ...] =head1 OPTIONS =over 4 =item B<-h> Print a brief help message and exit. =item B<-m> Print the manual page and exit. =item B<-u> Print the script UUID and exit. =item B<-v> Print the version and exit. =item B<-w> Print the source location and exit. =back =head1 DESCRIPTION B will read stdin or the given input file(s). Output goes to stdout, and consists of a unique signature followed by the filename (or "-" for stdin). The signature is 40 hex characters: the MD4 hash of the contents followed by the file size. Only regular files are printed. =head1 NOTES Always use the three-argument version of open(); our filenames have all sorts of weird characters in them which need to be protected. We have several million files that are stored according to their hash value, so a fast signature method was needed. Here are some benchmarks; first time is wall-clock seconds. Benchmark: timing 20 iterations of haval, md4, sha256, sha512... haval: 3 secs ( 2.92 usr + 0.14 sys = 3.06 CPU) @ 6.54/s (n=20) md4: 1 secs ( 0.43 usr + 0.10 sys = 0.53 CPU) @ 37.74/s (n=20) sha256: 2 secs ( 2.74 usr + 0.11 sys = 2.85 CPU) @ 7.02/s (n=20) sha512: 6 secs ( 5.97 usr + 0.12 sys = 6.09 CPU) @ 3.28/s (n=20) =head1 AUTHOR Karl Vogel Sumaria Systems, Inc. =cut