#!/usr/bin/perl -w
#
# $Revision: 1.4 $ $Date: 2012-07-26 17:11:47-04 $
# $Source: /home/vogelke/bin/RCS/file2log,v $
# $Host: sys7.com $
# $UUID: 6e3b231f-a645-3dd6-8cb4-7dcb441a9381 $
#
#<file2log: print files holding email in date-sorted order like a LOG.

my $rcsid  = '$Id: file2log,v 1.4 2012-07-26 17:11:47-04 vogelke Exp $';
my $rcssrc = '$Source: /home/vogelke/bin/RCS/file2log,v $';

use strict;
use Getopt::Long;
use Pod::Usage;
use File::Basename;
use Date::Parse;
use Date::Format qw(time2str);

sub numeric { $a <=> $b; }    # sort in numeric order.

$ENV{'PATH'} = join ":", qw(/bin /usr/bin /usr/local/bin);

my $myname = basename($0);
$myname =~ s/\.\w*$//;        # strip any extension

my %options;
my @getopt_args = (
    'h|?',                    # print usage
    'm',                      # print manpage
    'v',                      # print version
    );

Getopt::Long::config("noignorecase", "bundling");
usage() unless GetOptions(\%options, @getopt_args);

manpage() if $options{'m'};
version() if $options{'v'};
usage()   if $options{'h'} || !@ARGV;

#
# Read each file, looking for a date somewhere in the header.
# Go through the list, get each date, and make another list which
# holds the proper order for printing.
#

my %filelist;
my $mintime = 0;    # time to use when we don't find a date.
my ($t, $x, $y);
my $ifh;

foreach my $path (@ARGV) {
    my $match = 0;    # no date found.
    open($ifh, "$path") || die "$path: $!\n";

    while (<$ifh>) {
        chomp;
        s/^\s*//g;

        if (/^Date:|^Sent:/) {
            s/^....://;
            $t = str2time($_);

            if (defined $t) {
                $match = 1;
                $filelist{$t} .= "$path=";
            }
            last;
        }
    }

    close($ifh);

    if ($match == 0) {
        $y = time();
        $filelist{$y} .= "$path ";
    }
}

#
# Read and print each filename in date-sorted order.
# Files with no date are assumed to be today.
#

foreach $t (sort numeric keys %filelist) {
    $_ = $filelist{$t};
    s/=/ /g;

    foreach my $path (split) {
	$x = time2str ("%a, %e %b %Y %T %z", $t);
        print "\n$x\n\n";

        open($ifh, "$path") || die "$path: $!\n";
	print while <$ifh>;
	close($ifh);
    }
}

exit(0);

#---------------------------------------------------------------------
# Print a usage message from the comment header and exit.

sub usage {
    my ($emsg) = @_;

    require Pod::Usage;
    import Pod::Usage qw(pod2usage);
    warn "$emsg\n" if defined $emsg;
    pod2usage(-verbose => 1);
}

sub manpage {
    require Pod::Usage;
    import Pod::Usage qw(pod2usage);
    pod2usage(-exitstatus => 0, -verbose => 2);
}

#---------------------------------------------------------------------
# Print the current version and exit.

sub version {
    $_ = $rcsid;
    s/,v / /;
    @_ = split;
    print "$myname  v$_[2]  $_[3] $_[4]\n";
    exit(0);
}

#---------------------------------------------------------------------
__END__

=head1 NAME

file2log - print filenames holding email in date-sorted order

=head1 SYNOPSIS

file2log [-hmv] file [file...]

=head1 OPTIONS

=over 4

=item B<-h>

Print a brief help message and exit.

=item B<-m>

Print the manual page and exit.

=item B<-v>

Prints the version and exits.

=back

=head1 DESCRIPTION

B<file2log> reads one or more files generated by "csplit".
Outlook writes messages in any order it likes, usually separated by
a row of underscores.  To break up those files into separate messages,
you can use something like this:

 csplit some-filename '/^____/' '{*}'

Each file is assumed to contain a date.  If a date is found,
the program will attempt to rewrite it as "weekday dd mmm yyyy hh:mm:ss".

The filenames are sorted by date, and their contents are written to stdout
in a format roughly corresponding to a LOG-type file.
Files with no date are written last with today's date.

=head1 AUTHOR

 Karl Vogel <vogelke@pobox.com>
 Sumaria Systems, Inc.

=cut
