#!/usr/bin/perl -w
#
# $Revision: 1.7 $ $Date: 2010-11-02 20:54:54-04 $
# $Source: /home/vogelke/projects/sort-logfile/RCS/sort-logfile,v $
# $Host: sys7.com $
# $UUID: 40bfe808-500c-3cdf-8b8b-f0920c01b058 $
#
#<sort-logfile: sort a LOG-style file by date

use Getopt::Long;
use File::Basename;
use Date::Manip;
use Carp;
use strict;
use diagnostics;

use subs qw(dbg manpage myuuid usage version where);
$ENV{'PATH'} = join ":", qw(/bin /usr/bin /usr/local/bin /opt/sfw/bin);

my $myname = basename($0);
$myname =~ s/\.\w*$//;    # strip any extension

my %options;
my @getopt_args = (
    'd',            # print debugging stuff
    'h|?',          # print usage
    'm',            # print manpage
    'u',            # print UUID
    'v',            # print version
    'w',            # print source location
    );

Getopt::Long::config("noignorecase", "bundling");
usage unless GetOptions(\%options, @getopt_args);

manpage if $options{'m'};
myuuid  if $options{'u'};
version if $options{'v'};
where   if $options{'w'};
usage   if $options{'h'} || !@ARGV;

# Store file as an array of anonymous hashes.

my $entry   = '';
my $stamp   = '';
my $user    = '';
my @logfile = ();

# Make regular expression for timestamp.
my $reday;
my $remonth = $reday = '[A-Z][a-z]{2}';
my $rehms = '\d\d:\d\d:\d\d';
my $retimestamp = "$reday, \\d+ $remonth \\d{4} $rehms -\\d{4}";

# The first line we keep from the logfile must be a timestamp;
# echo anything else.

my $found = 0;
my $ek    = 0;    # entry counter.

while (<>) {
    chomp;

    # Store each log entry in the array.
    if (/($retimestamp)(.*)/o) {
        $found = 1;

        if (length($stamp)) {    # previous entry is done, save it.
            dbg "ENTRY $ek end: [$stamp]\n  [$user]\n[$entry]";
            push @logfile,
              {
                stamp => "$stamp",
                user  => "$user",
                entry => "$entry"
                };
        }

        $stamp = $1;
        $user  = $2;
        $entry = '';
        $ek++;
        dbg "ENTRY $ek start: [$stamp]\n   [$user]\n[$entry]";
    }
    else {
        if ($found) {    # Contents of a given log-entry.
            $entry .= "$_\n";
        }
        else {           # No timestamp found yet.
            print "$_\n";
        }
    }
}

# If we hit eof and we've read at least one timestamped entry,
# it hasn't been stored yet.  If we haven't read any entries, bail.

if (length($stamp)) {
    dbg "ENTRY $ek (last): [$stamp]\n   [$user]\n[$entry]";
    push @logfile,
      {
        stamp => "$stamp",
        user  => "$user",
        entry => "$entry"
        };
}
else {
    exit(0);
}

# Walk the array.  If we find a Sent: line, replace the
# time/date part of the timestamp with it, and keep the
# numeric time in sortable form.  If no Sent: line is found,
# then use the original timestamp line.

my $k;
my $dobj;
my $str;
my $sec;
my %permute;

dbg "$#logfile entries found";

for $k (0 .. $#logfile) {
    $_     = $logfile[$k]{'entry'};
    $stamp = $logfile[$k]{'stamp'};

    if (/\n\s*Sent:\s+(.*)\n/) {
        $str = $1;
        dbg "  entry $k: found Sent: line";
    }
    elsif (/\n\s*Date:\s+(.*)\n/) {
        $str = $1;
        dbg "  entry $k: found Date: line";
    }
    else {
        $str = $stamp;
        dbg "  entry $k: found regular timestamp";
    }

    # If we have multiple entries with the same timestamp (which can
    # happen with Exchange dates), keep adding a second until we get
    # a slot for a unique entry.

    $dobj = ParseDate($str);
    if ($dobj) {
        $logfile[$k]{'stamp'} = UnixDate($dobj, "%a, %d %b %Y %T %z");
        $sec                  = UnixDate($dobj, "%s");

        while (defined($permute{$sec})) {
            $sec++;
        }
        $permute{$sec} = $k;
        dbg "  entry $k: seconds = [$sec]";
    }
    else {
        warn "UnixDate failed for [$str]\n";
    }
}

# Print the @logfile array in the order determined by %permute.
# Strip superfluous trailing newlines from the entry, and remove
# leading zeroes from the day of the month.

if ($options{'d'}) {
    dbg "\nPERMUTE:";
    foreach (sort numeric keys %permute) {
        dbg "$_: $permute{$_}";
    }
}

foreach (sort numeric keys %permute) {
    $k     = $permute{$_};
    $entry = $logfile[$k]{'entry'};
    $entry =~ s/\n\n*$//;
    $stamp = $logfile[$k]{'stamp'};
    $stamp =~ s/, 0/, /;

    print $stamp, $logfile[$k]{'user'}, "\n$entry\n\n\n";
}

exit(0);

#---------------------------------------------------------------------
# Numeric sort, debugging.

sub dbg { warn @_, "\n" if $options{'d'}; }

sub numeric { $a <=> $b; }

#---------------------------------------------------------------------
# Print a usage message from the comments and exit.

sub usage {
    my ($emsg) = @_;
    use Pod::Usage qw(pod2usage);
    warn "$emsg\n" if defined $emsg;
    pod2usage(-verbose => 99, -sections => "NAME|SYNOPSIS|OPTIONS");
}

sub manpage {
    my @args = ("perldoc", "$0");
    exec { $args[0] } @args;          # safe even with one-arg list
    die("should not get here\n");
}

#---------------------------------------------------------------------
# Print the UUID, current version, or source location.

sub myuuid {
    my $UUID = $1
      if q$UUID: 380fe1b4-ba7c-37f3-941e-3cb34d2c27b8 $ =~ /UUID: (.*) /;
    print "$UUID\n";
    exit(0);
}

sub version {
    my $VERSION = sprintf("%d.%02d", q$Revision: 1.7 $ =~ /(\d+)\.(\d+)/);
    my $DATE = $1 if q$Date: 2010-11-02 20:54:54-04 $ =~ /Date: (.*) /;
    print "$myname $VERSION $DATE\n";
    exit(0);
}

sub where {
    my $SOURCE = $1
      if q$Source: /home/vogelke/projects/sort-logfile/RCS/sort-logfile,v $ =~ /Source: (.*) /;
    my $HOST = $1 if q$Host: sys7.com $ =~ /Host: (.*) /;
    print "file://$HOST", "$SOURCE\n";
    exit(0);
}

#---------------------------------------------------------------------
__END__

=head1 NAME

sort-logfile - sort LOG file entries by date

=head1 SYNOPSIS

sort-logfile [-dhmuvw] [log ...]

=head1 OPTIONS

=over 4

=item B<-d>

Print debugging output.

=item B<-h>

Print a brief help message and exit.

=item B<-m>

Print the manual page and exit.

=item B<-u>

Print the script UUID and exit.

=item B<-v>

Print the version and exit.

=item B<-w>

Print the source location and exit.

=back

=head1 DESCRIPTION

B<sort-logfile> will read something that vaguely resembles a LOG-type file
and try to sort the entries by date.  Intended for use when entering a
collection of Outlook-style email messages into a log.

=head1 AUTHOR

 Karl Vogel <vogelke+unix@pobox.com>
 Oasis Systems, Inc.

=cut
