#!/usr/bin/perl -w
# log2blog: translate LOG file into weblog entries.

my $rcsid  = '$Id: log2blog,v 1.1 2006/05/21 23:37:11 vogelke Exp $';

use Getopt::Long;
use Pod::Usage;
use File::Basename;
use Regexp::Common qw /URI/;
use Time::ParseDate;
use Carp;
use strict;
use diagnostics;
use subs qw/prep usage version/;

$ENV{'PATH'} = join ":", qw(/bin /usr/bin /usr/local/bin);

my $myname = basename($0);
$myname =~ s/\.\w*$//;   # strip any extension

#
# Command-line options.
#

my %options;
my @getopt_args = (
    'h|?',    # print usage
    'm',      # print manpage
    'v',      # print version
    'u=s',    # output format
    );

Getopt::Long::config("noignorecase", "bundling");
usage() unless GetOptions(\%options, @getopt_args);

manpage() if $options{'m'};
version() if $options{'v'};
usage()   if $options{'h'} || !@ARGV;

#
# Real work starts here.
#

my $preformat = 0;   # found a <pre> line?
my $wasdated = 0;    # was the previous line a dated entry?
my $ofh = undef;     # output filehandle.
my $date = '';       # from each dated entry.
my $title = '';      # from each dated entry.

TOP: while (<>) {
    chomp;

    # ignore section header
    /^BEGINNING OF LOG FOR\s+(.*)\s+==*$/ and do {
        next TOP;
    };

    # begin dated entry
    /^[0-9A-Z]/ and do {
        if (defined($ofh)) {
	    close($ofh);
	}
	($date, $title) = split(/\t/);
        my $d = parsedate($date);
	die "$date: can't parse\n" unless $d;

	open ($ofh, "> $d.txt") or die "$d.txt: can't write: $!\n";
	print $ofh "$title\n";
        $wasdated = 1;
        next TOP;
    };

    # preformatted section; leave it alone except
    # for command line prompts "me%" and "root#".

    /---------S$/ and do {
        print $ofh "<pre>\n";
        while (<>) {
            chomp;
            $_ = prep ($_);
            s!^    !!;
            s!me\%!<b>me%</b>!;
            s!user\%!<b>user%</b>!;
            s!root#!<b>root#</b>!;

            last if /---------E$/;
            print $ofh "$_\n";
        }
        print $ofh "</pre>\n";
        $wasdated = 0;
        next TOP;
    };

    /^$/ and do {
        print $ofh "\n" unless $wasdated;
        next TOP;
    };

    # 2 spaces after periods.

    $wasdated = 0;
    $_ = prep ($_);
    s!^\s*!!g;
    s!\.  !.\&nbsp;\&nbsp;!g;

    print $ofh "$_\n";
}

if (defined($ofh)) {
    close($ofh);
}
exit (0);

#---------------------------------------------------------------------
# Print a usage message from the comment header and exit.

sub usage {
    my ($emsg) = @_;

    require Pod::Usage;
    import Pod::Usage qw(pod2usage);
    warn "$emsg\n" if defined $emsg;
    pod2usage(-verbose => 1);
}

sub manpage {
    require Pod::Usage;
    import Pod::Usage qw(pod2usage);
    pod2usage(-exitstatus => 0, -verbose => 2);
}

#---------------------------------------------------------------------
# Print the current version and exit.

sub version {
    $_ = $rcsid;
    s/,v / /;
    @_ = split;
    print "$myname  v$_[2]  $_[3] $_[4]\n";
    exit(0);
}

#---------------------------------------------------------------------
# Prepare string by expanding tabs, etc.

sub prep
{
    local($_) = shift;
    1 while s/\t+/' ' x (length($&) * 5 - length($`) % 5)/e;

    # Special characters?
    s!\&!\&amp;!g;
    s!<!\&lt;!g;
    s!>!\&gt;!g;

    # Test for lines containing only URIs.
    my $addbr = 0;
    /^\s*$RE{URI}{HTTP}\s*$/ and $addbr = 1;

    # URIs?
    # FIXME: use common regexes for this?

    s#\[URL:(.*)\]#<a href="$1">$1</a>#g;  # embedded URL
    s#(http://\S+)#<a href="$1">$1</a>#g;
    s#(https://\S+)#<a href="$1">$1</a>#g;
    s#(ftp://\S+)#<a href="$1">$1</a>#g;

    # Add breaks after lines containing only URIs.
    $addbr and $_ = "<br>$_<br>";

    return $_;
}

#---------------------------------------------------------------------
__END__

=head1 NAME

log2blog - translate LOG file into weblog entries.

=head1 SYNOPSIS

log2blog [-hmv] logfile

=head1 OPTIONS

=over 4

=item B<-h>

Print a brief help message and exit.

=item B<-m>

Print the manual page and exit.

=item B<-v>

Prints the version and exits.

=back

=head1 DESCRIPTION

B<log2blog> will read the LOGfile and write a series of separate Bloxsom-style
log entries in the current directory.  Each entry will be named using the Unix
epoch time, with a .txt extension.

=head1 AUTHOR

 Karl Vogel <vogelke@pobox.com>
 Sumaria Systems, Inc.

=cut