#!/usr/bin/perl -w
#
# $Revision: 1.2+3 $ $Date: 2010-04-20 14:14:42-04 $
# $Source: /home/vogelke/bin/RCS/idxcache2html,v $
# $Host: sys7.com $
# $UUID: b124f631-9120-3472-a0bb-37831b844e3e $
#
#<idxcache2html: read WN index.cache file, write index.htm
# usage: idxcache2html [cache-file [url]]
# Full pathname of index.cache file is used for the title.
# It's also used for breadcrumbs in the generated HTML unless URL
# is specified.

use File::Basename;
use strict;

my ($myname) = basename($0, ".pl");
my $title;

# Store "file=" lines from index.cache.  We could do
# line-at-a-time processing, but we might want to simply
# pass everything to a template.

my $cache = shift || 'index.cache';
my $url   = shift || '';
my @str = storecache($cache);

# Make "created" and "expires" headers based on the modtime
# of the cache file.  Doc should expire in two years.

$_ = modtime($cache);
my $created = gmdate($_);

$_ += 365 * 86400 * 2;
my $expires = gmdate($_);

# Where are we?
#   $cache = /doc/html/htdocs/one/two/three/index.cache
#
# s1: lose the index.cache, we don't need it any more.
# s2: keep "three" for the <title> tag.
# s3: keep the remaining directories for the breadcrumbs, i.e.
#     "Home / one / two / three", *unless* URL has been passed in.

$cache    = dirname($cache);        # s1
my $where = basename($cache);       # s2
my $bc    = breadcrumbs(length($url) ? $url : $cache);    # s3

# Print the HTML header, title, breadcrumbs, and table start.
header($where, $created, $expires, $bc);

# Print the listed files.
my ($bytes, $class, $f, $ftime, $img, $mt, $size, $t);
my $kbyte   = 1024;
my $mbyte   = $kbyte * $kbyte;
my $count   = 0;
my @a       = ();

foreach (@str) {
    my ($ftype, $finfo) = split (/\t/);

    foreach (split(/\&/, $finfo)) {
        $f = $1 if /^file=\s*(.*)/;
        $t = $1 if /^title=\s*(.*)/;
    }

    next if $f =~ /^index.htm/;
    $t = '' if $t =~ /^File /;

    # Get file size.
    ($bytes, $mt) = (stat("$cache/$f"))[7, 9];

    if ($bytes >= $mbyte) {
        $size = sprintf("%.1f", $bytes/$mbyte) . 'M';
    }
    elsif ($bytes >= $kbyte && $bytes < $mbyte) {
        $size = int($bytes / $kbyte) . 'k';
    }
    elsif ($bytes < 512) {
        $size = $bytes;
    }
    else {
        $size = '1k';
    }
    $size = sprintf("%4s", $size);

    # Get file type.
    $ftime = filetime($mt);
    $img = icon($ftype);

    # Print the table entry.
    $count++;
    $class = $count % 2 ? '' : 'class="dark"';
    $url = urlencode($f);

    print <<"EndEntry";
<tr>
 <td $class>$img</td>
 <td $class><a href="$url">$f</a></td>
 <td $class>$ftime</td>
 <td $class align="right">$size</td>
 <td $class>$t</td>
</tr>
EndEntry
}

# Print the footer.
footer();
exit(0);

#---------------------------------------------------------------------
# Escape non-HTML-safe characters.

sub htmlsafe {
    my $text = $_[0];
    return undef unless defined $text;

    # do it in this order, or you get lots of &...
    $text =~ s/\&/&amp;/g;
    $text =~ s/>/&gt;/g;
    $text =~ s/</&lt;/g;
    $text;
}

#---------------------------------------------------------------------
# Return the current version.

sub version {
    my $VERSION = sprintf("%d.%02d", q$Revision: 1.2 $ =~ /(\d+)\.(\d+)/);
    my $DATE =
      sprintf("%s", q$Date: 2010-04-20 14:14:42-04 $ =~ /Date: (.*) /);
    return "$myname $VERSION $DATE";
}

#---------------------------------------------------------------------
# Accept an optional date in total seconds since the epoch,
# and return it in HTTP Created/Expires format:
#       Wed, 20 Jan 2010 02:14:57 GMT

sub gmdate {
    use POSIX qw(strftime);
    my $t = shift || time();
    return strftime("%a, %d %b %Y %T GMT", gmtime($t));
}

#---------------------------------------------------------------------
# Accept an optional date in total seconds since the epoch,
# and write it in ARPA-mail standard form:
#       Fri, 28 Mar 1997 15:40:57 -0500

sub arpadate {
    use POSIX qw(strftime);
    my $t = shift || time();
    return strftime("%a, %d %b %Y %T %z", localtime($t));
}

#---------------------------------------------------------------------
# Create breadcrumbs link to a given page.
# Stolen from Blosxom Plugin: breadcrumbs
# Expects full path to document being displayed,
# including document root, or a URL.
#  Relative path will assume starting point at the document root.

sub breadcrumbs {
    use Cwd;
    my ($path, $p, @p);

    # Accept an argument, or use the current directory.
    $_ = shift;
    $_ = getcwd() if $_ eq '.';

    # What divider should I use between path components?
    my $divider = " /\n ";

    # Strip the document root or homepage if either is present.
    my $docroot  = '/doc/html/htdocs';
    my $homepage = 'http://sys7.com';

    s!^$docroot/!!;
    s!^$homepage/!!;

    # Get started, then walk the path.
    push @p, '<a href="/">myhost</a>';
    $path = $_;

    foreach (split /\//, $path) {
        $p .= "/$_";
        push @p, qq{<a href="$p/">$_</a>};
    }

    return join $divider, @p;
}

#---------------------------------------------------------------------
# Store the cache file contents.
# Each array entry is: content-type <TAB> entry.

sub storecache {
    my @a = ();
    my $fh;

    open($fh, "< $cache") || die "$cache: $!\n";
    while (<$fh>) {
        next unless /^file=/;
        chomp;
        m/content=(.*?)[;&]/ && push(@a, "$1\t$_");
    }
    close($fh);
    return (@a);
}

#---------------------------------------------------------------------
# Return file modification time.

sub modtime {
    my $path = shift || die;
    my $mt = (stat($path))[9];
    die unless defined($mt);
    return $mt;
}

#---------------------------------------------------------------------
# Page footer.

sub footer {
    my @v = split(/\s+/, version());
    my $adate = arpadate();

    print <<"EndFooter";
 </table>
 <p><table class="footer"><tr>
 <td>As of: <em>$adate</em></td>
 <td align="right"><em>$v[0] $v[1]</em></td></tr>
 </table>
 </body>
 <!-- AUTO-GENERATED BY $v[0] -->
</html>
EndFooter
}

#---------------------------------------------------------------------
# Page header.  Include environment for debug.

sub header {
    my ($where, $created, $expires, $bc) = @_;

    print <<"EndHeader";
<?xml version="1.0" encoding="iso-8859-1"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
 <title>Index of $where</title>
 <meta http-equiv="Expires" content="$expires">
 <meta http-equiv="Created" content="$created">
 <style>
 body {
   width: 95%;
   margin-left: 2%;
 }
 table.main {
   border: 1px black;
   width: 100%;
   font-family: monospace;
   line-height: 105%;
 }
 table.main th {
   border-bottom: 1px solid black;
   line-height: 105%;
 }
 table.main td {
   padding-right: 20px;
 }
 table.main td.dark {
   padding-right: 20px;
   background-color: #f0f0f0;
 }
 table.footer {
   width: 100%;
   background-color: #e0eeee;
   font-family: monospace;
 }
 </style>
</head>

<body>
 <h3>$bc</h3>
 <table class="main">
 <tr>
 <th align="left">&nbsp;</th>
 <th align="left">File</th>
 <th align="left">Last modified</th>
 <th align="left">Size</th>
 <th align="left">Description</th>
 </tr>
EndHeader
}

#---------------------------------------------------------------------
# Accept file modification time, return nicely-formatted time.

sub filetime {
    my $mt = shift;
    my ($hour, $mday, $min, $mon, $sec, $str, $year);

    my @names = (
        "Jan", "Feb", "Mar", "Apr", "May", "Jun",
        "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
        );

    ($sec, $min, $hour, $mday, $mon, $year) = localtime($mt);
    $str = sprintf(
        "%2.2d-%s-%4.4d %2.2d:%2.2d",
        $mday, $names[$mon], $year + 1900,
        $hour, $min
        );

    return ($str);
}

#---------------------------------------------------------------------
# Accept content type, return appropriate icon link.

sub icon {
    my ($content) = @_;
    my ($t);

    if ($content eq "text/directory") {
        $t = '<img src="/ftypes/dir.gif" alt="[DIR]">';
    }
    elsif ($content eq "text/html") {
        $t = '<img src="/ftypes/web.gif" alt="[WEB]">';
    }
    elsif ($content eq "text/plain") {
        $t = '<img src="/ftypes/txt.gif" alt="[TXT]">';
    }
    elsif ($content eq "image/jpeg") {
        $t = '<img src="/ftypes/jpeg.gif" alt="[JPG]">';
    }
    elsif ($content eq "image/gif") {
        $t = '<img src="/ftypes/gif.gif" alt="[GIF]">';
    }
    elsif ($content eq "image/png") {
        $t = '<img src="/ftypes/png.gif" alt="[PNG]">';
    }
    elsif ($content eq "audio/wav") {
        $t = '<img src="/ftypes/wav.gif" alt="[WAV]">';
    }
    elsif ($content eq "video/x-ms-wmv") {
        $t = '<img src="/ftypes/wmv.gif" alt="[WMV]">';
    }
    elsif ($content eq "text/x-chdr") {
        $t = '<img src="/ftypes/h.gif" alt="[H]">';
    }
    elsif ($content eq "text/x-csrc") {
        $t = '<img src="/ftypes/c.gif" alt="[C]">';
    }
    elsif ($content eq "application/x-perl") {
        $t = '<img src="/ftypes/pl.gif" alt="[PERL]">';
    }
    else {
        $t = '<img src="/ftypes/unknown.gif" alt="[UNK]">';
    }

    return ($t);
}

#---------------------------------------------------------------------
# Encode spaces and other foolishness in filenames.
# http://www.kluge.net/codesnippets/perl/URLEncode.html

sub urlencode {
    my ($url)        = @_;
    my (@characters) = split(/(\%[0-9a-fA-F]{2})/, $url);

    foreach (@characters) {
        if (/\%[0-9a-fA-F]{2}/)    # Escaped character set ...
        {
            # IF it is in the range of 0x00-0x20 or 0x7f-0xff
            #    or it is one of  "<", ">", """, "#", "%", ";",
            #                     "/", "?", ":", "@", "=" or "&"
            # THEN preserve its encoding

            unless (/(20|7f|[0189a-fA-F][0-9a-fA-F])/i
                || /2[2356fF]|3[a-fA-F]|40/i)
            {
                s/\%([2-7][0-9a-fA-F])/sprintf "%c",hex($1)/e;
            }
        }
        else    # Other stuff: 0x00-0x20, 0x7f-0xff, <, >, #, and "..."
        {
            s/([\000-\040\177-\377\074\076\042\043])
             /sprintf "%%%02x",unpack("C",$1)/egx;
        }
    }
    return join("", @characters);
}
