#!/usr/bin/perl
#<ls2htm: write a decent HTML directory table of contents

use Modern::Perl;
use Getopt::Long qw(GetOptions);
use Pod::Usage;
use File::Basename;
use Carp;
use Fcntl ':mode';
use Cwd;

use subs qw/arpadate filetype footer header manpage metatags
  modtime myuuid slurp styling usage version where/;

local $ENV{'PATH'} = join ":", qw(/bin /usr/bin /usr/local/bin);

my $myname = basename($0);
$myname =~ s/\.\w*$//;    # strip any extension

# Command-line options.
my ($opt_help, $opt_manpage, $opt_uuid, $opt_version, $opt_where);

Getopt::Long::config("noignorecase", "bundling");

GetOptions(
    "h|?" => \$opt_help,       # print usage
    "m"   => \$opt_manpage,    # print manpage
    "u"   => \$opt_uuid,       # print UUID
    "v"   => \$opt_version,    # print version
    "w"   => \$opt_where,      # print source location
    ) or usage;

manpage if $opt_manpage;
myuuid  if $opt_uuid;
where   if $opt_where;
usage   if $opt_help;

if ($opt_version) { print version(), "\n"; exit(0); }

# Real work starts here.  Before we do anything else, make sure we have
# something to read.

# If there's something on stdin, use it.
# If a file is passed as an argument, use that.
# Otherwise if an "index.wn" file is present, use it.

my $fh;
my $fn;
my $defindex = 'index.wn';

unless (@ARGV) {    # stdin or default.
    if (-f STDIN || -p STDIN) {
        push @ARGV, '-';
    }
    elsif (-f "$defindex") {
        push @ARGV, "$defindex";
    }
    else {
        croak "need a file or STDIN to read";
    }
}

$fn = shift @ARGV;
if ($fn eq '-') {
    $fh = \*STDIN;
}
else {
    open($fh, '<', $fn) or croak "main open: $fn: can't read: $!";
}

my @months = (
    "Jan", "Feb", "Mar", "Apr", "May", "Jun",
    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
    );

my $kbyte = 1024;
my $mbyte = $kbyte * $kbyte;

# Where are the icons?
my $icondir = '/icons/';
my $iback   = $icondir . "back.gif";
my $iblank  = $icondir . "blank.gif";
my $ifolder = $icondir . "folder.gif";
my $itext   = $icondir . "text.gif";
my $iunk    = $icondir . "unknown.gif";

# Current directory:
# Account for / and \ on Win32 and non-Win32 systems
my $pwd;
($^O =~ /Win32/) ? ($pwd = getcwd) =~ s/\//\\/g : ($pwd = getcwd);

# HTML header.
header();

# Column headers and parent directory.
my $mt  = (stat(".."))[9];
my $str = modtime($mt);

print <<"EndParent";
    <!-- Contents start here -->
    <table class="ls">
      <colgroup>
        <col class="col1">
        <col class="col2">
        <col class="col3">
        <col class="col4">
        <col class="col5">
      </colgroup>
      <tr>
        <th><img src="/icons/blank.gif" alt=" "></th>
        <th>Name</th>
        <th>Last modified</th>
        <th class="size">Size</th>
        <th>Description</th>
      </tr>
      <tr>
        <td><img src="/icons/back.gif"alt="[DIR]"></td>
        <td><a href="..">Parent Directory</a></td>
        <td>-</td>
        <td class="size">-</td>
        <td></td>
      </tr>
EndParent

# This is the weird part.  "index.wn" files have an odd syntax, and
# we'll probably replace it with JSON sometime soon.

my $bytes;
my $img;
my $mode;
my $size;

# File= and Title= entries.
my ($feq, $teq) = ('', '');

while (<$fh>) {
    chomp;
    next if /^#/;

    if (/File=(.*)/) {
        $feq = $1;
    }
    elsif (/Title=(.*)/) {
        $teq = $1;
    }
    elsif (length($_) == 0) {

        # Already do this line?
        next unless length($feq) + length($teq) > 0;

        # Get file size.
        ($mode, $bytes, $mt) = (stat($feq))[2, 7, 9];

        if ($bytes >= $mbyte) {
            $size = sprintf("%.1f", $bytes / $mbyte) . 'M';
        }
        elsif ($bytes >= $kbyte && $bytes < $mbyte) {
            $size = int($bytes / $kbyte) . 'k';
        }
        else {
            $size = '1k';
        }

        # Get file type.
        $str = modtime($mt);
        $img = filetype($mode);

        # Print the table entry.
        print <<"ENTRY";
      <tr>
        <td>$img</td>
        <td><a href="$feq">$feq</a></td>
        <td>$str</td>
        <td class="size">$size</td>
        <td><em>$teq</em></td>
      </tr>
ENTRY

        $feq = $teq = '';
    }
}

print "    </table>\n";
close($fh) or croak "main close failed: $!";

# HTML footer with version, update times.
footer();
exit(0);

#---------------------------------------------------------------------
# Set additional "<meta>" tags.  I wanted this separate in case you
# don't want to bother with them.

sub metatags {
    my $adate = arpadate();

    return <<"EndMeta";
    <meta name="Author" content="XXX" />
    <meta name="ETag" content="XXX" />
    <meta name="Generator" content="$myname" />
    <meta name="Keywords" content="XXX" />
    <meta name="Last-Modified" content="$adate" />
    <meta name="UUID" content="XXX" />
EndMeta
}

#---------------------------------------------------------------------
# Print header and start of TOC.

sub header {
    my $str;
    my $curdir  = basename($pwd);
    my $rcsinfo = version();
    $rcsinfo =~ s/\$//g;

    if    ($str = slurp("HEADER"))      { $str = "\n<pre>$str</pre>\n"; }
    elsif ($str = slurp("HEADER.txt"))  { $str = "\n<pre>$str</pre>\n"; }
    elsif ($str = slurp("HEADER.htm"))  { 1; }
    elsif ($str = slurp("HEADER.html")) { 1; }
    elsif ($str = slurp("README"))      { $str = "\n<pre>$str</pre>\n"; }
    elsif ($str = slurp("README.txt"))  { $str = "\n<pre>$str</pre>\n"; }
    elsif ($str = slurp("README.htm"))  { 1; }
    elsif ($str = slurp("README.html")) { 1; }
    else                                { $str = "<h2>Index of $pwd</h2>"; }

    # If there's a stylesheet present, use it.
    # Otherwise generate a decent alternative.
    my $css = styling();

    # If you want additional meta tags like "Author", etc...
    my $mtags = metatags();

    print <<"EndHeader";
<!DOCTYPE html>
<!-- Generated by $myname -->
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
$mtags
    <title>Index of $curdir</title>
$css
  </head>
  <body>
    <h2>Index of $curdir</h2>
    $str
EndHeader
}

#---------------------------------------------------------------------
# Print link to CSS file or a decent substitute.

sub styling {
    if (-f "style.css") {
        return <<"EOCFILE";
    <link rel="stylesheet" href="style.css" type="text/css" media="screen">
EOCFILE
    }
    else {
        return <<"EOCSS";
    <style type="text/css">
    body {
      margin:           0 15% 0 5%;
      padding:          0 0 1em 0;
      font-family:      "Bitstream Vera Sans", sans-serif;
      font-size:        14pt;  /* might be a little small... */
      line-height:      110%;
    }
    ul li {
      padding:          0;
    }
    pre {
      font-family:      "Bitstream Vera Sans", monospace;
      font-size:        100%;
      line-height:      100%;
      white-space:      pre;
      border:           1px solid black;
      background-color: #eee;
      padding:          1em 1em;
    }

    div.toc {
      font-family:      "Bitstream Vera Sans", monospace;
      font-size:        95%;
      background-color: #fff;
      border:           none;
      margin-right:     -5em;
      padding:          0;
      line-height:      100%;
      white-space:      pre;
    }

    /* Footer styles */
    table.footer {
      width:            100%;
      font-size:        95%;
      color:            black;
      background-color: #ccffcc;
      margin-top:       1em;
      margin-left:      0;
      margin-right:     0;
    }
    td.fl {             /* left side */
      width:            30%;
      font-style:       italic;
      text-align:       left;
      padding-left:     10px;
    }
    td.fc {             /* center */
      font-style:       italic;
      text-align:       center;
    }
    td.fr {             /* right side */
      width:            40%;
      font-style:       italic;
      text-align:       right;
      padding-right:    10px;
    }

    /* File-list styles */

    .col1 { width: 1%; }
    .col2 { width: 20%; }
    .col3 { width: 20%; }
    .col4 { width: 1%; }
    .col5 { width: 58%; }
    .size { text-align: right; padding-right: 1em; }

    table.ls {
      text-align:       left;
      font:             "Bitstream Vera Sans Mono", monospace;
      font-size:        90%;
      padding:          0 10px 0 0;
      width:            100%;
      margin:           5px;
      padding:          0;
      border-top:       1px solid black;
      border-bottom:    1px solid black;
      border-collapse:  separate;
      border-spacing:   1px;
    }

    h1, h2, h3, h4, h5, h6 { margin-left: -2em; text-align: left }
    /* background should be transparent, but WebTV has a bug */
    h1, h2, h3 { color: #005A9C; background: white }
    h1         { font: 170% sans-serif }
    h2         { font: 140% sans-serif }
    h3         { font: 120% sans-serif }
    h4         { font: bold 100% sans-serif }
    h5         { font: italic 100% sans-serif }
    h6         { font: small-caps 100% sans-serif }
    </style>
EOCSS
    }
}

#---------------------------------------------------------------------
# Print page footer.
# FIXME: check for </body> and </html> in footer files.

sub footer {
    my $adate = arpadate();
    my $str   = '';
    my $v     = version();

    if    ($str = slurp("FOOTER"))      { $str = "\n<pre>$str</pre>\n"; }
    elsif ($str = slurp("FOOTER.txt"))  { $str = "\n<pre>$str</pre>\n"; }
    elsif ($str = slurp("FOOTER.htm"))  { 1; }
    elsif ($str = slurp("FOOTER.html")) { 1; }

    print "$str" if length($str);

    print <<"EndFooter";
    <!-- Footer starts here -->
    <table class="footer">
      <tr>
        <td class="fl">$v</td>
        <td class="fr">$adate</td>
      </tr>
    </table>
  </body>
</html>
EndFooter
}

#---------------------------------------------------------------------
# Accept an optional date in total seconds since the epoch,
# and write it in ARPA-mail standard form:
#       Fri, 28 Mar 1997 15:40:57 -0500

sub arpadate {
    use POSIX qw(strftime);
    my $t = shift || time();
    return strftime("%a, %d %b %Y %T %z", localtime($t));
}

#---------------------------------------------------------------------
# Accept path, return modification time.

sub modtime {
    my ($mt) = @_;
    my ($hour, $mday, $min, $mon, $sec, $str, $year);

    ($sec, $min, $hour, $mday, $mon, $year) = localtime($mt);
    $str = sprintf(
        "%2.2d-%s-%4.4d %2.2d:%2.2d",
        $mday, $months[$mon], $year + 1900,
        $hour, $min
        );

    return ($str);
}

#---------------------------------------------------------------------
# Accept mode, return filetype indicator.

sub filetype {
    my ($mode) = @_;
    my $t;

    if (S_ISDIR($mode)) {
        $t = '<img src="' . $ifolder . '" alt="[DIR]" />';
    }
    elsif (S_ISREG($mode)) {
        $t = '<img src="' . $itext . '" alt="[TXT]" />';
    }
    else {
        $t = '<img src="' . $iunk . '" alt="[UNK]" />';
    }

    return ($t);
}

#---------------------------------------------------------------------
# Grab an entire file.

sub slurp {
    my ($file) = @_;
    my $str = undef;

    if (open(my $fh, '<', $file)) {
        local $/;
        $str = <$fh>;
        close($fh);
    }
    return $str;
}

#---------------------------------------------------------------------
# Print a usage message from the comments and exit.

sub usage {
    my ($emsg) = @_;
    use Pod::Usage qw(pod2usage);
    carp "$emsg\n" if defined $emsg;
    pod2usage(-verbose => 99, -sections => "NAME|SYNOPSIS|OPTIONS");
    return;
}

sub manpage {
    my @args = ("perldoc", "$0");
    exec {$args[0]} @args;    # safe even with one-arg list
    die "manpage: should not get here\n";
}

#---------------------------------------------------------------------
# Print the UUID, current version, or source location.

sub myuuid {
    my $UUID = (qw$UUID: bb5cbfb9-33cc-303f-9386-2c79a3919b24 $)[-1];
    print "$UUID\n";
    exit(0);
}

sub version {
    use version; our $VERSION = qv((qw$Revision: 4.7 $)[-1]);
    my $DATE = join(" ", (qw$Date: 2025-12-13 05:20:20-05 $)[-2, -1]);
    return "$myname $VERSION $DATE";
}

sub where {
    no warnings 'qw';
    my $SOURCE =
      (qw$Source: /home/vogelke/projects/html-dir/RCS/ls2htm,v $)[-1];
    my $HOST = (qw$Host: furbag.my.domain $)[-1];
    print "file://$HOST", "$SOURCE\n";
    exit(0);
}

#---------------------------------------------------------------------
__END__

=head1 NAME

ls2htm - write a decent HTML directory table of contents

=head1 SYNOPSIS

ls2htm [-hmuvw] [file ...]

=head1 OPTIONS

=over 4

=item B<-h>

Print a brief help message and exit.

=item B<-m>

Print the manual page and exit.

=item B<-u>

Print the script UUID and exit.

=item B<-v>

Print the version and exit.

=item B<-w>

Print the canonical source location and exit.

=back

=head1 DESCRIPTION

B<ls2htm> reads an "index.wn" file generated by B<wndex> (part of the
very OLD B<wn> webserver code), and writes a decent display and table of
contents similar to the autoindex generated by Apache.

An B<index.wn> file looks like this (without the line numbers):

  1  File=ls2htm
  2  Content-Type=text/x-perl; charset=us-ascii
  3  Title=Script to write a decent HTML directory table of contents
  4  
  5  File=wanted.htm
  6  Content-Type=text/html; charset=us-ascii
  7  Title=Sample page generated by ls2htm -- Sanoid time format tweaks
  8  

Entries are separated by blank lines, and the file must end with one or
the last entry won't be read.  This format was chosen because the first
version of B<ls2htm> was written in 2001; we did have easier formats like
JSON and YAML, but utilities to conveniently handle them weren't common.

You don't need the B<wn> utilities to generate this file, but they do some
useful things like extract HTML titles, etc.  Of course, this is nothing
that you can't do with Perl.

=head1 METHOD

If your directory holds

  optional HEADER (or HEADER.{htm|html|txt})
  f1.txt
  f2.c
  optional README (or README.{htm|html|txt})

and the index.wn file holds

  File=f1.txt
  Content-Type=text/plain; charset=us-ascii
  Title=Some neat text file

  File=f2.c
  Content-Type=text/x-c; charset=us-ascii
  Title=Equally nifty C program

then the generated index.htm would hold

  Title
  Included HEADER

  File display:
    icon  filename  modtime  size  description-if-any
    DIR   ..        -        -     Parent directory
    TXT   f1.txt    ...            Some neat text file
    C     f2.c      ...            Equally nifty C program

  Included README
  Footer with last-modified date, generated-by, etc

=head1 AUTHOR

 Karl Vogel <vogelke+unix@pobox.com>

=cut
