#!/usr/bin/perl # # $Id: pgnews,v 1.8 2002/08/24 22:02:49 vogelke Exp $ # # # SOURCE NAME | pgnews, (Perl Get News) # | # SYNOPSIS | pgnews [-h hostname] # | # DESCRIPTION | pgnews goes to a specified NNTP server # | and retrieves news articles by newsgroup # | and saves them to a specified file in # | mailbox format. # | Please see the NOTES section. # | # CHANGES | Programmer: Date: Reason/Comments # | Jeffrey B. McGough 09-05-91 initial # | Jeffrey B. McGough 09-06-91 Added select (see FIXES) # | Jeffrey B. McGough 10-06-91 Fixed erronious end of article # | Karl Vogel 12-01-91 Additions for Emacs mail # | Jeffrey B. McGough 07-09-92 Fixed dup article bug # | Jeffrey B. McGough 07-13-92 (See FIXES) # | Jeffrey B. McGough 07-27-92 VERSION 2.0 (See FIXES) # | # NOTES | Pgnews needs a file named .pgnews to read # | its newsgroup, last message, and savefile from. # | .pgnews format is: # | newsgroup number savefile # | Example: # | comp.unix.wizards 7800 cuw # | comp.unix.shell 3203 cus # | comp.unix.questions 546 cuq # | # | comp.unix.wizards will be saved to file cuw in # | mailbox format starting at article 7800 etc. # | # FIXES | 09-06-91: added select on S to keep the client # | from getting out of sync. # | Jeffrey B. McGough mcgough@wrdis01.af.mil # | # | 10-06-91: Fixed an overlooked END of ARTICLE # | bug... Thanks to a member(s) of the issos # | group at Ohio State. # | Jeffrey B. McGough mcgough@wrdis01.af.mil # | # | 07-09-92: Fixed a duplicate article bug # | pointed out to me by kenr@bridge.cc.rochester.edu # | and gort@bridge.cc.rochester.edu. Thanks # | for the help with the fix. # | Jeffrey B. McGough mcgough@wrdis01.af.mil # | # | 07-13-92: Added code to take a -h option # | for a host to use as a server... # | Thanks to Barry Hassler... # | Added code written by sherman@unx.sas.com # | to make the header From_ line more RFC976 # | compatable for the ELM mailer... # | Jeffrey B. McGough mcgough@wrdis01.af.mil # | # | 07-27-92: Added 15 minute timeout to the # | select stuff. Just in case the server goes # | away we won't sit around forever listening # | to a dead connection... # | Exception to the time out: # | While in the loop where we slurp up the # | article from the server, the select will not # | work... # | A friend of mine (Barry Hassler) seems to think # | that at that point Perl has already sucked # | the whole thing into its own buffers... # | Care to comment Larry? # | Anyway I commented out that select. If anyone # | has any ideas please let me know... # | Went ahead and built VERSION 2.0 # | Jeffrey B. McGough mcgough@wrdis01.af.mil # | # require 'sys/socket.ph'; # The way I coded the sockets is this necessary? $VERSION = '2.0'; $port = 119; # For NNTP # HOSTNAME for the server... #$host = 'localhost'; $host = 'blackbird.afit.af.mil'; # Pack format... $sockaddr = 'S n a4 x8'; $DOMAIN = &AF_INET; $STYLE = &SOCK_STREAM; while ($arg = shift(@ARGV)) { if ($arg =~ /-.*h/) { $host=shift(@ARGV); if ($host eq "") { printf ("Need host name after -h\n"); exit 1; } next; } printf "Unknown option: '%s'\n", $arg; exit 1; } $newsfile = '.pgnews'; $nnewsfile = '.pgnews.new'; $rin = $rout = ''; ($name, $aliases, $proto) = getprotobyname('tcp'); ($name, $aliases, $type, $len, $hostaddr) = gethostbyname($host); $sock = pack($sockaddr, $DOMAIN, $port, $hostaddr); socket(S, $DOMAIN, $STYLE, $proto) || die $!; connect(S, $sock) || die $!; select(S); $| = 1; select(STDOUT); #set up for select vec($rin, fileno(S), 1) = 1; #this select will block until the server gives us something. $nfound = select($rout=$rin, undef, undef, 900); if ($nfound == 0) { print "Socket timed out..."; exit 1; } $_ = ; #Read one line to see if we got a good connection. if ($_ !~ /^2../) { print; die "Service unavailable"; } open(GRP, "< $newsfile") || die "Could not open $newsfile: $!"; open(NGRP, "> $nnewsfile") || die "Could not open $nnewsfile: $!"; # # If we get this far, see if any post-processing is to be done for # each newsgroup. Look for a program called "pgpost" in the user's # search path, and store it in the variable "$post" if found. Assume # that the post-processor accepts one argument on the command line; # the newsfile we just read. # @path = split (/:/, $ENV{'PATH'}); foreach $dir (@path) { $post = $dir . "/pgpost"; last if -x $post; undef ($post); } select(NGRP); $| = 1; select(STDOUT); while() { chop; ($grp, $lgot, $file) = split; print(S "group $grp\n"); #this select will block until the server gives us something. $nfound = select($rout=$rin, undef, undef, 900); if ($nfound == 0) { print "Socket timed out..."; exit 1; } $_ = ; #Make sure the group change worked... ($stat, $num, $first, $last) = split; if( $stat !~ /^2../ ) { print; warn "Bad group: $grp"; print(NGRP "$grp $lgot $file\n"); next; } # good group open output file... open(OUTFILE, ">>$file") || die "Could not open $file: $!"; if ( $first > $lgot ) { $lgot = $first; } if ( $lgot <= $last ) { foreach $art ($lgot..$last) { print(S "article $art\n"); # this select will block until the server gives # us something. $nfound = select($rout=$rin, undef, undef, 900); if ($nfound == 0) { print "Socket timed out..."; exit 1; } $_ = ; #get error if one exists # Only print errors if they are more serious # than a bad article number. if($_ !~ /^2../) { if($_ !~ /423 Bad article number/) { print; } } else { # We now slurp the whole article into the array article... # HMMM is this good or bad... # It gives me the WILLIES [:^) Jeffrey B. McGough @article = (); do { $lgot = $art; $_ = ; s/\r//; if( $_ ne ".\n") { s/^\.//; push(@article,$_); s/^\./../; } else { push(@article,"\n"); } } until $_ eq ".\n"; # replace the Path: with a from line splice(@article, 0, 1, &from_line(@article)); print OUTFILE @article; undef (@article); } } } else { $lgot -= 1; } close(OUTFILE); $lgot += 1; print(NGRP "$grp $lgot $file\n"); # # Handle any post-processing required by the user. # if (defined ($post)) { system ("$post $file"); } } # # Save some old versions of the .pgnews file, in case one gets munged. # close(NGRP); close(GRP); print( S "quit\n"); close(S); for ($k = 7; $k > 1; --$k) { $km1 = $k - 1; rename (".pgnews.$km1", ".pgnews.$k"); } rename (".pgnews", ".pgnews.1"); rename ("$nnewsfile", "$newsfile"); # We parse through @article to build a more proper From_ line sub from_line { local(@article) = @_; local($header) = $true; # we are in the header of the mail local($date,$month,$year,$time,$day); for (@article) { if ($header == $true) { if (/^Path: ([^ \n]+)/) { $path = $1; } elsif (/^Date: /) { if (/^Date: (\d*) (\D*) (\d*) (\S*)/) { $date = $1; $month = $2; $year = $3; $time = $4; } elsif (/^Date: (\D*), (\d*) (\D*) (\d*) (\S*)/) { $day = $1; $date = $2; $month = $3; $year = $4; $time = $5; } # convert 2 digit year to 4 $year =~ s/^([0-9])([0-9])$/19$1$2/; if ($day eq "") { $day = &day_of_week($month,$date,$year); } } $header = $false if /^\n$/; } } $from_line = sprintf("From %s %s %s %2s %s %s %s\n", $path, $day, $month, $date, $time, $year); return($from_line); } # This gives us the day of week from the date... sub day_of_week { local($month,$date,$year) = @_; local($day); if ($month <= 2) { $month += 12; $year--; } $day = ($date + $month * 2 + int(($month + 1) * 6 / 10) + $year + int($year / 4) - int($year / 100) + int($year / 400) + 2) % 7; if ($day == 0) { $day = 7; } return (NULL, Sun, Mon, Tue, Wed, Thu, Fri, Sat, Sun)[$day]; } exit (0);