#!/usr/bin/perl my $rcsid = '$Id: xmltext,v 1.2 2003/08/24 21:59:04 vogelke Exp $'; =head1 NAME xmltext =head1 SYNOPSIS xmltext [-v] file =head1 DESCRIPTION Reads XML files in stream mode with an internal subroutine package. Prints just the text contents. =head1 OPTIONS -v prints the version and exits. =head1 AUTHOR Karl Vogel Sumaria Systems, Inc. =cut #--------------------------------------------------------------------- use strict; use warnings; use subs qw(usage version); use File::Basename; use XML::Parser; $ENV{"PATH"} = "/bin:/usr/bin:/usr/local/bin"; my $myname = basename ($0, ".pl"); my $infile = ''; # # Arguments. # @ARGV or usage("no args"); ARG: while (@ARGV) { $_ = shift @ARGV; /^-v/ and do { my $vers = version(); warn "$vers\n"; exit (0); }; /^-.*/ and do { usage ("unrecognized option: $_"); }; $infile = $_; } # # Real work starts here. # die "$infile: can't read: $!\n" unless -f $infile; my $p = XML::Parser->new( Style => 'Stream', Pkg => 'MySubs' ); $p->parsefile($infile); exit(0); #--------------------------------------------------------------------- # Package for callbacks. All the interesting stuff happens here. { package MySubs; # StartDocument # Called at the start of the parse. sub StartDocument { 1; } # StartTag # Called for every start tag with a second parameter of the # element type. The $_ variable will contain a copy of the # tag and the %_ variable will contain attribute values # supplied for that element. sub StartTag { 1; } # EndTag # Called for every end tag with a second parameter of the # element type. The $_ variable will contain a copy of the # end tag. sub EndTag { 1; } # Text # Called just before start or end tags with accumulated # non-markup text in the $_ variable. sub Text { tr/\n\t/ /; s/^\s+//; s/\s+$//; s/\s\s*/ /g; return unless length; print "$_\n"; } # Default # Anything not covered. sub Default { tr/\n\t/ /; s/^\s+//; s/\s+$//; print "default: [$_]\n"; } # EndDocument # Called at conclusion of the parse. sub EndDocument { 1; } } #--------------------------------------------------------------------- # Print a usage message from the comment header and exit. sub usage { my ($emsg) = @_; require Pod::Usage; import Pod::Usage qw(pod2usage); warn "$emsg\n"; pod2usage(-verbose => 1); } #--------------------------------------------------------------------- # Return the current version. sub version { $_ = $rcsid; s/,v / /; @_ = split; return "$_[1] v$_[2] $_[3] $_[4]"; }