$title

#!/usr/bin/perl # # @(#) Perl -- Simple text2html converter. Uses Techical text format (TF) # @(#) $Id: t2html.pls,v 1.55 1998/04/08 16:26:36 jaalto Exp $ # # {{{ Documentation # # File id # # .Copyright (C) 1996-98 Jari Aalto # .Created: 1996-11 # .$Contactid: $ # .$URL: ftp://cs.uta.fi/pub/ssjaaa/ssjaaa.html $ # .$Keywords: Perl txt html conversion $ # .$PerlVer: 5.001 $ # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, # Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # Perl versions supported # # This programs should work with any version of perl 5, but recommended # version is 5.004 or never. You also need the LWP modulesif you're # going to use the 'link check' option. This program was originally # written for Perl4, so don't wonder why there are local() definitions. # # This program was originally written for perl4, so excuse some # oldish perl keyword usage, like `local'. # # About program layout # # The {{ }}} marks you see in this file are party of file "fold" # conrol package called folding.el (Unix Emacs lisp package). # ftp://ftp.csd.uu.se/pub/users/andersl/beta/ to get the latest. # # There is also lines that look like # ....... &tag ... and they # are generated by Emacs Lisp package tinybm.el, which is also # document structure tool. You can jump between the blocks with # Ctrl-up and Ctrl-down keys and create those "bookmarks" with # Emacs M-x tibm-insert. See mentioned URL cs.uta.fi. # # Funny identifiers at the top of file # # The GNU RCS ident(1) program can print usefull information out # of all variables that are in format $ IDENTIFIER: text $ # See also Unix man pages for command what(1) which outputs all lines # matching @( # ). Try commands: # # % what PRGNAME # % ident PRGNAME # # INTRODUCTION # # Please start this perl script with options # # -help to get the help page # -help-sh To print the sample shelll script to stear # this perl file (front-end) # # WWW CONTACT SITE # # See http://www.netforward.com/poboxes/?jari.aalto and navigate # to html pages in the site to get more information about me # and my tools (Emacs, Perl, procmail mostly) # # DESCRIPTION # # This perl program converts text files that are written in rigid # (T)echnical layout (f)ormat (which is explained when you run -help) # to html pages very easily and effectively. # # If you plan to put any text files available in HTML format you will # find this program a very usefull. If you want to have fancy # graphics or more personal page layout, then this program is not for # you. # # I have also made package that helps you to write and format text # files to Technical format. Please see following Emacs package at # the previously mentioned URL site. # # tinytfo.el # # End of documentation # # Change Log: # .......................................................... &RcsLog ... # # $Log: t2html.pls,v $ # Revision 1.55 1998/04/08 16:26:36 jaalto # - commented out

code adding; because it seemd to # create double empty line. Netscape display bug? # # Revision 1.54 1998/03/26 10:41:27 jaalto # - small changes # # Revision 1.53 1998/03/23 08:10:42 jaalto # - There was sometimes missing

code between # paragraphs. Fixed. # # Revision 1.52 1998/03/15 09:35:39 jaalto # - The ** and __ were not extrapolated inside bullets. # This was due to previous version's change: no expasion # beyond column 12. Now checks !$AsIs, which fixes the problem. # # Revision 1.51 1998/03/04 11:49:10 jaalto # - Special marking is now disabled after column 12. # Eg perl end of script marker __END__ is now correct. # # Revision 1.50 1998/02/23 08:25:55 jaalto # - Fixed the email and url A-HREF handling. There was an error # when deciding which url was clickable and which not. # # Revision 1.49 1998/02/10 15:02:13 jaalto # - turning option -lchk mistakenly removed TOC. The reported line numbers # didn't match the original text any more. Fixed. # # Revision 1.48 1998/02/06 09:34:08 jaalto # - Switch -md didn't work. Fixed. # - Bug in url handling fixed. Ate text after url. # - LINK tag is not handled okay by any other than Lynx. # # Revision 1.47 1997/10/16 19:22:47 jaalto # - Turned off The LINK yags, because Netscape couldn't handle them # and with lynx you would see double prev-top-next buttons. # # Revision 1.46 1997/10/13 23:08:59 jaalto # - Documentations changes only # # Revision 1.45 1997/10/13 19:06:38 jaalto # - The tar|gz link check adjusted # - Mysteriously added newlines to the beg of html doc: fixed. # - Killing invalid characters from header NAME tag ref is now better. # - KillToc is now always called. # # Revision 1.44 1997/10/11 14:40:01 jaalto # - Changed UpdateHeaderArray. Now deletes unnecessary characters # before constructing NAME reference. Better word recognition # - Corrected error in "URL and MAIL made clickable". The words after # last url vanished. Now ok. # # Revision 1.43 1997/10/06 18:03:26 jaalto # - Added link Check feature # # Revision 1.42 1997/10/05 19:24:52 jaalto # *** empty log message *** # # Revision 1.41 1997/10/03 10:31:41 jaalto # - Corrected the (-) control code fro -http:// references. # - Added the hr tag before each heading 0. # - added more email suppression rules, like < noClick@this.i>. There must # be no surrounding whitespace. # - Common words used in URL now automatically skips the url: # foo,bar,quu # # Revision 1.40 1997/09/25 21:14:33 jaalto # - added option -simple and -quiet. Changed ## to !! # # Revision 1.39 1997/09/19 13:32:07 jaalto # - Added GNU Gen. Pub. licence # # Revision 1.38 1997/09/16 20:04:40 jaalto # - Small text changes # # Revision 1.37 1997/09/16 15:45:39 jaalto # - Only documentation changes # # Revision 1.36 1997/08/28 06:56:12 jaalto # - Added -lchk option for Perl5; but it isnot yet functional. # # Revision 1.35 1997/08/14 22:06:06 jaalto # - New feature: if there is heading "Table of contents" it is automatically # filtered out. Program itself creates TOC; so having file's toc # make's no sense. The Program's toc items are clickable. # # Revision 1.34 1997/08/14 17:33:03 jaalto # - only Help() function text adjusted. # # Revision 1.33 1997/08/12 14:43:20 jaalto # - Now it can read Heading that have plus(+) character; like in # remailer options +signsend/-signsend # # Revision 1.32 1997/08/12 14:29:11 jaalto # - Now program skips these invalid email addresses and does not make # them clickable. # References: <5dfqlm$m50@basement.replay.com> # Message-ID: <5dko56$1lv$1@news02.deltanet.com> # # Revision 1.31 1997/08/12 10:50:02 jaalto # - Corrected "mailto" reference at the end of html doc; where Contact # address were placed. # # Revision 1.30 1997/06/08 19:50:13 jaalto # - Mailto: if there was the rest of the line after email was # deleted. Bug, corrected # - Very serious error corrected: if there were multiple links # in the same line, only the last line was found. I used the # .* operator which missed the links before text. Corrected. # # Revision 1.29 1997/05/29 19:46:09 jaalto # - Added initialize function, CONTACT and URL; Reformatted beginning # comments in the file # - Corrected mysterious "duplication of words" after http or ftp link. # - Added optio -v to print version number. # # Revision 1.28 1997/04/01 08:54:48 jaalto # - Small fixes. The 1.1 header name eating didn}t work right. It ate # any first number. now corrected. # - Added new option -e, where you can specify EMAIL address of the # document. Environmental variable can now be overruled. # # Revision 1.27 1997/03/28 23:20:04 jaalto # - Added new option -name-nbr. Also now handles numbered heading so that # the NAME reference doesn't contain the number unless this new option # is used. # - Renamed option -uniq to -name-uniq # - Added support for Emacs style American english quotes `'. In emacs # this refer to used symbols, like variables or functions. # - Fixed header reading, now included ['] charcters too. # # Revision 1.26 1997/03/25 11:28:39 jaalto # - Added -help-sh switch and included the t2h csh script to the end # of perl file. # - Wrote introduction + reformatted the comments at top of file # - Corrected '-' suppression TAG in front of email and http # references. Previsouly those refs were still highlighted, while # they should have been unclickable. # # Revision 1.25 1997/02/18 11:55:59 jaalto # - If there was multiple email addresses on theline; only last one # was marked. Added while loop to handle every email in the line. # - Correctd *_= markup handling. The line looped 3 time ONLY # if it has markup. # - Now requires 2 lines of input instead of 5. Otherwise program dies. # # Revision 1.24 1997/02/15 22:33:24 jaalto # - missing { cause fatal perl exec error # # Revision 1.23 1997/02/15 22:08:21 jaalto # - Corrected nested *=_ markup handling. # - corrected #REF handling. Doesn't require spaces any more. # # Revision 1.22 1997/02/14 19:12:24 jaalto # - small text change # # Revision 1.21 1997/02/03 16:20:14 jaalto # - small changes, the '==' mistankely got replaced with , # now # - The 'Ducument author' line now has code. # # Revision 1.20 1997/02/03 07:40:48 jaalto # - Added '==' sample code marking for individual words # # Revision 1.19 1997/02/02 13:59:45 jaalto # - Added -uniq option to handle non-resolvable NAME references # that are constructed from header names. Now also dies # if not all NAME refs are unique and suggests turning on # -uniq option. # # Revision 1.18 1996/12/17 18:06:48 jaalto # - bullet P-code continuing corrected. # # Revision 1.17 1996/12/12 17:20:07 jaalto # - The endind ADDRESS statement was not separated by HR code. # Now corrected. # # Revision 1.16 1996/12/12 09:14:39 jaalto # - Fixed couple of small bugs, Eg. " code handling at column 7. # # Revision 1.15 1996/12/02 14:46:37 jaalto # - Changed $RCS_ID's double quotes to single quotes to prevent problems # in Perl 5 (interpolation of $) # - Added die to the main if file couldn't be opened. # # Revision 1.14 1996/12/02 14:22:44 jaalto # - Added expand tabs function # # Revision 1.13 1996/11/24 15:47:42 jaalto # - Added new #REF command to reference inside document # easily. # # Revision 1.12 1996/11/23 23:16:11 jaalto # - The whole structure of program has been reorganized into categories. # - The code changed so that no function prints nothing, but returns # array of html strings. # - Added options -del -delfld -o and added internal -db debugging # - Started writing and designing #SPLIT to several html pages. # This feature is not yet implemented. # - Clarified the usage function better. # # Revision 1.11 1996/11/19 22:24:02 jaalto # - Added LINK tags, Rewrote Button code. # - Now there is P code for continuing the bullet in next paragraphs. # - added: IsEmptyText, makeLinkHtml # - Now doesn't use FONT SIZE command any more, but SMALL which # is suggested by standard. # # Revision 1.10 1996/11/18 13:47:04 jaalto # - The index structure vilated SGML, it printed

text # whne it should have printed

text. Corrected. # - The #URL-BASE was not expanded at bullet start line (o). Now fixed. # # Revision 1.9 1996/11/18 09:16:41 jaalto # - Added "-" switch to inhibit http or mail address to be made # clickable. This is usefull in examples and in references that are not # real or which shouldn't be used. # - Some Perl 5 errors, like escaping \@ corrected. # - New function XlatHtml which can take care of some common special # characters and turn them into html codes (like > ) # - EM ** and STRONG __ words now require leading space. # # Revision 1.8 1996/11/17 11:16:09 jaalto # - Added numbered list support vit "." bullet # - Added META tags for search engined to the start of html page, # switches: -mk -md # - Added -pref tag to print out built NAME references # - The html creation date is now in ISO format and not just `date` # # Revision 1.7 1996/11/16 20:30:19 jaalto # - Added "," code for _not_ inserting P line is above line is empty # - Corrected minor bugs and clarified the -help functin to show # all text manipulation. # # Revision 1.6 1996/11/15 23:00:40 jaalto # - Still small bugs, eg it doubled the
lines. now the P is not # added if there is PRE defined, because PRE does already P. # - Some more minor fixes. # # Revision 1.5 1996/11/15 21:05:42 jaalto # - This program has changed totally. You don't even want to see version # 1.3... The previous one shoved text in plain PRE, but this really can # format it into html. # - Bulletins are also now supported and the help page should document # all trick to write tight standard .txt file where you can generate html # # Revision 1.4 1996/11/14 15:40:12 jaalto # - fixed url parsing: it href'd the surrounding tabs # - added BASE and BUT command line args # # Revision 1.3 1996/11/13 23:36:55 jaalto # - Minor corrections # # Revision 1.2 1996/11/13 23:32:58 jaalto # - Perl 5 corrections # # Revision 1.1 1996/11/13 23:25:43 jaalto # Initial revision # }}} # {{{ Initial setup # ----------------------------------------------------------- &setup --- sub Initialize { # DESCRIPTION # # Set global variables unshift(@INC, split(' ',$ENV{'MYPERLLIB'})) if defined $ENV{'MYPERLLIB'} ; # My private library, not needed unless debugging # require 'libmisc.pl' if $ENV{'USER'} eq "jaalto"; $prgname = "t2html.pls"; $lib = $prgname; $| = 1; $RCS_ID = '$Id: t2html.pls,v 1.55 1998/04/08 16:26:36 jaalto Exp $'; $VERSION = (split (' ', $RCS_ID))[2]; if ($RCS_ID && $VERSION) {} # perl -w silencer, No-op $CONTACT = ""; $URL = "ftp://cs.uta.fi/pub/ssjaaa/ssjaaa.html"; if ($CONTACT && $URL) {} # perl -w silencer, No-op @HEADERS = (); } # }}} # {{{ usage/help # ----------------------------------------------------------- &usage --- sub usage { local( $msg ) = @_; # reason why are we here... # We must print to stderr because program is usually used # as pipe 'PRG txt.file > out.html' and the error would not # be seen otherwise. # PRINT: { print STDERR < Do not make this email address clickable bar\@site.com, because it is only an example and not a real address. Notice that it was not surrounded by <>. Common login names like foo, bar, quux are also ignored automatically. Also do not make < this\@site.com> because there is extra white spaces. This may be more convenient way to disable email for mouse click. Heading level 1 again at colum 0 Subheading, colum 4 And regular text, column 8 txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt txt --//-- decription end That's it, there is the whole layout described above. More formally the the rules of text formatting are secribed below- About headings o There are only _two_ heading levels in this style. Heading columns are 0 and 4 and the heading must start with big letter or number o In column 4, if the text starts with small letter, that line is interpreted as o The heading level 1 uses
code to mark big sections. o The headings are gathered and the index jump block is built. The NAME reference consists of first 4 sequential words from the heading name. Make sure your heading are uniquely named, otherwise there will be same NAME references in the generated html. Spaces are converted into underscore when joining the words. About text placement General o Text at column 0 is undefined if it doesn't start with big letter or number to indicate Heading level 1. o Text between colums 1-3 is marked with o Column 4 is reserved for heading level 2 o Text between colums 5-7 is marked with o Text in column 7 is if the first character is double quote. o Column 10 is reserved for text. If yuu want to quote someone's words or reference text, place the text in this column. o Text in colums 9,11 are marked with Column 8 for text and special codes o Column 8 is reserved for normal text Column 12 is special o Column 12 is treated specially: block is started with
and lines are marked as . Wwhen the last text at _column_ 12 is found, the block is closed with
Note follwing example txt txt txt ;evenly started block, fine, do it like this txt txt txt txt txt txt ;No! can't terminate the /pre, because last txt txt txt txt ;column is not at 12 txt txt txt txt Other text markings o If there is dot-code, '.', and immediately non-whitespace, then
code is added to the end of line. .This text contains BR code While these two line are joined together in normal html document. o If there is ',' then the
code is not inserted if the previous line is empty. If you use both '.' and ',' they must be in order '.,' The P-comma-code works differently if it is used in bullet o Special text markings: _this_ is intepreted as this *this* is intepreted as this =this= is intepreted as this `this' is intepreted as this Special #-commands o #REF command is used for refering to NAME inside current document. The whole command must be placed on one single line, you can't break the line. Example: #REF how_to_profile ;(Note: profiling); (1) (2) 1. The NAME reference in current document, a single word. This can also be full http url link. You can get NAME list by enabling -pref option. 2. The clickable text delimited by ; characters. o #URL-BASE is substituted with the command line -base URL reference: only directory part is used from the -base. It allows you to refer to documents local to current site. base = http://this.com/dir1/dir2/text.html #URL-BASE/next.html --> http://this.com/dir1/dir2/next.html o A !! in text column adds one HR code. Any text after tag in the same line is written with STROM EM and inserted just after hr code. Therefore the text word commands have no effect, unless you use CODE markup. Http and email highlighting control o All http and ftp references as well as email addeses are marked clickable. Email must have surrounding <> characters to be recognized. o If you don't want to make some reference clickable, preceed it o If url contains character $, it is not made clickable is clickable < me\@here.com> is not clickable me\@here.com is not clickable <5dko56\$1\@news02.deltanet.com> is Message id, not clickable http://this.com is clickable http://foo.com is not clickable -http://this.com is not clickable http://\$EXAMPLE is not clickable Lists and bullets o The bulletin table is contructed if there is 'o' at column 8 and 3 spaces after it, so that text starts at column 12. Bulleted lined must be kept together, no spaces between bullet blocks. o The ordered list is started with '.' and contructed like bullet. About line breaks o All line breaks are visible in your document, do not use more tan one line break to separate text from ewach other. o Very important is that there is only _one_ line break after headers 1 and 2 SPECIAL NOTE If there is heaading 1, whose name is "Table of contents", then that heading and all text up to next heading are discarded from the generated html file. This is done because program itself generates the TOC and jump blocks and NAME references. WHEN DOCUMENT DOESN'T FORMAT RIGHT The second most common error is wrong spacing. Remember, Keeep _one_ empty line between headers and text, between body text and bullet. Check those first. Third common error is that you have put text on wrong column. Remember that text column is at 8. Also, _check_ that the block ends evenly, expecially when it's inside columns 1-7 or 12. Headings start with _big_ letter. USAGE $prgname [opt] textfile -a 'M. Foo' Author of document -del REGEXP Delete lines matching perl regexp. Eg. if you use Unix/Emacs folding.el you can put text into folders # {{{ and # }}}. You don't want to show those marks in the generated html document. -delfld This is special option for deleting Emacs pacakge folding.el's marks as described in the beginning of the perl file. All lines that have {{{ or }}} are discarded. -doc file.txt Document file name, the original text file. -e EMAIL The contact address eg. foo\@site.com -butt URL Button to go to top level document if URL is 'none', then no button is not inserted -butp URL Button to go to previous document, or 'none' -butn URL Button to go to next document, or 'none' -base URL Url location of the html file in destination site where the html will be put. IT IS VERY IMPORTANT THAT YOU SPECIFY THIS. ALL #TAG REFERENCES WITHOUT QUALIFIER URL REFER TO THE URL WHERE base points. -lchk Activate 'link check'. All http and ftp links are checked for validity. Problematic links are outputted to stderr. This is available only in if you have the Perl5 LWP web library. Also turn on -quiet, if you are only interested in failed links. Links that are big, eg. tar.gz or that run programs(has ? character) are ignored because the GET request used in checking would return whole link content. -lchkt Only usefull if -lchk is selected. This concatenates the url request test to single line, so that you can view the URL: ERROR_CODE ERROR_TEXT in one line. (Usefull in Emacs compile buffer and truncate mode on) -mk 'A B C' Meta keywords. Include keywords AA BB CC here This is used by search engines. Separate kwywords AA BB CC with spaces and do not use comma anywhere. -md STR Meta Description. Include description string, max 1000 chars. This is used by search engines. -name-uniq When the NAME references are collected, first 1-5 first words are picked from the header. However, it is possible that you have two headers that use exactly the same words in the beginning. In those cases you have to turn on this option. It will use running counter 00 - 99.. instead of words from headers to construct NAME reference. Please use this option only in emergencies, because referring to NAME via this#header_name is more convenient than using reference this#11, where the number may change in the next run. Make sure that the headers don't have same subjects and you don't need this option at all. -name-nbr When constructing NAME refrences, the header numbering is tossed away by default so that heading's "1.0 Intro" NAME reference is "#Intro". This is derirable, because the numbering may change but the heading text usually remains the same. Uf you really want to have NAME reference like "#1.1_Intro" turn this option on. NOT RECOMMENDED. -pref Print references (contructed from header names) that build up the local NAME jump points in the document. The list is printed in stderr. This way you can do % $prgname tmp.txt > file.html and the reference names printed don't go to html file. -ref URL Refer-to Url location of file in destination site where the document will be put. -simple Print minimum footer. Only Contact and date. -quiet Print no footer at all. This option has different meaning if -lchk option is turned on. -t STR The title text that appears in Browser's top frame. -v Print program's version information. -db NBR Debug with LEVEL. Not supported in distributed version. Can only be run by maintainer. -h(elp)|-usage Print this help screen -help-sh Print help for sample shell script. EOF } print " >> $msg" if defined $msg; # why function was called exit 1; } # }}} # {{{ URL Link # ............................................................ &link ... #---------------------------------------------------------------------- sub StudyLinkExternal { # DESCRIPTION # # Check if link is valid # # INPUT # # str string containing the link or pure URL link # # RETURN # # 0 nbr Error code. Global %LINK_HASH is updated too # with key 'link' -- 'response' # local( $f ) = "$lib.StudyLinkExternal"; local( $url , *LINK_HASH , *LINK_HASH_CODE) = @_; local( $ret , $txt ) = 0; if ( $P5 ) { # Because this is perl 5 lib thing, we can't do this unless # the WWW lib is present. # use LWP::UserAgent; my $ua = new LWP::UserAgent; my $request = new HTTP::Request('GET', $url); my $obj = $ua->request($request); # check the outcome # if ($obj->is_success) { 0; } else { $ret = 1; $LINK_HASH{ $url} = $obj->code; # There is new error code, record it. # if ( ! defined $LINK_HASH_CODE{ $obj->code} ) { $txt = $obj->message; $LINK_HASH_CODE{ $obj->code} = $txt; } } } ($ret , $txt); } #---------------------------------------------------------------------- sub html2txt { # DESCRIPTION # # converts html files into ascii by just stripping anything between # < and > # written 4/21/96 by Michael Smith for WebGlimpse # # INPUT # # @txt text from the file # # RETURN # @ local( $f ) = "$lib.html2txt"; local( *input ) = @_; local( $carry, $line, @ret, $_, $comment); $carry = $comment = 0; for( @input ) { $line = $_; if (0) # enable comment stripping { $comment = 1 if //; $comment = 0 if /--->/; next if $comment; } if($carry==1) { # remove all until the first > # next if( $line !~ s/[^>]*>// ); # if we didn't do next, it succeeded -- reset carry # $carry=0; } while( $line =~ s/<[^>]*>//g ){}; if( $line =~ s/<.*$// ) { $carry=1; } $line = &XlatText( $line); push( @ret, $line); } @ret; } #---------------------------------------------------------------------- sub ReadLinks { # DESCRIPTION # # read external links # # INPUT # # @txt text from the file # # RETURN # % all found links 'line nbr' -- 'lnk' local( $f ) = "$lib.ReadLinks"; local( *txt ) = @_; local( $_ , $url, %ret, $i ); for ( @txt ) { $i++; $url = ""; # This used to read (ftp|http), but the ftp check does not # know GET request. # $url = $1 if m"((http)://[^\s\)\'\",;]+)"i; # Do not check the tar.gz links. or program calls perl?args # if ( $url =~ m"\.(gz|tgz|Z)$|\?" ) { warn " link will not be checked: $url"; next if m"\?"; # forget programs # but try to verify at least directory $url =~ s"(.*/)"$1"; } if ( $url ) { $D && print "$f: $i $url\n"; $ret{$i} = $url ; } } %ret; } #---------------------------------------------------------------------- sub Studylinks { # DESCRIPTION # # Check if TEXT contains no data. Empty, only whitespaces # or "none" word is considered empty text. # # INPUT # # $text string # # RETURN # # 0,1 local( $f ) = "$lib.Studylinks"; local( $FILE, *content ) = @_; local( %link, %errDesc, %linkErr ); local( $i, $_, $lnk, $i , $text, $status ); %link = &ReadLinks( *content ); $D && &pAsc( "$f", *link); $i = 0; for ( sort {$a<=> $b} keys %link ) { $i = $_; $lnk = $link{$_}; if ( ! $QUIET ) { print "$FILE:$i:$lnk "; } ( $status, $err ) = &StudyLinkExternal( $lnk , *linkErr, *errDesc ); $text = ""; if ( $ERR_TEXT_ONE_LINE ) { ( $text = $err ) =~ s/\n/./; } if ( ! $QUIET ) { print " $status $text\n"; } elsif ( $status != 0 ) { printf "$FILE:$i:%-4d $lnk $text\n", $status; } } } # }}} # {{{ Is, testing # ............................................................... &is ... #---------------------------------------------------------------------- sub IsEmptyText { # DESCRIPTION # # Check if TEXT contains no data. Empty, only whitespaces # or "none" word is considered empty text. # # INPUT # # $text string # # RETURN # # 0,1 local( $f ) = "$lib.IsEmptyText"; local( $_ ) = @_; local( $ret ) = 0; $ret = 1 if $_ eq "" || $_ =~ /^\s+$|none$/i; $ret; } #---------------------------------------------------------------------- sub IsHeader { # DESCRIPTION # # Return level of header and header text if header # # INPUT # # $line line # # RETURN # # 0,x level of header if it was header local( $f ) = "$lib.IsHeader"; local( $_ ) = @_; local( $level ) = 0; if ( /^[A-Z0-9]/ ) { $level = 1; }elsif ( /^ {4}[A-Z0-9.]/ ) { $level = 2; } $level } #---------------------------------------------------------------------- sub IsBullet { # DESCRIPTION # # # INPUT # # $line ,line # $text , returned, modified line text # # RETURN # # 0,x ,level local( $f ) = "$lib.IsBullet"; local( $_ , *text) = @_; local( $level ) = 0; if ( /^ {8}([o.]) {3}(.+)/ ) { $level = 1 if $1 eq "o"; $level = 2 if $1 eq "."; $text = $2; } $level; } # }}} # {{{ start, end #---------------------------------------------------------------------- sub PrintStart { # DESCRIPTION # # Creates the start of html document # # INPUT # # Too many to list here. See code. # # RETURN # # @ html lines local( $f ) = "$lib.PrintStart"; local( $doc, $author, $title, $base, $butt, $butp, $butn , $metaDesc, $metaKeywords ) = @_; local( @ret, $str , $tab , $tmp ); $tab = " "; # ................................................ start of html ... $str = "\n" . "\n\n" . "\n\n" . "$title\n\n" . "\n\n\n" ; push ( @ret, $str ); # ............................................. meta information ... # META tags provide "meta information" about the document. # http://www.htmlhelp.com/reference/wilbur/head/meta.html # if ( defined $metaKeywords ) { # "keywords" # Provides keywords for search engines such as Infoseek or Alta # Vista. These are added to the keywords found in the document # itself. If you insert a keyword more than seven times here, # the whole tag will be ignored! # push(@ret, " \n"); } if ( defined $metaDesc ) { push(@ret," \n"); } # ................................................. general meta ... # push(@ret, qq/ \n/ ); push(@ret, qq/ \n/); push(@ret, qq/ \n\n/); push(@ret, &makeComment("BUTTON DEFINITION START") ); if ( ! &IsEmptyText( $butp ) ) { $tmp = "Previous document"; # push(@ret, $tab . &makeLinkHtml("previous","$butp", $tmp) ); push(@ret, $tab . &makeUrlRef( $butp, "Previous", "but") ); push(@ret, "\n"); } if ( ! &IsEmptyText( $butt ) ) { $tmp = "The homepage of site"; # push(@ret, $tab . &makeLinkHtml("home","$butt", $tmp) ); push(@ret, $tab . &makeUrlRef( $butt, "home", "but") ); push(@ret, "\n"); } if ( ! &IsEmptyText( $butn ) ) { $tmp = "Next document"; # push(@ret, $tab . &makeLinkHtml("next","$butt", $tmp) ); push(@ret, $tab . &makeUrlRef( $butn, "Next", "but") ); push(@ret, "\n"); } push(@ret, "\n\n\n\n" ); # &pArr("$f", *ret); @ret; } #---------------------------------------------------------------------- sub PrintEndQuiet { # DESCRIPTION # # # INPUT # # # RETURN # local( $f ) = "$lib.PrintEndQuiet"; local( $doc , $author, $ref , $file ) = @_; local( @ret, $str ); push(@ret, &makeComment( "DOCUMENT END BLOCK") ); $str = "\n\n" . "\n" . "\n" . "\n" ; push(@ret, $str ); @ret; } #---------------------------------------------------------------------- sub PrintEndSimple { # DESCRIPTION # # # INPUT # # # RETURN # local( $f ) = "$lib.PrintEndSimple"; local( $doc , $author, $ref , $file ) = @_; local( @ret, $str, $date); # Get ISO 8601 standard time 1995-11-07 and HH:MM # $date = `date '+%Y-%m-%d %H:%m'`; chop( $date ); push(@ret, &makeComment( "DOCUMENT END BLOCK") ); $str = "\n\n" . "\n" . "
\n\n" . qq|Contact: <$EMAIL>
\n| . qq|Html \$Doc id: $date \$
\n| . "\n" . "\n\n" . "\n" . "\n" ; push(@ret, $str ); @ret; } #---------------------------------------------------------------------- sub PrintEnd { # DESCRIPTION # # # INPUT # # # RETURN # local( $f ) = "$lib.PrintEnd"; local( $doc , $author, $ref , $file ) = @_; $doc = "document" if $doc =~ /\s*/; local( $date, @ret, $str , $disc, $F); # Get ISO 8601 standard time 1995-11-07 and HH:MM # $date = `date '+%Y-%m-%d %H:%m'`; chop( $date ); # disclaimer # $disc = "\n
\n" . "This material can be publically distributed and copied " . "with the permission\n" . "of the Author, provided that you preserve the " . "Author's name and that you\n" . "distribute it in full and not partially. If you " . "quote parts of this\n" . "document, please always mention author's email " . "address or http reference\n" . "where to get the document you refered to." . "\n\n" ; # Read the disclaimer from separate file. # if( defined $file ) { open(F,"$file") || die "$f: Can't open [$file]"; $disc = join('', ); close F; } push(@ret, &makeComment( "DOCUMENT END BLOCK") ); $str = "\n\n" . "\t End of $doc\n\n\n" . "\n" . "
\n\n" . "
" . $disc ; push(@ret, $str ); $str = "\n\n
\n" . "This file has been automatically generated from plain text file\n" . "with perl 4 script v$VERSION $prgname" . "\n
" . "Document author: $author
\n" . qq|Url: $ref
\n| . qq|Contact: <$EMAIL>
\n| . qq|Html \$Doc id: $date \$
\n| . "\n" . "
\n\n" . "\n" . "\n" ; push(@ret, $str ); @ret; } #---------------------------------------------------------------------- sub printHtmlDoc { # DESCRIPTION # # Prints the whole generated html with header and footer. # # USES GLOBAL # # Uses the globals defined with program switches. # # INPUT # # @arr content of body html # $file # # RETURN # local( $f ) = "$lib.printHtmlDoc"; local( *arr , $file ) = @_; local( @ret, $str ); @ret = &PrintStart( $DOC, $AUTHOR, $TITLE , $BASE, $BUT_TOP, $BUT_PREV, $BUT_NEXT, $MKEYWORDS, $MDESC ); push( @ret, &makeIndex( *HEADERS, *HREFS ) ); push( @ret, @arr); if ( $SIMPLE_OUTPUT == 1 ) { push( @ret, &PrintEndSimple( $DOC, $AUTHOR, $REF ) ); } elsif ( $SIMPLE_OUTPUT == 2 ) { push( @ret, &PrintEndQuiet( $DOC, $AUTHOR, $REF ) ); } else { push( @ret, &PrintEnd( $DOC, $AUTHOR, $REF ) ); } # &pArr("$f", *ret); print @ret; warn "Lines: ", scalar( @slurp), "\n"; } # }}} # {{{ misc - make #---------------------------------------------------------------------- sub ExpandTabs { # DESCRIPTION # # From Perl5 # % perldoc Text::Tabs # # INPUT # # @ reference of array # # RETURN # # @ local( $f ) = "$lib.ExpandTabs"; local( *l ) = @_; local( $_ , $tabstop ); $tabstop = 8; for ( @l ) { 1 while s/^([^\t]*)(\t+)/ $1 . (" " x ($tabstop * length($2) - (length($1) % $tabstop))) /e; } @l; } #---------------------------------------------------------------------- sub makeLinkHtml { # DESCRIPTION # # Note, 1997-10, you should not use this function because # a) netscape 3.0 doesn't obey LINK HREF # b) If you supply LINK and normal HREF; then lynx would show both # which is not a good thing. # Let's just conclude,t hat LINK tag is not handled right # in net browsers. # # # Create html tag # # Advanced net brocsers can use the included LINK tags. # http://www.htmlhelp.com/reference/wilbur/alltags.html # # REL="home": indicates the location of the homepage, or # starting page in this site. # # REL="next" # Indicates the location of the next document in a series, # relative to the current document. # # REL="previous" # Indicates the location of the previous document in a series, # relative to the current document. # # INPUT # # $type the value of REL # $url the value for HREF # $title An advisory title for the linked resource. # # RETURN # # html string local( $f ) = "$lib.makeLinkHtml"; local( $type, $url , $title ) = @_; if ( ! defined $title ) { $title = ""; } else { $title = qq|TITLE="$title"|; } qq|\n|; } #---------------------------------------------------------------------- sub makeUrlRef { # DESCRIPTION # # # INPUT # # $ref url reference or "none" # $txt text # $type "but" for button # # RETURN # local( $f ) = "$lib.makeUrlRef"; local( $ref, $txt , $type ) = @_; $type = "" if ! defined $type; if ( $type eq "but" ) { qq|[$txt]\n|; } else { qq|$txt|; } } #---------------------------------------------------------------------- sub makeComment { # DESCRIPTION # # # INPUT # # # RETURN # local( $f ) = "$lib.makeComment"; local( $txt ) = @_; $txt = "\n\n" ; } #---------------------------------------------------------------------- sub makeIndex { # DESCRIPTION # # # INPUT # # # RETURN # local( $f ) = "$lib.makeIndex"; local( *arr ) = @_; local( $_ , $i , $txt, $spc, $li, $ul , $refname ); local( $styleb, $stylee , @ret , $str ); $styleb = $stylee = ""; if ( 0 ) { $styleb = ""; $stylee = ""; } push( @ret , &makeComment( "TABLE OF INDEX START") ); # According to spec, the SMALL tag is new and old browsers # only undertand FONT="-1". $str = qq|\n| . qq|
Table of contents $DOC
| . "
" ; push( @ret, $str ); $ul = $i = 0; for ( @arr ) { $refname = $HREFS[$i]; # print "\n" if ! /^\s+/; $i++; $spc = ""; $spc = $1 if /^(\s+)/; $txt = $1 if /^\s*(.*)\s*$/; $li = $str = ""; if ( /^ +[A-Z0-9]/ ) { $str = "
\n" if $ul == 0; $li = "\t
"; $ul++; }else{ $str = "
\n" if $ul != 0; $ul = 0; } $str .= "$spc$styleb" . "$li" . qq|| . "$txt" . "" . "$stylee
\n" ; push( @ret, $str); } $str = "
\n\n" . &makeComment( "TABLE OF INDEX END") ; push( @ret, $str); $D && &pArr( "$f", *ret); @ret; } # }}} # {{{ misc #---------------------------------------------------------------------- sub killToc { # DESCRIPTION # # Removed heading "Table of contents" and its content. # # INPUT # # @ whole text # # RETURN # # @ modified text local( $f ) = "$lib.killToc"; local(*arr ) = @_; local( $_ , @ret, $flag); for ( @arr ) { $flag = 1 if /^Table\s+of\s+contents\s*$/i; if ( $flag ) { # save next header # next if /^Table/; if ( /^[A-Z0-9]/ ) { $flag = 0; }else{ next; } } push( @ret, $_); } @ret; } #---------------------------------------------------------------------- sub UpdateHeaderArray { # DESCRIPTION # # After you have checked that line is header with IsHeader() # the line is sent to here. It reformats the lie and # # o Contructs 1-5 first words to forn the TOC NAME reference # o Updates globals HEADERS AHREFS HREFS # # INPUT # # $line header line # # RETURN # local( $f ) = "$lib.UpdateHeaderArray"; local( $_ ) = @_; local( $name , $orig , $w , $ws ) ; $D && print "$f: $_\n"; $name = $orig = $_ ; $w = "[.\\w]+"; # The word. $ws = "$w\\s+"; # The word and space push( @HEADERS, $_ ); $HEADER_COUNTER++; # When constructing names, the numbers may move, # So it's more logical to link to words only when making NAME ref. # # 11.0 Using lambda notation --> Using lambda notation s/[0-9][0-9.]*// if $FORGET_HEAD_NUMBERS; # Kill cracters that we don't want to see in NAME reference s/[-+,:;!\"#%&89=?^{}()?!\\~*'|]//g; # warn ">>$f: $_\n"; if ( defined $NAME_UNIQ ) { $_ = "$HEADER_COUNTER"; } else { # Pick first 1-5 words for header name # if ( /($ws$ws$ws$ws$w)/o || /($ws$ws$ws$w)/o || /($ws$ws$w)/o || /($ws$w)/o || /($w)/o ) { $name = $1 } $_ = $name; s/^\s+//; s/\s+$//; # strip trailing spaces s/\s/_/g; s/(.*)/\L$1\E/; # only lowercase } if ( !defined $AHREFS{$_} ) # is the 1-5 enough to identify? { $AHREFS{$_} = 1 # add new } else { print "$f: $AHREFS{$_}"; PRINT:{ warn <$butt\n"; if ( $level == 1 ) { $ret = "\n" . "\t\n" . "

\n$header\n$butt
\n" ; } elsif ( $level > 1 ) { $ret = "\t \n" . "
\n$header\n$butt
\n" ; } $ret; } #---------------------------------------------------------------------- sub XlatHtml { # DESCRIPTION # # Translate some characters into Html codes. # # INPUT # # line # # RETURN # # html line local( $f ) = "$lib.XlatHtml"; local( $_ ) = @_; s/\&/&/ig; s/>/>/ig; s//ig; s/</ html # ------------------------------------------------------- &doTheLine --- sub DoLine { # DESCRIPTION # # Add heml tags per line basis # # INPUT # # line # # RETURN # # formatted line # local( $f ) = "$lib.DoLine"; local( $_ , $base, $line, *arr ) = @_; ! defined $_ && warn "$f: \$_ not defined? "; ! defined $line && warn "$f: LINE not defined? "; # This is very odd, Has happened to me sometimes that $_ # is not defined when this function is called. Should # track down the error sometime... return "" if ! defined $_ ; local( $i, $url , $s1, $s2 , $s3, $s4, $s5 ); local( $prev, $prev2, $next , $isPrevHdr ); local( $bulletText , $origLine ); local( $isText, $AsIs , $isBullet, $isBrCode , $isPcode ); local( $prevEmpty, $nextEmpty , $hname, $hlevel ); local( $tmp , $isPureText); chop( $_ ); $origLine = $_; $prev = $prev2 = $next = ""; # HEADER <-- search this # # text starts here # $prev2 = $arr[ $line -2] if $line > 1; $prev = $arr[ $line -1] if $line > 0; $next = $arr[ $line +1] if $line +1 < @arr ; $isText = $AsIs = $isPrevHdr = $isBullet = 0; $isBrCode = $hlevel = $isPcode = 0 ; $bulletText = ""; $isPrevHdr = &IsHeader( $prev2 ) if $line > 1; $prevEmpty = 0; $prevEmpty = 1 if $prev =~ /^\s*$/; $nextEmpty = 0; $nextEmpty = 1 if $next =~ /^\s*$/; $i = -1; $isPureText = 1 if /^ {12}/; # ................................................. command tags ... if ( /^( {1,11})\.([^ \t.].*)/ ) { # The "DOT" code at the beginning of word. Notice that the dot # code is efective only in columns 1..11 # warn "BR $line <$_>\n"; $isBrCode = 1; $s1 = $1; $s2 = $2; $_ = "$s1$s2"; # Remove the DOT control code } if ( /^( +),([^ \t,].*)/ ) # The "P" tag { $isPcode = 1; $s1 = $1; $s2 = $2; $_ = "$s1" . substr($s2, 1); # warn "P $line $_\n"; } if( /#URL-BASE/ ) { # warn ">> $_"; $Base = 1 s"#URL-BASE"$base"gi; } $_ = &XlatHtml( $_); # .......................................................... &ref ... # Handle #REF command if ( /(.*)#REF\s+(.*)\s*;(.*);(.*)/ ) { $s1 = $1; $s2 = $2; $s3 = $3; $s4 = $4; # There already may be absolute reference, check it first # # http:/www.this.com#referece_here # # $s2 = "#$s2" if ! /(\#REF.*\#)/ && /ftp:|htp:/; $_ = $s1 . &makeUrlRef( $s2, $s3 ) . $s4; # warn "#REF--> [$s1]\n [$s2]\n [$s3]\n [$_]"; } # ......................................................... &url ... # The A HREF test may be added in #REF substitutin already, then do # not call these $tmp = $s5 = ""; while ( m,(.*)([^fh\"<>]+)((file|ftp|http|telnet):/[^ \t\n]+)(.*), ) { $s1 = $1; $s2 = $2; $s3 = $3; $s4 = $4; $s5 = $5; $url = $3; # warn "[1:$1] [2:$2] [3 url:$3] [4:$4] [5:$5]\n[line:$_]\n"; # If there is no more to match, quit # if ( $5 ){ $_ = $5; }else{ $_ = "";}; # Ignore following # # -file:/ Leading dash # -http:/$THIS_EXAMPLE_VAR no dollars # # if ( "$s2$s3" =~ m"-(file|ftp|http|telnet)"i || $url =~ m"foo|ba[rz]|quu"i || $url =~ m"\$" ) { $tmp .= "$s1$s2$url"; }else{ $tmp .= "$s1$s2" . &makeUrlRef($url,$url) . ""; } } # $s5 is the part that was left over. It didn't contain any more # url references, so we put it back. if ( $tmp ne "" ) { if ( $s5 ne "" ) { $_ = "$tmp$s5" } else { $_ = $tmp; } } #warn "<1< $_" if $Base; $Base = 0; # ........................................................ &mail ... # Handle Mail references, we need while because there may be # multiple mail addresses on the line # # A special case; in text there may be written like these. They are NOT # clickable email addresses. # # References: <5dfqlm$m50@basement.replay.com> # Message-ID: <5dko56$1lv$1@news02.deltanet.com> $tmp = ""; while ( m"(.*)<([^ \t<>]+@[^ \t<>]+)>(.*)" ) { $tmpLine = $_; $s1 = $1; $s2 = $2; $s3 = $3; # warn "[1:$1] [2:$2] [3:$3]\n"; # If there is no more to match, quit if ( $3 ){ $_ = $3; }else{ $_ = "";}; # Ignore certain email addresses like # foo@site.com bar@site.com ... that are used as examples # in the document. # # Ignore also any address that is like # - Leading dash # < addr@site.com> space follows character < if ( $tmpLine =~ m"\$[A-Za-z0-9]+@" || m"(foo|ba[rz]|quu[zx]|none|example|test)@" || m"-\<.*@" || m"\< +.*@" ) { $tmp .= "$s1$s2"; }else{ $tmp .= "$s1" . &makeUrlRef("mailto:$s2",$s2) . ""; } } $_ = "$tmp$s3" if $tmp ne ""; # ......................................................... &rcs ... # RCS keywords if ( m"(.*)(\$Id.*\$)(.*)" ) { $_ = "$1$2$3"; } # The bullet text must be examined only after the expansions # in the line $isBullet = &IsBullet( $_ , *bulletText ); # ................................................... study line ... # if ( /^ {4,}(!!)([^!].*)?$/ ) { # A special !! code means adding
tag # # if (defined $2 ) { $_ = "\n
\n\t $2
\n"; } else { $_ = "\n
\n\t
\n"; } } elsif ( $hlevel = &IsHeader($_) ) { $hname = &UpdateHeaderArray( $_); $_ = &makeHeaderHtml( $_, $hname, $hlevel ); }elsif ( /^ {12,}[^ ]/ && !$BULLET_MODE && !$isBullet ) { $AsIs = 1; # Make it little shorted by removing spaces # Otw, the indent level is too deep # $_ = substr( $_, 6); $_ = "$_"; }elsif ( /^ {1,3}[^ ]/ ) { $AsIs = 1; $_ = "$_"; }elsif ( /^ {7}\"/ ) { $_ = "$_"; }elsif ( /^ {5,7}[^ ]/ ) { $_ = "$_"; }elsif ( /^ {9}[^ ]|^ {11}[^ ]/ ) { $_ = "$_"; }elsif ( /^ {10}[^ ]/ ) { $_ = "$_"; }elsif ( /^( {8})([^ ].*)/ ) { $isText = 1; } # ...................................................... bullets ... # $D = 2; $D == 2 && warn "$line: empty $nextEmpty br $isBrCode " . "isBullet $isBullet " . "mode $BULLET_MODE <$_> <$next> \n"; if ( $isBullet && $prevEmpty ) { $s1 = "
"; $s1 = "
" if $isBullet > 1; $_ = "$s1\n\t
$bulletText"; $BULLET_MODE = 1; $isBullet = 0; $D == 2 && warn "BULLET ON $_\n"; } if ( ( $isBullet || $BULLET_MODE ) && $nextEmpty ) { $s1 = "
"; $s1 = "" if $isBullet > 1; $_ = "
$bulletText" if $isBullet; if ( !$isPcode ) { # if previous paragraph doesn't contain P code, # then terminate this bullet $BULLET_MODE = 0; $_ = "\t$_\n$s1\n\n"; } else { $_ = "\t$_\n
\n"; # Continue in bullet mode } $isBullet = 0; $D == 2 && warn "BULLET OFF $_\n"; } if ( $isBullet ) { $_ = "\t
$bulletText"; 0 && warn "BULLET $_\n"; } if ( !( $isBullet || $BULLET_MODE ) && $nextEmpty ) { # $_ = "$_\n
"; } # ...................................... determining line context ... # # If this is column 8, suppose regular text see if this # is begining or end of paragraph. if ( $line > 0 && $prevEmpty && !$isPrevHdr && !$hlevel && !$isBullet && !$BULLET_MODE # these tags do not need P tag, otw line doubles && ! /PRE|SAMP/i # If user has not prohibited using P code && !$isPcode ) { # if this the above line was header, we must not insert P tag, # because it would double the line spacing # warn "P1>> !$isBullet && !$BULLET_MODE [$_]"; $_ = "
\n$_"; } if ( $nextEmpty && !$isBullet && !$BULLET_MODE ) { # $_ = "$_\n
"; } if ( $line > 0 && $AsIs && $prevEmpty ) { $_ = "
\n$_"; } if ( $AsIs && $nextEmpty ) { $_ = "$_\n
"; } # _WORD_ is strong # *WORD* is emphatized # The '_' must preceede whitespace and '>' which could be # html code ending character. # # We must loop 3 times because the markup may be nested # _*=WORD=*_ --> _ --> * --> = --> html completed # # Turn american english `this-function' references into samples too. # This is convention that Emacs documentation uses. s"([ \t>])\`([^ \t]+)\'"$1$2"gi; # but Do not touch "code" text above 12 column if ( !$AsIs ) { if ( /[ \t>][_*=][^ \t]+[_*=]/ ) { # warn "$_"; # We loop only if the line matched. for ($i=0; $i< 3; $i++) { s"([ \t>])\_([^ \t]+)\_"$1$2"gi; s"([ \t>])\*([^ \t]+)\*"$1$2 "gi; s"([ \t>])\=([^ \t]+)\="$1$2 "gi; } } # If already has /P then do nothing. if ( $isBrCode && ! m"
" ) { $_ .= "
"; } } # die if /exaple/; "$_\n"; } # }}} # {{{ Args parsing # ............................................................. &args ... &Initialize; local ( $DOC , $TITLE , $REF , $AUTHOR) = ("", "", "", "") ; local ( $MDESC , $MKEYWORDS ) = ("" , "") ; local ( $BUT_TOP , $BUT_PREV , $BUT_NEXT ) = ( "" ,"", "") ; local ( $doShift) = 1; local ( $SIMPLE_OUTPUT, $QUIET) = 0; local ( $PREF ) = 0; local ( $BULLET_MODE ) = 0; local ( $FORGET_HEAD_NUMBERS ) = 1; local ( $ERR_TEXT_ONE_LINE ) = 0; local ( $LINK_CHECK ) = 0; local ( %LINK_HASH ) = (); # Links that are invalid: 'link' -- errCode local ( %LINK_HASH_CODE ) = (); # Error code table: errCode -- 'text' # Check if running under Perl5. # local ( $P5 ) = 0; { local ( $stat ) = eval "use LWP::UserAgent;"; unless ( $@ ) { $P5 = 1; 0 && $stat; # No-op, perl -w silencer 0 && &isLinkOk # Same, unused function otw. } } # .................................................... &ARGV-parsing ... while ($ARGV[0] =~ /^-/) { $_ = shift if $doShift; # shift may be set to 0 to $doShift = 1; # force multiple evaluation. $D && print "[$_]\n"; if (/^-db$/) { # debug $D = shift; print "#debug $D " if $D; } elsif (/^-a$/) { $AUTHOR = shift; } elsif (/^-butt$/) { $BUT_TOP = shift; } elsif (/^-butp$/) { $BUT_PREV = shift; } elsif (/^-butn$/) { $BUT_NEXT = shift; } elsif (/^-base$/) { $_ = shift; s"\n""g; # No newlines # If direct /users/foo/dir given, treat as file:/... # access protocol # $_ = "file:$_" if m"^/"; $BASE = $_; # To ensure that we really get filename # die "Base must have trailing slash [$BASE] " if $BASE =~ m"/$"; # Exclude the filename part # $BASE_URL = $1 if $BASE =~ m,(.*)/,; } elsif (/^-del$/) { $DEL = shift; } elsif (/^-delfld$/) { # # Delete Emacs folding.el marks that keeps text in sections. # $DEL = "^(# )?\{\{\{|^(# )?\}\}\}" } elsif (/^-doc$/) { $DOC = shift; } elsif (/^-e$/) { $EMAIL = shift; } elsif (/^-lchk$/) { if ( ! $P5 ) { warn "Need perl 5 LWP::UserAgent to check links. -lchk ignored."; }else{ $LINK_CHECK = 1; } } elsif (/^-lchkt$/) { $ERR_TEXT_ONE_LINE = 1; } elsif (/^-md$/) { $MDESC = shift; die "-md needs description text" if $MDESC =~ /^-|^\s*$/; } elsif (/^-mk$/) { $MKEYWORDS = shift; die "-mk needs description text" if $MKEYWORDS =~ /^-|^\s*$/; } elsif (/^-name-uniq$/) { $NAME_UNIQ = 1; } elsif (/^-name-nbr$/) { $FORGET_HEAD_NUMBERS = 0; } elsif (/^-pref$/) { $PREF = 1; } elsif (/^-ref$/) { $REF = shift; } elsif (/^-simple$/) { $SIMPLE_OUTPUT = 1; } elsif (/^-t$/) { $TITLE = shift; } elsif (/^-quiet$/) { $SIMPLE_OUTPUT = 2; $QUIET = 1; } elsif (/^-v$/) { print "$VERSION $prgname $CONTACT\n"; exit; } elsif (/^--/) { last; # no more options,break out } elsif (/^-h(elp)?|-u(sage)?/i) { # help &usage; } elsif (/^-help-sh$/) { print ; exit 1; } else { die "Unrecognized switch: $_\n"; } } # }}} # {{{ Main # ............................................................ &main ... local( $f) = "$lib.man"; if (! defined $EMAIL ) { $EMAIL = $ENV{'EMAIL'} || die "setenv EMAIL to foo\@site.com"; } if ( $EMAIL !~ /^\s*$/ ) # Not empty, continue { if ( $EMAIL !~ /@/ || $EMAIL =~ /[<>]/ ) { warn "Invalid EMAIL, must not contain characters [<>]\n", "or you didn't give \@\n" ; die "Example: me\@site.com"; } } # ................................................... read file ... $D && warn "ARGV: @ARGV\n"; $DIR = `pwd`; chop $DIR; $FILE = "stdin"; $FILE = "$DIR/$ARGV[0]" if @ARGV; @slurp = <> if !@slurp; # ..................................................... html2txt ... # - If text contains tag in the begining of file then automatically # convert the input into text if ( grep( //i, @slurp[0 .. 100] ) ) { warn "$f: was HTML page\n"; @slurp = &html2txt( *slurp ); } # We can't remove TOC if link check mode is on, because then the line # numbers reported wouoldn't match the original if TOC were removed. # if ( ! $LINK_CHECK ) { @slurp = &killToc( *slurp); } # Prevent processing empty files # @slurp < 2 && die "$prgname [@ARGV] not enough input lines"; # Should we ignore some lines according to regexp ? # # @slurp = grep( !/$DEL/, @slurp ) if defined $DEL; @slurp = &ExpandTabs( *slurp ); $HEADER_COUNTER = 0; $i = 0; if ( $LINK_CHECK ) { &Studylinks( $FILE, *slurp); exit; } else { for ( @slurp ) { $line = &DoLine( $_ , $BASE_URL, $i++, *slurp ); push( @arr, $line ); } } &printHtmlDoc( *arr ); # }}} # ............................................................. &end ... 0; __END__ #!/bin/csh -f # # FILE ID # # @(#) HTML Front end to t2html.pls script. Make html pages # # program : t2h # Docid: 1997-09-14 Jari Aalto # Contactid: # Id: t2html.pls,v 1.45 1997/10/13 19:06:38 jaalto Exp jaalto # # # DOCUMENTATION # # This is shell script that defines all the needed parameters for # t2html.pls perl program which converts text documents into html. # # Program is called in following manner # # %t2h DOCUMENT .. [SITE] # # DOCUMENT document identifies charaters # SITE can be "local" or missing # # The DOCUMENT parameter is some characters that match the 'if' # cases in this shell script; they select specific document for # formatting. The SITE is the destination site where # the document will be prepared. The SITE defaults to the # primary document location. # # ENVIRONMENT VARIABLES # # none # We set this here and don't read it from ENV, because the address there # is not what I want into my documents. # setenv EMAIL "jari.aalto@poboxes.com" # Where to put the document # if ( "$argv[$#argv]" == "local" ) then # Put document into this local host, into my directory # set url = file:$HOME/txt else # Not given, use default destination # set url = ftp://cs.uta.fi/pub/ssjaaa endif # Initial parameters for t2html.pls # set oauthor = "Jari Aalto" set options = ( $* ) if ( "$1" == "reg" ) then # REGULAR update, update these files. # set options = ( "j" "e" "gp" "gc" "gk" "gf" ) endif if ( "$1" == "all" ) then # REGULAR update, update these files. # "tdoc" set options = ( "j" "e" "tipgp" "rnym" "gp" "gc" "gk" "gf" ) endif foreach arg ( $options ) set author = "$oauthor" # original set butt = "$url/ssjaaa.html#links_to_documents_in" set butp = "none" set butn = "none" set title = "" set kwd = "" set addopt = "" set f = "" # .............................................................. pgp ... # PGP documents if ( "$arg" == "tipgp" ) then set f = tipgp.txt set kwd = "pgp unix emacs elisp" set title = "UTA - Jari's TinyPgp.el, Unix Emacs PGP packages" # set butn = "ema-keys.html"; endif # ........................................................... remail ... if ( "$arg" == "rnym" ) then set f = remail-nym-hlp.txt set author = "admin@nym.alias.net" set kwd = "anonymous remailer help problems" set title = "UTA - Jari's remail, troubleshooting nym.alias.net" # set butn = "ema-keys.html"; endif if ( "$arg" == "tdoc" ) then set f = tiny-docs.txt set kwd = "elisp Emacs packages documentation" set title = "UTA - Jari's tiny tool packagfe documentation" set addopt = "-name-uniq" endif # ........................................................... guides ... if ( "$arg" == "e" ) then set f = elisp.txt set kwd = "homepage emacs elisp tiny-tools packages" set title = "UTA - Jari's Emacs elisp readme, tips and instructions" set butn = "ema-keys.html"; endif if ( "$arg" == "gk" ) then set f = ema-keys.gui set kwd = "emacs lisp keybindings guide xmodmap xterm connecting" set title = "UTA - Jari's Emacs keybinding guide" set butp = "elisp.html"; set butn = "ema-font.html"; endif if ( "$arg" == "gf" ) then set f = ema-font.gui set kwd = "emacs lisp font faq" set title = "UTA - Jari's Emacs font setting guide" set butp = "ema-keys.html"; set butn = "ema-code.html"; endif if ( "$arg" == "gc" ) then set f = ema-code.gui set kwd = "emacs lisp code guide style" set title = "UTA - Jari's Emacs Elisp code guidelines" set butp = "ema-keys.html"; set butn = "ema-pkg.html"; endif if ( "$arg" == "gp" ) then set f = ema-pkg.gui set kwd = "emacs lisp package guide" set title = "UTA - Jari's Emacs package writing checklist." set butp = "ema-code.html"; endif # ............................................................ other ... if ( "$arg" == "j" ) then set f = ssjaaa.txt set kwd = "homepage ftp links pgp" set title = "UTA - Jari's Ftp directory top page" set butt = "none"; endif if ( "$arg" == "x" ) then set f = pgp-xhd.txt set kwd = "homepage pgp X-pgp standard" set title = "UTA - Jari's Email X-Pgp header standard proposal" endif # No file given, exit. # if ( ! $?f ) then echo "ARGS: j r x e gk gc gp gf tdoc rnym tipgp"; exit endif if ( "$f" != "" ) then set FILE = $f:r set EXT = $f:e set ref = $url/$f set refh = $url/$FILE.html set fileIn = $HOME/txt/$f set fileOut = $HOME/txt/$FILE.html perl -w ~/bin/t2html.pls \ -delfld \ -db 0 \ $addopt \ -t "$title" \ -a "$author" \ -mk "$kwd" \ -butp "$butp" \ -butt "$butt" \ -butn "$butn" \ -base $refh \ -doc $refh \ -ref $ref \ $fileIn \ > $fileOut # Where is the ready file # echo file:$fileOut endif end # End of file