#!/usr/local/bin/perl 'di'; 'ig00'; # $Id: latex2html,v 1.49 1998/03/02 08:53:50 latex2html Exp $ # # This variable can be set manually or by the installation script # to point to the DIRECTORY where the latex2html files can be found. $LATEX2HTMLDIR='/home/payne/programs/latex2html'; # Inserted by installation script local($dirs); if ($] >= 5 ) {# we have Perl 5.0 or later... $dirs = 1; # All operating system depended stuff should be defined in # Override.pm before being used in the code. # See export.pm on how to do this # This path is set similarly to the value of the variable above. use lib "/home/payne/programs/latex2html"; # Inserted by installation script #use Override qw(getpwuid link setenv getenv symlink rename use Override qw(link setenv getenv symlink rename make_directory_absolute unlink $dd $envkey $image_pre $DEBUG); if (($ENV{'PERL5LIB'} =~ m|${LATEX2HTMLDIR}|) ||($ENV{'PERLLIB'} =~ m|${LATEX2HTMLDIR}|)) { $dirs = 2; }} if (! $dirs) { print "\n*** could not configure automatically using Override.pm ***"; print "\n*** so assuming standard Unix conventions apply ***\n"; # With Perl 5 these should have been set already, within Override.pm # The key, which delimts expressions defined in the environment # depends on the operating system. $envkey=':' unless ($envkey); # $dd is the directory delimiter character $dd = '/' unless ($dd); sub old_make_directory_absolute { local($path) = @_; local($orig_cwd); if (! ($path =~ /^$dd/)) { # if $path doesn't start with '/' $orig_cwd = &getcwd; chdir $path; $path = &getcwd; chdir $orig_cwd; } $path; } eval "sub make_directory_absolute {\&old_make_directory_absolute(\@_)}\n" unless (defined &make_directory_absolute); } # make sure the $LATEX2HTMLDIR is on the search-path for forked processes $ENV{'PERLLIB'} .= "$envkey$LATEX2HTMLDIR" if ($dirs < 2); undef $dirs; use Cwd; # # Comprises patches and revisions by various authors: # See Changes, the log file of LaTeX2HTML. # # Original Copyright notice: # # LaTeX2HTML by Nikos Drakos # **************************************************************** # LaTeX To HTML Translation ************************************** # **************************************************************** # LaTeX2HTML is a Perl program that translates LaTeX source # files into HTML (HyperText Markup Language). For each source # file given as an argument the translator will create a # directory containing the corresponding HTML files. # # The man page for this program is included at the end of this file # and can be viewed using # %nroff -man latex2html # # For more information on this program and some examples of its # capabilities see the accompanying documentation in the docs/ # directory, or # # http://www-dsed.llnl.gov/files/programs/unix/latex2html/manual/ # # or # # http://www.cbl.leeds.ac.uk/nikos/tex2html/doc/latex2html/ # # Written by Nikos Drakos, July 1993. # # Address: Computer Based Learning Unit # University of Leeds # Leeds, LS2 9JT # # Copyright (c) 1993. All rights reserved. # # See general license below. # # **************************************************************** # General License Agreement and Lack of Warranty ***************** # **************************************************************** # # This software is distributed in the hope that it will be useful # but WITHOUT ANY WARRANTY. The author(s) do not accept responsibility # to anyone for the consequences of using it or for whether it serves # any particular purpose or works at all. No warranty is made about # the software or its performance. # # Use and copying of this software and the preparation of derivative # works based on this software are permitted, so long as the following # conditions are met: # o The copyright notice and this entire notice are included intact # and prominently carried on all copies and supporting documentation. # o No fees or compensation are charged for use, copies, or # access to this software. You may charge a nominal # distribution fee for the physical act of transferring a # copy, but you may not charge for the program itself. # o If you modify this software, you must cause the modified # file(s) to carry prominent notices (a Change Log) # describing the changes, who made the changes, and the date # of those changes. # o Any work distributed or published that in whole or in part # contains or is a derivative of this software or any part # thereof is subject to the terms of this agreement. The # aggregation of another unrelated program with this software # or its derivative on a volume of storage or distribution # medium does not bring the other program under the scope # of these terms. # # This software is made available AS IS, and is distributed without # warranty of any kind, either expressed or implied. # # In no event will the author(s) or their institutions be liable to you # for damages, including lost profits, lost monies, or other special, # incidental or consequential damages arising out of or in connection # with the use or inability to use (including but not limited to loss of # data or data being rendered inaccurate or losses sustained by third # parties or a failure of the program to operate as documented) the # program, even if you have been advised of the possibility of such # damages, or for any claim by any other party, whether in an action of # contract, negligence, or other tortious action. # # Please send bug reports, comments, questions and suggestions to # nikos@cbl.leeds.ac.uk. We would also appreciate receiving any changes # or improvements you may make. # ############################# System Parameters ########################## # # Uncomment the following statement if your Linux system requires it. # use GDBM_File; # change these whenever you do a patch to this program and then # name the resulting patch file accordingly $TPATCHLEVEL = " release"; $TVERSION = "98.1p1"; $RELDATE = "(March 2nd, 1998)"; $TEX2HTMLV_SHORT = $TVERSION . $TPATCHLEVEL; $TEX2HTMLVERSION = $TEX2HTMLV_SHORT . ' ' . $RELDATE; $TEX2HTMLADDRESS = "http://www-dsed.llnl.gov/files/programs/unix/latex2html/manual/"; $AUTHORADDRESS = "http://cbl.leeds.ac.uk/nikos/personal.html"; # Set $HOME to the environment variable. $HOME = $ENV{'HOME'} || (getpwuid($<))[7]; push(@INC,$HOME); # flush stdout with every print -- gives better feedback during # long computations $| = 1; # set Perl's subscript separator to LaTeX's illegal character. # (quite defensive but why not) $; = "\000"; # No arguments!! (&usage && die "No files to process!\n") unless @ARGV; die ("LaTeX2HTML must be installed first before usage:\n". "Please run installation script.\n") unless $LATEX2HTMLDIR; # Image prefix ( see Override.pm for $image_pre ) $IMAGE_PREFIX = ((defined $image_pre)? $image_pre : '_image'); # Author address @address_data = &address_data('ISO'); $ADDRESS = "$address_data[0]\n$address_data[1]"; # ensure non-zero defaults $MAX_SPLIT_DEPTH = 4 unless ($MAX_SPLIT_DEPTH); $MAX_LINK_DEPTH = 4 unless ($MAX_LINK_DEPTH); $TOC_DEPTH = 4 unless ($TOC_DEPTH); # Read latex2html.config require("$LATEX2HTMLDIR/latex2html.config") if ((-f "$LATEX2HTMLDIR/latex2html.config") || die "LaTeX2HTML has not been installed correctly:". "\nCould not find file $LATEX2HTMLDIR/latex2html.config\n"); # Read .latex2html-init file if one is found if (-f "$HOME$dd.latex2html-init") { print "\nloading $HOME$dd.latex2html-init"; require("$HOME$dd.latex2html-init"); die "You have an out-of-date " . $HOME . "$dd.latex2html-init file.\nPlease update or delete it.\n" if ($DESTDIR eq '.'); } # Read .latex2html-init file if one is found in current directory if ( (! (&getcwd eq $HOME )) && (-f ".$dd.latex2html-init")) { print "\nloading .$dd.latex2html-init"; require(".$dd.latex2html-init"); } die "'.' is an incorrect setting for DESTDIR.\n" . "Please check your .latex2html-init file.\n" if ($DESTDIR eq '.'); # User home substitutions $LATEX2HTMLSTYLES =~ s/~([$dd:]|$)/$HOME$1/g; $LATEX2HTMLSTYLES =~ s/~([^$dd:]+)/(getpwnam($1))[7]/ge; #HWS: That was the last reference to HOME. Now set HOME to $LATEX2HTMLDIR, # to enable dvips to see that version of .dvipsrc! But only if we # have DVIPS_MODE not set - yes - this is a horrible nasty kludge if ($PK_GENERATION && ! $DVIPS_MODE) { $ENV{HOME} = "$LATEX2HTMLDIR"; delete $ENV{PRINTER}; # Overrides .dvipsrc } # Process switches $argv = join(' ',@ARGV); # Save the command line arguments while ($ARGV[0] =~ /^-/) { $_ = shift; if (/^-split$/) { $_ = shift; if (/^(\+?)(\d+)$/) { $MAX_SPLIT_DEPTH = $2; if ($1) { $MAX_SPLIT_DEPTH *= -1; $REL_DEPTH = 1 } } else { print("Unrecognised value for -split: $_\n") && &usage && die }; } elsif (/^-link$/) { $_ = shift; if (/^(\+?)(\d+)$/) { $MAX_LINK_DEPTH = $2; if ($1) { $MAX_LINK_DEPTH *= -1 } } else { print("Unrecognised value for -link: $_\n") && &usage && die }; # ((($MAX_LINK_DEPTH) = /^(\d+)$/) # || print("Unrecognised value for -link: $_\n") # && &usage && die); } elsif (/^-toc_depth$/) { $_ = shift; ((($TOC_DEPTH) = /^(\d+)$/) || print("Unrecognised value for -toc_depth: $_\n") && &usage && die); } elsif (/^-toc_stars$/) { $TOC_STARS = 1; } elsif (/^-short_extn$/) { $SHORTEXTN = 1; } elsif (/^-iso_language$/) { $ISO_LANGUAGE = shift; &usage && die "Language must be uppercase and dots only: $ISO_LANGUAGE" unless ($ISO_LANGUAGE =~ /^[A-Z\.]+$/); } elsif (/^-nolatex$/) { $NOLATEX = 1; } elsif (/^-no_fork$/) { $NOFORK = 1; } elsif (/^-external_images$/) { $EXTERNAL_IMAGES = 1; } elsif (/^-ascii_mode$/) { $ASCII_MODE = 1; $EXTERNAL_IMAGES = 1; } elsif (/^-ps_images$/) { $PS_IMAGES = 1; $EXTERNAL_IMAGES = 1; } elsif (/^-font_size$/) { $FONT_SIZE = shift; &usage && die "Font size must end with \"pt\": $FONT_SIZE" unless ($FONT_SIZE =~ /^\d*pt$/); } elsif (/^-no_tex_defs$/) { $TEXDEFS = 0; } elsif (/^-no_navigation$/) { $NO_NAVIGATION = 1; } elsif (/^-top_navigation$/) { $TOP_NAVIGATION = 1; } elsif (/^-bottom_navigation$/) { $BOTTOM_NAVIGATION = 1; } elsif (/^-auto_navigation$/) { $AUTO_NAVIGATION = 1; } elsif (/^-index_in_navigation$/) { $INDEX_IN_NAVIGATION = 1; } elsif (/^-contents_in_navigation$/) { $CONTENTS_IN_NAVIGATION = 1; } elsif (/^-next_page_in_navigation$/) { $NEXT_PAGE_IN_NAVIGATION = 1; } elsif (/^-previous_page_in_navigation$/) { $PREVIOUS_PAGE_IN_NAVIGATION = 1; } elsif (/^-no_footnode$/) { $NO_FOOTNODE = 1; } elsif (/^-numbered_footnotes$/) { $NUMBERED_FOOTNOTES = 1; } elsif (/^-prefix$/) { $PREFIX = shift; } elsif (/^-auto_prefix$/) { $AUTO_PREFIX = 1; } elsif (/^-long_titles$/) { $_ = shift; $LONG_TITLES = $_; ((($LONG_TITLES) = /^(\d+)$/) || print("max words for -long_titles must be integer: $_\n") && &usage && die); } elsif (/^-custom_titles$/) { $CUSTOM_TITLES = 1; } elsif (/^-t$/) { $_ = shift; ((($TITLE) = /^(.+)$/) || print("No title for -t? $_\n") && &usage && die); } elsif (/^-dir$/) { $_ = shift; $DESTDIR = $_; &usage && die unless ($_); } elsif (/^-address$/) { $ADDRESS = shift; } elsif (/^-no_subdir$/) { $NO_SUBDIR = 1; } elsif (/^-info$/) { $_ = shift; ((($INFO) = /^(.+)$/) || print("No string for -info: Will not generate information page.\n") ); } elsif (/^-no_auto_link/) { $NO_AUTO_LINK = 1; } elsif (/^-reuse/) { $REUSE = shift; } elsif (/^-no_reuse/) { $REUSE = 0; } elsif (/^-antialias_text/) { $ANTI_ALIAS_TEXT = 1; } elsif (/^-no_antialias_text/) { $ANTI_ALIAS_TEXT = 0; } elsif (/^-antialias/) { $ANTI_ALIAS = 1; } elsif (/^-no_antialias/) { $ANTI_ALIAS = 0; } elsif (/^-white/) { $WHITE_BACKGROUND = 1; } elsif (/^-no_white/) { $WHITE_BACKGROUND = ''; } elsif (/^-discard/) { $DISCARD_PS = 1; } elsif (/^-no_images/) { $NO_IMAGES = 1; } elsif (/^-accent_images/) { $_ = shift; $ACCENT_IMAGES = $_; ((($ACCENT_IMAGES) = /^([a-zA-Z,]+)$/) || print("single word or comma-list of style words needed for -accent_images , no \\ : $_\n") && die); } elsif (/^-no_accent_images/) { $ACCENT_IMAGES = ''; } elsif (/^-no_math$/) { $NO_SIMPLE_MATH = 1; } elsif (/^-no_latin$/) { $NO_ISOLATIN = 1; } elsif (/^-local_icons/) { $LOCAL_ICONS = 1; } elsif (/^-scalable_fonts/) { $SCALABLE_FONTS = 1; } elsif (/^-images_only/) { $IMAGES_ONLY = 1; } elsif (/^-show_section_numbers/) { $SHOW_SECTION_NUMBERS = 1; } elsif (/^-init_file/) { $init_file = shift; # require($init_file) if (-f $init_file); if (-f $init_file) { print "\ninitialising with file: $init_file" if (($DEBUG)||($VERBOSITY)); require($init_file); } else { print "\nCould not find file: $init_file"; } } elsif ( /^-up_url$/ ) { $EXTERNAL_UP_LINK = shift; $EXTERNAL_UP_LINK =~ s/~/~/g; # protect `~' } elsif ( /^-down_title$/ ) { $EXTERNAL_DOWN_TITLE = shift; } elsif ( /^-down_url$/ ) { $EXTERNAL_DOWN_LINK = shift; $EXTERNAL_DOWN_LINK =~ s/~/~/g; # protect `~' } elsif ( /^-up_title$/ ) { $EXTERNAL_UP_TITLE = shift; } elsif ( /^-prev_url$/ ) { $EXTERNAL_PREV_LINK = shift; $EXTERNAL_PREV_LINK =~ s/~/~/g; # protect `~' } elsif ( /^-prev_title$/ ) { $EXTERNAL_PREV_TITLE = shift; } elsif ( /^-index$/ ) { $EXTERNAL_INDEX = shift; $EXTERNAL_INDEX =~ s/~/~/g; # protect `~' } elsif ( /^-biblio$/ ) { $EXTERNAL_BIBLIO = shift; $EXTERNAL_BIBLIO =~ s/~/~/g; # protect `~' } elsif ( /^-contents$/ ) { $EXTERNAL_CONTENTS = shift; $EXTERNAL_CONTENTS =~ s/~/~/g; # protect `~' } elsif ( /^-external_file$/ ) { $EXTERNAL_FILE = shift; } elsif (/^-short_index/) { $SHORT_INDEX = 1; } elsif (/^-unsegment/) { $UNSEGMENT = 1; } elsif (/^-debug/) { $DEBUG = 1; } elsif (/^-tmp/) { $TMP = shift; $TMP = "${dd}tmp" unless $TMP; } elsif (/^-ldump/) { $LATEX_DUMP = 1; } elsif (/^-timing/) { $TIMING = 1; } elsif (/^-verbosity/) { $_ = shift; ((($VERBOSITY) = /^(\d+)$/) || print("invalid verbosity level: $_, must be a number\n") && &usage && die); } elsif (/^-h(elp)?$/) { &usage; } elsif (/^-v/) { print "$TEX2HTMLV_SHORT\n"; exit 0; } ###MEH elsif (/^-html_version$/) { $_ = shift; $HTML_VERSION = $_; # if ( /^\d(\.\d)?$/ ) { # $HTML_VERSION = $_; # } else { # print("Invalid HTML version, defaulting to $HTML_VERSION\n"); # }; } elsif (/^-strict$/) { $STRICT_HTML = 1; } elsif (/^-no_strict$/) { $STRICT_HTML = 0; } else { &usage; die "Unrecognised switch: $_\n"; } } if ( $EXTERNAL_UP_TITLE || $EXTERNAL_UP_LINK ) { if ( ! $EXTERNAL_UP_TITLE || !$EXTERNAL_UP_LINK ) { print STDERR "Need to specify both a parent URL and a parent title!\n"; $EXTERNAL_UP_TITLE = $EXTERNAL_UP_LINK = ""; }; }; if ( $EXTERNAL_DOWN_TITLE || $EXTERNAL_DOWN_LINK ) { if ( ! $EXTERNAL_DOWN_TITLE || !$EXTERNAL_DOWN_LINK ) { print STDERR "Need to specify both a parent URL and a parent title!\n"; $EXTERNAL_DOWN_TITLE = $EXTERNAL_DOWN_LINK = ""; }; }; # $NO_NAVIGATION = 1 unless $MAX_SPLIT_DEPTH; # Martin Wilck if (($MAX_SPLIT_DEPTH) && ($MAX_SPLIT_DEPTH < 0 )) { $MAX_SPLIT_DEPTH *= -1; $REL_DEPTH = 1 } if (($MAX_LINK_DEPTH) && ($MAX_LINK_DEPTH < 0 )) { $MAX_LINK_DEPTH *= -1; $LEAF_LINKS = 1 } $NO_FOOTNODE = 1 unless ($MAX_SPLIT_DEPTH || $NO_FOOTNODE); $NO_SPLIT = 1 unless $MAX_SPLIT_DEPTH; $SEGMENT = $SEGMENTED = 0; $NO_MATH_MARKUP = 1; if ($SHORTEXTN) { $EXTN = ".htm"; } else { $EXTN = ".html"; } $TMP_ = "TMP$dd"; # different to the $TMPDIR for image-generation if ($TMP) { $TMP .= $dd unless ($TMP =~ /$dd$/)} $TMP_PREFIX = "l2h" unless ($TMP_PREFIX); #check that $TMP is writable, if so create a subdirectory sub make_tmp_dir { $dvips_warning = join("\n" , "\n*** there is a `.' in \$TMPDIR ; $DVIPS will fail" , " Set \$TMP to use a /tmp directory, or rename the working directory.\n" ); if ($DESTDIR) { $DESTDIR = &make_directory_absolute($DESTDIR) } else { $DESTDIR = join('', &getcwd, $dd) } $DESTDIR .= $dd unless ($DESTDIR =~ /$dd$/); $TMPDIR = $DESTDIR . $TMP_PREFIX.$$.$dd; do { $TMPDIR = $DESTDIR unless mkdir("$TMP_PREFIX$$", oct(755)); print STDERR "\nimages will be generated in $TMPDIR \n"; $PSTOIMG .= " -tmp $TMPDIR " if (($TMPDIR)&&!($PSTOIMG =~ /-tmp\b/)); die ($dvips_warning . "\n\$TMPDIR=$TMPDIR ***\n\n") if (($TMPDIR =~ /\./)&&($DVIPS =~ /dvips/)); return ; } unless ($TMP); $TMP .= $dd unless ($TMP =~ /$dd$/); open(STDOUT, ">${TMP}foo$$"); print STDOUT &getcwd; close(STDOUT); open(FOO,"<${TMP}foo$$"); $DESTDIR = ; close(FOO); unlink "${TMP}foo$$"; if ($DESTDIR) { $DESTDIR .= $dd; chdir ("$TMP"); $TMPDIR = &getcwd; $TMPDIR .= $dd.$TMP_PREFIX.$$.$dd if mkdir("$TMP_PREFIX$$", oct(755)); chdir ("$DESTDIR"); } else { print STDERR "\ncould not use $TMP directory "; $DESTDIR = join('', &getcwd, $dd); } die ($dvips_warning . "\n\$TMPDIR=$TMPDIR ***\n\n") if (($TMPDIR =~ /\./)&&($DVIPS =~ /dvips/)); do { $TMPDIR = $DESTDIR.$TMP_PREFIX.$$.$dd if mkdir("$TMP_PREFIX$$", oct(755)); } unless $TMPDIR; $PSTOIMG .= " -tmp $TMPDIR " if (($TMPDIR)&&!($PSTOIMG =~ /-tmp\b/)); select(STDERR); $| = 1; print "\nworking directory is $DESTDIR "; print "\nimages will be generated in $TMPDIR \n"; } #if (!(($TMP)&&($TMPDIR))) { $TMP = ''; $TMPDIR ='' } $CHARSET = $charset unless ($CHARSET); #$CHARSET = "iso_8859_1" unless ($CHARSET); $CHARSET = "iso-8859-1" unless ($CHARSET); #################################################################### # # Figure out what options we need to pass to DVIPS and store that in # the $DVIPSOPT variable. Also, scaling is taken care of at the # dvips level if PK_GENERATION is set to 1, so adjust SCALE_FACTORs # accordingly. # if ($SCALABLE_FONTS) { $PK_GENERATION = 0; $DVIPS_MODE = ''; } if ($PK_GENERATION) { if ($MATH_SCALE_FACTOR <= 0) { $MATH_SCALE_FACTOR = 2; } if ($FIGURE_SCALE_FACTOR <= 0) { $FIGURE_SCALE_FACTOR = 2; } $saveMSF = $MATH_SCALE_FACTOR; $saveFSF = $FIGURE_SCALE_FACTOR; $desired_dpi = int($MATH_SCALE_FACTOR*75); $FIGURE_SCALE_FACTOR = ($METAFONT_DPI / 72) * ($FIGURE_SCALE_FACTOR / $MATH_SCALE_FACTOR) ; $MATH_SCALE_FACTOR = $METAFONT_DPI / 72; $dvi_mag = int(1000 * $desired_dpi / $METAFONT_DPI); $mode_switch = "-mode $DVIPS_MODE" if $DVIPS_MODE; if ($dvi_mag > 1000) { &write_warnings( "WARNING: Your SCALE FACTOR is too large for PK_GENERATION.\n" . " See latex2html.config for more information.\n"); } # RRM: over-sized scaling, using dvi-magnification if ($EXTRA_IMAGE_SCALE) { print "\n *** Images at $EXTRA_IMAGE_SCALE times resolution of displayed size ***\n"; $desired_dpi = int($EXTRA_IMAGE_SCALE * $desired_dpi+.5); print " desired_dpi = $desired_dpi METAFONT_DPI = $METAFONT_DPI\n" if $DEBUG; $dvi_mag = int(1000 * $desired_dpi / $METAFONT_DPI); $MATH_SCALE_FACTOR = $saveMSF; $FIGURE_SCALE_FACTOR = $saveFSF; } $DVIPSOPT = " -y $dvi_mag -D $METAFONT_DPI $mode_switch -e 5 "; } else { # if ($EXTRA_IMAGE_SCALE) { # &write_warnings( # "the \$EXTRA_IMAGE_SCALE feature requires either \$PK_GENERATION=1" # . " or the '-scalable_fonts' option"); # print "\n*** the \$EXTRA_IMAGE_SCALE feature requires \$PK_GENERATION=1" # . " or the '-scalable_fonts' option ***\n"; # $EXTRA_IMAGE_SCALE = ''; # } $DVIPSOPT = ' -M '; } # The mapping from numbers to accents. # These are required to process the \accent command, which is found in # tables of contents whenever there is an accented character in a # caption or section title. Processing the \accent command makes # $encoded_*_number work properly (see &extract_captions) with # captions that contain accented characters. # I got the numbers from the plain.tex file, version 3.141. # Missing entries should be looked up by a native speaker. # Have a look at generate_accent_commands and $iso_8859_1_character_map. # MEH: added more accent types %accent_type = ( '18', 'grave', # \` '19', 'acute', # `' '20', 'caron', # \v '21', 'breve', # \u '22', 'macr', # \= '23', 'ring', # '24', 'cedil', # \c '94', 'circ', # \^ '95', 'dot', # \. '7D', 'dblac', # \H '7d', 'dblac', # \H '7E', 'tilde', # \~ '7e', 'tilde', # \~ '7F', 'uml', # \" '7f', 'uml', # \" ); &driver; # Process each file ... sub driver { local($FILE, $orig_cwd, %unknown_commands, %dependent, %depends_on , %styleID, %env_style, $bbl_cnt, $dbg, %numbered_section); # MRO: $texfilepath has to be global! local(%styles_loaded); $orig_cwd = &getcwd; print "\n *** initialise *** " if ($VERBOSITY > 1); &initialise; # Initialise some global variables print "\n *** check modes *** " if ($VERBOSITY > 1); &ascii_mode if $ASCII_MODE; # Must come after initialization &titles_language($TITLES_LANGUAGE); &make_numbered_footnotes if ($NUMBERED_FOOTNOTES); $dbg = $DEBUG ? "-debug" : ""; $dbg .= (($VERBOSITY>2) ? " -verbose" : ""); print "\n *** files: ".join(',',@ARGV)." *** " if ($VERBOSITY > 1); foreach $FILE (@ARGV) { local($bbl_nr) = 1; # The number of reused images and those in images.tex local($global_page_num) = (0); # The number of images in images.tex local($new_page_num) = (0); local($pid, $sections_rx, $outermost_level, %cached_env_img, %id_map, %latex_body, $latex_body, %symbolic_labels, %latex_labels, %encoded_section_number, %verbatim, %new_command, %new_environment, %provide_command, %renew_command ,$preamble, $aux_preamble, $prelatex, @preamble); ## AYS: Allow extension other than .tex and make it optional ($EXT = $FILE) =~ s/.*\.([^\.]*)$/$1/; if ( $EXT eq $FILE ) { $EXT = "tex"; $FILE =~ s/$/.tex/; } ($texfilepath, $FILE) = &get_full_path($FILE); die "Cannot read $texfilepath$dd$FILE \n" unless (-f "$texfilepath$dd$FILE"); # Tell texexpand which files we *don't* want to look at. $ENV{'TEXE_DONT_INCLUDE'} = $DONT_INCLUDE if $DONT_INCLUDE; # Tell texexpand which files we *do* want to look at, e.g. # home-brew style files $ENV{'TEXE_DO_INCLUDE'} = $DO_INCLUDE if $DO_INCLUDE; $FILE =~ s/\.[^\.]*$//; ## AYS $DESTDIR = $FILE unless $DESTDIR; $PREFIX = "$FILE-" if $AUTO_PREFIX; $DESTDIR = "." if $NO_SUBDIR; print "OPENING $texfilepath$dd$FILE.$EXT \n"; ## AYS next unless &new_dir($DESTDIR); # Need to clean up a bit in case there's garbage left # from former runs. chdir($DESTDIR) || die "$!\n"; if (opendir (TMP,".$dd$TMP_")) { local(@files) = readdir TMP; foreach (@files) { unlink ".$dd$TMP_$_" unless (/^\./) } closedir TMP; } &cleanup(1); do {local($tmp)=$TMP_; $tmp =~ s/$dd$//; mkdir($tmp, oct(755)); undef $tmp;}; chdir($orig_cwd); # JCL(jcl-dir) # We need absolute paths for TEXINPUTS here, because # we change the directory &deal_with_texinputs($orig_cwd, $texfilepath); # This needs $DESTDIR to have been created ... print " *** calling `texexpand' ***" if ($VERBOSITY > 1); local($unseg) = ($UNSEGMENT ? "-unsegment" : ""); &syswait("$TEXEXPAND $dbg -auto_exclude $unseg " . "-save_styles $DESTDIR${dd}${TMP_}styles " . "$texfilepath$dd$FILE.$EXT > $DESTDIR${dd}$TMP_$FILE") && print "Error: $!\n"; print STDERR "\n *** `texexpand' done ***\n"; chdir($DESTDIR); $SIG{'INT'} = 'handler'; &open_dbm_database; &initialise_sections; print STDERR "\n *** database open ***\n"; if ($IMAGES_ONLY) { &make_off_line_images} else { &rename_image_files; &load_style_file_translations; &make_language_rx; &make_raw_arg_cmd_rx; # &make_isolatin1_rx unless ($NO_ISOLATIN); &translate_titles; print "\nReading ..."; &slurp_input_and_partition_and_pre_process("$TMP_$FILE"); &add_preamble_head; # Create a regular expressions &set_depth_levels; &make_sections_rx; &make_order_sensitive_rx; &add_document_info_page if (($INFO) && !(/\\htmlinfo/)); &add_bbl_and_idx_dummy_commands; &translate; # Destructive! } &cleanup; &style_sheet; #JCL: read warnings from file to $warnings local($warnings) = &get_warnings; print "\n\n*********** WARNINGS *********** \n$warnings" if ($warnings || $NO_IMAGES || $IMAGES_ONLY); &image_cache_message if ($NO_IMAGES || $IMAGES_ONLY); &image_message if ($warnings =~ /Failed to convert/io); undef $warnings; # JCL - generate directory index entry. # Yet, a hard link, cause Perl lacks symlink() on some systems. do { local($from,$to) = (eval($LINKPOINT),eval($LINKNAME)); if (length($from) && length($to) && ($from ne $to)) { unlink($to); link($from,$to); } } unless ($NO_AUTO_LINK || !($LINKPOINT) || !($LINKNAME)); # Go back to the source directory chdir($orig_cwd); } print "\nUnknown commands: ". join(" ",keys %unknown_commands) if %unknown_commands; ###MEH -- math support print "\nMath commands outside math: " . join(" ",keys %commands_outside_math) . "\n Output may look weird or may be faulty!\n" if %commands_outside_math; print "\nDone.\n"; $end_time = time; $total_time = $end_time - $start_time; print STDERR join(' ',"Timing:",$total_time,"seconds") if ($TIMING||$DEBUG||($VERBOSITY > 2)); $_; } sub open_dbm_database { # These are DBM (unix DataBase Management) arrays which are actually # stored in external files. They are used for communication between # the main process and forked child processes; print STDERR "\n"; # this mysteriously prevents a core dump ! dbmopen(%verb, "${TMP_}verb",0755); dbmopen(%verb_delim, "${TMP_}verb_delim",0755); dbmopen(%expanded,"${TMP_}expanded",0755); # Holds max_id, verb_counter, verbatim_counter, eqn_number dbmopen(%global, "${TMP_}global",0755); # Hold style sheet information dbmopen(%env_style, "${TMP_}envstyles",0755); dbmopen(%txt_style, "${TMP_}txtstyles",0755); dbmopen(%styleID, "${TMP_}styleIDs",0755); # These next two are used during off-line image conversion # %new_id_map maps image id's to page_numbers of the images in images.tex # %image_params maps image_ids to conversion parameters for that image dbmopen(%new_id_map, "${TMP_}ID_MAP",0755); dbmopen(%img_params, "${TMP}IMG_PARAMS",0755); dbmopen(%orig_name_map, "${TMP_}ORIG_MAP",0755); $global{'max_id'} = ($global{'max_id'} | 0); &read_mydb(*verbatim, "verbatim"); $global{'verb_counter'} = ($global{'verb_counter'} | 0); $global{'verbatim_counter'} = ($global{'verbatim_counter'} | 0); &read_mydb(*new_command, "new_command"); &read_mydb(*renew_command, "renew_command"); &read_mydb(*provide_command, "provide_command"); &read_mydb(*new_theorem, "new_theorem"); &read_mydb(*new_environment, "new_environment"); &read_mydb(*dependent, "dependent"); # &read_mydb(*env_style, "env_style"); # &read_mydb(*styleID, "styleID"); $preamble = &read_mydb(*preamble, "preamble"); $prelatex = &read_mydb(*prelatex, "prelatex"); $aux_preamble = &read_mydb(*aux_preamble, "aux_preamble"); &restore_critical_variables; } sub close_dbm_database { &save_critical_variables; dbmclose(%verb); undef %verb; dbmclose(%verb_delim); undef %verb_delim; dbmclose(%expanded); undef %expanded; dbmclose(%global); undef %global; dbmclose(%env_style); undef %env_style; dbmclose(%style_id); undef %style_id; dbmclose(%new_id_map); undef %new_id_map; dbmclose(%img_params); undef %img_params; dbmclose(%orig_name_map); undef %orig_name_map; } sub clear_images_dbm_database { # # %new_id_map will be used by the off-line image conversion process # dbmclose(%new_id_map); dbmclose(%img_params); dbmclose(%orig_name_map); undef %new_id_map; undef %img_params; undef %orig_name_map; dbmopen(%new_id_map, "${TMP_}ID_MAP",0755); dbmopen(%img_params, "${TMP_}IMG_PARAMS",0755); dbmopen(%orig_name_map, "${TMP_}ORIG_MAP",0755); } sub initialise_sections { local($key); foreach $key (keys %numbered_section) { $global{$key} = $numbered_section{$key}} } sub save_critical_variables{ $global{'math_markup'} = $NO_MATH_MARKUP; $global{'charset'} = $CHARSET; $global{'charenc'} = $charset; $global{'language'} = $default_language; $global{'isolatin'} = $ISOLATIN_CHARS; $global{'unicode'} = $UNICODE_CHARS; if ($UNFINISHED_ENV) { $global{'unfinished_env'} = $UNFINISHED_ENV; $global{'replace_end_env'} = $REPLACE_END_ENV; } $global{'unfinished_comment'} = $UNFINISHED_COMMENT; if (@UNMATCHED_OPENING) { $global{'unmatched'} = join(',',@UNMATCHED_OPENING); } } sub restore_critical_variables{ $NO_MATH_MARKUP = ($global{'math_markup'}| (defined $NO_MATH_MARKUP ? $NO_MATH_MARKUP:1)); $CHARSET = ($global{'charset'}| $CHARSET); $charset = ($global{'charenc'}| $charset); $default_language = ($global{'language'}| (defined $default_language ? $default_language:'english')); $ISOLATIN_CHARS = ($global{'isolatin'}| (defined $ISOLATIN_CHARS ? $ISOLATIN_CHARS:0)); $UNICODE_CHARS = ($global{'unicode'}| (defined $UNICODE_CHARS ? $UNICODE_CHARS:0)); if ($global{'unfinished_env'}) { $UNFINISHED_ENV = $global{'unfinished_env'}; $REPLACE_END_ENV = $global{'replace_end_env'}; } $UNFINISHED_COMMENT = $global{'unfinished_comment'}; if ($global{'unmatched'}) { @UNMATCHED_OPENING = split(',',$global{'unmatched'}); } # undef any renewed-commands... # so the new defs are read from %new_command local($cmd,$key,$code); foreach $key (keys %renew_command) { $cmd = "do_cmd_$key"; $code = "undef \&$cmd"; eval($code) if (defined &$cmd); if ($@) { print "\nundef \&do_cmd_$cmd failed"} } } #JCL: The warnings should have been handled within the DBM database. # Unfortunately if the contents of an array are more than ~900 (system # dependent) chars long then dbm cannot handle it and gives error messages. sub write_warnings { local($_) = @_; open(DB,">>WARNINGS"); #file name mustn't start with TMP_ print DB $_; print STDERR "\n *** Warning: $_\n" if ($VERBOSITY > 1); close DB; } sub get_warnings { local($_); return unless (-f "WARNINGS"); # $_ = `cat WARNINGS`; open CATFILE, "WARNINGS"; local(@the_warnings) = ; close CATFILE; unlink("WARNINGS"); join('', @the_warnings); # $_; } sub lost_argument { local($cmd) = @_; &write_warnings("\nincomplete argument to command: \\$cmd"); } # These three subroutines should have been handled within the DBM database. # Unfortunately if the contents of an array are more than ~900 (system # dependent) chars long then dbm cannot handle it and gives error messages. # So here we save and then read the contents explicitly. sub write_mydb { local($db, $key, $_) = @_; open(DB,">>$TMP_$db"); print DB join('', "\n$mydb_mark","#", $key, "#", $_); close DB; } sub write_mydb_simple { local($db, $_) = @_; open(DB,">$TMP_$db"); print DB $_; close DB; } sub clear_mydb { local($db) = @_; open(DB,">$TMP_$db"); close DB; } # Assumes the existence of a file TMP_verbatim which contains # sequences of verbatim counters and verbatim contents. sub read_mydb { local(*db,$name) = @_; local($_,@tmp,$i,$tmp1,$tmp2); return unless (-f "$TMP_$name"); # $_ = `cat $TMP_$name`; open CATFILE, "$TMP_$name"; local(@catfile) = ; close CATFILE; $_ = join('',@catfile); undef @catfile; $| = 1; @tmp = split(/\n$mydb_mark#([^#]*)#/); $i = 1; # Ignore the first element at 0 print "\nDBM: $name open..." if ($VERBOSITY > 2); while ($i < scalar(@tmp)) { $tmp1 = $tmp[$i]; $tmp2 = $tmp[++$i]; $db{$tmp1} = $tmp2; ++$i; }; undef @tmp; $_; } # Reads in a latex generated file (e.g. .bbl or .aux) # It returns success or failure # ****** and binds $_ in the caller as a side-effect ****** sub process_ext_file { local($ext) = @_; local($found, $extfile,$dum); $extfile = $FILE; $extfile = $EXTERNAL_FILE if $EXTERNAL_FILE; local($file) = &fulltexpath("$extfile.$ext"); $found = 0; &write_warnings( "\n$extfile.$EXT is newer than $extfile.$ext: Please rerun latex" . ## AYS (($ext =~ /bbl/) ? " and bibtex.\n" : ".\n")) if ( ($found = (-f $file)) && &newer(&fulltexpath("$extfile.$EXT"), $file)); ## AYS if ((!$found)&&($extfile =~ /\.$EXT$/)) { $file = &fulltexpath("$extfile"); &write_warnings( "\n$extfile is newer than $extfile: Please rerun latex" . ## AYS (($ext =~ /bbl/) ? " and bibtex.\n" : ".\n")) if ( ($found = (-f $file)) && &newer(&fulltexpath("$extfile"), $file)); ## AYS } if ( $found ) { print "\nReading $file ..."; # must allow @ within control-sequence names $dum = &do_cmd_makeatletter(); &slurp_input($file); &pre_process; &substitute_meta_cmds if (%new_command || %new_environment); if ($ext eq "aux") { $aux_preamble .= "\\AtBeginDocument{\\input $file }\n"; local(@extlines) = split ("\n", $_); print " translating ".(0+@extlines). " lines " if ($VERBOSITY >1); local($eline,$skip_to); #$_ = ''; foreach $eline (@extlines) { if ($skip_to) { next unless ($eline =~ s/$O$skip_to$C//) } $skip_to = ''; # remove \index and \label commands, else invalid links may result $eline =~ s/\\(index|label)\s*($O\d+$C).*\2//g; if ($eline =~ /\\contentsline/) { do { local($_,$save_AUX) = ($eline,$AUX_FILE); $AUX_FILE = 0; &wrap_shorthand_environments; #footnote markers upset the numbering s/\\footnote(mark|text)?//g; $eline = &translate_environments($_); $AUX_FILE = $save_AUX; undef $_ }; } elsif ($eline =~ s/^\\\@input//) { &do_cmd__at_input($eline); $eline = ''; } elsif ($eline =~ s/^\\\@setckpt$O(\d+)$C//) { $skip_to = $1; next; } # $eline =~ s/$image_mark#([^#]+)#/print "\nIMAGE:",$img_params{$1};''/e; # $_ .= &translate_commands(&translate_environments($eline)); $_ .= &translate_commands($eline) if $eline; } undef @extlines; } elsif ($ext =~ /$caption_suffixes/) { local(@extlines) = split ("\n", $_); print " translating ".(0+@extlines). " lines "if ($VERBOSITY >1); local($eline); $_ = ''; foreach $eline (@extlines) { # remove \index and \label commands, else invalid links may result $eline =~ s/\\(index|label)\s*($O\d+$P).*\2//g; if ($eline =~ /\\contentsline/) { do { local($_,$save_PREAMBLE) = ($eline,$PREAMBLE); $PREAMBLE = 0; &wrap_shorthand_environments; $eline = &translate_environments($_); $PREAMBLE = $save_PREAMBLE; undef $_ }; } $_ .= &translate_commands($eline); } undef @extlines; } else { print " wrapping " if ($VERBOSITY >1); &wrap_shorthand_environments; $_ = &translate_commands(&translate_environments($_)); print " translating " if ($VERBOSITY >1); } print "\n processed size: ".length($_)."\n" if($VERBOSITY>1); $dum = &do_cmd_makeatother(); } else { print "\n*** Could not find file: $file ***\n" if ($DEBUG) }; $found; } sub deal_with_texinputs { # The dot precedes all, this let's local files override always. # The dirs we want are given as parameter list. die("You must have TEXINPUTS set at least to the TeX library to" . " work with LaTeX2HTML") unless $ENV{'TEXINPUTS'}; $ENV{'TEXINPUTS'} = join(':',".",@_,$ENV{'TEXINPUTS'}); } sub add_document_info_page { # Uses $outermost_level # Nasty race conditions if the next two are done in parallel local($X) = ++$global{'max_id'}; local($Y) = ++$global{'max_id'}; ###MEH -- changed for math support: no underscores in commandnames $_ = join('', $_, "\\$outermost_level", "*", "$O$X$C $O$Y$C $info_title$O$Y$C $O$X$C \n", " \\textohtmlinfopage"); } # For each style file name in TMP_styles (generated by texexpand) look for a # perl file in $LATEX2HTMLDIR/styles and load it. sub load_style_file_translations { local($_, $style, $options, $dir); print "\n"; if ($TEXDEFS) { foreach $dir (split(/$envkey/,$LATEX2HTMLSTYLES)) { if (-f ($_ = "$dir${dd}texdefs.perl")) { print "Loading $_...\n"; require ($_); $styles_loaded{'texdefs'} = 1; last; } } } open(STYLES, "<${TMP_}styles"); while() { if(s/^\s*(\S+)\s*(.*)$/$style = $1; $options = $2;/eo) { &do_require_package($style); $_ = $DONT_INCLUDE; s/:/|/g; &write_warnings("No implementation found for style \`$style\'\n") unless $styles_loaded{$style} || $style =~ /^($_)$/; # MRO: Process options for packages &do_package_options($style,$options) if($options); } } close(STYLES); # packages automatically implemented $styles_loaded{'array'} = 1 if ($HTML_VERSION > 3.1); $styles_loaded{'theorem'} = 1; } ################## Weird Special case ################## # The new texexpand can be told to leave in \input and \include # commands which contain code that the translator should simply pass # to latex, such as the psfig stuff. These should still be seen by # TeX, so we add them to the preamble ... sub do_include_lines { while (s/$include_line_rx//o) { local($include_line) = &revert_to_raw_tex($&); &add_to_preamble ('include', $include_line); } } ########################## Preprocessing ############################ # JCL(jcl-verb) # The \verb declaration and the verbatim environment contain simulated # typed text and should not be processed. Characters such as $,\,{,and } # loose their special meanings and should not be considered when marking # brackets etc. To achieve this \verb declarations and the contents of # verbatim environments are replaced by markers. At the end the original # text is put back into the document. # The markers for verb and verbatim are different so that these commands # can be restored to what the raw input was just in case they need to # be passed to latex. sub pre_process { # Modifies $_; #JKR: We need support for some special environments. # This has to be here, because they might contain # structuring commands like \section etc. local(%comments); local($comment_counter) = 0; $* = 1; # Multiline matching ON &pre_pre_process if (defined &pre_pre_process); &wrap_other_environments if (%other_environments); $* = 0; # Multiline matching OFF s/\\\\/\\\\ /go; # Makes it unnecessary to look for escaped cmds &replace_html_special_chars; $* = 1; # Multiline matching ON # Remove fake environment which should be invisible to LaTeX2HTML. s/%end{latexonly}/\001/go; s/%begin{latexonly}([^\001]*)\001/%/go; # Move all LaTeX comments into a local list s/^([ \t]*%.*)\n/print "%";$comments{++$comment_counter} = "$1"; "$comment_mark"."$comment_counter\n"/ge; # Remove the htmlonly-environment s/\\begin{htmlonly}\s*\n?//go; s/\\end{htmlonly}\s*\n?//go; # Remove enviroments which should be invisible to LaTeX2HTML. s/\n[^%\n]*\\end{latexonly}\s*\n?/\001/go; s/((^|\n)[^%\n]*)\\begin{latexonly}([^\001]*)\001/$1/go; s/\\end{comment}\s*\n?/\001/go; s/\\begin{comment}([^\001]*)\001//go; $* = 0; # Multiline matching OFF # s/\\\\/\\\\ /go; # Makes it unnecessary to look for escaped cmds local($next, $esc_del); &normalize_language_changes; # Patches by #JKR, #EI#, #JCL(jcl-verb) local(@processedV); while (($UNFINISHED_COMMENT)||(/\\begin($opt_arg_rx)?\{($verbatim_env_rx|$keepcomments_rx)\}/o)) { local($opt, $style_info) = ($1,$2); local($before, $contents, $after, $env); if ($UNFINISHED_COMMENT) { $UNFINISHED_COMMENT =~ s/([^:]*)::(\d+)/$env=$1;$after=$_; $before = join("",$unfinished_mark,$env,$2,"#");''/e; print "\nfound the lost \\end{$env}\n"; } ($before, $after, $env) = ($`, $', $3) unless ($env); if (!($before =~ /\\begin(\s*\[[^\]]*\]\s*)?\{($verbatim_env_rx|$keepcomments_rx)\}/)) { push(@processedV,$before); print "'";$before = ''; } if ($after =~ /\s*\\end{$env[*]?}/) { # Must NOT use the s///o option!!! ($contents, $after) = ($`, $'); $contents =~ s/^\n+//; # re-insert comments $contents =~ s/$comment_mark(\d+)/$comments{$1}/g; # revert '\\ ' -> '\\' only once if ($env =~ /rawhtml|$keepcomments_rx/i) { $contents = &revert_to_raw_tex($contents); } else { $contents =~ s/\\\\ /\\\\/go; } if ($env =~/$keepcomments_rx/) { ++$global{'verbatim_counter'}; $verbatim{$global{'verbatim_counter'}} = "$contents"; } else { &write_mydb("verbatim", ++$global{'verbatim_counter'}, $contents); } # $verbatim{$global{'verbatim_counter'}} = "$contents" if ($env =~/$keepcomments_rx/); # $verbatim{$global{'verbatim_counter'}} = "$contents"; if ($env =~ /rawhtml|$keepcomments_rx/i) { $after = join("",$verbatim_mark,$env ,$global{'verbatim_counter'},"#",$after); } else { $after = join("","\\begin", $opt, "\{tex2html_preform\}" , $verbatim_mark,$env , $global{'verbatim_counter'},"#" , "\\end\{tex2html_preform\}",$after); } } else { print "Cannot find \\end{$env}\n"; $after =~ s/$comment_mark(\d+)/$comments{$1}/g; if ($env =~ /rawhtml|$keepcomments_rx/i) { $after = &revert_to_raw_tex($contents); } else { $after =~ s/\\\\ /\\\\/go; } if ($env =~/$keepcomments_rx/) { ++$global{'verbatim_counter'}; $verbatim{$global{'verbatim_counter'}} = "$after"; } else { &write_mydb("verbatim", ++$global{'verbatim_counter'}, $after ); } $after = join("",$unfinished_mark,$env ,$global{'verbatim_counter'},"#"); } $_ = join("",$before,$after); } print STDERR " sensitive environments found: ".(0+@processedV)." " if((@processedV)&&($VERBOSITY > 1)); $_ = join('',@processedV, $_); undef @processedV; # Now do the \verb declarations # Patches by: #JKR, #EI#, #JCL(jcl-verb) # Tag \verb command and legal opening delimiter with unique number. # Replace tagged ones and its contents with $verb_mark & id number if the # closing delimiter can be found. After no more \verb's are to tag, revert # tagged one's to the original pattern. local($del,$contents); local($id) = $global{'verb_counter'}; # must tag only one alternation per loop while (s/\\verb(\t*\*\t*)(\S)/"$2"/e || s/\\verb()([^a-zA-Z*\s])/"$2"/e || s/\\verb(\t\t*)([^*\s])/"$2"/e) { $del = $2; $esc_del = &escape_rx_chars($del); # try to find closing delimiter and substitute the complete # statement with $verb_mark s/]*$id>[$esc_del]([^$esc_del]*)[$esc_del]/ $contents=$1; $contents =~ s|\\\\ |\\\\|g; $contents =~ s|\n| |g; $verb{$id}=$contents; $verb_delim{$id}=$del; join('',$verb_mark,$id,$verb_mark)/e; } $global{'verb_counter'} = $id; # revert changes to fake verb statements s/]*)\d+>/\\verb$1/g; $* = 1; # Multiline matching ON &preprocess_alltt if defined(&preprocess_alltt); #JKR: the comments include the linebreak and the following whitespace # s/([^\\]|^)(%.*\n[ \t]*)+/$1/go; # Remove Comments but not % which may be meaningful s/$comment_mark(\d+)\n//go; # Remove comment markers # HWS: Correctly remove multiple %%'s. # s/\\%/\002/g; # s/(%.*\n[ \t]*)//g; s/(%[^\n]*\n)[ \t]*/$comment_mark\n/g; s/\002/\\%/g; s/^$unfinished_mark$keepcomments_rx(\d+)#\n?$verbatim_mark$keepcomments_rx(\d+)#/ $verbatim{$4}."\n\\end{$1}"/eg; # Raw TeX s/$verbatim_mark$keepcomments_rx(\d+)#/ "\\begin{$1}\n".$verbatim{$2}."\n\\end{$1}"/eg; # Raw TeX s/$unfinished_mark$keepcomments_rx(\d+)#/$UNFINISHED_COMMENT="$1::$2"; "\\begin{$1}\n".$verbatim{$2}/eg; # Raw TeX s/%%% TEXEXPAND[^\n]*\n//g; $* = 0; # Multiline matching OFF &mark_string; # &make_unique; } sub make_comment { local($type,$_) = @_; $_ = &revert_to_raw_tex($_); s/^\n+//m; $_ = join('', '" ); $global{'verbatim_counter'}++; $verbatim{$global{'verbatim_counter'}} = $_; &write_mydb('verbatim_counter', $global{'verbatim_counter'}, $_ ); join('', $verbatim_mark, 'verbatim' , $global{'verbatim_counter'},'#') } sub warn_if_too_long { local(*str,*type) = @_; if (length($str) > 900) { local($tmp) = &get_first_words($str, 7); &write_warnings( "A $type environment is too long and may have disappeared\n" . "(causing \"dbm\" errors). Try separating it into smaller pieces.\n" . "Potential DBM error:\n$tmp\n\n"); print "\nPotential DBM error >>>: \n$tmp\n<<<\n"; } } sub wrap_other_environments { local($key, $env, $start, $end, $opt_env, $opt_start); foreach $key (keys %other_environments) { # skip bogus entries next unless ($env = $other_environments{$key}); $key =~ s/:/($start,$end)=($`,$');':'/e; if (($end =~ /^\#$/)&&($start =~ /^\#/)) { # catch Indica pre-processor language switches $opt_start = $'; if ($env =~ s/\[(\w*)\]//o) { $opt_env = join('','[', ($1 ? $1 : $opt_start ), ']'); } local($next); while ($_ =~ /$start\b/) { push(@pre_wrapped, $`, "\\begin\{pre_$env\}", $opt_env ); $_=$'; if (/(\n+)?$end/m) { push(@pre_wrapped, $`.$1,"\\end\{pre_$env\}$1"); $_ = $'; if (!(s/^N(IL)?//o)) {$_ = '#'.$_ } } else { print "\n *** unclosed $start...$end chunk ***\n"; last; } } $* = 0; $_ = join('', @pre_wrapped, $_); undef @pre_wrapped; } elsif (!$end &&($start =~ /^\#/)) { # catch Indica pre-processor input-mode switches s/$start(.*)\n/\\begin\{tex2html_$env\}$&\\end\{tex2html_$env\}\n/g; } elsif (($start eq $end)&&(length($start) == 1)) { $start =~ s/(\W)/\\$1/; $end = $start; s/([^$end])$start([^$end]+)$end/$1\\begin\{pre_$env\}$2\\end\{pre_$env\}/mg; } elsif ($start eq $end) { $start =~ s/(\W)/\\$1/g; $end = $start; local (@pre_wrapped); $* = 1; while ($_ =~ /$start/m) { push(@pre_wrapped, $`, "\\begin\{pre_$env\}"); $_=$'; if (/$end/m) { push(@pre_wrapped, $`, "\\end\{pre_$env\}"); $_ = $'; } else { print "\n *** unclosed $start...$end chunk ***\n"; last; } } $* = 0; $_ = join('', @pre_wrapped, $_); undef @pre_wrapped; } elsif (($start)&&($end)) { s/$start/\\begin\{pre_$env\}/g; s/$end/\\end\{pre_$env\}/g; } } $_; } #################### Marking Matching Brackets ###################### # Reads the entire input file and performs pre_processing operations # on it before returning it as a single string. The pre_processing is # done on separate chunks of the input file by separate Unix processes # as determined by LaTeX \input commands, in order to reduce the memory # requirements of LaTeX2HTML. sub slurp_input_and_partition_and_pre_process { local($file) = @_; local(%string, @files, $pos); local ($count) = 1; open(SINPUT,"<$file"); local(@file_string); print STDERR "$file" if ($VERBOSITY >1); while () { if (/TEXEXPAND: INCLUDED FILE MARKER (\S*)/) { # Forking seems to screw up the rest of the input stream # We save the current position ... $pos = tell SINPUT; print STDERR " fork at offset $pos " if ($VERBOSITY >1); $string{'STRING'} = join('',@file_string); @file_string = (); &write_string_out($count); delete $string{'STRING'}; # ... so that we can return to it seek(SINPUT, $pos, 0); print STDERR "\nDoing $1 "; ++$count} else { # $string{'STRING'} .= $_ push(@file_string,$_); } } $string{'STRING'} = join('',@file_string); @file_string = (); &write_string_out($count); delete $string{'STRING'}; close SINPUT; # @files = sort file_sort (); opendir(DIR, ".${dd}$TMP_"); @files = sort grep(/^part/, readdir(DIR)); closedir(DIR); die "\nFailed to read in document parts.\n". "Look up section Globbing in the troubleshooting manual.\n" if $#files < 0; $count = 0; foreach $file (@files) { print STDERR "\nappending file: $file " if ($VERBOSITY > 1); # $_ .= `cat $TMP_$file`; open CATFILE, "$TMP_$file"; local(@catfile) = ; $_ = join('', $_, @catfile); undef @catfile; close CATFILE; } die "\nFailed to read in document parts (out of memory?).\n" unless length($_); print STDERR "\ntotal length: ".length($_)." characters\n" if ($VERBOSITY > 1); } sub write_string_out { local($count) = @_; if ($count < 10) {$count = '00'.$count} elsif ($count < 100) {$count = '0'.$count} local($ppid) = "$TMP_"; local($pid); # All open unflushed streams are inherited by the child. If this is # not set then the parent will *not* wait $| = 1; # fork returns 0 to the child and PID to the parent &write_mydb_simple("prelatex", $prelatex); &close_dbm_database unless ($NOFORK); unless ($pid = fork) { local($_); &open_dbm_database unless ($NOFORK); $_ = delete $string{'STRING'}; # locate blank-lines, for paragraphs. # Replace verbatim environments etc. &pre_process; # locate the blank lines for \par s &substitute_pars; # Handle newcommand, newenvironment, newcounter ... &substitute_meta_cmds; &wrap_shorthand_environments; print STDERR "\n *** End-of-partition ***" if ($VERBOSITY > 1); open(OUTPUT, ">${ppid}part$count"); print OUTPUT $_; close(OUTPUT); print STDERR $_ if ($VERBOSITY > 9); &write_mydb_simple("preamble", $preamble); &write_mydb_simple("prelatex", $prelatex); &write_mydb_simple("aux_preamble", $aux_preamble); &close_dbm_database unless ($NOFORK); exit 0 unless ($NOFORK); }; waitpid($pid,0) unless ($NOFORK); &open_dbm_database unless ($NOFORK); } # Reads the entire input file into a # single string. sub slurp_input { local($file) = @_; local(%string); open(INPUT,"<$file"); local(@file_string); while () { # $string{'STRING'} .= $_ push(@file_string, $_ ); }; $string{'STRING'} = join('',@file_string); close INPUT; undef @file_string; $_ = delete $string{'STRING'}; # Blow it away and return the result } sub special { ($x) = @_; $y= $html_specials{$x}; ($y ? $y : $x)} sub special_inv { ($x) = @_; $y= $html_specials_inv{$x}; ($y ? $y : $x)} # Mark each matching opening and closing bracket with a unique id. sub mark_string { # Modifies $_ in the caller; $* = 1; # Multiline matching ON s/^\\{|([^\\])\\{/$1tex2html_escaped_opening_bracket/go; s/^\\{|([^\\])\\{/$1tex2html_escaped_opening_bracket/go; # repeat this s/^\\}|([^\\])\\}/$1tex2html_escaped_closing_bracket/go; s/^\\}|([^\\])\\}/$1tex2html_escaped_closing_bracket/go; # repeat this $* = 0; # Multiline matching OFF local($id) = $global{'max_id'}; local($prev_id) = $id; for (;;) { # Infinite loop last unless s/{([^{}]*)}/join("",$O,++$id,$C,$1,$O,$id,$C)/geo; } local($before,$after,@processedB) = ('','',()); while (/\{/) { local($before) = $`; local($after) = $'; if ((@UNMATCHED_OPENING) && ($before =~ /\}/)){ while ($before =~ /\}/) { local ($this) = pop(@UNMATCHED_OPENING); print "\n *** matching brace \#$this found ***\n"; $before =~ s/\}/join("",$O,$this,$C)/geo; } $_= join('',$before,"\{",$after); } s/\{/join("",$O,++$id,$C)/geo; $before = $`; push(@processedB,$before); $before=''; $after = $'; if ($after =~ /\}/) { $after =~ s/\}/join("",$O,$id,$C)/geo; $_ = join('',$before,$O,$id,$C,$after); } else { print "\n *** opening brace \#$id is unmatched ***\n"; $after =~ /^([^\n]+\n)/; print " preceding: $& \n\n"; push (@UNMATCHED_OPENING,$id); } } $_ = join('',@processedB,$_); undef(@processedB); print STDERR "bracketings found: ". $id - $prev_id ."\n" if (@processedB); while (/\}/) { local($afterclose); if (@UNMATCHED_OPENING) { local ($this) = pop(@UNMATCHED_OPENING); print "\n *** matching brace \#$this found ***\n"; s/\}/join("",$O,$this,$C)/geo; } else { print "\n *** there was an unmatched closing \} "; local($beforeline,$prevline,$afterline) = ($`, $`.$& , $'); $* = 1; $prevline =~ /\n([^\n]+)\}$/; if ($1) { print "at the end of:\n" . $1 . "\}\n\n"; } else { $afterline =~ /^([^\n]+)\n/; if ($1) { print "at the start of:\n\}" . $1 ."\n\n"; } else { $prevline =~ /\n([^\n]+)\n\}$/; print "on a line by itself after:\n" . $1 . "\n\}\n\n"; } } $* = 0; $_ = $beforeline . $afterline; } } $global{'max_id'} = $id; s/tex2html_escaped_opening_bracket/\\{/go; s/tex2html_escaped_closing_bracket/\\}/go; } sub replace_html_special_chars { # Replaces html special characters with markers unless preceded by "\" $* = 1; # Multiline matching ON s/([^\\])(<|>|&|\")/&special($1).&special($2)/geo; # MUST DO IT AGAIN JUST IN CASE THERE ARE CONSECUTIVE HTML SPECIALS s/([^\\])(<|>|&|\")/&special($1).&special($2)/geo; s/^(<|>|&|\")/&special($1)/geo; $* = 0; # Multiline matching OFF } # The bibliography and the index should be treated as separate sections # in their own HTML files. The \bibliography{} command acts as a sectioning command # that has the desired effect. But when the bibliography is constructed # manually using the thebibliography environment, or when using the # theindex environment it is not possible to use the normal sectioning # mechanism. This subroutine inserts a \bibliography{} or a dummy # \textohtmlindex command just before the appropriate environments # to force sectioning. sub add_bbl_and_idx_dummy_commands { local($id) = $global{'max_id'}; s/([\\]begin\s*$O\d+$C\s*thebibliography)/$bbl_cnt++; $1/eg; #if ($bbl_cnt == 1) { s/([\\]begin\s*$O\d+$C\s*thebibliography)/$id++; "\\bibliography$O$id$C$O$id$C $1"/geo; #} $global{'max_id'} = $id; s/([\\]begin\s*$O\d+$C\s*theindex)/\\textohtmlindex $1/o; s/[\\]printindex/\\textohtmlindex /o; &lib_add_bbl_and_idx_dummy_commands() if defined(&lib_add_bbl_and_idx_dummy_commands); } # Uses and modifies $default_language # This would be straight-forward except when there are # \MakeUppercase, \MakeLowercase or \uppercase , \lowercase commands # present in the source. The cases have to be adjusted before the # ISO-character code is set; e.g. with "z --> "Z in german.perl # sub convert_iso_latin_chars { local($_) = @_; local($next_language, $pattern); local($xafter, $before, $after, $funct, $level, $delim); local(@case_processed); while (/$case_change_rx/) { $xafter = $2; # $before .= $`; push(@case_processed, $`); $funct = $3; $after = ''; $_ = $'; if ($xafter =~ /noexpand/) { $before .= "\\$funct"; next; } s/^[\s%]*(.)/$delim=$1;''/eo; if ($delim =~ /{/ ) { # brackets not yet numbered... # $before .= $funct . $delim; push(@case_processed, $funct . $delim); $level = 1; $after = $delim; while (($level)&&($_)&&(/[\{\}]/)) { $after .= $` . $&; $_ = $'; if ( "$&" eq "\{" ) {$level++} elsif ( "$&" eq "\}" ) { $level-- } else { print $_ } print "$level"; } # $before .= $after; push(@case_processed, $after); } elsif ($delim eq "<") { # brackets numbered, but maybe not processed... s/((<|#)(\d+)(>|#)>).*\1//; $after .= $delim . $&; $_ = $'; print STDERR "\n<$2$funct$4>" if ($VERBOSITY > 2); $funct =~ s/^\\//o; local($cmd) = "do_cmd_$funct"; $after = &$cmd($after); # $before .= $after; push(@case_processed, $after); } elsif (($xafter)&&($delim eq "\\")) { # preceded by \expandafter ... # ...so expand the following macro first $funct =~ s/^\\//o; local($case_change) = $funct; s/^(\w+|\W)/$funct=$1;''/eo; local($cmd) = $funct; local($thiscmd) = "do_cmd_$funct"; if (defined &$thiscmd) { $_ = &$thiscmd($_) } elsif ($new_command{$funct}) { local($argn, $body, $opt) = split(/:!:/, $new_command{$funct}); do { ### local($_) = $body; &make_unique(*body); ### $body = $_ } if ($body =~ /$O/); if ($argn) { do { local($before) = ''; local($after) = "\\$funct ".$_; $after = &substitute_newcmd; # may change $after $after =~ s/\\\@#\@\@/\\/o ; } } else { $_ = $body . $_; } } else { print "\nUNKNOWN COMMAND: $cmd "; } $cmd = $case_change; $case_change = "do_cmd_$cmd"; if (defined &$case_change) { $_ = &$case_change($_) } } else { # this should not happen, but just in case... $funct =~ s/^\\//o; local($cmd) = "do_cmd_$funct"; print STDERR "\n\n<$delim$funct>" if ($VERBOSITY > 2); $_ = join('', $delim , $_ ); if (defined &$cmd) { $_ = &$cmd($_) } } } # $_ = join('', $before, $_) if ($before); $_ = join('', @case_processed, $_) if (@case_processed); # ...now do the conversions ($before, $after, $funct) = ('','',''); @case_processed = (); if (/$language_rx/o) { ($next_language, $pattern, $before, $after) = (($1||$2), $&, $`, $'); $before = &convert_iso_latin_chars($before); push(@case_processed, $pattern, $before); $default_language = $next_language; # $_ = join($pattern, $before, &convert_iso_latin_chars($after)); $_ = &convert_iso_latin_chars($after); } else { $funct = $language_translations{$default_language}; (defined(&$funct) ? $_ = &$funct($_) : do { &write_warnings( "\nCould not find translation function for $default_language.\n\n") } ); &convert_to_unicode(*_) if ((defined %unicode_table)&& length(%unicode_table) > 2); } $_ = join('', @case_processed, $_); undef(@case_processed); $_; } # May need to add something here later sub english_translation { $_[0] } # This replaces \setlanguage{\language} with \languageTeX # This makes the identification of language chunks easier. sub normalize_language_changes { s/$setlanguage_rx/\\$1TeX/go; } # General translation mechanism: # # # The main program latex2html calls texexpand with the document name # in order to expand some of its \input and \include statements, here # also called 'merging', and to write a list of sensitized style, class, # input, or include file names. # When texexpand has finished, all is contained in one file, TMP_foo. # (assumed foo.tex is the name of the document to translate). # # In this version, texexpand cares for following environments # that may span include files / section boundaries: # (For a more technical description, see texexpand.) # a) \begin{comment} # b) %begin{comment} # c) \begin{any} introduced with \excludecomment # d) %begin{any} # e) \begin{verbatim} # f) \begin{latexonly} # g) %begin{latexonly} # # a)-d) cause texexpand to drop its contents, it will not show up in the # output file. You can use this to 'comment out' a bunch of files, say. # # e)-g) prevent texexpand from expanding input files, but the environment # content goes fully into the output file. # # Together with each merging of \input etc. there are so-called %%%texexpand # markers accompanying the boundary. # # When latex2html reads in the output file, it uses these markers to write # each part to a separate file, and process them further. # # # If you have, for example: # # a) preample # b) \begin{document} # c) text # d) \input{chapter} # e) more text # f) \end{document} # # you end up in two parts, part 1 is a)-c), part 2 is the rest. # Regardless of environments spanning input files or sections. # # # What now starts is meta command substitution: # Therefore, latex2html forks a child process on the first part and waits # until it finished, then forks another on the next part and so forth # (see also &slurp_input_and_partition_and_preprocess). # # Here's what each child is doing: # Each child process reads the new commands translated so far by the previous # child from the TMP_global DBM database. # After &pre_processing, it substitutes the meta commands (\newcommand, \def, # and the like) it finds, and adds the freshly retrieved new commands to the # list so far. # This is done *only on its part* of the document; this saves upwards of memory. # Finally, it writes its list of new commands (synopsis and bodies) to the # DBM database, and exits. # After the last child finished, latex2html reads in all parts and # concatenates them. # # # So, at this point in time (start of &translate), it again has the complete # document, but now preprocessed and with new commands substituted. # This has several disadvantages: an amount of commands is substituted (in # TeX lingo, expanded) earlier than the rest. # This causes trouble if commands really must get expanded at the point # in time they show up. # # # Then, still in &translate, latex2html uses the list of section commands to # split the complete document into chunks. # The chunks are not written to files or so, they are retained in the @sections # list, but each chunk is handled separately. # latex2html puts the current chunk to $_ and processes it with # &translate_environments etc., then fetches the next chunk, and so on. # This prevents environments that span section boundaries from getting # translated, because \begin and \end cannot find one another, to say it this # way. # # # After the chunk is translated to HTML, it is written to a file. # When all chunks are done, latex2html rereads each file to get cross # references right, replace image markers with the image file names, and # writes index and bibliography. # # sub translate { &normalize_sections; # Deal with the *-form of sectioning commands # Split the input into sections, keeping the preamble together # Due to the regular expression, each split will create 5 more entries. # Entry 1 and 2: non-letter/letter sectioning command, # entry 4: the delimiter (may be empty) # entry 5: the text. local($pre_section, @sections); if (/\\(startdocument|begin\s*($O\d+$C)\s*document\s*\2)/) { $pre_section = $`.$&; $_ = $'; } @sections = split(/$sections_rx/, $_); $sections[0] = $pre_section.$sections[0] if ($pre_section); undef $pre_section; local($sections) = int(scalar(@sections) / 5); # Initialises $curr_sec_id to a list of 0's equal to # the number of sectioning commands. local(@curr_sec_id) = split(' ', &make_first_key); local(@segment_sec_id) = @curr_sec_id; local($i, $j, $current_depth) = (0,0,0); local($curr_sec) = $FILE; local($top_sec) = ($SEGMENT ? '' : 'top of '); local(%section_info, %toc_section_info, $CURRENT_FILE, %cite_info, %ref_files); # These filenames may be set when translating the corresponding commands. local($tocfile, $loffile, $lotfile, $footfile, $citefile, $idxfile, $figure_captions, $table_captions, $footnotes, $citations, %font_size, %index, %done, $t_title, $t_author, $t_date, $t_address, $t_affil, $changed); local(%index_labels, %index_segment, $preindex, %footnotes, %citefiles); local($segment_table_captions, $segment_figure_captions); local($dir,$nosave) = ('',''); local($del,$close_all,$open_all); local(@open_tags) = (); local(@save_open_tags)=(); &process_aux_file if $SHOW_SECTION_NUMBERS || /\\(caption|(html|hyper)?((eq)?ref|cite))/; require ("${PREFIX}internals.pl") if (-f "${PREFIX}internals.pl"); #JCL(jcl-del) &make_single_cmd_rx; # $tocfile = $EXTERNAL_CONTENTS; $idxfile = $EXTERNAL_INDEX; $citefile = $EXTERNAL_BIBLIO; $citefiles{1} = $citefile if ($citefile); print "\nTranslating ..."; while ($i <= @sections) { undef $_; $_ = $sections[$i]; s/^[\s]*//; # Remove initial blank lines # The section command was removed when splitting ... s/^/\\$curr_sec$del/ if ($i > 0); # ... so put it back if ($current_depth < $MAX_SPLIT_DEPTH) { if (($footnotes)&&($NO_FOOTNODE)&&( $current_depth < $MAX_SPLIT_DEPTH)) { local($thesenotes) = &make_footnotes ; print OUTPUT $thesenotes; } $CURRENT_FILE = &make_name($curr_sec, join('_',@curr_sec_id)); open(OUTPUT, ">$CURRENT_FILE") || die "Cannot open $DESTDIR/$FILE $!"; }; &remove_document_env; # &wrap_shorthand_environments; #RRM Is this needed ? print STDERR "\n" if ($VERBOSITY); print STDERR "\n" if ($VERBOSITY > 2); print $i/5; print "/$sections:$top_sec$curr_sec:" if ($VERBOSITY); # Must do this early ... It also sets $TITLE &process_command($sections_rx, *_) if (/^$sections_rx/); # reset tags saved from the previous section @open_tags = @save_open_tags; @save_open_tags = (); if ((! $TITLE) || ($TITLE eq $default_title)) { eval '$TITLE = '.$default_title; $TITLE = $default_title if $@; local($curr_sec_tex) = ($top_sec ? '' : join('', '"', &revert_to_raw_tex($curr_sec), '"')); print STDERR "$curr_sec_tex for $CURRENT_FILE\n" if ($VERBOSITY); } else { local($tmp) = &purify($TITLE,1); $tmp = &revert_to_raw_tex($tmp); print STDERR "\"$tmp\" for $CURRENT_FILE\n" if ($VERBOSITY); } if (/\\(latextohtmlditchpreceding|startdocument)/m) { local($after) = $'; local($before) = $`.$&; $SEGMENT = 1 if ($1 =~ /startdocument/); print STDERR "\n *** translating preamble ***\n" if ($VERBOSITY); $_ = &translate_preamble($before); s/\n\n//g; s/
//g; # remove redundant blank lines and breaks # # &process_aux_file if $AUX_FILE_NEEDED; # print STDERR "\n *** preamble done ***\n" if ($VERBOSITY); $PREAMBLE = 0; $NESTING_LEVEL=0; &do_AtBeginDocument; $* = 1; $after =~ s/^\s*//; $* = 0; print STDERR (($VERBOSITY >2)? "\n*** Translating environments ***" : ";"); $after = &translate_environments($after); print STDERR (($VERBOSITY >2)? "\n*** Translating commands ***" : ";"); $_ .= &translate_commands($after); # $_ = &translate_commands($after); } else { &do_AtBeginDocument; $PREAMBLE = 0; $NESTING_LEVEL=0; print STDERR (($VERBOSITY >2)? "\n*** Translating environments ***" : ";"); $_ = &translate_environments($_); print STDERR (($VERBOSITY >2)? "\n*** Translating commands ***" : ";"); $_ = &translate_commands($_); } # close any tags that remain open if (@open_tags) { ($close_all,$open_all) = &preserve_open_tags(); $_ .= $close_all; @save_open_tags = @open_tags; @open_tags = (); } else { ($close_all,$open_all) = ('','') } print STDERR (($VERBOSITY >2)? "\n*** Translations done ***" : "\n"); # if (($footnotes)&&($NO_FOOTNODE)&&( $current_depth < $MAX_SPLIT_DEPTH)) { # $_ .= &make_footnotes # } print OUTPUT $_; # Associate each id with the depth, the filename and the title ###MEH -- starred sections don't show up in TOC ... # RRM: ...unless $TOC_STARS is set if ($TOC_STARS) { # STScI CHANGE. Add TOCAUTHORLIST to toc info, but only if # current_depth is the chapter level. # # $toc_section_info{join(' ',@curr_sec_id)} = # "$current_depth$delim$CURRENT_FILE$delim$TITLE" # if ($current_depth <= $MAX_SPLIT_DEPTH + $MAX_LINK_DEPTH); # if( $current_depth == $section_commands{'chapter'} ) { $toc_section_info{join(' ',@curr_sec_id)} = "$current_depth$delim$CURRENT_FILE$delim$TITLE$delim$TOCAUTHORLIST" if ($current_depth <= $MAX_SPLIT_DEPTH + $MAX_LINK_DEPTH); # } else { # $toc_section_info{join(' ',@curr_sec_id)} = # "$current_depth$delim$CURRENT_FILE$delim$TITLE" # if ($current_depth <= $MAX_SPLIT_DEPTH + $MAX_LINK_DEPTH); # } } else { # STScI CHANGE. Add TOCAUTHORLIST to toc info, but only if # current_depth is the chapter level. # # $toc_section_info{join(' ',@curr_sec_id)} = # "$current_depth$delim$CURRENT_FILE$delim$TITLE" # . ($curr_sec =~ /star$/ ? "$delim" : "") # if ($current_depth <= $MAX_SPLIT_DEPTH + $MAX_LINK_DEPTH); # if( $current_depth == $section_commands{'chapter'} ) { $toc_section_info{join(' ',@curr_sec_id)} = "$current_depth$delim$CURRENT_FILE$delim$TITLE$delim$TOCAUTHORLIST" . ($curr_sec =~ /star$/ ? "$delim" : "") if ($current_depth <= $MAX_SPLIT_DEPTH + $MAX_LINK_DEPTH); # } else { # $toc_section_info{join(' ',@curr_sec_id)} = # "$current_depth$delim$CURRENT_FILE$delim$TITLE" # . ($curr_sec =~ /star$/ ? "$delim" : "") # if ($current_depth <= $MAX_SPLIT_DEPTH + $MAX_LINK_DEPTH); # } } # include $BODYTEXT in the section_info, when starting a new page $section_info{join(' ',@curr_sec_id)} = "$current_depth$delim$CURRENT_FILE$delim$TITLE$delim" . (($current_depth < $MAX_SPLIT_DEPTH)? $BODYTEXT: ""); # Get type of section (see also the split above) $curr_sec = $sections[$i+1].$sections[$i+2]; $del = $sections[$i+4]; # Get the depth of the current section; $current_depth = $section_commands{$curr_sec}; for($j=0; $j <= $#curr_sec_id; $j++) { $curr_sec_id[$j] += $segment_sec_id[$j]; $segment_sec_id[$j] = 0; } #RRM : Should this be done here, or in \stepcounter ? @curr_sec_id = &new_level($current_depth, @curr_sec_id); $TITLE = ''; $top_sec = ''; $i+=5; #skip to next text section } @open_tags = (); $open_all = ''; $_ = undef; $_ = &make_footnotes if ($footnotes); print OUTPUT; close OUTPUT; if ($PREPROCESS_IMAGES) { &preprocess_images } else { &make_image_file } print STDERR "\n *** making images ***" if ($VERBOSITY > 1); &make_images; # Link sections, add head/body/address do cross-refs etc print STDERR "\n *** post-process ***" if ($VERBOSITY > 1); &post_process; print STDERR "\n *** post-processed ***" if ($VERBOSITY > 1); ©_icons if $LOCAL_ICONS; if ($SEGMENT || $DEBUG || $SEGMENTED) { &save_captions_in_file("figure", $figure_captions) if $figure_captions; &save_captions_in_file("table", $table_captions) if $table_captions; # &save_array_in_file ("captions", "figure_captions", %figure_captions) if %figure_captions; # &save_array_in_file ("captions", "table_captions", %table_captions) if %table_captions; &save_array_in_file ("index", "index", %index); &save_array_in_file ("sections", "section_info", %section_info); &save_array_in_file ("contents", "toc_section_info", %toc_section_info); &save_array_in_file ("index", "sub_index", %sub_index) if %sub_index; &save_array_in_file ("index", "index_labels", %index_labels) if %index_labels; &save_array_in_file ("index", "index_segment", %index_segment) if %index_segment; &save_array_in_file ("index", "printable_key", %printable_key) if (%printable_key || %index_segment); } # &save_array_in_file ("sections", "section_info", %section_info); # &save_array_in_file ("contents", "toc_section_info", %toc_section_info); &save_array_in_file ("internals", "ref_files", %ref_files) if $changed; &save_array_in_file ("labels", "external_labels", %ref_files); &save_array_in_file ("images", "cached_env_img", %cached_env_img); } # RRM: sub translate_preamble { local($_) = @_; $PREAMBLE = 1; $NESTING_LEVEL=0; #counter for TeX group nesting level # remove some artificially inserted constructions $* = 1; s/${tex2html_deferred_rx}\\par${tex2html_deferred_rx2}//g; s/\\newedcommand(<<\d+>>)([A-Za-z]+|[^A-Za-z])\1(\[\d+\])?(\[[^]]*\])?(<<\d+>>)[\w\W\n]*\5($comment_mark)?//g; s/\n{2,}/\n/og; $* = 0; if (/\\htmlhead/) { print STDERR "\nPREAMBLE: discarding...\n$`" if ($VERBOSITY > 4); local($after) = $&.$'; # translate segment preamble preceding \htmlhead &translate_commands(&translate_environments($`)); # translate \htmlhead and rest of preamble $_=&translate_commands(&translate_environments($after)); print STDERR "\nPREAMBLE: retaining...\n$_" if ($VERBOSITY > 4); } else { # translate only preamble here (metacommands etc.) # there should be no textual results, if so, discard them &translate_commands(&translate_environments($_)); print STDERR "\nPREAMBLE: discarding...\n$_" if ($VERBOSITY > 4); $_=""; }; $_ = &do_AtBeginDocument($_); if (! $SEGMENT) { $_ = ''} # segmented documents have a heading already print STDERR "\nPREAMBLE: $_" if (($_)&&($VERBOSITY > 1)); $_; } ############################ Processing Environments ########################## sub wrap_shorthand_environments { # This wraps a dummy environment around environments that do not use # the begin-end convention. The wrapper will force them to be # evaluated by Latex rather than them being translated. # Wrap a dummy environment around matching TMPs. # s/^\$\$|([^\\])\$\$/{$1.&next_wrapper('tex2html_double_dollar')}/ge; # Wrap a dummy environment around matching $s. # s/^\$|([^\\])\$/{$1.&next_wrapper('$')}/ge; # s/tex2html_double_dollar/\$\$/go; # Do \(s and \[s # local($wrapper) = "tex2html_wrap_inline"; # \ensuremath wrapper print STDERR "\n *** wrapping environments ***\n" if ($VERBOSITY > 3); $* = 1; # Multiline matching ON print STDERR "\\(" if ($VERBOSITY > 3); s/(^\\[(])|([^\\])(\\[(])/{$2.&make_any_wrapper(1,'',$wrapper).$1.$3}/geo; print STDERR "\\)" if ($VERBOSITY > 3); s/(^\\[)]|[^\\]\\[)])/{$1.&make_any_wrapper(0,'',$wrapper)}/geo; print STDERR "\\[" if ($VERBOSITY > 3); s/(^\\[[])|([^\\])(\\[[])/{$2.&make_any_wrapper(1,1,"displaymath")}/geo; print STDERR "\\]" if ($VERBOSITY > 3); s/(^\\[\]])|([^\\])(\\[\]])/{$2.&make_any_wrapper(0,1,"displaymath")}/geo; print STDERR "\$" if ($VERBOSITY > 3); s/$enspair/print "\$"; {&make_any_wrapper(1,'',$wrapper).$&.&make_any_wrapper(0,'',$wrapper)}/geo; $* = 0; # Multiline matching OFF $double_dol_rx = '(^|[^\\\\])\\$\\$'; $single_dol_rx = '(^|[^\\\\])\\$'; print STDERR "\$" if ($VERBOSITY > 3); local($dollars_remain) = 0; $_ = &wrap_math_environment; $_ = &wrap_raw_arg_cmds; } sub wrap_math_environment { # This wraps math-type environments # The trick here is that the opening brace is the same as the close, # but they *can* still nest, in cases like this: # # $ outer stuff ... \hbox{ ... $ inner stuff $ ... } ... $ # # Note that the inner pair of $'s is nested within a group. So, to # handle these cases correctly, we need to make sure that the outer # brace-level is the same as the inner. --- rst #tex2html_wrap # And yet another problem: there is a scungy local idiom to do # this: $\_$ for a boldfaced underscore. xmosaic can't display the # resulting itty-bitty bitmap, for some reason; even if it could, it # would probably come out as an overbar because of the floating- # baseline problem. So, we have to special case this. --- rst again. local ($processed_text, @processed_text, $before, $end_rx, $delim, $ifclosed); local ($underscore_match_rx) = "^\\s*\\\\\\_\\s*\\\$"; local ($wrapper); print STDERR "\nwrap math:" if ($VERBOSITY > 3); #find braced dollars, in tabular-specs while (/((($O|$OP)\d+($C|$CP))\s*)\$(\s*\2)/) { push (@processed_text, $`, $1.$dol_mark.$5); $_ = $'; } $_ = join('',@processed_text, $_) if (@processed_text); undef @processed_text; $dollars_remain = 0; while (/$single_dol_rx/) { $processed_text .= $`.$1; $_ = $'; $wrapper = "tex2html_wrap_inline"; $end_rx = $single_dol_rx; # Default, unless we begin with $$. $delim = "\$"; if (/^\$/ && (! $`)) { s/^\$//; $end_rx = $double_dol_rx; $delim = ""; # Cannot say "\$\$" inside displaymath $wrapper = "displaymath"; } elsif (/$underscore_match_rx/ && (! $`)) { # Special case for $\_$ ... s/$underscore_match_rx//; $processed_text .= '\\_'; next; } # Have an opening $ or $$. Find matching close, at same bracket level # $processed_text .= &make_any_wrapper(1,'',$wrapper).$delim; print STDERR "\$" if ($VERBOSITY > 3); $ifclosed = 0; local($thismath); while (/$end_rx/) { # Forget the $$ if we are going to replace it with "displaymath" $before = $` . (($wrapper eq "displaymath")? "$1" : $&); last if ($before =~ /\\(sub)*(item|section|chapter|part|paragraph)(star)?\b/); $thismath .= $before; $_ = $'; s/^( [^\n])/\\space$1/; #make sure a trailing space doesn't get lost. # Found dollar sign inside open subgroup ... now see if it's # at the same brace-level ... local ($losing, $br_rx) = (0, ''); print STDERR "\$" if ($VERBOSITY > 3); while ($before =~ /$begin_cmd_rx/) { $br_rx = &make_end_cmd_rx($1); $before = $'; if ($before =~ /$br_rx/) { $before = $'; } else { $losing = 1; last; } } do { $ifclosed = 1; last } unless $losing; # It wasn't ... find the matching close brace farther on; then # keep going. /$br_rx/; $thismath .= $`.$&; #RRM: may now contain unprocessed $s e.g. $\mbox{...$...$...}$ # the &do_cmd_mbox uses this specially to force an image # ...but there may be other situations; e.g. \hbox # so set a flag: $dollars_remain = 1; $_ = $'; } # Got to the end. Whew! if ($ifclosed) { # also process any nested math while (($dollars_remain)&&($delim eq "\$")) { local($saved) = $_; $thismath =~ s/\$$//; $_ = $thismath; $thismath = &wrap_math_environment; $thismath .= "\$"; $_ = $saved; } $processed_text .= &make_any_wrapper(1,'',$wrapper) . $delim . $thismath . &make_any_wrapper(0,'',$wrapper); } else { print STDERR "\n\n *** Error: unclosed math or extra `\$', before:\n$thismath\n\n"; # # remove a $ to try to recover as much as possible. # $thismath =~ s/([^\\]\\\\|[^\\])\$/$1\%\%/; # $_ = $thismath . $_; $thismath = ""; print "\n$thismath\n\n\n$_\n\n\n"; die; } } $processed_text . $_; } sub translate_environments { local ($_) = @_; local($tmp, $capenv); # print "\nTranslating environments ..."; local($after, @processedE); local ($contents, $before, $br_id, $env, $pattern); for (;;) { # last unless (/$begin_env_rx/o); last unless (/$begin_env_rx|$begin_cmd_rx/o); # local ($contents, $before, $br_id, $env, $pattern); local($this_env, $opt_arg, $style_info); $contents = ''; # $1,$2 : optional argument/text --- stylesheet info # $3 : br_id (at the beginning of an environment name) # $4 : environment name # $5 : br_id of open-brace, when $3 == $4 == ''; if ($3) { ($before, $opt_arg, $style_info, $br_id , $env, $after, $pattern) = ($`, $1, $2, $3, $4, $', $&); if (($before)&& (!($before =~ /$begin_env_rx|$begin_cmd_rx/))) { push(@processedE,$before); $_ = $pattern . $after; $before = ''; } } else { ($before, $br_id, $env, $after, $pattern) = ($`, $5, 'group', $', $&); if (($before)&& (!($before =~ /$begin_env_rx|$begin_cmd_rx/))) { push(@processedE,$before); $_ = $pattern . $after; $before = ''; } local($end_cmd_rx) = &make_end_cmd_rx($br_id); if ($after =~ /$end_cmd_rx/) { # ... find the the matching closing one $NESTING_LEVEL++; ($contents, $after) = ($`, $'); $contents = &process_group_env($contents); print STDERR "\nOUT: {$br_id} ".length($contents) if ($VERBOSITY > 3); print STDERR "\n:$contents\n" if ($VERBOSITY > 7); # THIS MARKS THE OPEN-CLOSE DELIMITERS AS PROCESSED $_ = join("", $before,"$OP$br_id$CP", $contents,"$OP$br_id$CP", $after); $NESTING_LEVEL--; } else { $pattern = &escape_rx_chars($pattern); s/$pattern//; print "\nCannot find matching bracket for $br_id"; $_ = join("", $before,"$OP$br_id$CP", $after); } next; } $contents = undef; local($defenv) = $env =~ /deferred/; # $capenv = $env =~ /.*(figure|table)/ ? $1 : ""; local($closures,$reopens); local(@save_open_tags) = @open_tags unless ($defenv); local(@open_tags) = @save_open_tags unless ($defenv); ($closures, $reopens) = &preserve_open_tags() if ($env =~ /tabular|longtable/); $closures = &close_all_tags() if ( !($defenv)&&(&defined_env($env)) && (!$declarations{$env}) ); # Sets $contents and modifies $after if (&find_end_env($env,*contents,*after)) { print STDERR "\nIN {$env $br_id}\n$contents\n" if ($VERBOSITY > 4); &process_command($counters_rx, *before) if ($before =~ /$counters_rx/); # This may modify $before and $after # &extract_captions($capenv) if $capenv; # Modifies $contents #RRM: the do_env_... subroutines handle when to translate sub-environments # $contents = &translate_environments($contents) if ## ((!$defenv) && (&defined_env($env)) && (! $raw_arg_cmds{$env}) ## && (!$declarations{$env}) # ((&defined_env($env)) && (! $raw_arg_cmds{$env}) # && (!($env =~ /latexonly|enumerate|figure|table|makeimage|wrap_inline/)) # && ((! $NO_SIMPLE_MATH)||(!($env =~ /wrap/))) # && (!($env =~ /(math|wrap|equation|eqnarray|makeimage|minipage|tabular)/) ) # ); if ($opt_arg) { &process_environment(1, $env, $br_id, $style_info); # alters $contents } else { &process_environment(0, $env, $br_id, '')} undef $_; print STDERR "\nOUT {$env $br_id}\n$contents\n" if ($VERBOSITY > 4); # if ($capenv && $captions) { # $after = "
\n$captions
\n$after"; # $captions = ""; # } #JCL(jcl-env) - insert the $O$br_id$C stuff to handle environment grouping if (!($contents eq '')) { $after =~ s/^\n//o if ($defenv); $this_env = join("", $before, $closures # $this_env = join("", $before, $O,$br_id,$C , $closures # $_ = join("", $O,$br_id,$C , $closures # push(@processedE, $O,$br_id,$C , $closures , $contents , ($defenv ? '': &balance_tags()) ); $_ = $after; # , $O,$br_id,$C); $_ = $after; # , $O,$br_id,$C, $after); } else { $this_env = join("", $before , $closures # $_ = join("", $closures # push(@processedE , $closures , ($defenv ? '': &balance_tags()) ); $_ = $after; # , $after); }; ### Evan Welsh added the next 24 lines ## } elsif (&defined_env($env)) { print STDERR "\nIN {$env $br_id}\n$contents\n" if ($VERBOSITY > 4); # If I specify a function for the environment then it # calls it with the contents truncated at the next section. # It assumes I know what I'm doing and doesn't give a # deferred warning. # &extract_captions($capenv) if $capenv; $contents = $after; if ($opt_arg) { $contents = &process_environment(1, $env, $br_id, $style_info); } else { $contents = &process_environment(0, $env, $br_id, ''); } print STDERR "\nOUT {$env $br_id}\n$contents\n" if ($VERBOSITY > 4); $this_env = join("", $before, $closures ,$contents); $_ = $after; # $_ = join("", $closures ,$contents); # push(@processedE, $closures ,$contents); $_ = $after; # if ($capenv && $captions) { # $_ .= "
\n$captions
\n"; # $captions = "";} } elsif ($ignore{$env}) { print STDERR "\nIGNORED {$env $br_id}\n$contents\n" if ($VERBOSITY > 4); # If I specify that the environment should be ignored then # it is but I get a deferred warning. $this_env = join("", $before , $closures , &balance_tags() # $_ = join("", $closures , &balance_tags() # push(@processedE , $closures , &balance_tags() , $contents); $_ = $after; &write_warnings("\n\\end{$env} not found (ignored).\n"); } elsif ($raw_arg_cmds{$env}) { print "\nIN {$env $br_id}\n$contents\n" if ($VERBOSITY > 4); # If I specify that the environment should be passed to tex # then it is with the environment truncated at the next # section and I get a deferred warning. # &extract_captions($capenv) if $capenv; $contents = $after; if ($opt_arg) { $contents = &process_environment(1, $env, $br_id, $style_info); } else { $contents = &process_environment(0, $env, $br_id, ''); } print STDERR "\nOUT {$env $br_id}\n$contents\n" if ($VERBOSITY > 4); $this_env = join("", $before, $closures, $contents, &balance_tags()); $_=''; &write_warnings("\n\\end{$env $br_id} not found (truncated at next section boundary).\n"); } else { $pattern = &escape_rx_chars($pattern); s/$pattern/$closures/; print "\nCannot find \\end{$env $br_id}\n"; $_ .= &balance_tags() unless ($defenv); } if ($this_env =~ /$begin_env_rx|$begin_cmd_rx/) { $_ = $this_env . $_ } else { push (@processedE, $this_env) } } $_ = join('',@processedE) . $_; $tmp = $_; undef $_; &process_command($counters_rx, *tmp) if ($tmp =~ /$counters_rx/); $_ = $tmp; undef $tmp; $_ } sub find_end_env { local ($env, *ref_contents, *rest) = @_; local ($be_rx) = &make_begin_end_env_rx ($env); local ($count) = 1; while ($rest =~ /$be_rx\n?/s) { $ref_contents .= $`; if ($1 eq "begin") { ++$count } else { --$count }; $rest = $'; last if $count == 0; $ref_contents .= $&; } if ($count != 0) { $rest = join('', $ref_contents, $rest); $ref_contents = ""; return(0) } else { return(1) } } sub process_group_env { local($contents) = @_; local(@save_open_tags) = @open_tags; local(@open_tags) = @save_open_tags; print STDERR "\nIN::{group $br_id}" if ($VERBOSITY > 4); print STDERR "\n:$contents\n" if ($VERBOSITY > 6); # need to catch explicit local font-changes local(%font_size) = %font_size if (/\\font\b/); # record class/id info for a style-sheet entry local($env_id, $tmp, $etmp); if (($USING_STYLES) && !$PREAMBLE ) { $env_id = $br_id; } # $env_id = "grp$br_id"; print "\nENV: grp : $env_id"; # $styleID{$env_id} = " "; # $env_id = " ID=\"$env_id\""; # } undef $_; if ($contents =~ /^\s*\\($image_switch_rx)\b/s) { # catch TeX-like environments: {\fontcmd ... } local($switch_cmd) = "do_cmd_$1"; if (defined &$switch_cmd ) { eval "\$contents = \&${switch_cmd}(\$')"; print "\n*** &$switch_cmd didn't work: $@\n$contents\n\n" if ($@); } else { $contents = &process_undefined_environment("tex2html_accent_inline" , ++$global{'max_id'},"\{".$contents."\}"); # $contents = &process_in_latex("\{".$contents."\}"); } } elsif (($env_switch_rx)&&($contents =~ s/^(\s*)\\($env_switch_rx)\b//s)) { # write directly into images.tex, protected by \begingroup...\endgroup local($prespace, $cmd, $tmp) = ($1,$2,"do_cmd_$2"); $latex_body .= "\n\\begingroup "; if (defined &$tmp) { eval("\$contents = &do_cmd_$cmd(\$contents)"); } $contents = &translate_environments($contents); $contents = &translate_commands($contents); undef $tmp; undef $cmd; $contents .= "\n\\endgroup "; } elsif ($contents =~ /^\s*\\([a-zA-Z]+)\b/) { local($after_cmd) = $'; local($cmd) = $1; $tmp = "do_cmd_$cmd"; $etmp = "do_env_$cmd"; if (($cmd =~/^(rm(family)?|normalsize)$/)||($declarations{$cmd}&&(defined &$tmp))) { do{ local(@save_open_tags) = @open_tags; eval "\$contents = \&$tmp(\$after_cmd);"; print "\n*** eval &$tmp failed: $@\n$contents\n\n" if ($@); $contents .= &balance_tags(); }; } elsif ($declarations{$cmd}&&(defined &$etmp)) { eval "\$contents = \&$etmp(\$after_cmd);"; } else { $contents = &translate_environments($contents); $contents = &translate_commands($contents) if ($contents =~ /$match_br_rx/o); # Modifies $contents &process_command($single_cmd_rx,*contents) if ($contents =~ /\\/o); } undef ($cmd,$tmp,$etmp); } else { $contents = &translate_environments($contents); $contents = &translate_commands($contents) if ($contents =~ /$match_br_rx/o); # Modifies $contents &process_command($single_cmd_rx,*contents) if ($contents =~ /\\/o); } $contents . &balance_tags(); } # MODIFIES $contents sub process_environment { local($opt, $env, $id, $styles) = @_; local($envS) = $env; $envS =~ s/\*\s*$/star/; local($env_sub,$border,$attribs,$env_id) = ("do_env_$envS",'','',''); local($original) = $contents; if ($env =~ /tex2html_deferred/ ) { $contents = &do_env_tex2html_deferred($contents); return ($contents); } $env_id = &read_style_info($opt, $env, $id, $styles) if (($USING_STYLES)&&($opt)); # # record class/id info for a style-sheet entry # if ($USING_STYLES) { # local($style_names,$style_extra)=(''," "); # if ($opt) { #print "\n$envS : $br_id : $styles "; # if ($styles =~ /^\s*([^;]*)\;\s*(.*)$/) { # $style_names = $1; $style_extra = $2; # } elsif ($styles) { # $style_names = $styles; # } # $envS = "$style_names" if ($style_names); # } # $envS =~ s/tex2html_(\w+_)?//; # $env_style{$envS} = " " unless (($style_names)||($env_style{$envS})); # $env_id = "env$id"; # $styleID{$env_id} = $style_extra unless ($PREAMBLE); # $env_id = " ID=\"$env_id\" CLASS=\"$envS\""; # } if (&defined_env($env)) { print STDERR ","; print STDERR "{$env $id}" if ($VERBOSITY > 1); # $env_sub =~ s/\*$/star/; $contents = &$env_sub($contents); } elsif ($env =~ /tex2html_nowrap/) { #pass it on directly for LaTeX, via images.tex $contents = &process_undefined_environment($env, $id, $contents); return ($contents); # elsif (&special_env) { # &special_env modifies $contents } else { local($no_special_chars) = 0; local($failed) = 0; local($has_special_chars) = 0; &special_env; # modifies $contents print STDERR "\n" if ($VERBOSITY > 3); if ($failed || $has_special_chars) { $contents = $original; $failed = 1; print STDERR " !failed!\n" if ($VERBOSITY > 3); } } if (($contents) && ($contents eq $original)) { if ($ignore{$env}) { return(''); } # Generate picture if ($contents =~ s/$htmlborder_rx//o) { $attribs = $2; $border = (($4)? "$4" : 1) } elsif ($contents =~ s/$htmlborder_pr_rx//o) { $attribs = $2; $border = (($4)? "$4" : 1) } $contents = &process_undefined_environment($env, $id, $contents); $env_sub = "post_latex_$env_sub"; # i.e. post_latex_do_env_ENV if ( defined &$env_sub) { $contents = &$env_sub($contents) } elsif (($border||($attributes))&&($HTML_VERSION > 2.1)) { $contents = &make_table( $border, $attribs, '', '', '', $contents ); } else { $contents = join('',"
\n",$contents,"\n
") unless (!($contents)||($env =~ /^(tex2html_wrap|tex2html_nowrap|\w*math|eq\w*n)/o )); } } $contents; } #RRM: This reads the style information contained in the optional argument # to the \begin command. It is stored to be recovered later as an entry # within the automatically-generated style-sheet, if $USING_STYLES is set. # Syntax for this info is: #