#!/usr/local/bin/perl # # arts2book # # Given a directory of articles, create a book.tex skeleton # file. This is done by reading from the book.template file, # which contains a tag, which it expands into the # article list. If no -paperdir argument is supplied, it is # assumed that the articles are in this directory. # use Cwd; $cwd = cwd() ; use lib $cwd ; use BOOKSTUFF ; $usg = "Usage: arts2book.pl [-paperdir dirname]\n" ; # Any command line switches? # while ($ARGV[0] =~ /^-/) { $_ = shift; if (/^-paperdir$/) { $paperdir = shift ; } else { $usg ; die "Unrecognized switch: $_\n" ; } } open( IN, "book.tex" ) || die "Cannot open book.tex output file.\n" ; undef $/ ; $\ = "\n" ; print OUT "% Don't edit this file!" ; print OUT "% Edit book.template instead.\n" ; $template= ; @lines = split /\n/m, $template ; foreach $line (@lines) { if( $line =~ // ) { &artlist( $paperdir ) ; } else { print OUT $line ; } } # Create a list of .tex files in directory $dirname. Parse each # for information to go into the book.tex file: # # filename # full author list, for table of contents # abbreviated author list, for page headings # abbreviated title, for page headings # # The so-called abbreviated items are actually the full-length # versions. You can edit them down if the page headings are # too long. # sub artlist { my( $dirname ) = @_ ; my( $file, @filelist ) ; if( defined( $dirname ) ) { @filelist = glob( "$dirname$pathsep*.$ext_in" ); } else { @filelist = glob( "*.$ext_in" ); } foreach $file (@filelist) { # Obtain file root name by stripping off both the extension # and any leading path elements. ( $fileroot = $file ) =~ s/\.tex$// ; $fileroot =~ s/^.*$pathsep([^$pathsep]*)/$1/ ; # Open the article as a single string. open( ART, "<$file" ) || die "Cannot open article $file.\n" ; $art = ; # Remove comments, but be careful of \% characters (and # so be careful of "%" at the beginning of the line). # $art =~ s/((?!\\).%|^%).*\n//mg ; # Parse the article for \title and \author tags. Skip # files that have no title tag. # $title = &get_title( $art ) ; next unless( defined($title) ) ; ( $authors, $short_authors ) = &get_authors( $art ) ; # Print the information # print OUT "% $fileroot" ; print OUT "\\paper{$authors}{$short_authors}{$title}\\input $fileroot.$ext_out\n" ; print "\\paper{$authors}{$short_authors}{$title}\n" ; } } # get_title( $art ) # # Parse the article string $art and return the title. # Easy, since there can only be one. # Search down to \title{ and start building $title from # there. Append everything up to the first "}". If there # was a "{" in there, append a matching "}" to the title, # and repeat, appending to the next "}". # sub get_title{ my( $art ) = @_ ; my( $title ) ; #undef $title ; if( $art =~ /\\title{/m ) { $title = &to_closing_brace( $' ) ; } else { # No sense fooling with a .tex file without a title. # return( undef ) ; } # Take out line break characters. # $title =~ s/\n/ /g ; return( $title ) ; } # to_closing_brace( $str ) # # Given a string which starts immediately after an # opening brace, return the substring that extends # up to the matching, closing brace. # sub to_closing_brace{ my( $str ) = @_ ; my( $leftparcount, $result, $tmp ) ; undef $result ; while( $str =~ /([^}]*)}/m ) { $str = $' ; $result .= $1 ; $tmp = $1 ; $leftparcount = ( $tmp =~ s/{//g ) ; last if( $leftparcount == 0 ); $result .= "}" ; } return( $result ) ; } # get_authors( $art ) # # Parse the article string $art for author names. Return both # a complete author list for the table of contents, and a # shortened form suitable for page headings. The shortened form # uses only last names. # # The authors. Not so easy, since there can be several. # Search down to \author{ and start building $author from # there. Append everything up to the first "}". If there # was a "{" in there, append a matching "}" to $author, # and repeat, appending to the next "}". Same as for the # title, but repeat until all \author commands are gone. # sub get_authors{ my( $art ) = @_ ; my( $author, $authorcount, $authormacro_count, @authors, $short_author ) ; undef $author ; $authormacro_count = 0 ; while( $art =~ /\\author{/m ) { $art = $' ; $authormacro_count += 1 ; # If this isn't the first \author macro encountered, # append ", " to the current $author string. unless( $authormacro_count == 1 ) { $author .= ", " ; } $author .= &to_closing_brace( $' ) ; # Take out \altaffilmark{} macros # $author =~ s/\\altaffilmark{\d+}//g ; # Take out any affiliations in parentheses that may have # sneaked in. # $author =~ s/\([^)]*\)//g ; # Take out any "and" or "\&" separators that may be # in the existing string. # $author =~ s/\s*\band\b/,/g ; $author =~ s/\s*\\\&/,/g ; # Taking out "and" and "\&" might have left multiple, # space separated commas. Clean them up. # $author =~ s/,\s*,/,/g ; # Make sure there is one "and" in front of the last # author, if there are more than two. # @authors = split /,\s*/, $author ; $authorcount = scalar( @authors ) ; if( $authorcount == 1 ) { $author = &author_trim( $authors[0] ) ; $short_author = &author_shorten( $authors[0] ) ; } elsif( $authorcount == 2 ) { $author = &author_trim($authors[0]) . " and " . &author_trim($authors[1]) ; $short_author = &author_shorten($authors[0]) . " and " . &author_shorten($authors[1]) ; } elsif( $authorcount > 2 ) { $author = &author_trim( $authors[0] ); $short_author = &author_shorten( $authors[0] ) ; for( $i=1; $i<$authorcount-1; $i++ ) { $author = join ", ", $author, &author_trim( $authors[$i]) ; $short_author = join ", ", $short_author, &author_shorten( $authors[$i]) ; } $author = $author . ", and " . &author_trim ($authors[$authorcount-1] ) ; $short_author = $short_author . ", and " . &author_shorten( $authors[$authorcount-1] ) ; } # If 7 or more authors, short form is "first author et al." # if( $authorcount >= 7 ) { $short_author = &author_shorten( $authors[0] ) . " et al." ; } # Take out line breaks # $author =~ s/\n/ /g ; } return( $author, $short_author ) ; } # author_trim( $author ) # # Trim an author name down to initials, lastname, and # whatever "Jr.", "III", ... are called. # sub author_trim{ my( $author ) = @_ ; my( $mod ) ; undef $mod ; # Trim trailing white space # $author =~ s/\s+$// ; # Regularize other forms for a space # $author =~ s/\\ /~/g ; $author =~ s/ +/~/g ; $author =~ s/\s/~/g ; # Try making names other than the first into initials. # But save any trailing modifier first. # if( $author =~ /~(Jr\.?|Sr\.?|[IV]*)$/ ) { $author =~ s/$&// ; $mod = $& ; } $author =~ s/\b([A-Z])[^\.]*~/$1.~/g ; return( "$author" . "$mod" ) ; } # author_shorten( $author ) # # Shorten an author name down to just a last name. # But want "de Vries" "dela Pena" to be recognized as last # names. # sub author_shorten{ my( $author ) = @_ ; # Put author into canonical form with initials, lastname, and # trailing modifiers # $author = &author_trim( $author ) ; # Trim off any of a list of trailing modifiers: # $author =~ s/~(Jr\.?|Sr\.?|[IV]*)$// ; # Trim initials. Remember initials can be of the form J.-P. # Or Th. # while( $author =~ /^-?[A-Z][a-z]*\.~?/ ) { $author =~ s/$1// ; } return( $author ) ; }