From: Jonathan Blake Date: Tue, 26 Apr 2005 04:47:32 +0000 (+0000) Subject: Initial revision X-Git-Tag: 20121028~181 X-Git-Url: http://git.projectaon.org/?p=project-aon.git;a=commitdiff_plain;h=c560b69a3029efc08efe83f4672c98680dd2a605 Initial revision git-svn-id: https://projectaon.org/data/trunk@52 f6f3e2d7-ff33-0410-aaf5-b4bee2cdac11 --- diff --git a/scripts/addcorr.pl b/scripts/addcorr.pl new file mode 100755 index 0000000..3d449b6 --- /dev/null +++ b/scripts/addcorr.pl @@ -0,0 +1,148 @@ +#!/usr/bin/perl -w +# +# addcorr.pl +# +# addcorr.pl [inputCorrections HTMLfile(s)] +# +# Combines the operations of corrtohtml.pl, sortcorrhtml.pl, and +# mergecorrhtml.pl into one simple command line. Anything that needs +# to do anything more complex than this should use the three utilities +# separately. +# +# The book code will be obtained from the input HTML filename. E.g. +# 01fftd-changes.html provides the book code 01fftd. If for some reason +# the input HTML filename doesn't hold the book code in the first +# characters before the hyphen, use the three separate utilities. +# +# Anything that isn't specified on the command line will be prompted for. +# +# This program uses the most used pattern of using these three utilities. +# +# corrtohtml.pl -v -b bookCode inputCorrections +# | sortcorrhtml.pl -v -s -b bookCode +# | mergecorrhtml.pl inputHTML +# > outputHTML +# +# This utility also has the side effect of creating a backup copy of +# the inputHTML file. +# +###################################################################### +use strict; + +my $programName = "addcorr"; +my $usage = "$programName [inputCorrections HTMLfile(s)]\n"; + +unless( -d $ENV{AONPATH} ) { die "\$AONPATH environment variable doesn't point to a directory"; } + +my $convert = $ENV{AONPATH} . "/bin/corrtohtml.pl"; +die( "Cannot find executable file \"$convert\"" ) unless( -x $convert ); +my $sort = $ENV{AONPATH} . "/bin/sortcorrhtml.pl"; +die( "Cannot find executable file \"$sort\"" ) unless( -x $sort ); +my $merge = $ENV{AONPATH} . "/bin/mergecorrhtml.pl"; +die( "Cannot find executable file \"$merge\"" ) unless( -x $merge ); +my $copy = "/bin/cp"; +die( "Cannot find executable file \"$copy\"" ) unless( -x $copy ); + +my $optsProcessed = 0; +my $bookCode = ""; +my $inCorr = ""; +my $inHTML = ""; +my $outHTML = ""; +my $verbose = 0; + +my %books = ( + '01fftd' => 1, + '02fotw' => 1, + '03tcok' => 1, + '04tcod' => 1, + '05sots' => 1, + '06tkot' => 1, + '07cd' => 1, + '08tjoh' => 1, + '09tcof' => 1, + '10tdot' => 1, + '11tpot' => 1, + '12tmod' => 1, + '13tplor' => 1, + '14tcok' => 1, + '15tdc' => 1, + '16tlov' => 1, + '17tdoi' => 1, + '18dotd' => 1, + '19wb' => 1, + '20tcon' => 1, + '21votm' => 1, + '22tbos' => 1, + '23mh' => 1, + '24rw' => 1, + '25totw' => 1, + '26tfobm' => 1, + '27v' => 1, + '28thos' => 1, + '01gstw' => 1, + '02tfc' => 1, + '03btng' => 1, + '04wotw' => 1, + '01hh' => 1, + '02smr' => 1, + '03oz' => 1, + '04cc' => 1, + 'tmc' => 1, + 'rh' => 1 +); + +###################################################################### + +while( $#ARGV > -1 && not $optsProcessed ) { + my $commandLineItem = shift @ARGV; + if( $commandLineItem eq "--help" ) { + print $usage and exit; + } + elsif( $commandLineItem eq "-v" ) { + $verbose = 1; + } + else { + unshift @ARGV, $commandLineItem; + $optsProcessed = 1; + } +} + +if( $#ARGV > -1 ) { + $inCorr = shift @ARGV or die "Couldn't get input corrections\n$usage"; + $inHTML = shift @ARGV or die "Couldn't get input HTML\n$usage"; +} +else { + while( $inCorr eq "" ) { + print "Corrections File: "; + $inCorr = <>; + chomp( $inCorr ); + } + while( $inHTML eq "" ) { + print "Input HTML File: "; + $inHTML = <>; + chomp( $inHTML ); + } +} + +while( $inHTML ne "" ) { + die( "Cannot find corrections file \"$inCorr\"" ) unless( -f $inCorr ); + die( "Cannot read corrections file \"$inCorr\"" ) unless( -r $inCorr ); + die( "Cannot find HTML file \"$inHTML\"" ) unless( -f $inHTML ); + die( "Cannot read HTML file \"$inHTML\"" ) unless( -r $inHTML ); + die( "Cannot write to HTML file \"$inHTML\"" ) if( -f $inHTML && ! -w $inHTML ); + + $bookCode = $inHTML; + $bookCode =~ s{^([[:lower:][:digit:]]+)-.*$}{$1}; + die( "Unknown book code \"$bookCode\" (obtained from \"$inHTML\")" ) unless( exists $books{ $bookCode } ); + + print "Bookcode: $bookCode\n" if( $verbose ); + # leave backup untouched while putting output in original filename + print qx{$copy $inHTML $inHTML.backup}; + $outHTML = $inHTML; + $inHTML = "$inHTML.backup"; + + print qx{ $convert -v -b $bookCode $inCorr | $sort -v -s -b $bookCode | $merge $inHTML >$outHTML }; + + $inHTML = ""; + $inHTML = shift @ARGV if( $#ARGV > -1 ); +} diff --git a/scripts/corrtohtml.pl b/scripts/corrtohtml.pl new file mode 100755 index 0000000..4ede10c --- /dev/null +++ b/scripts/corrtohtml.pl @@ -0,0 +1,431 @@ +#!/usr/bin/perl -w + +use strict; + +my $programName = 'corrtohtml'; +my $usage = "$programName [options] [inputFile [inputFile2 ...]]\n" . + "\t-b bookCode convert unspecified corrections to this book\n" . + "\t-o outputFile\n" . + "\t-i editorsInitials\n" . + + "\t-s strips book information\n" . + "\t-v verbose reporting\n"; + +my $optsProcessed = 0; +my $outFile = ""; +my $editorInitials = ""; +my $stripBookInfo = 0; +my $verbose = 0; +my $bookCode = ""; +my $bookCodeReport = ""; + +while( $#ARGV > -1 && not $optsProcessed ) { + my $commandLineItem = shift @ARGV; + if( $commandLineItem eq "-b" ) { + $bookCode = shift @ARGV or die $usage; + &validateBookCode( $bookCode ) or die( "Error ($programName): unrecognized bookcode on command line \"$bookCode\"" ); + } + elsif( $commandLineItem eq "-o" ) { + $outFile = shift @ARGV or die $usage; + } + elsif( $commandLineItem eq "-i" ) { + $editorInitials = shift @ARGV or die $usage; + } + elsif( $commandLineItem eq "-s" ) { + $stripBookInfo = 1; + } + elsif( $commandLineItem eq "-v" ) { + $verbose = 1; + } + elsif( $commandLineItem eq "--help" ) { + print $usage and exit; + } + else { + unshift @ARGV, $commandLineItem; + $optsProcessed = 1; + } +} + +my @lines = <>; +my $document = ""; +my %sectionDocLookup = ( + '_unknown' => '_unknown', + 'toc' => 'toc', + 'title' => 'title', + 'dedicate' => 'dedicate', + 'acknwldg' => 'acknwldg', + 'credits' => 'acknwldg', + 'coming' => 'coming', + 'tssf' => 'tssf', + 'gamerulz' => 'gamerulz', + 'discplnz' => 'discplnz', + 'camflage' => 'discplnz', + 'hunting' => 'discplnz', + 'sixthsns' => 'discplnz', + 'tracking' => 'discplnz', + 'healing' => 'discplnz', + 'wepnskll' => 'discplnz', + 'mndshld' => 'discplnz', + 'mndblst' => 'discplnz', + 'anmlknsp' => 'discplnz', + 'mindomtr' => 'discplnz', + 'mksumary' => 'discplnz', + 'anmlctrl' => 'discplnz', + 'curing' => 'discplnz', + 'invsblty' => 'discplnz', + 'psisurge' => 'discplnz', + 'psiscrn' => 'discplnz', + 'dvnation' => 'discplnz', + 'wpnmstry' => 'discplnz', + 'anmlmstr' => 'discplnz', + 'deliver' => 'discplnz', + 'assimila' => 'discplnz', + 'hntmstry' => 'discplnz', + 'pthmnshp' => 'discplnz', + 'kaisurge' => 'discplnz', + 'kaiscrn' => 'discplnz', + 'nexus' => 'discplnz', + 'gnosis' => 'discplnz', + 'magi' => 'discplnz', + 'kalchemy' => 'discplnz', + 'powers' => 'powers', + 'lessmcks' => 'powers', + 'alchemy' => 'powers', + 'sorcery' => 'powers', + 'enchant' => 'powers', + 'elementl' => 'powers', + 'prophecy' => 'powers', + 'psycmncy' => 'powers', + 'evcation' => 'powers', + 'highmcks' => 'powers', + 'thamtrgy' => 'powers', + 'telergy' => 'powers', + 'physirgy' => 'powers', + 'theurgy' => 'powers', + 'visionry' => 'powers', + 'necrmncy' => 'powers', + 'staff' => 'powers', + 'moonston' => 'powers', + 'equipmnt' => 'equipmnt', + 'howcarry' => 'equipmnt', + 'howmuch' => 'equipmnt', + 'howuse' => 'equipmnt', + 'cmbtrulz' => 'cmbtrulz', + 'evasion' => 'cmbtrulz', + 'lorecrcl' => 'lorecrcl', + 'lcbonus' => 'lorecrcl', + 'levels' => 'levels', + 'primate' => 'levels', + 'tutelary' => 'levels', + 'mentora' => 'levels', + 'scion' => 'levels', + 'archmstr' => 'levels', + 'prncpln' => 'levels', + 'imprvdsc' => 'imprvdsc', + 'guardian' => 'imprvdsc', + 'sunkght' => 'imprvdsc', + 'sunlord' => 'imprvdsc', + 'kaiwisdm' => 'kaiwisdm', + 'sage' => 'sage', + 'numbered' => 'numbered', + 'part1' => 'part1', + 'part2' => 'part2', + 'ill1' => 'ill1', + 'ill2' => 'ill2', + 'ill3' => 'ill3', + 'ill4' => 'ill4', + 'ill5' => 'ill5', + 'ill6' => 'ill6', + 'ill7' => 'ill7', + 'ill8' => 'ill8', + 'ill9' => 'ill9', + 'ill10' => 'ill10', + 'ill11' => 'ill11', + 'ill12' => 'ill12', + 'ill13' => 'ill13', + 'ill14' => 'ill14', + 'ill15' => 'ill15', + 'ill16' => 'ill16', + 'ill17' => 'ill17', + 'ill18' => 'ill18', + 'ill19' => 'ill19', + 'ill20' => 'ill20', + 'passing' => 'passing', + 'map' => 'map', + 'action' => 'action', + 'crsumary' => 'crsumary', + 'smevazn' => 'crsumary', + 'crtable' => 'crtable', + 'random' => 'random', + 'errata' => 'errata', + 'errintro' => 'errata', + 'errerr' => 'errata', + 'footnotz' => 'footnotz', + 'illstrat' => 'illstrat', + 'primill' => 'illstrat', + 'secill' => 'illstrat', + 'license' => 'license', + 'lic-pre' => 'license', + 'lic-1' => 'license', + 'lic-1-0' => 'license', + 'lic-1-1' => 'license', + 'lic-1-2' => 'license', + 'lic-1-3' => 'license', + 'lic-1-4' => 'license', + 'lic-1-5' => 'license', + 'lic-1-6' => 'license', + 'lic-1-7' => 'license', + 'lic-2' => 'license', + 'lic-2-0' => 'license', + 'lic-2-1' => 'license', + 'lic-2-2' => 'license', + 'lic-2-3' => 'license', + 'lic-2-4' => 'license', + 'lic-2-5' => 'license', + 'lic-3' => 'license', + 'lic-3-0' => 'license', + 'lic-3-1' => 'license', + 'lic-4' => 'license', + 'lic-4-0' => 'license', + 'lic-5' => 'license', + 'lic-5-0' => 'license', + 'lic-6' => 'license', + 'lic-6-0' => 'license', + 'lic-6-1' => 'license' +); + +my %sectionTitleLookup = ( + '_unknown' => '_unknown', + 'toc' => 'Table of Contents', + 'title' => 'Title Page', + 'dedicate' => 'Dedication', + 'acknwldg' => 'Acknowledgements', + 'coming' => 'Of the Coming of Grey Star', + 'tssf' => 'The Story So Far . . .', + 'gamerulz' => 'The Game Rules', + 'discplnz' => '. . . Disciplines', + 'powers' => 'Magical Powers', + 'equipmnt' => 'Equipment', + 'cmbtrulz' => 'Rules for Combat', + 'lorecrcl' => 'Lore-circles of the Magnakai', + 'levels' => 'Levels of . . . Mastery', + 'imprvdsc' => 'Improved . . . Disciplines', + 'kaiwisdm' => '. . . Wisdom', + 'sage' => 'Sage Advice', + 'numbered' => 'Numbered Sections', + 'part1' => 'Part I', + 'part2' => 'Part II', + 'ill1' => 'Illustration 1', + 'ill2' => 'Illustration 2', + 'ill3' => 'Illustration 3', + 'ill4' => 'Illustration 4', + 'ill5' => 'Illustration 5', + 'ill6' => 'Illustration 6', + 'ill7' => 'Illustration 7', + 'ill8' => 'Illustration 8', + 'ill9' => 'Illustration 9', + 'ill10' => 'Illustration 10', + 'ill11' => 'Illustration 11', + 'ill12' => 'Illustration 12', + 'ill13' => 'Illustration 13', + 'ill14' => 'Illustration 14', + 'ill15' => 'Illustration 15', + 'ill16' => 'Illustration 16', + 'ill17' => 'Illustration 17', + 'ill18' => 'Illustration 18', + 'ill19' => 'Illustration 19', + 'ill20' => 'Illustration 20', + 'passing' => 'Passing of the Shianti', + 'map' => 'map', + 'action' => 'Action Chart', + 'crsumary' => 'Combat Rules Summary', + 'crtable' => 'Combat Results Table', + 'random' => 'Random Number Table', + 'errata' => 'Errata', + 'footnotz' => 'Footnotes', + 'illstrat' => 'Table of Illustrations', + 'license' => 'Project Aon License' +); + +if( $bookCode ne "" ) { + $bookCodeReport = " [$bookCode]"; +} +################################################################################ +# Normalize Lines and Whitespace + +foreach my $line (@lines) { + $line =~ tr/\n\r/ /; + $document .= $line; +} +$document =~ s/[[:space:]]{2,}/ /g; # collapse spaces +$document =~ s/(\(er?\)|\(ne?\)|\(ft?\)|\(ce\)|\(cn\)|\(cf\)|\(re\)|\(rn\)|\(rf\)|\(\?\??\))/\n$1/g; # break lines +$document =~ s/^[[:space:]]*\n//g; # remove blank lines +@lines = split( m/ *\n/, $document ); + +################################################################################ +# Translate + +my $commentRegex = qr{\[[[:space:]]*(([^[:space:]:]*)[[:space:]]*:)?[[:space:]]*([^]]*)\]}; +my $sectionNumberRegex = qr{^\(([^)][^)])*\) # type: $1 + [[:space:]]* + ([[:digit:]]*[[:alpha:]]+[[:space:]]+)? # book: $2 + ([[:digit:]]+) # section: $3 + (?:[[:space:]]+ + \#([[:digit:]]+))? # issue: $4 + [[:space:]]*: + (.*?) # correction: $5 + [[:space:]]*$}x; +my $sectionIDRegex = qr{^\(([^)][^)])*\) # type: $1 + [[:space:]]* + ([[:digit:]]*[[:alpha:]]+[[:space:]]+)? # book: $2 + ([^:[:space:]]*) # section: $3 + (?:[[:space:]]+ + \#([[:digit:]]+))? # issue: $4 + [[:space:]]*: + (.*?) # correction: $5 + [[:space:]]*$}x; + +foreach my $line (@lines) { + $line =~ s{&} {&}g; # escape for HTML + $line =~ s{<} {<}g; # " + $line =~ s{>} {>}g; # " + + while( $line =~ m{$commentRegex} ) { + if( (not defined( $2 )) || $2 eq "" ) { + $line =~ s{$commentRegex}{
$3
}; + } else { + my $initials = lc( $2 ); + $line =~ s{$commentRegex}{
$3
}; + } + if( $3 =~ m/^[[:space:]]*$/ ) { + warn( "Warning ($programName)$bookCodeReport: empty comment found\n" ); + } + } + + if( $line =~ m{$sectionNumberRegex} ) { + my $book = ""; + if( defined $2 ) { + $book = lc( $2 ); + &validateBookCode( $book ) or die( "Error ($programName)$bookCodeReport: unrecognized bookcode in input corrections \"$book\"" ); + } + elsif( $bookCode ) { + $book = $bookCode; + warn( "Warning ($programName)$bookCodeReport: entry with unspecified book coerced to $bookCode: $line\n" ); + } + + my $issue = ""; + if( defined $4 ) { $issue = $4; } + my $caseFoldSection = lc( $3 ); + if( $book ne "" && not $stripBookInfo ) { + $line =~ s{$sectionNumberRegex}{
$caseFoldSection #$issue:$5
\n}; + } + else { + $line =~ s{$sectionNumberRegex}{
$caseFoldSection #$issue:$5
\n}; + } + } + elsif( $line =~ m{$sectionIDRegex} ) { + my $caseFoldSection = lc( $3 ); + exists $sectionDocLookup{$caseFoldSection} && defined $sectionDocLookup{$caseFoldSection} + or die( "Error ($programName)$bookCodeReport: don\'t understand section ID \"$caseFoldSection\" in $line" ); + exists $sectionTitleLookup{$sectionDocLookup{$caseFoldSection}} && defined $sectionTitleLookup{$sectionDocLookup{$caseFoldSection}} + or die( "Error ($programName)$bookCodeReport: section ID \"$caseFoldSection\" doesn\'t have an associated title" ); + + my $book = ""; + if( defined $2 ) { + $book = $2; + chomp( $book ); + &validateBookCode( $book ) or die( "Error ($programName)$bookCodeReport: unrecognized bookcode in input corrections \"$book\"" ); + } + elsif( $bookCode ) { + $book = $bookCode; + warn( "Warning ($programName)$bookCodeReport: entry with unspecified book coerced to $bookCode: $line\n" ); + } + + my $issue = ""; + if( defined $4 ) { $issue = $4; } + + if( $book ne "" && not $stripBookInfo ) { + $line =~ s{$sectionIDRegex}{
$sectionTitleLookup{$sectionDocLookup{$caseFoldSection}} \#$issue:$5
\n}; + } + else { + $line =~ s{$sectionIDRegex}{
$sectionTitleLookup{$sectionDocLookup{$caseFoldSection}} \#$issue:$5
\n}; + } + } + else { + die( "Error ($programName)$bookCodeReport: unable to parse line: $line\n" ); + } + + $line =~ s{class="\?\??"} {class="u"}; + $line =~ s{class="er"} {class="e"}; + $line =~ s{class="ne"} {class="n"}; + $line =~ s{class="ft"} {class="f"}; + + if( $line =~ m/(\(.{,4}\))|(\[.{,4}\])/ ) { + warn( "Warning ($programName)$bookCodeReport: possible malformed correction entry: $line\n" ); + } +} + +################################################################################ +# Output Results + +if( $outFile ne "" ) { + open( OUTFILE, ">$outFile" ) or die( "Error ($programName)$bookCodeReport: Unable to open output file \"$outFile\" for writing: $!" ); + print OUTFILE @lines; + close( OUTFILE ); +} +else { + print @lines; +} + +################################################################################ +# Subroutines + +sub validateBookCode { + my ($bookCode) = @_; + + # bookCode typically has some space after real data + $bookCode =~ s{[[:space:]]+}{}g; + + my %books = ( + '01fftd' => 1, + '02fotw' => 1, + '03tcok' => 1, + '04tcod' => 1, + '05sots' => 1, + '06tkot' => 1, + '07cd' => 1, + '08tjoh' => 1, + '09tcof' => 1, + '10tdot' => 1, + '11tpot' => 1, + '12tmod' => 1, + '13tplor' => 1, + '14tcok' => 1, + '15tdc' => 1, + '16tlov' => 1, + '17tdoi' => 1, + '18dotd' => 1, + '19wb' => 1, + '20tcon' => 1, + '21votm' => 1, + '22tbos' => 1, + '23mh' => 1, + '24rw' => 1, + '25totw' => 1, + '26tfobm' => 1, + '27v' => 1, + '28thos' => 1, + '01gstw' => 1, + '02tfc' => 1, + '03btng' => 1, + '04wotw' => 1, + '01hh' => 1, + '02smr' => 1, + '03oz' => 1, + '04cc' => 1, + 'tmc' => 1, + 'rh' => 1 + ); + + return exists $books{ $bookCode }; +} diff --git a/scripts/create-css-xhtml-single.pl b/scripts/create-css-xhtml-single.pl new file mode 100755 index 0000000..051c3ad --- /dev/null +++ b/scripts/create-css-xhtml-single.pl @@ -0,0 +1,142 @@ +#!/usr/local/bin/perl -w + +# See the following about font-size-adjust: +# http://www.w3.org/TR/REC-CSS2/fonts.html#font-size-props + +#"Verdana, Arial, Helvetica", "Georgia, Times New Roman, Times", "Courier New, Courier". + +#Commonly-installed typefaces on Macs and PCs +# (Windows then Mac) +# +# Serif: +# Georgia +# MS Serif +# Book Antiqua +# Times New Roman +# +# Georgia* +# New York +# Palatino +# Times +# +# Sans-serif: +# Verdana +# MS Sans Serif +# Arial +# Trebuchet +# +# Verdana* +# Geneva +# Helvetica +# Chicago +# +# Monospace: +# Courier New +# Courier + +( $bookPath, $textColor, $backgroundColor, $scrollbarBaseColor, $scrollbarTrackColor, $scrollbarArrowColor, $linkColor, $alinkColor, $hlinkBackgroundColor, $hlinkColor ) = @ARGV; + +open( CSSFILE, ">${bookPath}/main.css" ) or die( "Can\'t output to file: \"${bookPath}/main.css\"\n\t$!" ); + +print CSSFILE << "(END OF CSS)"; +html { + /* scrollbar properties are currently IE specific (24 Aug 2002) */ + scrollbar-base-color: ${scrollbarBaseColor}; + scrollbar-track-color: ${scrollbarTrackColor}; + scrollbar-arrow-color: ${scrollbarArrowColor}; +} + +html, body { + background-color: ${backgroundColor}; + color: ${textColor}; + font-family: Souvenir, Georgia, "Times New Roman", Times, serif; +} + +#footnotes { + font-size: 0.8em; +} + +hr { margin-left: 0px; } + +ul.unbulleted { list-style-type: none } +/* ul { list-style-type: none } */ + +b { font-weight: bold } + +h1, h2, h3, h4, h5, h6 { + margin-top: 0px; + border: 0px none; + padding: 0px; + text-align: left; +} + +div.numbered h3 { + text-align: center; +} + +div.glossary h3 { + text-align: center; +} + +:link:focus, :visited:focus { +} + +:link, :visited { + background-color: transparent; + color: ${linkColor}; + text-decoration: none; + font-weight: bold; +} + +:link:hover, :visited:hover { + background-color: ${hlinkBackgroundColor}; + color: ${hlinkColor}; + text-decoration: none; + font-weight: bold; +} + +:link:active, :visited:active { + background-color: transparent; + color: ${alinkColor}; + text-decoration: none; + font-weight: bold; +} + +dt { + font-weight: bold; +} + +.navigation, .signpost, .illustraion, .caption, .center { + text-align: center; +} + +.author { + text-align: center; + font-weight: bold; +} + +.dedication { + text-align: center; + font-style: italic; + font-weight: bold; + margin-top: 15ex; + margin-bottom: 15ex; +} + +.copyright { + text-align: center; + font-style: italic; +} + +.choice, .combat { + text-align: left; + margin-left: 15px; +} + +.smallcaps { + font-size: 0.8em; +} + +(END OF CSS) + +close CSSFILE; diff --git a/scripts/create-css.pl b/scripts/create-css.pl new file mode 100755 index 0000000..2fd3bc7 --- /dev/null +++ b/scripts/create-css.pl @@ -0,0 +1,208 @@ +#!/usr/bin/perl -w + +# See the following about font-size-adjust: +# http://www.w3.org/TR/REC-CSS2/fonts.html#font-size-props + +#"Verdana, Arial, Helvetica", "Georgia, Times New Roman, Times", "Courier New, Courier". + +#Commonly-installed typefaces on Macs and PCs +# (Windows then Mac) +# +# Serif: +# Georgia +# MS Serif +# Book Antiqua +# Times New Roman +# +# Georgia* +# New York +# Palatino +# Times +# +# Sans-serif: +# Verdana +# MS Sans Serif +# Arial +# Trebuchet +# +# Verdana* +# Geneva +# Helvetica +# Chicago +# +# Monospace: +# Courier New +# Courier + +#( $bookPath, $textColor, $backgroundColor, $scrollbarBaseColor, $scrollbarTrackColor, $scrollbarArrowColor, $linkColor, $alinkColor, $hlinkBackgroundColor, $hlinkColor, $hlinkLightBorderColor, $hlinkDarkBorderColor ) = @ARGV; + +( $bookPath, $textColor, $backgroundColor, $linkColor, $alinkColor, $hlinkBackgroundColor, $hlinkColor ) = @ARGV; + +open( CSSFILE, ">${bookPath}/main.css" ) or die( "Can\'t output to file: \"${bookPath}/main.css\"\n\t$!" ); + +print CSSFILE << "(END OF CSS)"; +\@import url( more.css ); + +html, body { + background-color: ${backgroundColor}; + color: ${textColor}; + font-family: Souvenir, Georgia, "Times New Roman", Times, serif; +} + +#title { + position: absolute; + top: 0px; + left: 0px; + width: 550px; + height: 100px; + padding: 0px; + border: 0px none; + margin: 0px; +} + +#body { + position: absolute; + top: 95px; + left: 100px; + width: 450px; + padding: 0px; + border: 0px none; + margin: 0px; +} + +#footnotes { + font-size: 0.8em; +} + +hr { margin-left: 0px; } + +p, ol, ul, dl, blockquote { text-align: justify } + +ul.unbulleted { list-style-type: none } + +b { font-weight: bold } + +h1, h2, h3, h4, h5, h6 { + margin-top: 0px; + border: 0px none; + padding: 0px; + clear: left; + text-align: left; +} + +:link:focus, :visited:focus { +} + +:link, :visited { + background-color: transparent; + color: ${linkColor}; + text-decoration: none; + font-weight: bold; +} + +:link:hover, :visited:hover { + background-color: ${hlinkBackgroundColor}; + color: ${hlinkColor}; + text-decoration: none; + font-weight: bold; +} + +.navigation :link:hover, .navigation :visited:hover { + background-color: transparent; + color: ${hlinkColor}; + text-decoration: none; + font-weight: bold; +} + +:link:active, :visited:active { + background-color: transparent; + color: ${alinkColor}; + text-decoration: none; + font-weight: bold; +} + +dt { + font-weight: bold; +} + +.navigation, .signpost, .illustraion, .caption, .center { + text-align: center; +} + +.author { + text-align: center; + font-weight: bold; +} + +.dedication { + text-align: center; + font-style: italic; + font-weight: bold; + margin-top: 15ex; + margin-bottom: 15ex; +} + +.copyright { + text-align: center; + font-style: italic; +} + +.choice, .combat { + text-align: left; + margin-left: 15px; + width: 435px; +} + +.smallcaps { + font-size: 0.8em; +} +(END OF CSS) + +close CSSFILE; + +open( CSSFILE, ">${bookPath}/more.css" ) or die( "Can\'t output to file: \"${bookPath}/more.css\"\n\t$!" ); + +print CSSFILE << "(END OF MORE CSS)"; +p { + padding-top: 1px; + padding-bottom: 1px; +} + +div.numbered h3 { + position: absolute; + top: -56px; + left: 404px; + width: 39px; + height: 18pt; + margin: 0px; + border: 0px none; + padding: 0px; + font-size: 14pt; + background-color: transparent; + text-align: center; + vertical-align: middle; +} + +div.glossary h3 { + position: absolute; + top: -56px; + left: 404px; + width: 39px; + height: 18pt; + margin: 0px; + border: 0px none; + padding: 0px; + font-size: 14pt; + background-color: transparent; + text-align: center; + vertical-align: middle; +} + +img.accent { + margin-top: 5px; + margin-right: 10px; + margin-bottom: 5px; +} +(END OF MORE CSS) + +close CSSFILE; diff --git a/scripts/create-pdacss.pl b/scripts/create-pdacss.pl new file mode 100755 index 0000000..47c2a24 --- /dev/null +++ b/scripts/create-pdacss.pl @@ -0,0 +1,88 @@ +#!/usr/local/bin/perl -w + +( $bookPath, $textColor, $backgroundColor, $linkColor, $alinkColor ) = @ARGV; + +open( CSSFILE, ">${bookPath}/main.css" ) or die( "Can\'t output to file: \"${bookPath}/main.css\"\n\t$!" ); + +print CSSFILE << "(END OF CSS)"; +html, body { + background-color: ${backgroundColor}; + color: ${textColor}; + font-family: Souvenir, Times, serif; +} + +#footnotes { + font-size: 0.8em; +} + +p, ol, ul, dl, blockquote { text-align: justify } + +ul.unbulleted { list-style-type: none } + +b { font-weight: bold } + +h1, h2, h3, h4, h5, h6 { + margin-top: 0px; + border: 0px none; + padding: 0px; + text-align: left; +} + +:link:focus, :visited:focus { +} + +:link, :visited { + background-color: transparent; + color: ${linkColor}; +/* text-decoration: none;*/ + font-weight: bold; +} + +:link:hover, :visited:hover { +} + +:link:active, :visited:active { + background-color: transparent; + color: ${alinkColor}; +/* text-decoration: none;*/ + font-weight: bold; +} + +dt { + font-weight: bold; +} + +.navigation, .signpost, .illustraion, .caption, .center { + text-align: center; +} + +.author { + text-align: center; + font-weight: bold; +} + +.dedication { + text-align: center; + font-style: italic; + font-weight: bold; + margin-top: 15ex; + margin-bottom: 15ex; +} + +.copyright { + text-align: center; + font-style: italic; +} + +.choice, .combat { + text-align: left; + margin-left: 5%; +} + +.smallcaps { + font-size: 0.8em; +} + +(END OF CSS) + +close CSSFILE; diff --git a/scripts/frontfilter.pl b/scripts/frontfilter.pl new file mode 100755 index 0000000..61b0184 --- /dev/null +++ b/scripts/frontfilter.pl @@ -0,0 +1,30 @@ +#!/usr/bin/perl + +while( $ARGV[ 0 ] ) { + $infile = shift @ARGV; + + @lines = ( ); + open( INFILE, "<$infile" ) or die "Bad input file \"$infile.\": $!"; + @lines = ; + close INFILE; + + foreach $line (@lines) { + my $oldline = $line; + $line =~ s/(\.\.\.|\.\s\.\s\.)/\&ellips\;/g; + $line =~ tr/\t/ /; + $line =~ s/\s{2,}/ /g; + $line =~ s/\&\s/\&\; /g; + $line =~ tr/\"\`\222\221/\'/; + $line =~ s/(Random\sNumber\sTable)/$1<\/a>/gi; + $line =~ s/(COMBAT\sSKILL|CLOSE\sCOMBAT\sSKILL|ENDURANCE|WILLPOWER|\bCS\b|\bEP\b)([^<])/$1<\/typ>$2/g; + $line =~ s/(Action\sCharts?)/$1<\/a>/gi; + # \222 and \221 are some form of funky right and + # left quotes not present in ascii (of course) + $line =~ tr/\227/-/; + # \227 is an em or en dash + + $line =~ s/^\s*(.*)\s*$/$1\n/; + } + + print @lines; +} diff --git a/scripts/gb-convert-entities.pl b/scripts/gb-convert-entities.pl new file mode 100755 index 0000000..34eb27c --- /dev/null +++ b/scripts/gb-convert-entities.pl @@ -0,0 +1,9 @@ +#!/usr/bin/perl + +while( <> ) { + if( /"[^">]*&([^;]+);/ && $1 !~ /^link/ ) { + warn "Entity $1 in attribute"; + } + s{\&(apos|nbsp|iexcl|cent|pound|curren|yen|brvbar|sect|uml|copy|ordf|laquo|not|shy|reg|macr|deg|plusmn|sup2|sup3|acute|micro|para|middot|cedil|sup1|ordm|raquo|frac14|frac12|frac34|iquest|Agrave|Aacute|Acirc|Atilde|Auml|Aring|AElig|Ccedil|Egrave|Eacute|Ecirc|Euml|Igrave|Iacute|Icirc|Iuml|ETH|Ntilde|Ograve|Oacute|Ocirc|Otilde|Ouml|times|Oslash|Ugrave|Uacute|Ucirc|Uuml|Yacute|THORN|szlig|agrave|aacute|acirc|atilde|auml|aring|aelig|ccedil|egrave|eacute|ecirc|euml|igrave|iacute|icirc|iuml|eth|ntilde|ograve|oacute|ocirc|otilde|ouml|divide|oslash|ugrave|uacute|ucirc|uuml|yacute|thorn|yuml|ampersand|lsquot|rsquot|ldquot|rdquot|minus|endash|emdash|ellips|lellips|blankline|percent|thinspace);}{}g; + print; +} diff --git a/scripts/gbfixencoding.pl b/scripts/gbfixencoding.pl new file mode 100755 index 0000000..d6d91c5 --- /dev/null +++ b/scripts/gbfixencoding.pl @@ -0,0 +1,122 @@ +#!/usr/bin/perl -Tw +# +# Uses ANSI color escapes to highlight text and for cursor movement +# + +use strict; + +my $usage = "Usage:\n\tgbfixquotes.pl INFILE OUTFILE\n"; + +my $lineNumber = 1; + +my ($infile, $outfile); + +if( $#ARGV == 1 ) { + $infile = shift @ARGV; + $outfile = shift @ARGV; +} +else { + die $usage; +} + +if( $infile =~ m{(^.*$)} && -f $1 ) { + open( INFILE, "<$1" ) or die "Error: unable to read from \"$infile\": $!\n"; +} +else { + die "Error: bad input file\n"; +} + +if( $outfile =~ m{(^.*$)} ) { + open( OUTFILE, ">$1" ) or die "Error: unable to write to \"$outfile\": $!\n"; +} +else { + die "Error: bad output file\n"; +} + +while( my $line = ) { + $line = &encodify( $line ); + + print OUTFILE $line; + ++$lineNumber; +} + +close OUTFILE; +close INFILE; + +################################################################################ + +sub encodify { + my ($line) = @_; + my $modified = $line; + my $replacements = 0; + + if( $modified =~ s{ ([[:space:]]) \& ([[:space:]]) }{$1\&ersand;$2}xg ) { $replacements = 1; } + if( $modified =~ s{ [[:space:]]+ - [[:space:]]+ }{\&emdash;}xg ) { $replacements = 1; } + if( $modified =~ s{ (?) }{$1\&emdash;$2}xg ) { $replacements = 1; } + if( $modified =~ s{ [[:space:]]* \227 [[:space:]]* }{\&emdash;}xg ) { $replacements = 1; } + if( $modified =~ s{ ([[:digit:]]) - ([[:digit:]]) }{$1\&endash;$2}xg ) { $replacements = 1; } + if( $modified =~ s{ [[:space:]]* \227 [[:space:]]* }{\&endash;}xg ) { $replacements = 1; } + if( $modified =~ s{ > [[:space:]]* \. [[:space:]]* \. ([[:space:]]* \.)? }{>\&lellips;}xg ) { $replacements = 1; } + if( $modified =~ s{ [[:space:]]* \. [[:space:]]* \. ([[:space:]]* \.)? }{\&ellips;}xg ) { $replacements = 1; } + if( $modified =~ s{ () \1 }{\&thinspace;}xg ) { $replacements = 1; } + if( $modified =~ s{ \' }{\&thinspace;\'}xg ) { $replacements = 1; } + if( $modified =~ s{ \' }{\'\&thinspace;}xg ) { $replacements = 1; } + if( $modified =~ s{ __+ }{\&blankline;}xg ) { $replacements = 1; } + if( $modified =~ s{\%}{\&percent;}xg ) { $replacements = 1; } + + if( $replacements ) { + print "\033[2J"; + print &highlight( $line ) . "\n"; + print &highlight( $modified ); + print "\033[7m (a)ccept, (r)eject, (q)uit: [accept]\033[0m >> "; + + my $response = ; + chomp $response; + if( $response =~ m/^[aA]$/ || $response eq "" ) { $line = $modified; } + elsif( $response =~ m/^[qQ]$/ ) { + print OUTFILE $line; + while( $line = ) { + print OUTFILE $line; + } + exit( 0 ); + } + return $line; + } + else { return $line; } +} + +sub highlight { + my ($text) = @_; + + my $start = "\033[45;30m"; + my $encodedStart = "\033[40;35m"; + my $dashStart = "\033[46;30m"; + my $encodedDashStart = "\033[40;36m"; + my $stop = "\033[0m"; + + $text =~ s{^[[:space:]]+}{}g; + $text =~ s{ ([[:space:]]) \& ([[:space:]]) }{$1$start\&$stop$2}xg; + $text =~ s{(\&ersand;)}{$encodedStart$1$stop}g; + $text =~ s{(\&emdash;)}{$encodedDashStart$1$stop}g; + $text =~ s{ [[:space:]] (\&) [[:space:]] }{$dashStart$1$stop}xg; + $text =~ s{ ([[:space:]]+ - [[:space:]]+) }{$dashStart$1$stop}xg; + $text =~ s{ (? ([[:space:]]* \. [[:space:]]* \. ([[:space:]]* \.)?) }{>$start$1$stop}xg; + $text =~ s{(\&ellips;)}{$encodedStart$1$stop}g; + $text =~ s{ ([[:space:]]* \. [[:space:]]* \. ([[:space:]]* \.)?) }{$start$1$stop}xg; + $text =~ s{(\&thinspace;)}{$encodedStart$1$stop}g; + $text =~ s{ ( \1) }{$start$1$stop}xg; + $text =~ s{ ( \') }{$start$1$stop}xg; + $text =~ s{ (\' ) }{$start$1$stop}xg; + $text =~ s{(\&blankline;)}{$encodedStart$1$stop}g; + $text =~ s{ (__+) }{$start$1$stop}xg; + $text =~ s{(\&percent;)}{$encodedStart$1$stop}g; + $text =~ s{(\%)}{$start$1$stop}xg; + + return $text; +} diff --git a/scripts/gbfixquotes.pl b/scripts/gbfixquotes.pl new file mode 100755 index 0000000..22e08d4 --- /dev/null +++ b/scripts/gbfixquotes.pl @@ -0,0 +1,165 @@ +#!/usr/bin/perl -Tw +# +# Uses ANSI color escapes to highlight text and for cursor movement +# + +use strict; + +my $usage = "Usage:\n gbfixquotes.pl [options] INFILE OUTFILE\n\t-f \tforce attempted fixes in malformed places\n\t-s LINES\tskip lines\n"; + +my $lineNumber = 1; +my $skipLines = 1; + +my $tags = qr{(p)|(choice)}; +my $quoteMarks = qr{['`\221-\224]}; +my $notQuoteMarks = qr{[^'`\221-\224]}; +my $terminalPunctuation = qr{[.?!,]}; +my $notTerminalPunctuation = qr{[^.?!,]}; + +my $spellNames = qr{(lightning[[:space:]]+hand)|(splinter)|(flameshaft)|(halt[[:space:]]+missile)|(strength)|(penetrate)|(energy[[:space:]]+grasp)|(slow[[:space:]]+fall)|(breathe[[:space:]]+water)|(power[[:space:]]+glyph)|(hold[[:space:]]+enemy)|(teleport)|(see[[:space:]]+illusion)|(mind[[:space:]]+charm)|(net)|(counterspell)|(sense[[:space:]]+evil)|(invisible[[:space:]]+fist)|(levitation)}i; + +my ($infile, $outfile); + +my $optsProcessed = 0; +my $forced = 0; + +while( $#ARGV > -1 && not $optsProcessed ) { + my $commandLineItem = shift @ARGV; + if( $commandLineItem eq "-f" ) { + $forced = 1; + } + elsif( $commandLineItem eq "-s" ) { + $skipLines = shift @ARGV or die $usage; + } + else { + unshift @ARGV, $commandLineItem; + $optsProcessed = 1; + } +} + +if( $#ARGV == 1 ) { + $infile = shift @ARGV; + $outfile = shift @ARGV; +} +else { + die $usage; +} + +if( $infile =~ m{(^.*$)} && -f $1 ) { + open( INFILE, "<$1" ) or die "Error: unable to read from \"$infile\": $!\n"; +} +else { + die "Error: bad input file\n"; +} + +if( $outfile =~ m{(^.*$)} ) { + open( OUTFILE, ">$1" ) or die "Error: unable to write to \"$outfile\": $!\n"; +} +else { + die "Error: bad output file\n"; +} + +while( my $line = ) { + if( $skipLines > $lineNumber ) { } + elsif( $line =~ m{<($tags)[[:space:]>]} ) { + my $tagName = $1; + unless( $line =~ m{} ) { + printWarning( "Warning ($lineNumber): <$tagName> found without , skipping tests for current line\n", $line ); + } + elsif( $line =~ m{${quoteMarks}} ) { + $line = "ify( $line ); + } + } + elsif( $forced && $line =~ m{${quoteMarks}} ) { + $line = "ify( $line ); + } + elsif( $line =~ m{} ) { + printWarning( "Warning ($lineNumber): found without <$1>\n", $line ); + } + elsif( $line =~ m{($quoteMarks)}x ) { + printWarning( "Warning ($lineNumber): unescaped quotation character \"$1\" found outside tested context\n", $line ); + } + + print OUTFILE $line; + ++$lineNumber; +} + +close OUTFILE; +close INFILE; + +################################################################################ + +sub quotify { + my ($line) = @_; + my $modified = $line; + $modified =~ s{ + $quoteMarks + ($spellNames) + $quoteMarks + } + {$1}xg; + $modified =~ s{ + ([[:space:]]) + $quoteMarks + ([[:alpha:]]+) + $quoteMarks + ([[:space:]]) + } + {$1$2$3}xg; + $modified =~ s{ + ([[:alpha:]][[:space:]]*) + $quoteMarks + ([[:space:]]*[[:alpha:]]) + } + {$1\'$2}xg; + $modified =~ s{ + ${quoteMarks} + (${notTerminalPunctuation}+? + ${terminalPunctuation}) + ${quoteMarks} + } + {$1}xg; + $modified =~ s{ + ${quoteMarks} + (${notQuoteMarks}+?) + ${quoteMarks} + } + {$1}xg; + print "\033[2J"; + print &highlight( $line ) . "\n"; + print &highlight( $modified ); + print "\033[7m (a)ccept, (r)eject, (q)uit: [accept]\033[0m >> "; + my $response = ; + chomp $response; + if( $response =~ m/^[aA]$/ || $response eq "" ) { $line = $modified; } + elsif( $response =~ m/^[qQ]$/ ) { + print OUTFILE $line; + while( $line = ) { + print OUTFILE $line; + } + exit( 0 ); + } + return $line; +} + +sub highlight { + my ($text) = @_; + + $text =~ s{^[[:space:]]+}{}; + $text =~ s{()}{\033[1;36m$1\033[0m}g; + $text =~ s{()}{\033[1;34m$1\033[0m}g; + $text =~ s{()}{\033[1;35m$1\033[0m}g; + $text =~ s{(\')}{\033[1;32m$1\033[0m}g; + $text =~ s{($quoteMarks)}{\033[1m\033[43m$1\033[0m}g; + + return $text; +} + +sub printWarning { + my ($message, $line) = @_; + print "\033[2J"; + print "$message\n"; + print &highlight( $line ) . "\n"; + print "\033[7m [continue]\033[0m >> "; + my $response = ; +} diff --git a/scripts/gblint.pl b/scripts/gblint.pl new file mode 100755 index 0000000..faaa8aa --- /dev/null +++ b/scripts/gblint.pl @@ -0,0 +1,315 @@ +#!/usr/bin/perl -Tw +# +# Each new section id requires adding it to the list (e.g. improved +# disciplines). +# +############################################################################### +use strict; + +my $endOfDTD = 0; + +my %sectionDocLookup = ( + '_unknown' => '_unknown', + 'toc' => 'toc', + 'title' => 'title', + 'dedicate' => 'dedicate', + 'acknwldg' => 'acknwldg', + 'credits' => 'acknwldg', + 'coming' => 'coming', + 'tssf' => 'tssf', + 'gamerulz' => 'gamerulz', + 'discplnz' => 'discplnz', + 'camflage' => 'discplnz', + 'hunting' => 'discplnz', + 'sixthsns' => 'discplnz', + 'tracking' => 'discplnz', + 'healing' => 'discplnz', + 'wepnskll' => 'discplnz', + 'mndshld' => 'discplnz', + 'mndblst' => 'discplnz', + 'anmlknsp' => 'discplnz', + 'mindomtr' => 'discplnz', + 'mksumary' => 'discplnz', + 'anmlctrl' => 'discplnz', + 'curing' => 'discplnz', + 'invsblty' => 'discplnz', + 'psisurge' => 'discplnz', + 'psiscrn' => 'discplnz', + 'dvnation' => 'discplnz', + 'wpnmstry' => 'discplnz', + 'anmlmstr' => 'discplnz', + 'deliver' => 'discplnz', + 'assimila' => 'discplnz', + 'hntmstry' => 'discplnz', + 'pthmnshp' => 'discplnz', + 'kaisurge' => 'discplnz', + 'kaiscrn' => 'discplnz', + 'nexus' => 'discplnz', + 'gnosis' => 'discplnz', + 'magi' => 'discplnz', + 'kalchemy' => 'discplnz', + 'powers' => 'powers', + 'lessmcks' => 'powers', + 'alchemy' => 'powers', + 'sorcery' => 'powers', + 'enchant' => 'powers', + 'elementl' => 'powers', + 'prophecy' => 'powers', + 'psycmncy' => 'powers', + 'evcation' => 'powers', + 'highmcks' => 'powers', + 'thamtrgy' => 'powers', + 'telergy' => 'powers', + 'physirgy' => 'powers', + 'theurgy' => 'powers', + 'visionry' => 'powers', + 'necrmncy' => 'powers', + 'staff' => 'powers', + 'moonston' => 'powers', + 'equipmnt' => 'equipmnt', + 'howcarry' => 'equipmnt', + 'howmuch' => 'equipmnt', + 'howuse' => 'equipmnt', + 'cmbtrulz' => 'cmbtrulz', + 'evasion' => 'cmbtrulz', + 'lorecrcl' => 'lorecrcl', + 'lcbonus' => 'lorecrcl', + 'levels' => 'levels', + 'primate' => 'levels', + 'tutelary' => 'levels', + 'mentora' => 'levels', + 'scion' => 'levels', + 'archmstr' => 'levels', + 'prncpln' => 'levels', + 'imprvdsc' => 'imprvdsc', + 'guardian' => 'imprvdsc', + 'sunkght' => 'imprvdsc', + 'sunlord' => 'imprvdsc', + 'kaiwisdm' => 'kaiwisdm', + 'sage' => 'sage', + 'numbered' => 'numbered', + 'passing' => 'passing', + 'part1' => 'part1', + 'part2' => 'part2', + 'map' => 'map', + 'action' => 'action', + 'crsumary' => 'crsumary', + 'smevazn' => 'crsumary', + 'crtable' => 'crtable', + 'random' => 'random', + 'errata' => 'errata', + 'errintro' => 'errata', + 'errerr' => 'errata', + 'footnotz' => 'footnotz', + 'illstrat' => 'illstrat', + 'primill' => 'illstrat', + 'secill' => 'illstrat', + 'license' => 'license', + 'lic-pre' => 'license', + 'lic-1' => 'license', + 'lic-1-0' => 'license', + 'lic-1-1' => 'license', + 'lic-1-2' => 'license', + 'lic-1-3' => 'license', + 'lic-1-4' => 'license', + 'lic-1-5' => 'license', + 'lic-1-6' => 'license', + 'lic-1-7' => 'license', + 'lic-2' => 'license', + 'lic-2-0' => 'license', + 'lic-2-1' => 'license', + 'lic-2-2' => 'license', + 'lic-2-3' => 'license', + 'lic-2-4' => 'license', + 'lic-2-5' => 'license', + 'lic-3' => 'license', + 'lic-3-0' => 'license', + 'lic-3-1' => 'license', + 'lic-4' => 'license', + 'lic-4-0' => 'license', + 'lic-5' => 'license', + 'lic-5-0' => 'license', + 'lic-6' => 'license', + 'lic-6-0' => 'license', + 'lic-6-1' => 'license', +); + +my $errorCount = 0; +my $maxErrorCount = 0; +my $skipLines = 0; +my $initials = "??"; +my $useCorr = 0; + +while( $#ARGV > -1 && $ARGV[ 0 ] =~ /^-/ ) { + if( $ARGV[ 0 ] eq "-e" && $#ARGV > 0 ) { + shift @ARGV; + $maxErrorCount = shift @ARGV; + } + elsif( $ARGV[ 0 ] eq "-s" && $#ARGV > 0 ) { + shift @ARGV; + $skipLines = shift @ARGV; + } + elsif( $ARGV[ 0 ] eq "-i" && $#ARGV > 0 ) { + shift @ARGV; + $initials = shift @ARGV; + } + elsif( $ARGV[ 0 ] eq "--use-corr" ) { + shift @ARGV; + $useCorr = 1; + } +} + +my $lineNumber = 1; +my $currentSection = "_unknown"; + +while( my $line = <> ) { + my @section = ( $line =~ /]+id="([^"]*)"/g ); + if( $#section > 0 ) { die( "Multiple sections begin at line $lineNumber\n" ); } + elsif( $#section == 0 ) { + if( $section[ 0 ] =~ /^sect[[:digit:]]+$/ ) { + $currentSection = $section[ 0 ]; + } + else { + $currentSection = $sectionDocLookup{$section[ 0 ]}; + } + } + + if( $skipLines >= $lineNumber ) { + ++$lineNumber; + next; + } + + ##### Unescaped Characters + if( $line =~ /[\200-\377]/ ) { + if( $line =~ /\221/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped left single quotation mark(s)", "\221", "... or \'" ); } + if( $line =~ /\222/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped right single quotation mark(s)", "\222", "... or \'" ); } + if( $line =~ /\223/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped left double quotation mark(s)", "\223", "..." ); } + if( $line =~ /\224/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped right double quotation mark(s)", "\224", "..." ); } + if( $line =~ /\226/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped endash(es)", "\226", "&endash;" ); } + if( $line =~ /\227/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped emdash(es)", "\227", "&emdash;" ); } + if( $line =~ /([ \200-\220 \225 \230-\377 ])/gx ) { + &printError( "ne", $currentSection, $lineNumber, "unescaped non-ASCII character(s); first found only", "$1" ); + } +} + if( $line =~ /'/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped apostrophe(s)", "'", "\' or ..." ); } + if( $line =~ /`/ ) { &printError( "ne", $currentSection, $lineNumber, "backtick(s)", "`", "\' or ..." ); } + + # tab + + if( $line =~ /\t/ ) { &printError( "ne", $currentSection, $lineNumber, "TAB character found; convert to equivalent SPACEs" ); } + + # ampersand + if( $line =~ /\&\s/ ) { &printError( "ne", $currentSection, $lineNumber, "possible malformed ampersand or escape sequence", "&", "&ersand;" ); } + + # emdash + if( $line =~ /\s-\s/ ) { &printError( "ne", $currentSection, $lineNumber, "probable malformed emdash", " - ", "\&emdash;" ); } + if( $line =~ /(?)/ ) { &printError( "ne", $currentSection, $lineNumber, "probable malformed emdash", "--", "\&emdash;" ); } + + # endash + if( $line =~ /([0-9])-([0-9]+)(?![^<]+>)/ ) { &printError( "ne", $currentSection, $lineNumber, "possible malformed endash", "$1-$2", "$1\&endash;$2" ); } + + # ellipsis + if( $line =~ /(\.\s*\.(\s*\.)?)/ ) { &printError( "ne", $currentSection, $lineNumber, "possible malformed ellipsis", "$1", "\&ellips; or \&lellips;" ); } + if( $line =~ /(\&ellips;)([^<[:space:]])/ ) { &printError( "ne", $currentSection, $lineNumber, "\&ellips; without space afterwards", "$1$2", "\&ellips; $2" ); } + if( $line =~ /([[:space:]]\&ellips;)/ ) { &printError( "ne", $currentSection, $lineNumber, "\&ellips; with preceding space", "$1", "\&ellips;" ); } + if( $line =~ /([^>])(\&lellips;)/ ) { &printError( "ne", $currentSection, $lineNumber, "possible \&lellips; used in place of \&ellips;", "$1$2", "$1\&ellips;" ); } + if( $line =~ /(>\&ellips;)/ ) { &printError( "ne", $currentSection, $lineNumber, "possible \&ellips; used in place of \&lellips;", "$1", ">\&lellips;" ); } + + # thinspace + if( $line =~ m{()\1} ) { &printError( "ne", $currentSection, $lineNumber, "probable candidate for thinspace", "$1$1", "$1\&thinspace;$1" ); } + if( $line =~ m{()(\')} || $line =~ m{(\')()} ) { &printError( "ne", $currentSection, $lineNumber, "probable canidate for thinspace", "$1$2", "$1\&thinspace;$2" ); } + + # blankline + if( $line =~ /(__+)/ ) { &printError( "ne", $currentSection, $lineNumber, "probable candidate for blankline", "$1", "\&blankline;" ); } + + # percent + # It should be safe to assume that there will be a "]>" at the end of + # internal DTD subset. Previous to the end of the internal DTD subset + # "%" has special meaning and shouldn't be detected. + if( $line =~ /]>/ ) { $endOfDTD = 1; } + if( $endOfDTD && $line =~ /\%/ ) { &printError( "ne", $currentSection, $lineNumber, "possible candidate for percent", "\%", "\&percent;" ); } + + ##### OCR Errors + + if( $line =~ m{([^.?!:);>])} ) { &printError( "??", $currentSection, $lineNumber, "possible missing punctuation", "$1" ); } + if( $line =~ /((?[^<]*-[[:space:]]/ ) { &printError( "??", $currentSection, $lineNumber, "possible retained end-of-line hyphen(s)" ); } + + ##### Obsolete Markup + + if( $line =~ /\&lsquot;/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "\&lsquot;", "" ); } + if( $line =~ /\&rsquot;/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "\&rsquot;", "" ); } + if( $line =~ /\&ldquot;/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "\&ldquot;", "" ); } + if( $line =~ /\&rdquot;/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "\&rdquot;", "" ); } + if( $line =~ /\"/ ) { &printError( "ne", $currentSection, $lineNumber, "possible obsolete markup", "\"", " or " ); } + if( $line =~ /(\&link.[^;]+;)/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "$1", "use instead" ); } + if( $line =~ /\&([^[:space:]]+);/ ) { + unless( $1 =~ /^(?:link|inclusion)/ ) { + &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "\&$1\;", "" ); + } + } + if( $line =~ /(]*) class="footnote"(.*?)>)/ ) { &printError( "ne", $currentSection, $lineNumber, "obsolete markup", "$1", "" ); } + + ##### Character Attributes + if( $line =~ /[^>]((CLOSE\s+)?COMBAT\sSKILL)/ || $line =~ /((CLOSE\s+)?COMBAT\sSKILL)[^<]/ ) { + &printError( "ne", $currentSection, $lineNumber, "possible missing markup", "$1", "$1" ); + } + if( $line =~ /[^>](ENDURANCE)/ || $line =~ /(ENDURANCE)[^<]/ ) { + &printError( "ne", $currentSection, $lineNumber, "possible missing markup", "ENDURANCE", "ENDURANCE" ); + } + if( $line =~ /[^>](WILLPOWER)/ || $line =~ /(WILLPOWER)[^<]/ ) { + &printError( "ne", $currentSection, $lineNumber, "possible missing markup", "WILLPOWER", "WILLPOWER" ); + } + + ##### Links + if( $line =~ /[^>](random[[:space:]]+number[[:space:]]+table)/i ) { + &printError( "ne", $currentSection, $lineNumber, "possible missing markup", "$1", "$1" ); + } + if( $line =~ /[^>](action[[:space:]]+charts?)/i ) { + &printError( "ne", $currentSection, $lineNumber, "possible missing markup", "$1", "$1" ); + } + + ##### Others + if( $line =~ m{ +# [list goes here] +# +# +# Typical usage would be in concert with corrtohtml and sortcorrhtml: +# +# corrtohtml | sorcorrhtml | mergecorrhtml -b +# +# Output will appear on standard out which would usually be redirected to file. +# +################################################################################ + +use strict; + +my $programName = 'mergecorrhtml'; +my $usage = "$programName [options] inputHTML\n" . + "\t-b bookcode\n" . + "\t-u include unspecified book\n" . + "\t-v verbose reporting\n"; + +my $htmlRegex; +my $corrRegex; +my $issueRegex; +my $markerRegex; + +################################################################################ +# Process command line + +my $optsProcessed = 0; +my $inFile; +my $bookCode = ""; +my $bookCodeReport = ""; +my $includeUnspecifiedBook = 0; +my $verbose = 0; + +while( $#ARGV > -1 && not $optsProcessed ) { + my $commandLineItem = shift @ARGV; + if( $commandLineItem eq "-b" ) { + $bookCode = shift @ARGV or die $usage; + } + elsif( $commandLineItem eq "-u" ) { + $includeUnspecifiedBook = 1; + } + elsif( $commandLineItem eq "-v" ) { + $verbose = 1; + } + elsif( $commandLineItem eq "--help" ) { + print $usage and exit; + } + else { + unshift @ARGV, $commandLineItem; + $optsProcessed = 1; + } +} + +if( $verbose ) { + $bookCodeReport = " [$bookCode]"; +} + +$inFile = shift @ARGV or die $usage; + +$issueRegex = qr{[^#]+?(?:#([[:digit:]]+))}; + +$htmlRegex = qr{^()())()?)(); +close INFILE; + +#### Consume preamble + +while( $#lines > -1 && $lines[ 0 ] !~ m{^[[:space:]]*[[:space:]]*$} ) { + print shift @lines; +} +print shift @lines if( $#lines > -1 ); + +my @inHTML; + +#### Get good stuff + +while( $#lines > -1 && $lines[ 0 ] !~ m{^[[:space:]]*[[:space:]]*$} ) { + if( $lines[ 0 ] =~ m/$htmlRegex/ ) { + push( @inHTML, shift @lines ); + } + elsif( $lines[ 0 ] =~ m/$markerRegex/ ) { + shift @lines; + } + elsif( $lines[ 0 ] =~ m/^[[:space:]]*$/ ) { + shift @lines; + } + else { + die( "Error ($programName)$bookCodeReport: unrecognized input HTML: " . $lines[ 0 ] . "\n" ); + } +} + +my @inCorr; +while( my $corr = <> ) { + push( @inCorr, $corr ) if( $corr =~ m{$corrRegex} ); +} + +################################################################################ +# Merge! + +my @sectSortOrder = &getSectSortOrder( ); + +foreach my $section (@sectSortOrder) { + my $issue; + print "\n"; + while( $#inHTML > -1 && $inHTML[ 0 ] =~ m/$htmlRegex$section\.htm${issueRegex}/ ) { + $issue = $4; + while( $#inCorr > -1 && $inCorr[ 0 ] =~ m/$corrRegex$section\.htm${issueRegex}/ && $issue eq $4 ) { + my $corr = shift @inCorr; + my $comm = ""; + if( $corr !~ m{^.+?:[[:space:]]*]+?class="[^"]*cm} ) { warn( "Warning ($programName)$bookCodeReport: discarding data in issue comment: $corr" ); } + while( $corr =~ s{^.*?(]+?class="[^"]*cm[^>]+>.*?)}{} ) { + $comm .= $1; + } + $inHTML[ 0 ] =~ s{$}{$comm} + } + print shift @inHTML; + } + while( $#inCorr > -1 && $inCorr[ 0 ] =~ m/$corrRegex$section\.htm/ ) { + my $corr = shift @inCorr; + $corr =~ s{$corrRegex}{$1 -1 ) { + warn( "Warning ($programName)$bookCodeReport: input HTML probably out of order\n\tor unrecognized section--error near:\n\t" . $inHTML[ 0 ] . "\n" ); + print @inHTML; +} +if( $#inCorr > -1 ) { + warn( "Warning ($programName)$bookCodeReport: input corrections probably out of order\n\tor unrecognized section--error near:\n\t" . $inCorr[ 0 ] . "\n" ); + print @inCorr; +} + +print @lines; + + +################################################################################ +################################################################################ +# Subroutines + +sub getSectSortOrder { + return qw{ + _unknown + toc + title + dedicate + acknwldg + coming + tssf + gamerulz + discplnz + powers + equipmnt + cmbtrulz + lorecrcl + levels + imprvdsc + kaiwisdm + sage + numbered + part1 + sect1 + sect2 + sect3 + sect4 + sect5 + sect6 + sect7 + sect8 + sect9 + sect10 + sect11 + sect12 + sect13 + sect14 + sect15 + sect16 + sect17 + sect18 + sect19 + sect20 + sect21 + sect22 + sect23 + sect24 + sect25 + sect26 + sect27 + sect28 + sect29 + sect30 + sect31 + sect32 + sect33 + sect34 + sect35 + sect36 + sect37 + sect38 + sect39 + sect40 + sect41 + sect42 + sect43 + sect44 + sect45 + sect46 + sect47 + sect48 + sect49 + sect50 + sect51 + sect52 + sect53 + sect54 + sect55 + sect56 + sect57 + sect58 + sect59 + sect60 + sect61 + sect62 + sect63 + sect64 + sect65 + sect66 + sect67 + sect68 + sect69 + sect70 + sect71 + sect72 + sect73 + sect74 + sect75 + sect76 + sect77 + sect78 + sect79 + sect80 + sect81 + sect82 + sect83 + sect84 + sect85 + sect86 + sect87 + sect88 + sect89 + sect90 + sect91 + sect92 + sect93 + sect94 + sect95 + sect96 + sect97 + sect98 + sect99 + sect100 + sect101 + sect102 + sect103 + sect104 + sect105 + sect106 + sect107 + sect108 + sect109 + sect110 + sect111 + sect112 + sect113 + sect114 + sect115 + sect116 + sect117 + sect118 + sect119 + sect120 + sect121 + sect122 + sect123 + sect124 + sect125 + sect126 + sect127 + sect128 + sect129 + sect130 + sect131 + sect132 + sect133 + sect134 + sect135 + sect136 + sect137 + sect138 + sect139 + sect140 + sect141 + sect142 + sect143 + sect144 + sect145 + sect146 + sect147 + sect148 + sect149 + sect150 + sect151 + sect152 + sect153 + sect154 + sect155 + sect156 + sect157 + sect158 + sect159 + sect160 + sect161 + sect162 + sect163 + sect164 + sect165 + sect166 + sect167 + sect168 + sect169 + sect170 + sect171 + sect172 + sect173 + sect174 + sect175 + sect176 + sect177 + sect178 + sect179 + sect180 + sect181 + sect182 + sect183 + sect184 + sect185 + sect186 + sect187 + sect188 + sect189 + sect190 + sect191 + sect192 + sect193 + sect194 + sect195 + sect196 + sect197 + sect198 + sect199 + part2 + sect200 + sect201 + sect202 + sect203 + sect204 + sect205 + sect206 + sect207 + sect208 + sect209 + sect210 + sect211 + sect212 + sect213 + sect214 + sect215 + sect216 + sect217 + sect218 + sect219 + sect220 + sect221 + sect222 + sect223 + sect224 + sect225 + sect226 + sect227 + sect228 + sect229 + sect230 + sect231 + sect232 + sect233 + sect234 + sect235 + sect236 + sect237 + sect238 + sect239 + sect240 + sect241 + sect242 + sect243 + sect244 + sect245 + sect246 + sect247 + sect248 + sect249 + sect250 + sect251 + sect252 + sect253 + sect254 + sect255 + sect256 + sect257 + sect258 + sect259 + sect260 + sect261 + sect262 + sect263 + sect264 + sect265 + sect266 + sect267 + sect268 + sect269 + sect270 + sect271 + sect272 + sect273 + sect274 + sect275 + sect276 + sect277 + sect278 + sect279 + sect280 + sect281 + sect282 + sect283 + sect284 + sect285 + sect286 + sect287 + sect288 + sect289 + sect290 + sect291 + sect292 + sect293 + sect294 + sect295 + sect296 + sect297 + sect298 + sect299 + sect300 + sect301 + sect302 + sect303 + sect304 + sect305 + sect306 + sect307 + sect308 + sect309 + sect310 + sect311 + sect312 + sect313 + sect314 + sect315 + sect316 + sect317 + sect318 + sect319 + sect320 + sect321 + sect322 + sect323 + sect324 + sect325 + sect326 + sect327 + sect328 + sect329 + sect330 + sect331 + sect332 + sect333 + sect334 + sect335 + sect336 + sect337 + sect338 + sect339 + sect340 + sect341 + sect342 + sect343 + sect344 + sect345 + sect346 + sect347 + sect348 + sect349 + sect350 + sect351 + sect352 + sect353 + sect354 + sect355 + sect356 + sect357 + sect358 + sect359 + sect360 + sect361 + sect362 + sect363 + sect364 + sect365 + sect366 + sect367 + sect368 + sect369 + sect370 + sect371 + sect372 + sect373 + sect374 + sect375 + sect376 + sect377 + sect378 + sect379 + sect380 + sect381 + sect382 + sect383 + sect384 + sect385 + sect386 + sect387 + sect388 + sect389 + sect390 + sect391 + sect392 + sect393 + sect394 + sect395 + sect396 + sect397 + sect398 + sect399 + sect400 + ill1 + ill2 + ill3 + ill4 + ill5 + ill6 + ill7 + ill8 + ill9 + ill10 + ill11 + ill12 + ill13 + ill14 + ill15 + ill16 + ill17 + ill18 + ill19 + ill20 + passing + map + action + crsumary + crtable + random + errata + footnotz + illstrat + license + }; +} diff --git a/scripts/sortcorrhtml.pl b/scripts/sortcorrhtml.pl new file mode 100755 index 0000000..1c5745e --- /dev/null +++ b/scripts/sortcorrhtml.pl @@ -0,0 +1,625 @@ +#!/usr/bin/perl -w +# +# Sort Correction HTML +# +# Sorts input correction HTML. Can also filter out corrections for undesired +# books. +# +# Assumes that corrections appear one per line. Good practice would be to pipe +# the output of corrtohtml.pl to this function: +# +# corrtohtml.pl corrections | sortcorrhtml.pl +# +################################################################################ + +use strict; + +my $progName = "sortcorrhtml"; +my $usage = "$progName [options]\n" . + "\t-b bookcode exclude corrections specified for other books\n" . + "\t-s strip book tags\n" . + "\t-v verbose reporting\n"; + +my $optsProcessed = 0; +my $bookCode = ""; +my $bookCodeReport = ""; +my $stripBookInfo = 0; +my $includeUnspecifiedBook = 0; +my $verbose = 0; + +while( $#ARGV > -1 && not $optsProcessed ) { + my $commandLineItem = shift @ARGV; + if( $commandLineItem eq "-b" ) { + $bookCode = shift @ARGV or die $usage; + } + elsif( $commandLineItem eq "-s" ) { + $stripBookInfo = 1; + } + elsif( $commandLineItem eq "-v" ) { + $verbose = 1; + } + elsif( $commandLineItem eq "--help" ) { + print $usage and exit; + } + else { + unshift @ARGV, $commandLineItem; + $optsProcessed = 1; + } +} + +if( $verbose ) { + $bookCodeReport = " [$bookCode]"; +} + +my @bookSortOrder = &getBookSortOrder( ); +my @sectSortOrder = &getSectSortOrder( ); + +my %buckets = ( ); + +my $corrRegex = qr{^.+?()?)}; +} +else { + $stripRegex = qr{}; +} + +my @lines = <>; +my $maxIssue = 0; + +foreach my $line (@lines) { + if( $bookCode ne "" ) { + ($line =~ m{$bookRegex}) or next; # skip other books + } + + ($line =~ m{$corrRegex}) or die( "Error ($progName)$bookCodeReport: unrecognized correction: $line\n" ); + my( $book, $sect, $issue ) = ($2, $3, $4); + $book = "unknown" unless defined $book; + $issue = "unassigned" unless defined $issue && $issue ne ""; + $buckets{$book} = { } unless exists $buckets{$book}; + $buckets{$book}->{$sect} = { } unless exists $buckets{$book}->{$sect}; + $buckets{$book}->{$sect}->{$issue} = [ ] unless exists $buckets{$book}->{$sect}->{$issue}; + + if( $stripBookInfo ) { + $line =~ s/$stripRegex//; + } + + if( $issue ne "unassigned" && $issue > $maxIssue ) { $maxIssue = $issue; } + + push @{$buckets{$book}->{$sect}->{$issue}}, $line; +} + +foreach my $bookKey (keys %buckets) { + my $found = 0; + foreach my $book (@bookSortOrder) { + if( $bookKey eq $book ) { $found = 1; } + } + unless( $found ) { die( "Error ($progName)$bookCodeReport: unknown book code: $bookKey\n" ); } + + foreach my $sectKey (keys %{$buckets{$bookKey}}) { + $found = 0; + foreach my $sect (@sectSortOrder) { + if( $sectKey eq $sect ) { $found = 1; } + } + unless( $found ) { die( "Error ($progName)$bookCodeReport: unknown section: $sectKey\n" ); } + } +} + +for( my $i = 0; $i <= $#bookSortOrder; ++$i ) { + for( my $j = 0; $j <= $#sectSortOrder; ++$j ) { + for( my $k = 0; $k <= $maxIssue; ++$k ) { + print @{$buckets{$bookSortOrder[$i]}->{$sectSortOrder[$j]}->{$k}} if exists $buckets{$bookSortOrder[$i]} && exists $buckets{$bookSortOrder[$i]}->{$sectSortOrder[$j]} && exists $buckets{$bookSortOrder[$i]}->{$sectSortOrder[$j]}->{$k}; + } + while( $#{$buckets{$bookSortOrder[$i]}->{$sectSortOrder[$j]}->{'unassigned'}} > -1 ) { + print shift @{$buckets{$bookSortOrder[$i]}->{$sectSortOrder[$j]}->{'unassigned'}}; + } + } +} + +################################################################################ + +sub getBookSortOrder { + return qw{ + unknown + 01fftd + 02fotw + 03tcok + 04tcod + 05sots + 06tkot + 07cd + 08tjoh + 09tcof + 10tdot + 11tpot + 12tmod + 13tplor + 14tcok + 15tdc + 16tlov + 17tdoi + 18dotd + 19wb + 20tcon + 21votm + 22tbos + 23mh + 24rw + 25totw + 26tfobm + 27v + 28thos + 01gstw + 02tfc + 03btng + 04wotw + 01hh + 02smr + 03oz + 04cc + tmc + rh + }; +} + +sub getSectSortOrder { + return qw{ + _unknown + toc + title + dedicate + acknwldg + coming + tssf + gamerulz + discplnz + powers + equipmnt + cmbtrulz + lorecrcl + levels + imprvdsc + kaiwisdm + sage + numbered + part1 + sect1 + sect2 + sect3 + sect4 + sect5 + sect6 + sect7 + sect8 + sect9 + sect10 + sect11 + sect12 + sect13 + sect14 + sect15 + sect16 + sect17 + sect18 + sect19 + sect20 + sect21 + sect22 + sect23 + sect24 + sect25 + sect26 + sect27 + sect28 + sect29 + sect30 + sect31 + sect32 + sect33 + sect34 + sect35 + sect36 + sect37 + sect38 + sect39 + sect40 + sect41 + sect42 + sect43 + sect44 + sect45 + sect46 + sect47 + sect48 + sect49 + sect50 + sect51 + sect52 + sect53 + sect54 + sect55 + sect56 + sect57 + sect58 + sect59 + sect60 + sect61 + sect62 + sect63 + sect64 + sect65 + sect66 + sect67 + sect68 + sect69 + sect70 + sect71 + sect72 + sect73 + sect74 + sect75 + sect76 + sect77 + sect78 + sect79 + sect80 + sect81 + sect82 + sect83 + sect84 + sect85 + sect86 + sect87 + sect88 + sect89 + sect90 + sect91 + sect92 + sect93 + sect94 + sect95 + sect96 + sect97 + sect98 + sect99 + sect100 + sect101 + sect102 + sect103 + sect104 + sect105 + sect106 + sect107 + sect108 + sect109 + sect110 + sect111 + sect112 + sect113 + sect114 + sect115 + sect116 + sect117 + sect118 + sect119 + sect120 + sect121 + sect122 + sect123 + sect124 + sect125 + sect126 + sect127 + sect128 + sect129 + sect130 + sect131 + sect132 + sect133 + sect134 + sect135 + sect136 + sect137 + sect138 + sect139 + sect140 + sect141 + sect142 + sect143 + sect144 + sect145 + sect146 + sect147 + sect148 + sect149 + sect150 + sect151 + sect152 + sect153 + sect154 + sect155 + sect156 + sect157 + sect158 + sect159 + sect160 + sect161 + sect162 + sect163 + sect164 + sect165 + sect166 + sect167 + sect168 + sect169 + sect170 + sect171 + sect172 + sect173 + sect174 + sect175 + sect176 + sect177 + sect178 + sect179 + sect180 + sect181 + sect182 + sect183 + sect184 + sect185 + sect186 + sect187 + sect188 + sect189 + sect190 + sect191 + sect192 + sect193 + sect194 + sect195 + sect196 + sect197 + sect198 + sect199 + part2 + sect200 + sect201 + sect202 + sect203 + sect204 + sect205 + sect206 + sect207 + sect208 + sect209 + sect210 + sect211 + sect212 + sect213 + sect214 + sect215 + sect216 + sect217 + sect218 + sect219 + sect220 + sect221 + sect222 + sect223 + sect224 + sect225 + sect226 + sect227 + sect228 + sect229 + sect230 + sect231 + sect232 + sect233 + sect234 + sect235 + sect236 + sect237 + sect238 + sect239 + sect240 + sect241 + sect242 + sect243 + sect244 + sect245 + sect246 + sect247 + sect248 + sect249 + sect250 + sect251 + sect252 + sect253 + sect254 + sect255 + sect256 + sect257 + sect258 + sect259 + sect260 + sect261 + sect262 + sect263 + sect264 + sect265 + sect266 + sect267 + sect268 + sect269 + sect270 + sect271 + sect272 + sect273 + sect274 + sect275 + sect276 + sect277 + sect278 + sect279 + sect280 + sect281 + sect282 + sect283 + sect284 + sect285 + sect286 + sect287 + sect288 + sect289 + sect290 + sect291 + sect292 + sect293 + sect294 + sect295 + sect296 + sect297 + sect298 + sect299 + sect300 + sect301 + sect302 + sect303 + sect304 + sect305 + sect306 + sect307 + sect308 + sect309 + sect310 + sect311 + sect312 + sect313 + sect314 + sect315 + sect316 + sect317 + sect318 + sect319 + sect320 + sect321 + sect322 + sect323 + sect324 + sect325 + sect326 + sect327 + sect328 + sect329 + sect330 + sect331 + sect332 + sect333 + sect334 + sect335 + sect336 + sect337 + sect338 + sect339 + sect340 + sect341 + sect342 + sect343 + sect344 + sect345 + sect346 + sect347 + sect348 + sect349 + sect350 + sect351 + sect352 + sect353 + sect354 + sect355 + sect356 + sect357 + sect358 + sect359 + sect360 + sect361 + sect362 + sect363 + sect364 + sect365 + sect366 + sect367 + sect368 + sect369 + sect370 + sect371 + sect372 + sect373 + sect374 + sect375 + sect376 + sect377 + sect378 + sect379 + sect380 + sect381 + sect382 + sect383 + sect384 + sect385 + sect386 + sect387 + sect388 + sect389 + sect390 + sect391 + sect392 + sect393 + sect394 + sect395 + sect396 + sect397 + sect398 + sect399 + sect400 + ill1 + ill2 + ill3 + ill4 + ill5 + ill6 + ill7 + ill8 + ill9 + ill10 + ill11 + ill12 + ill13 + ill14 + ill15 + ill16 + ill17 + ill18 + ill19 + ill20 + passing + map + action + crsumary + crtable + random + errata + footnotz + illstrat + license + }; +} diff --git a/scripts/xmlize.pl b/scripts/xmlize.pl new file mode 100755 index 0000000..5befc7f --- /dev/null +++ b/scripts/xmlize.pl @@ -0,0 +1,213 @@ +#!/usr/bin/perl +# +# xmlize.pl +# +# $Id$ +# +# $Log$ +# Revision 1.1 2005/04/26 04:48:03 jonathan.blake +# Initial revision +# +# Revision 1.2 2002/10/20 05:46:31 jblake +# Fixed a couple of bugs in the handling of carriage returns and +# added support for Freeway Warrior's CLOSE COMBAT SKILL. +# +# Revision 1.1 2002/10/20 03:18:35 jblake +# Initial revision +# +# +# 21 Jun 2002 - Fixed bug in tagging of character-attributes +# 06 May 2002 - Incorporated funcionality of xmlize-all +# 20 Oct 2001 - Added more spaces to xmlized lines to make 'em purty +# in the final product +# 19 May 2001 - Updated to conform to new gamebook DTD +# 17 Apr 2001 - Repurposed as XMLizer +# 24 Feb 2001 - Commented out some of the filtering in favor of +# placing it in a separate script +# 22 Feb 2000 - Added filtering for & +# Padding ENDURANCE in combat

with spaces +# 21 Feb 2000 - Added filtering for \t +# 05 Feb 2000 - Added Action Chart linking +# Added [] centering +# Fixed the "A Giak" caps problem +# +###################################################################### + +#use strict; + +#### Subroutines + +sub xmlize { + my( $inline, $infile ) = @_; + + $inline =~ s/(\.\.\.|\.\s\.\s\.)/\&ellips\;/g; + $inline =~ tr/\t/ /; + $inline =~ s/\s{2,}/ /g; + $inline =~ s/\s+$//; + $inline =~ s/\&\s/\&\; /g; + $inline =~ tr/\"\`\222\221/\'/; + $inline =~ s/(Random\sNumber\sTable)/$1<\/a>/gi; + $inline =~ s/(Action\sCharts?)/$1<\/a>/gi; + # \222 and \221 are some form of funky right and + # left quotes not present in ascii (of course) + $inline =~ tr/\227/-/; + # \227 is an em or en dash + + $inline =~ s/^\s*(.*)\s*$/$1/; + + if( $inline =~ /^\*/ ) { + $inline =~ s/^\*\s*/

"; + } + elsif( $inline =~ /^\d+\)\s/ ) { + $inline =~ s/^\d+\)\s+/
    \n
  1. /; + $inline =~ s/\s*\d+\)\s+/<\/li>\n
  2. /g; + $inline .= "
  3. \n
"; + } + elsif( $inline =~ /^\<\!\-\-\spre\s\-\-\>/ ) { + $inline =~ s/^\<\!\-\-\spre\s\-\-\>//; + warn( "Warning: preformatted text in \"$infile\"\n" ); + } + elsif( $inline =~ /^.+:\s+CLOSE\sCOMBAT\sSKILL/ ) { + $inline =~ s/^(.+):\s+CLOSE\sCOMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/ $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/g; + } + elsif( $inline =~ /^.+:\s+COMBAT\sSKILL/ ) { + $inline =~ s/^(.+):\s+COMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/ $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/; + } + elsif( $inline =~ /^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})/i ) { + $inline =~ s/^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})(.*)/ $1$2$3$4<\/link-text>$5<\/choice>/i; + $inline =~ s/\s+<\/choice>/<\/choice>/; + } + elsif( $inline =~ /^\[/ ) { + $inline =~ s/\[(.*)\]/$1/; + $inline = " $inline"; + $inline =~ s/\s+<\/signpost>/<\/signpost>/; + } + elsif( $inline eq "" ) { + } + elsif( $inline =~ /^/ ) { + warn( "Warning: unknown comment \"$1\" in \"$infile\"\n" ); + } + else { + $inline = "

$inline

"; + $inline =~ s/\s+<\/p>/<\/p>/; + } + +# Interferes with selecting a combat paragraph if done earlier + $inline =~ s/(COMBAT\sSKILL|CLOSE\sCOMBAT\sSKILL|ENDURANCE|WILLPOWER|\bCS\b|\bEP\b)([^<])/$1<\/typ>$2/g; + + return $inline; +} + +#### Main Routine + +my $numberOfSections = shift @ARGV; + +print << "(End of XML Header)"; + + + + %xhtml.characters; + + + %general.links; + + %xhtml.links; + + + %general.inclusions; +]> + + + + + + [Insert Title] + + +
+ + +
+ +
+ + Title Page + + + + + + + +
+ Numbered Sections + + +(End of XML Header) + +for( my $sectionNumber = 1; $sectionNumber <= $numberOfSections; ++$sectionNumber ) { + + my $infile = "${sectionNumber}.txt"; + + open( INFILE, "<$infile" ) or die "Input file \"$infile\" is not readable.\n"; + + my @oldlines = ( ); + @oldlines = ; + + close INFILE; + + my $title = shift @oldlines; + my $section = shift @oldlines; + my $illustration = shift @oldlines; + chomp $illustration; + $illustration =~ s/^Illustration\s+(\d+)\s+/$1/; + $illustration =~ s/\r//g; + shift @oldlines if( $illustration ne "" ); + + my @newlines = ( "" ); + my $newline; + + # Parsing waits for an empty line to XMLize and store + # the preceding lines. + push( @oldlines, "" ) if( @oldlines[ $#oldlines ] ne "" ); + + foreach my $oldline (@oldlines) { + $oldline =~ s/\r|\n/ /g; + $oldline =~ s/^\s*(\S*)\s*$/$1/; + $oldline =~ s/\s\s/ /; + if( $oldline ne "" ) { + $newline .= (" " . $oldline); + } + else { + $newline = &xmlize( $newline, $infile ); + $newline .= "\n" if( $newline ne "" ); + push( @newlines, $newline ); + $newline = ""; + } + } + + print "\n\n
\n $sectionNumber\n\n \n"; + print @newlines; + print " \n
"; +} + +print << "(End of XML footer)"; + +
+
+ + + +
+
+
+(End of XML footer)