#!/usr/bin/env perl
#
# xmlize.pl
#
######################################################################
use strict;
use warnings;
my $FILE_EXTENSION = 'txt';
#### Main Routine
die "xmlize.pl maxSectionNumber [minSectionNumber]\n" if $#ARGV < 0;
my $minSectionNumber = 1;
my $numberOfSections = shift @ARGV;
$minSectionNumber = shift @ARGV if $#ARGV > -1;
print << "(End of XML Header)";
%general.links;
%xhtml.links;
%general.inclusions;
]>
[Insert Title]
Title Page
Numbered Sections
(End of XML Header)
for( my $sectionNumber = $minSectionNumber; $sectionNumber <= $numberOfSections; ++$sectionNumber ) {
my $infile = "${sectionNumber}.${FILE_EXTENSION}";
open( INFILE, "<$infile" ) or die "Input file \"$infile\" is not readable.\n";
my @oldlines = ( );
@oldlines = ;
close INFILE;
my $title = shift @oldlines;
my $section = shift @oldlines;
my $illustration = shift @oldlines;
chomp $illustration;
$illustration =~ s/^Illustration\s+(\d+)\s+/$1/;
$illustration =~ s/\r//g;
shift @oldlines if( $illustration ne "" );
my @newlines = ( "" );
my $newline;
# Parsing waits for an empty line to XMLize and store
# the preceding lines.
push( @oldlines, "" ) if( $oldlines[ $#oldlines ] ne "" );
foreach my $oldline (@oldlines) {
$oldline =~ s/\r|\n/ /g;
$oldline =~ s/^\s*(\S*)\s*$/$1/;
$oldline =~ s/\s\s/ /;
if( $oldline ne "" ) {
$newline .= (" " . $oldline);
}
else {
$newline = &xmlize( $newline, $infile );
$newline .= "\n" if( $newline ne "" );
push( @newlines, $newline );
$newline = "";
}
}
print "\n\n \n $sectionNumber\n\n \n";
print @newlines;
print " \n ";
}
print << "(End of XML footer)";
(End of XML footer)
#### Subroutines
sub xmlize {
my( $inline, $infile ) = @_;
$inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g;
$inline =~ tr/\t/ /;
$inline =~ s/\s{2,}/ /g;
$inline =~ s/\s+$//;
$inline =~ s/\&\s//g;
$inline =~ tr/\"\`\222\221/\'/;
$inline =~ s/(Random\sNumber\sTable)/$1<\/a>/gi;
$inline =~ s/(Action\sCharts?)/$1<\/a>/gi;
# \222 and \221 are some form of funky right and
# left quotes not present in ascii (of course)
$inline =~ tr/\227/-/;
# \227 is an em or en dash
$inline =~ s/^\s*(.*)\s*$/$1/;
if( $inline =~ /^\*/ ) {
$inline =~ s/^\*\s*/ \n - /;
$inline =~ s/\s*\*\s*/<\/li>\n
- /g;
$inline .= "
\n
";
}
elsif( $inline =~ /^\d+\)\s/ ) {
$inline =~ s/^\d+\)\s+/ \n - /;
$inline =~ s/\s*\d+\)\s+/<\/li>\n
- /g;
$inline .= "
\n
";
}
elsif( $inline =~ /^\<\!\-\-\spre\s\-\-\>/ ) {
$inline =~ s/^\<\!\-\-\spre\s\-\-\>//;
warn( "Warning: preformatted text in \"$infile\"\n" );
}
elsif( $inline =~ /^.+:\s+CLOSE\sCOMBAT\sSKILL/ ) {
$inline =~ s/^(.+):\s+CLOSE\sCOMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/ $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/g;
}
elsif( $inline =~ /^.+:\s+COMBAT\sSKILL/ ) {
$inline =~ s/^(.+):\s+COMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/ $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/;
}
elsif( $inline =~ /^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})/i ) {
$inline =~ s/^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})(.*)/ $1$2$3$4<\/link-text>$5<\/choice>/i;
$inline =~ s/\s+<\/choice>/<\/choice>/;
}
elsif( $inline =~ /^\[/ ) {
$inline =~ s/\[(.*)\]/$1/;
$inline = " $inline";
$inline =~ s/\s+<\/signpost>/<\/signpost>/;
}
elsif( $inline eq "" ) {
}
elsif( $inline =~ /^/ ) {
warn( "Warning: unknown comment \"$1\" in \"$infile\"\n" );
}
else {
$inline = " $inline
";
$inline =~ s/\s+<\/p>/<\/p>/;
}
# Interferes with selecting a combat paragraph if done earlier
$inline =~ s/(COMBAT\sSKILL|CLOSE\sCOMBAT\sSKILL|ENDURANCE|WILLPOWER|\bCS\b|\bEP\b)([^<])/$1<\/typ>$2/g;
return $inline;
}