X-Git-Url: http://git.projectaon.org/?p=project-aon.git;a=blobdiff_plain;f=common%2Fscripts%2Fxmlize.pl;h=063bd05626d98e414774b7bcaccb7f15c1a04f88;hp=277ceb090ad3a9be94d77d00063795f23495c969;hb=93b6ee167006f78af6e039103055d935ba1822a4;hpb=06d757469d10301c723f3f2ace712196c115094a diff --git a/common/scripts/xmlize.pl b/common/scripts/xmlize.pl index 277ceb0..063bd05 100755 --- a/common/scripts/xmlize.pl +++ b/common/scripts/xmlize.pl @@ -6,8 +6,12 @@ use strict; use warnings; +use utf8; +use open ':encoding(UTF-8)'; +use open ':std', ':encoding(UTF-8)'; my $FILE_EXTENSION = 'txt'; +my $BASE_INDENT = ' '; #### Main Routine @@ -28,7 +32,7 @@ print << "(End of XML Header)"; %general.inclusions; ]> - + [Insert Title] @@ -96,9 +100,9 @@ for( my $sectionNumber = $minSectionNumber; $sectionNumber <= $numberOfSections; } } - print "\n\n
\n $sectionNumber\n\n \n"; + print "\n\n$BASE_INDENT
\n$BASE_INDENT $sectionNumber\n\n$BASE_INDENT \n"; print @newlines; - print " \n
"; + print "$BASE_INDENT
\n$BASE_INDENT
"; } print << "(End of XML footer)"; @@ -118,61 +122,74 @@ print << "(End of XML footer)"; sub xmlize { my( $inline, $infile ) = @_; - $inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g; + if(!defined $inline || $inline eq "") { + return ""; + } + $inline =~ tr/\t/ /; - $inline =~ s/\s{2,}/ /g; - $inline =~ s/\s+$//; - $inline =~ s/\&\s//g; - $inline =~ tr/\"\`\222\221/\'/; + $inline =~ s/[[:space:]]{2,}/ /g; + $inline =~ s/[[:space:]]+$//; + $inline =~ s/^[[:space:]]+//; + $inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g; + + $inline =~ s/\&(?=[[:space:]])//g; + $inline =~ tr/\"\`/\'/; + $inline =~ s/[\N{U+2018}\N{U+201C}]//g; + $inline =~ s/[\N{U+2019}\N{U+201D}]/<\/quote>/g; + $inline =~ s/[\N{U+2014}]//g; + $inline =~ s/[\N{U+2014}]//g; + $inline =~ s/(Random\sNumber\sTable)/$1<\/a>/gi; $inline =~ s/(Action\sCharts?)/$1<\/a>/gi; - # \222 and \221 are some form of funky right and - # left quotes not present in ascii (of course) - $inline =~ tr/\227/-/; - # \227 is an em or en dash - - $inline =~ s/^\s*(.*)\s*$/$1/; if( $inline =~ /^\*/ ) { - $inline =~ s/^\*\s*/
    \n
  • /; - $inline =~ s/\s*\*\s*/<\/li>\n
  • /g; - $inline .= "
  • \n
"; + # unordered lists + $inline =~ s/^\*\s*/$BASE_INDENT
    \n$BASE_INDENT
  • /; + $inline =~ s/\s*\*\s*/<\/li>\n$BASE_INDENT
  • /g; + $inline .= "
  • \n$BASE_INDENT
"; } elsif( $inline =~ /^\d+\)\s/ ) { - $inline =~ s/^\d+\)\s+/
    \n
  1. /; - $inline =~ s/\s*\d+\)\s+/<\/li>\n
  2. /g; - $inline .= "
  3. \n
"; + # ordered lists + $inline =~ s/^\d+\)\s+/$BASE_INDENT
    \n$BASE_INDENT
  1. /; + $inline =~ s/\s*\d+\)\s+/<\/li>\n$BASE_INDENT
  2. /g; + $inline .= "
  3. \n$BASE_INDENT
"; } elsif( $inline =~ /^\<\!\-\-\spre\s\-\-\>/ ) { + # pre-formatted text $inline =~ s/^\<\!\-\-\spre\s\-\-\>//; - warn( "Warning: preformatted text in \"$infile\"\n" ); + warn( "Warning: pre-formatted text in \"$infile\"\n" ); } elsif( $inline =~ /^.+:\s+CLOSE\sCOMBAT\sSKILL/ ) { - $inline =~ s/^(.+):\s+CLOSE\sCOMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/ $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/g; + # Freeway Warrior combat + $inline =~ s/^(.+):\s+CLOSE\sCOMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/$BASE_INDENT $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/g; } elsif( $inline =~ /^.+:\s+COMBAT\sSKILL/ ) { - $inline =~ s/^(.+):\s+COMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/ $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/; + # combat + $inline =~ s/^(.+):\s+COMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/$BASE_INDENT $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/; } elsif( $inline =~ /^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})/i ) { - $inline =~ s/^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})(.*)/ $1$2$3$4<\/link-text>$5<\/choice>/i; + # links + $inline =~ s/^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})(.*)/$BASE_INDENT $1$2$3$4<\/link-text>$5<\/choice>/i; $inline =~ s/\s+<\/choice>/<\/choice>/; } elsif( $inline =~ /^\[/ ) { + # signposts $inline =~ s/\[(.*)\]/$1/; - $inline = " $inline"; + $inline = "$BASE_INDENT $inline"; $inline =~ s/\s+<\/signpost>/<\/signpost>/; } - elsif( $inline eq "" ) { - } elsif( $inline =~ /^/ ) { + # comments warn( "Warning: unknown comment \"$1\" in \"$infile\"\n" ); } + elsif( $inline eq "" ) { + # do nothing + } else { - $inline = "

$inline

"; - $inline =~ s/\s+<\/p>/<\/p>/; + $inline = "$BASE_INDENT

$inline

"; } -# Interferes with selecting a combat paragraph if done earlier + # Interferes with selecting a combat paragraph if done earlier $inline =~ s/(COMBAT\sSKILL|CLOSE\sCOMBAT\sSKILL|ENDURANCE|WILLPOWER|\bCS\b|\bEP\b)([^<])/$1<\/typ>$2/g; return $inline;