X-Git-Url: http://git.projectaon.org/?p=project-aon.git;a=blobdiff_plain;f=common%2Fscripts%2Fxmlize.pl;h=349d37dc6945e31bddf7ca200ac6d61a318e5e88;hp=ad69f766ff007b13b7c6379478aab77dfca1be29;hb=0cf6555c4e596a4b07eccb406d8dd3d2d8e116e0;hpb=f4bdee5083ca9a72713637e1e979aa183e06faea diff --git a/common/scripts/xmlize.pl b/common/scripts/xmlize.pl index ad69f76..349d37d 100755 --- a/common/scripts/xmlize.pl +++ b/common/scripts/xmlize.pl @@ -1,77 +1,16 @@ -#!/usr/bin/perl -w +#!/usr/bin/env perl # # xmlize.pl # ###################################################################### -#use strict; +use strict; +use warnings; +use utf8; +use open ':encoding(UTF-8)'; -$FILE_EXTENSION = 'txt'; - -#### Subroutines - -sub xmlize { - my( $inline, $infile ) = @_; - - $inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g; - $inline =~ tr/\t/ /; - $inline =~ s/\s{2,}/ /g; - $inline =~ s/\s+$//; - $inline =~ s/\&\s//g; - $inline =~ tr/\"\`\222\221/\'/; - $inline =~ s/(Random\sNumber\sTable)/$1<\/a>/gi; - $inline =~ s/(Action\sCharts?)/$1<\/a>/gi; - # \222 and \221 are some form of funky right and - # left quotes not present in ascii (of course) - $inline =~ tr/\227/-/; - # \227 is an em or en dash - - $inline =~ s/^\s*(.*)\s*$/$1/; - - if( $inline =~ /^\*/ ) { - $inline =~ s/^\*\s*/
    \n
  • /; - $inline =~ s/\s*\*\s*/<\/li>\n
  • /g; - $inline .= "
  • \n
"; - } - elsif( $inline =~ /^\d+\)\s/ ) { - $inline =~ s/^\d+\)\s+/
    \n
  1. /; - $inline =~ s/\s*\d+\)\s+/<\/li>\n
  2. /g; - $inline .= "
  3. \n
"; - } - elsif( $inline =~ /^\<\!\-\-\spre\s\-\-\>/ ) { - $inline =~ s/^\<\!\-\-\spre\s\-\-\>//; - warn( "Warning: preformatted text in \"$infile\"\n" ); - } - elsif( $inline =~ /^.+:\s+CLOSE\sCOMBAT\sSKILL/ ) { - $inline =~ s/^(.+):\s+CLOSE\sCOMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/ $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/g; - } - elsif( $inline =~ /^.+:\s+COMBAT\sSKILL/ ) { - $inline =~ s/^(.+):\s+COMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/ $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/; - } - elsif( $inline =~ /^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})/i ) { - $inline =~ s/^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})(.*)/ $1$2$3$4<\/link-text>$5<\/choice>/i; - $inline =~ s/\s+<\/choice>/<\/choice>/; - } - elsif( $inline =~ /^\[/ ) { - $inline =~ s/\[(.*)\]/$1/; - $inline = " $inline"; - $inline =~ s/\s+<\/signpost>/<\/signpost>/; - } - elsif( $inline eq "" ) { - } - elsif( $inline =~ /^/ ) { - warn( "Warning: unknown comment \"$1\" in \"$infile\"\n" ); - } - else { - $inline = "

$inline

"; - $inline =~ s/\s+<\/p>/<\/p>/; - } - -# Interferes with selecting a combat paragraph if done earlier - $inline =~ s/(COMBAT\sSKILL|CLOSE\sCOMBAT\sSKILL|ENDURANCE|WILLPOWER|\bCS\b|\bEP\b)([^<])/$1<\/typ>$2/g; - - return $inline; -} +my $FILE_EXTENSION = 'txt'; +my $BASE_INDENT = ' '; #### Main Routine @@ -92,7 +31,7 @@ print << "(End of XML Header)"; %general.inclusions; ]> - + [Insert Title] @@ -147,22 +86,22 @@ for( my $sectionNumber = $minSectionNumber; $sectionNumber <= $numberOfSections; foreach my $oldline (@oldlines) { $oldline =~ s/\r|\n/ /g; - $oldline =~ s/^\s*(\S*)\s*$/$1/; - $oldline =~ s/\s\s/ /; - if( $oldline ne "" ) { - $newline .= (" " . $oldline); - } - else { - $newline = &xmlize( $newline, $infile ); - $newline .= "\n" if( $newline ne "" ); - push( @newlines, $newline ); - $newline = ""; - } + $oldline =~ s/^\s*(\S*)\s*$/$1/; + $oldline =~ s/\s{2,}/ /; + if( $oldline ne "" ) { + $newline .= (" " . $oldline); + } + else { + $newline = &xmlize($newline, $infile); + $newline .= "\n" if($newline ne ""); + push( @newlines, $newline ); + $newline = ""; + } } - print "\n\n
\n $sectionNumber\n\n \n"; + print "\n\n$BASE_INDENT
\n$BASE_INDENT $sectionNumber\n\n$BASE_INDENT \n"; print @newlines; - print " \n
"; + print "$BASE_INDENT
\n$BASE_INDENT
"; } print << "(End of XML footer)"; @@ -176,3 +115,77 @@ print << "(End of XML footer)";
(End of XML footer) + +#### Subroutines + +sub xmlize { + my( $inline, $infile ) = @_; + + $inline =~ tr/\t/ /; + $inline =~ s/[[:space:]]{2,}/ /g; + $inline =~ s/[[:space:]]+$//; + $inline =~ s/^[[:space:]]+//; + $inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g; + + $inline =~ s/\&(?=[[:space:]])//g; + $inline =~ tr/\"\`/\'/; + $inline =~ s/[\N{U+2018}\N{U+201C}]//g; + $inline =~ s/[\N{U+2019}\N{U+201D}]/<\/quote>/g; + $inline =~ s/[\N{U+2014}]//g; + $inline =~ s/[\N{U+2014}]//g; + + $inline =~ s/(Random\sNumber\sTable)/
$1<\/a>/gi; + $inline =~ s/(Action\sCharts?)/$1<\/a>/gi; + + if( $inline =~ /^\*/ ) { + # unordered lists + $inline =~ s/^\*\s*/$BASE_INDENT
    \n$BASE_INDENT
  • /; + $inline =~ s/\s*\*\s*/<\/li>\n$BASE_INDENT
  • /g; + $inline .= "
  • \n$BASE_INDENT
"; + } + elsif( $inline =~ /^\d+\)\s/ ) { + # ordered lists + $inline =~ s/^\d+\)\s+/$BASE_INDENT
    \n$BASE_INDENT
  1. /; + $inline =~ s/\s*\d+\)\s+/<\/li>\n$BASE_INDENT
  2. /g; + $inline .= "
  3. \n$BASE_INDENT
"; + } + elsif( $inline =~ /^\<\!\-\-\spre\s\-\-\>/ ) { + # pre-formatted text + $inline =~ s/^\<\!\-\-\spre\s\-\-\>//; + warn( "Warning: pre-formatted text in \"$infile\"\n" ); + } + elsif( $inline =~ /^.+:\s+CLOSE\sCOMBAT\sSKILL/ ) { + # Freeway Warrior combat + $inline =~ s/^(.+):\s+CLOSE\sCOMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/$BASE_INDENT $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/g; + } + elsif( $inline =~ /^.+:\s+COMBAT\sSKILL/ ) { + # combat + $inline =~ s/^(.+):\s+COMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/$BASE_INDENT $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/; + } + elsif( $inline =~ /^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})/i ) { + # links + $inline =~ s/^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})(.*)/$BASE_INDENT $1$2$3$4<\/link-text>$5<\/choice>/i; + $inline =~ s/\s+<\/choice>/<\/choice>/; + } + elsif( $inline =~ /^\[/ ) { + # signposts + $inline =~ s/\[(.*)\]/$1/; + $inline = "$BASE_INDENT $inline"; + $inline =~ s/\s+<\/signpost>/<\/signpost>/; + } + elsif( $inline =~ /^/ ) { + # comments + warn( "Warning: unknown comment \"$1\" in \"$infile\"\n" ); + } + elsif( $inline eq "" ) { + # do nothing + } + else { + $inline = "$BASE_INDENT

$inline

"; + } + + # Interferes with selecting a combat paragraph if done earlier + $inline =~ s/(COMBAT\sSKILL|CLOSE\sCOMBAT\sSKILL|ENDURANCE|WILLPOWER|\bCS\b|\bEP\b)([^<])/$1<\/typ>$2/g; + + return $inline; +}