X-Git-Url: http://git.projectaon.org/?p=project-aon.git;a=blobdiff_plain;f=common%2Fscripts%2Fxmlize.pl;h=349d37dc6945e31bddf7ca200ac6d61a318e5e88;hp=ad69f766ff007b13b7c6379478aab77dfca1be29;hb=0cf6555c4e596a4b07eccb406d8dd3d2d8e116e0;hpb=f4bdee5083ca9a72713637e1e979aa183e06faea
diff --git a/common/scripts/xmlize.pl b/common/scripts/xmlize.pl
index ad69f76..349d37d 100755
--- a/common/scripts/xmlize.pl
+++ b/common/scripts/xmlize.pl
@@ -1,77 +1,16 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
#
# xmlize.pl
#
######################################################################
-#use strict;
+use strict;
+use warnings;
+use utf8;
+use open ':encoding(UTF-8)';
-$FILE_EXTENSION = 'txt';
-
-#### Subroutines
-
-sub xmlize {
- my( $inline, $infile ) = @_;
-
- $inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g;
- $inline =~ tr/\t/ /;
- $inline =~ s/\s{2,}/ /g;
- $inline =~ s/\s+$//;
- $inline =~ s/\&\s//g;
- $inline =~ tr/\"\`\222\221/\'/;
- $inline =~ s/(Random\sNumber\sTable)/$1<\/a>/gi;
- $inline =~ s/(Action\sCharts?)/$1<\/a>/gi;
- # \222 and \221 are some form of funky right and
- # left quotes not present in ascii (of course)
- $inline =~ tr/\227/-/;
- # \227 is an em or en dash
-
- $inline =~ s/^\s*(.*)\s*$/$1/;
-
- if( $inline =~ /^\*/ ) {
- $inline =~ s/^\*\s*/ \n - /;
- $inline =~ s/\s*\*\s*/<\/li>\n
- /g;
- $inline .= "
\n
";
- }
- elsif( $inline =~ /^\d+\)\s/ ) {
- $inline =~ s/^\d+\)\s+/ \n - /;
- $inline =~ s/\s*\d+\)\s+/<\/li>\n
- /g;
- $inline .= "
\n
";
- }
- elsif( $inline =~ /^\<\!\-\-\spre\s\-\-\>/ ) {
- $inline =~ s/^\<\!\-\-\spre\s\-\-\>//;
- warn( "Warning: preformatted text in \"$infile\"\n" );
- }
- elsif( $inline =~ /^.+:\s+CLOSE\sCOMBAT\sSKILL/ ) {
- $inline =~ s/^(.+):\s+CLOSE\sCOMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/ $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/g;
- }
- elsif( $inline =~ /^.+:\s+COMBAT\sSKILL/ ) {
- $inline =~ s/^(.+):\s+COMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/ $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/;
- }
- elsif( $inline =~ /^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})/i ) {
- $inline =~ s/^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})(.*)/ $1$2$3$4<\/link-text>$5<\/choice>/i;
- $inline =~ s/\s+<\/choice>/<\/choice>/;
- }
- elsif( $inline =~ /^\[/ ) {
- $inline =~ s/\[(.*)\]/$1/;
- $inline = " $inline";
- $inline =~ s/\s+<\/signpost>/<\/signpost>/;
- }
- elsif( $inline eq "" ) {
- }
- elsif( $inline =~ /^/ ) {
- warn( "Warning: unknown comment \"$1\" in \"$infile\"\n" );
- }
- else {
- $inline = " $inline
";
- $inline =~ s/\s+<\/p>/<\/p>/;
- }
-
-# Interferes with selecting a combat paragraph if done earlier
- $inline =~ s/(COMBAT\sSKILL|CLOSE\sCOMBAT\sSKILL|ENDURANCE|WILLPOWER|\bCS\b|\bEP\b)([^<])/$1<\/typ>$2/g;
-
- return $inline;
-}
+my $FILE_EXTENSION = 'txt';
+my $BASE_INDENT = ' ';
#### Main Routine
@@ -92,7 +31,7 @@ print << "(End of XML Header)";
%general.inclusions;
]>
-
+
[Insert Title]
@@ -147,22 +86,22 @@ for( my $sectionNumber = $minSectionNumber; $sectionNumber <= $numberOfSections;
foreach my $oldline (@oldlines) {
$oldline =~ s/\r|\n/ /g;
- $oldline =~ s/^\s*(\S*)\s*$/$1/;
- $oldline =~ s/\s\s/ /;
- if( $oldline ne "" ) {
- $newline .= (" " . $oldline);
- }
- else {
- $newline = &xmlize( $newline, $infile );
- $newline .= "\n" if( $newline ne "" );
- push( @newlines, $newline );
- $newline = "";
- }
+ $oldline =~ s/^\s*(\S*)\s*$/$1/;
+ $oldline =~ s/\s{2,}/ /;
+ if( $oldline ne "" ) {
+ $newline .= (" " . $oldline);
+ }
+ else {
+ $newline = &xmlize($newline, $infile);
+ $newline .= "\n" if($newline ne "");
+ push( @newlines, $newline );
+ $newline = "";
+ }
}
- print "\n\n \n $sectionNumber\n\n \n";
+ print "\n\n$BASE_INDENT\n$BASE_INDENT $sectionNumber\n\n$BASE_INDENT \n";
print @newlines;
- print " \n ";
+ print "$BASE_INDENT \n$BASE_INDENT";
}
print << "(End of XML footer)";
@@ -176,3 +115,77 @@ print << "(End of XML footer)";
(End of XML footer)
+
+#### Subroutines
+
+sub xmlize {
+ my( $inline, $infile ) = @_;
+
+ $inline =~ tr/\t/ /;
+ $inline =~ s/[[:space:]]{2,}/ /g;
+ $inline =~ s/[[:space:]]+$//;
+ $inline =~ s/^[[:space:]]+//;
+ $inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g;
+
+ $inline =~ s/\&(?=[[:space:]])//g;
+ $inline =~ tr/\"\`/\'/;
+ $inline =~ s/[\N{U+2018}\N{U+201C}]//g;
+ $inline =~ s/[\N{U+2019}\N{U+201D}]/<\/quote>/g;
+ $inline =~ s/[\N{U+2014}]//g;
+ $inline =~ s/[\N{U+2014}]//g;
+
+ $inline =~ s/(Random\sNumber\sTable)/$1<\/a>/gi;
+ $inline =~ s/(Action\sCharts?)/$1<\/a>/gi;
+
+ if( $inline =~ /^\*/ ) {
+ # unordered lists
+ $inline =~ s/^\*\s*/$BASE_INDENT \n$BASE_INDENT - /;
+ $inline =~ s/\s*\*\s*/<\/li>\n$BASE_INDENT
- /g;
+ $inline .= "
\n$BASE_INDENT
";
+ }
+ elsif( $inline =~ /^\d+\)\s/ ) {
+ # ordered lists
+ $inline =~ s/^\d+\)\s+/$BASE_INDENT \n$BASE_INDENT - /;
+ $inline =~ s/\s*\d+\)\s+/<\/li>\n$BASE_INDENT
- /g;
+ $inline .= "
\n$BASE_INDENT
";
+ }
+ elsif( $inline =~ /^\<\!\-\-\spre\s\-\-\>/ ) {
+ # pre-formatted text
+ $inline =~ s/^\<\!\-\-\spre\s\-\-\>//;
+ warn( "Warning: pre-formatted text in \"$infile\"\n" );
+ }
+ elsif( $inline =~ /^.+:\s+CLOSE\sCOMBAT\sSKILL/ ) {
+ # Freeway Warrior combat
+ $inline =~ s/^(.+):\s+CLOSE\sCOMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/$BASE_INDENT $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/g;
+ }
+ elsif( $inline =~ /^.+:\s+COMBAT\sSKILL/ ) {
+ # combat
+ $inline =~ s/^(.+):\s+COMBAT\sSKILL\s+([0-9]+)\s+ENDURANCE\s+([0-9]+)/$BASE_INDENT $1<\/enemy>$2<\/enemy-attribute>$3<\/enemy-attribute><\/combat>/;
+ }
+ elsif( $inline =~ /^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})/i ) {
+ # links
+ $inline =~ s/^(.*)\b(return|turn|go)([a-zA-Z\s]+?to )(\d{1,3})(.*)/$BASE_INDENT $1$2$3$4<\/link-text>$5<\/choice>/i;
+ $inline =~ s/\s+<\/choice>/<\/choice>/;
+ }
+ elsif( $inline =~ /^\[/ ) {
+ # signposts
+ $inline =~ s/\[(.*)\]/$1/;
+ $inline = "$BASE_INDENT $inline";
+ $inline =~ s/\s+<\/signpost>/<\/signpost>/;
+ }
+ elsif( $inline =~ /^/ ) {
+ # comments
+ warn( "Warning: unknown comment \"$1\" in \"$infile\"\n" );
+ }
+ elsif( $inline eq "" ) {
+ # do nothing
+ }
+ else {
+ $inline = "$BASE_INDENT $inline
";
+ }
+
+ # Interferes with selecting a combat paragraph if done earlier
+ $inline =~ s/(COMBAT\sSKILL|CLOSE\sCOMBAT\sSKILL|ENDURANCE|WILLPOWER|\bCS\b|\bEP\b)([^<])/$1<\/typ>$2/g;
+
+ return $inline;
+}