X-Git-Url: http://git.projectaon.org/?p=project-aon.git;a=blobdiff_plain;f=common%2Fscripts%2Fxmlize.pl;fp=common%2Fscripts%2Fxmlize.pl;h=ed7511625bb306e8a7539bffe22c6690ff1c8c05;hp=277ceb090ad3a9be94d77d00063795f23495c969;hb=e8e22da9cc38ada2bedbc64c880c508ce7877343;hpb=06d757469d10301c723f3f2ace712196c115094a diff --git a/common/scripts/xmlize.pl b/common/scripts/xmlize.pl index 277ceb0..ed75116 100755 --- a/common/scripts/xmlize.pl +++ b/common/scripts/xmlize.pl @@ -6,6 +6,8 @@ use strict; use warnings; +use utf8; +use open ':encoding(UTF-8)'; my $FILE_EXTENSION = 'txt'; @@ -118,20 +120,21 @@ print << "(End of XML footer)"; sub xmlize { my( $inline, $infile ) = @_; - $inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g; $inline =~ tr/\t/ /; - $inline =~ s/\s{2,}/ /g; - $inline =~ s/\s+$//; - $inline =~ s/\&\s//g; - $inline =~ tr/\"\`\222\221/\'/; + $inline =~ s/[[:space:]]{2,}/ /g; + $inline =~ s/[[:space:]]+$//; + $inline =~ s/^[[:space:]]+//; + $inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g; + + $inline =~ s/\&(?=[[:space:]])//g; + $inline =~ tr/\"\`/\'/; + $inline =~ s/[\N{U+2018}\N{U+201C}]//g; + $inline =~ s/[\N{U+2019}\N{U+201D}]/<\/quote>/g; + $inline =~ s/[\N{U+2014}]//g; + $inline =~ s/[\N{U+2014}]//g; + $inline =~ s/(Random\sNumber\sTable)/$1<\/a>/gi; $inline =~ s/(Action\sCharts?)/$1<\/a>/gi; - # \222 and \221 are some form of funky right and - # left quotes not present in ascii (of course) - $inline =~ tr/\227/-/; - # \227 is an em or en dash - - $inline =~ s/^\s*(.*)\s*$/$1/; if( $inline =~ /^\*/ ) { $inline =~ s/^\*\s*/
    \n
  • /; @@ -162,11 +165,11 @@ sub xmlize { $inline = " $inline"; $inline =~ s/\s+<\/signpost>/<\/signpost>/; } - elsif( $inline eq "" ) { - } elsif( $inline =~ /^/ ) { warn( "Warning: unknown comment \"$1\" in \"$infile\"\n" ); } + elsif( $inline eq "" ) { + } else { $inline = "

    $inline

    "; $inline =~ s/\s+<\/p>/<\/p>/;