X-Git-Url: http://git.projectaon.org/?p=project-aon.git;a=blobdiff_plain;f=common%2Fscripts%2Fxmlize.pl;fp=common%2Fscripts%2Fxmlize.pl;h=ed7511625bb306e8a7539bffe22c6690ff1c8c05;hp=277ceb090ad3a9be94d77d00063795f23495c969;hb=e8e22da9cc38ada2bedbc64c880c508ce7877343;hpb=06d757469d10301c723f3f2ace712196c115094a
diff --git a/common/scripts/xmlize.pl b/common/scripts/xmlize.pl
index 277ceb0..ed75116 100755
--- a/common/scripts/xmlize.pl
+++ b/common/scripts/xmlize.pl
@@ -6,6 +6,8 @@
use strict;
use warnings;
+use utf8;
+use open ':encoding(UTF-8)';
my $FILE_EXTENSION = 'txt';
@@ -118,20 +120,21 @@ print << "(End of XML footer)";
sub xmlize {
my( $inline, $infile ) = @_;
- $inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g;
$inline =~ tr/\t/ /;
- $inline =~ s/\s{2,}/ /g;
- $inline =~ s/\s+$//;
- $inline =~ s/\&\s//g;
- $inline =~ tr/\"\`\222\221/\'/;
+ $inline =~ s/[[:space:]]{2,}/ /g;
+ $inline =~ s/[[:space:]]+$//;
+ $inline =~ s/^[[:space:]]+//;
+ $inline =~ s/[[:space:]]*(\.\.\.|\.\s\.\s\.)[[:space:]]*//g;
+
+ $inline =~ s/\&(?=[[:space:]])//g;
+ $inline =~ tr/\"\`/\'/;
+ $inline =~ s/[\N{U+2018}\N{U+201C}]//g;
+ $inline =~ s/[\N{U+2019}\N{U+201D}]/<\/quote>/g;
+ $inline =~ s/[\N{U+2014}]//g;
+ $inline =~ s/[\N{U+2014}]//g;
+
$inline =~ s/(Random\sNumber\sTable)/$1<\/a>/gi;
$inline =~ s/(Action\sCharts?)/$1<\/a>/gi;
- # \222 and \221 are some form of funky right and
- # left quotes not present in ascii (of course)
- $inline =~ tr/\227/-/;
- # \227 is an em or en dash
-
- $inline =~ s/^\s*(.*)\s*$/$1/;
if( $inline =~ /^\*/ ) {
$inline =~ s/^\*\s*/ \n - /;
@@ -162,11 +165,11 @@ sub xmlize {
$inline = " $inline";
$inline =~ s/\s+<\/signpost>/<\/signpost>/;
}
- elsif( $inline eq "" ) {
- }
elsif( $inline =~ /^/ ) {
warn( "Warning: unknown comment \"$1\" in \"$infile\"\n" );
}
+ elsif( $inline eq "" ) {
+ }
else {
$inline = "
$inline
";
$inline =~ s/\s+<\/p>/<\/p>/;