X-Git-Url: http://git.projectaon.org/?a=blobdiff_plain;f=common%2Fscripts%2Fxmlize.pl;h=d78bd94ce7b3e93a59abb9b11b4237373596d854;hb=f90cf1eea570aacdee9c831c498ad35aef0768c4;hp=349d37dc6945e31bddf7ca200ac6d61a318e5e88;hpb=0cf6555c4e596a4b07eccb406d8dd3d2d8e116e0;p=project-aon.git diff --git a/common/scripts/xmlize.pl b/common/scripts/xmlize.pl index 349d37d..d78bd94 100755 --- a/common/scripts/xmlize.pl +++ b/common/scripts/xmlize.pl @@ -8,6 +8,7 @@ use strict; use warnings; use utf8; use open ':encoding(UTF-8)'; +use open ':std', ':encoding(UTF-8)'; my $FILE_EXTENSION = 'txt'; my $BASE_INDENT = ' '; @@ -121,6 +122,10 @@ print << "(End of XML footer)"; sub xmlize { my( $inline, $infile ) = @_; + if(!defined $inline || $inline eq "") { + return ""; + } + $inline =~ tr/\t/ /; $inline =~ s/[[:space:]]{2,}/ /g; $inline =~ s/[[:space:]]+$//; @@ -130,6 +135,7 @@ sub xmlize { $inline =~ s/\&(?=[[:space:]])//g; $inline =~ tr/\"\`/\'/; $inline =~ s/[\N{U+2018}\N{U+201C}]//g; + $inline =~ s|[\N{U+2019}\N{U+201D}]([[:alpha:]])|$1|g; $inline =~ s/[\N{U+2019}\N{U+201D}]/<\/quote>/g; $inline =~ s/[\N{U+2014}]//g; $inline =~ s/[\N{U+2014}]//g;