From 2c4fa77295c37693c357952bf5e11cce1f553290 Mon Sep 17 00:00:00 2001 From: Jonathan Blake Date: Fri, 24 Mar 2006 00:28:30 +0000 Subject: [PATCH] Updated for use with Spanish gamebooks, added a --language command line option, and added a --skip-ASCII-check option to avoid flagging all of those accented characters. git-svn-id: https://projectaon.org/data/trunk@142 f6f3e2d7-ff33-0410-aaf5-b4bee2cdac11 --- scripts/gblint.pl | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/scripts/gblint.pl b/scripts/gblint.pl index faaa8aa..2a98dc2 100755 --- a/scripts/gblint.pl +++ b/scripts/gblint.pl @@ -139,6 +139,8 @@ my $maxErrorCount = 0; my $skipLines = 0; my $initials = "??"; my $useCorr = 0; +my $checkNonASCII = 1; +my $language = 'en'; while( $#ARGV > -1 && $ARGV[ 0 ] =~ /^-/ ) { if( $ARGV[ 0 ] eq "-e" && $#ARGV > 0 ) { @@ -157,6 +159,14 @@ while( $#ARGV > -1 && $ARGV[ 0 ] =~ /^-/ ) { shift @ARGV; $useCorr = 1; } + elsif( $ARGV[ 0 ] eq '--skip-ASCII-check' ) { + shift @ARGV; + $checkNonASCII = 0; + } + elsif( $ARGV[ 0 ] eq '--language' ) { + shift @ARGV; + $language = shift @ARGV; + } } my $lineNumber = 1; @@ -187,8 +197,11 @@ while( my $line = <> ) { if( $line =~ /\224/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped right double quotation mark(s)", "\224", "..." ); } if( $line =~ /\226/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped endash(es)", "\226", "&endash;" ); } if( $line =~ /\227/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped emdash(es)", "\227", "&emdash;" ); } - if( $line =~ /([ \200-\220 \225 \230-\377 ])/gx ) { - &printError( "ne", $currentSection, $lineNumber, "unescaped non-ASCII character(s); first found only", "$1" ); +# if( $checkNonASCII && $line =~ /(.{0,4}?)?([\x80-\xff])(.{0,4})?/ ) { +# &printError( "ne", $currentSection, $lineNumber, "unescaped non-ASCII character(s) in \"${1}[[HERE]]${3}\"; first found only", "$2" ); +# } + if( $line =~ /(.{0,4}?)?([\x80-\x9f])(.{0,4})?/ ) { + &printError( "ne", $currentSection, $lineNumber, "unsafe non-ASCII character(s) in \"${1}[[HERE]]${3}\"; first found only", "$2" ); } } if( $line =~ /'/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped apostrophe(s)", "'", "\' or ..." ); } @@ -246,7 +259,7 @@ while( my $line = <> ) { if( $line =~ /(\&link.[^;]+;)/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "$1", "use instead" ); } if( $line =~ /\&([^[:space:]]+);/ ) { unless( $1 =~ /^(?:link|inclusion)/ ) { - &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "\&$1\;", "" ); + &printError( "ne", $currentSection, $lineNumber, "possible obsolete markup", "\&$1\;", "" ); } } if( $line =~ /(]*) class="footnote"(.*?)>)/ ) { &printError( "ne", $currentSection, $lineNumber, "obsolete markup", "$1", "" ); } @@ -273,15 +286,20 @@ while( my $line = <> ) { ##### Others if( $line =~ m{