3 # Each new section id requires adding it to the list (e.g. improved
6 ###############################################################################
11 my %sectionDocLookup = (
12 '_unknown' => '_unknown',
15 'dedicate' => 'dedicate',
16 'acknwldg' => 'acknwldg',
17 'credits' => 'acknwldg',
20 'gamerulz' => 'gamerulz',
21 'discplnz' => 'discplnz',
22 'camflage' => 'discplnz',
23 'hunting' => 'discplnz',
24 'sixthsns' => 'discplnz',
25 'tracking' => 'discplnz',
26 'healing' => 'discplnz',
27 'wepnskll' => 'discplnz',
28 'mndshld' => 'discplnz',
29 'mndblst' => 'discplnz',
30 'anmlknsp' => 'discplnz',
31 'mindomtr' => 'discplnz',
32 'mksumary' => 'discplnz',
33 'anmlctrl' => 'discplnz',
34 'curing' => 'discplnz',
35 'invsblty' => 'discplnz',
36 'psisurge' => 'discplnz',
37 'psiscrn' => 'discplnz',
38 'dvnation' => 'discplnz',
39 'wpnmstry' => 'discplnz',
40 'anmlmstr' => 'discplnz',
41 'deliver' => 'discplnz',
42 'assimila' => 'discplnz',
43 'hntmstry' => 'discplnz',
44 'pthmnshp' => 'discplnz',
45 'kaisurge' => 'discplnz',
46 'kaiscrn' => 'discplnz',
47 'nexus' => 'discplnz',
48 'gnosis' => 'discplnz',
50 'kalchemy' => 'discplnz',
52 'lessmcks' => 'powers',
53 'alchemy' => 'powers',
54 'sorcery' => 'powers',
55 'enchant' => 'powers',
56 'elementl' => 'powers',
57 'prophecy' => 'powers',
58 'psycmncy' => 'powers',
59 'evcation' => 'powers',
60 'highmcks' => 'powers',
61 'thamtrgy' => 'powers',
62 'telergy' => 'powers',
63 'physirgy' => 'powers',
64 'theurgy' => 'powers',
65 'visionry' => 'powers',
66 'necrmncy' => 'powers',
68 'moonston' => 'powers',
69 'equipmnt' => 'equipmnt',
70 'howcarry' => 'equipmnt',
71 'howmuch' => 'equipmnt',
72 'howuse' => 'equipmnt',
73 'cmbtrulz' => 'cmbtrulz',
74 'evasion' => 'cmbtrulz',
75 'lorecrcl' => 'lorecrcl',
76 'lcbonus' => 'lorecrcl',
78 'primate' => 'levels',
79 'tutelary' => 'levels',
80 'mentora' => 'levels',
82 'archmstr' => 'levels',
83 'prncpln' => 'levels',
84 'imprvdsc' => 'imprvdsc',
85 'guardian' => 'imprvdsc',
86 'sunkght' => 'imprvdsc',
87 'sunlord' => 'imprvdsc',
88 'kaiwisdm' => 'kaiwisdm',
90 'numbered' => 'numbered',
91 'passing' => 'passing',
96 'crsumary' => 'crsumary',
97 'smevazn' => 'crsumary',
98 'crtable' => 'crtable',
100 'errata' => 'errata',
101 'errintro' => 'errata',
102 'errerr' => 'errata',
103 'footnotz' => 'footnotz',
104 'illstrat' => 'illstrat',
105 'primill' => 'illstrat',
106 'secill' => 'illstrat',
107 'license' => 'license',
108 'lic-pre' => 'license',
109 'lic-1' => 'license',
110 'lic-1-0' => 'license',
111 'lic-1-1' => 'license',
112 'lic-1-2' => 'license',
113 'lic-1-3' => 'license',
114 'lic-1-4' => 'license',
115 'lic-1-5' => 'license',
116 'lic-1-6' => 'license',
117 'lic-1-7' => 'license',
118 'lic-2' => 'license',
119 'lic-2-0' => 'license',
120 'lic-2-1' => 'license',
121 'lic-2-2' => 'license',
122 'lic-2-3' => 'license',
123 'lic-2-4' => 'license',
124 'lic-2-5' => 'license',
125 'lic-3' => 'license',
126 'lic-3-0' => 'license',
127 'lic-3-1' => 'license',
128 'lic-4' => 'license',
129 'lic-4-0' => 'license',
130 'lic-5' => 'license',
131 'lic-5-0' => 'license',
132 'lic-6' => 'license',
133 'lic-6-0' => 'license',
134 'lic-6-1' => 'license',
138 my $maxErrorCount = 0;
142 my $checkNonASCII = 1;
145 while( $#ARGV > -1 && $ARGV[ 0 ] =~ /^-/ ) {
146 if( $ARGV[ 0 ] eq "-e" && $#ARGV > 0 ) {
148 $maxErrorCount = shift @ARGV;
150 elsif( $ARGV[ 0 ] eq "-s" && $#ARGV > 0 ) {
152 $skipLines = shift @ARGV;
154 elsif( $ARGV[ 0 ] eq "-i" && $#ARGV > 0 ) {
156 $initials = shift @ARGV;
158 elsif( $ARGV[ 0 ] eq "--use-corr" ) {
162 elsif( $ARGV[ 0 ] eq '--skip-ASCII-check' ) {
166 elsif( $ARGV[ 0 ] eq '--language' ) {
168 $language = shift @ARGV;
173 my $currentSection = "_unknown";
175 while( my $line = <> ) {
176 my @section = ( $line =~ /<section[^>]+id="([^"]*)"/g );
177 if( $#section > 0 ) { die( "Multiple sections begin at line $lineNumber\n" ); }
178 elsif( $#section == 0 ) {
179 if( $section[ 0 ] =~ /^sect[[:digit:]]+$/ ) {
180 $currentSection = $section[ 0 ];
183 $currentSection = $sectionDocLookup{$section[ 0 ]};
187 if( $skipLines >= $lineNumber ) {
192 ##### Unescaped Characters
193 if( $line =~ /[\200-\377]/ ) {
194 if( $line =~ /\221/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped left single quotation mark(s)", "\221", "<quote>...</quote> or \'" ); }
195 if( $line =~ /\222/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped right single quotation mark(s)", "\222", "<quote>...</quote> or \'" ); }
196 if( $line =~ /\223/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped left double quotation mark(s)", "\223", "<quote>...</quote>" ); }
197 if( $line =~ /\224/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped right double quotation mark(s)", "\224", "<quote>...</quote>" ); }
198 if( $line =~ /\226/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped endash(es)", "\226", "&endash;" ); }
199 if( $line =~ /\227/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped emdash(es)", "\227", "&emdash;" ); }
200 # if( $checkNonASCII && $line =~ /(.{0,4}?)?([\x80-\xff])(.{0,4})?/ ) {
201 # &printError( "ne", $currentSection, $lineNumber, "unescaped non-ASCII character(s) in \"${1}[[HERE]]${3}\"; first found only", "$2" );
203 if( $line =~ /(.{0,4}?)?([\x80-\x9f])(.{0,4})?/ ) {
204 &printError( "ne", $currentSection, $lineNumber, "unsafe non-ASCII character(s) in \"${1}[[HERE]]${3}\"; first found only", "$2" );
207 if( $line =~ /'/ ) { &printError( "ne", $currentSection, $lineNumber, "unescaped apostrophe(s)", "'", "\' or <quote>...</quote>" ); }
208 if( $line =~ /`/ ) { &printError( "ne", $currentSection, $lineNumber, "backtick(s)", "`", "\' or <quote>...</quote>" ); }
212 if( $line =~ /\t/ ) { &printError( "ne", $currentSection, $lineNumber, "TAB character found; convert to equivalent SPACEs" ); }
215 if( $line =~ /\&\s/ ) { &printError( "ne", $currentSection, $lineNumber, "possible malformed ampersand or escape sequence", "&", "&ersand;" ); }
218 if( $line =~ /\s-\s/ ) { &printError( "ne", $currentSection, $lineNumber, "probable malformed emdash", " - ", "\&emdash;" ); }
219 if( $line =~ /(?<!\!)--(?!>)/ ) { &printError( "ne", $currentSection, $lineNumber, "probable malformed emdash", "--", "\&emdash;" ); }
222 if( $line =~ /([0-9])-([0-9]+)(?![^<]+>)/ ) { &printError( "ne", $currentSection, $lineNumber, "possible malformed endash", "$1-$2", "$1\&endash;$2" ); }
225 if( $line =~ /(\.\s*\.(\s*\.)?)/ ) { &printError( "ne", $currentSection, $lineNumber, "possible malformed ellipsis", "$1", "\&ellips; or \&lellips;" ); }
226 if( $line =~ /(\&ellips;)([^<[:space:]])/ ) { &printError( "ne", $currentSection, $lineNumber, "\&ellips; without space afterwards", "$1$2", "\&ellips; $2" ); }
227 if( $line =~ /([[:space:]]\&ellips;)/ ) { &printError( "ne", $currentSection, $lineNumber, "\&ellips; with preceding space", "$1", "\&ellips;" ); }
228 if( $line =~ /([^>])(\&lellips;)/ ) { &printError( "ne", $currentSection, $lineNumber, "possible \&lellips; used in place of \&ellips;", "$1$2", "$1\&ellips;" ); }
229 if( $line =~ /(>\&ellips;)/ ) { &printError( "ne", $currentSection, $lineNumber, "possible \&ellips; used in place of \&lellips;", "$1", ">\&lellips;" ); }
232 if( $line =~ m{(</?quote>)\1} ) { &printError( "ne", $currentSection, $lineNumber, "probable candidate for thinspace", "$1$1", "$1\&thinspace;$1" ); }
233 if( $line =~ m{(<quote>)(\')} || $line =~ m{(\')(</quote>)} ) { &printError( "ne", $currentSection, $lineNumber, "probable canidate for thinspace", "$1$2", "$1\&thinspace;$2" ); }
236 if( $line =~ /(__+)/ ) { &printError( "ne", $currentSection, $lineNumber, "probable candidate for blankline", "$1", "\&blankline;" ); }
239 # It should be safe to assume that there will be a "]>" at the end of
240 # internal DTD subset. Previous to the end of the internal DTD subset
241 # "%" has special meaning and shouldn't be detected.
242 if( $line =~ /]>/ ) { $endOfDTD = 1; }
243 if( $endOfDTD && $line =~ /\%/ ) { &printError( "ne", $currentSection, $lineNumber, "possible candidate for percent", "\%", "\&percent;" ); }
247 if( $line =~ m{([^.?!:);>]</((p)|(choice))>)} ) { &printError( "??", $currentSection, $lineNumber, "possible missing punctuation", "$1" ); }
248 if( $line =~ /((?<![iIeE]\.[eg])[.?!]\s+[a-z])/ ) { &printError( "??", $currentSection, $lineNumber, "possible bad initial capitalization", "$1" ); }
249 if( $line =~ /([a-zA-Z][0-9][a-zA-Z])/ ) { &printError( "??", $currentSection, $lineNumber, "probable replacement of number for letter", "$1" ); }
250 if( $line =~ />[^<]*-[[:space:]]/ ) { &printError( "??", $currentSection, $lineNumber, "possible retained end-of-line hyphen(s)" ); }
252 ##### Obsolete Markup
254 if( $line =~ /\&lsquot;/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "\&lsquot;", "<quote>" ); }
255 if( $line =~ /\&rsquot;/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "\&rsquot;", "</quote>" ); }
256 if( $line =~ /\&ldquot;/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "\&ldquot;", "<quote>" ); }
257 if( $line =~ /\&rdquot;/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "\&rdquot;", "</quote>" ); }
258 if( $line =~ /\"/ ) { &printError( "ne", $currentSection, $lineNumber, "possible obsolete markup", "\"", "<quote> or </quote>" ); }
259 if( $line =~ /(\&link.[^;]+;)/ ) { &printError( "ne", $currentSection, $lineNumber, "probable obsolete markup", "$1", "use <bookref.../> instead" ); }
260 if( $line =~ /\&([^[:space:]]+);/ ) {
261 unless( $1 =~ /^(?:link|inclusion)/ ) {
262 &printError( "ne", $currentSection, $lineNumber, "possible obsolete markup", "\&$1\;", "<ch.$1/>" );
265 if( $line =~ /(<a([^>]*) class="footnote"(.*?)>)/ ) { &printError( "ne", $currentSection, $lineNumber, "obsolete markup", "$1", "<footref$2$3>" ); }
267 ##### Character Attributes
268 if( $line =~ /[^>]((CLOSE\s+)?COMBAT\sSKILL)/ || $line =~ /((CLOSE\s+)?COMBAT\sSKILL)[^<]/ ) {
269 &printError( "ne", $currentSection, $lineNumber, "possible missing markup", "$1", "<typ class=\"attribute\">$1</typ>" );
271 if( $line =~ /[^>](ENDURANCE)/ || $line =~ /(ENDURANCE)[^<]/ ) {
272 &printError( "ne", $currentSection, $lineNumber, "possible missing markup", "ENDURANCE", "<typ class=\"attribute\">ENDURANCE</typ>" );
274 if( $line =~ /[^>](WILLPOWER)/ || $line =~ /(WILLPOWER)[^<]/ ) {
275 &printError( "ne", $currentSection, $lineNumber, "possible missing markup", "WILLPOWER", "<typ class=\"attribute\">WILLPOWER</typ>" );
279 if( $line =~ /[^>](random[[:space:]]+number[[:space:]]+table)/i ) {
280 &printError( "ne", $currentSection, $lineNumber, "possible missing markup", "$1", "<a idref=\"random\">$1</a>" );
282 if( $line =~ /[^>](action[[:space:]]+charts?)/i ) {
283 &printError( "ne", $currentSection, $lineNumber, "possible missing markup", "$1", "<a idref=\"action\">$1</a>" );
287 if( $line =~ m{<!--(?!/?ERRTAG)} ) { &printError( "ne", $currentSection, $lineNumber, "XML comment found (check for editor comments)" ); }
288 if( $line =~ /([[:upper:]]{5,})/ &&
289 $` !~ /<signpost>$/ &&
290 (($language eq 'en' &&
295 ($language eq 'es' &&
297 $1 ne 'RESISTENCIA')) &&
302 { &printError( "ne", $currentSection, $lineNumber, "possible <signpost> needed", "$1", "<signpost>$1</signpost>" ); }
308 unless( $endOfDTD || $skipLines > 0 ) { print "End of document reached without finding end of internal DTD subset \"]>\".\n"; }
310 ################################################################################
313 my ($type, $section, $line, $message, $original, $corrected) = @_;
317 $report = "($type) $section: ";
318 if( defined $original ) { $report .= "$original "; }
319 if( defined $corrected ) { $report .= "-> $corrected "; }
320 $report .= "[$initials: $message <line $line>]\n";
323 $report = "line $line ($section): $message";
324 if( defined $original ) { $report .= " \"$original\""; }
325 if( defined $corrected ) { $report .= " ($corrected)"; }
332 if( $maxErrorCount > 0 && $errorCount > $maxErrorCount ) { die "Maximum number of errors ($maxErrorCount) exceeded. Quitting.\n"; }