2 # This script parses the errata list of a book in XML form and tries to
3 # locate the added or replaced strings in the sections where they are now
4 # supposed to exist. Things that are not found by the simple errata list
5 # parsing code are just skipped. Nesting frontmatter sections are also
9 # 2005-08-23 Tried to improve handling of <link-text> tags
10 # 2005-03-12 Treat &thinspace; correctly.
11 # 2003-03-16 Fixed reporting of false positived due to <link-text> tags
12 # 2003-03-15 First version.
14 $xmlfile = shift @ARGV;
15 $xmlfile || die("usage: $0 <xml file>\n");
16 $invalid = shift @ARGV;
17 $invalid && die("usage: $0 <xml file>\n");
19 open(ERRATA, $xmlfile) || die("Cannot read input file $xmlfile\n");
20 open(SECTIONS, $xmlfile) || die("Cannot read input file $xmlfile\n");
22 while(<ERRATA>) { # find errata list
23 last if m/<section.*id="errerr".*>/;
25 die("No errata section!? Aborting.\n") if eof(ERRATA);
26 while(<ERRATA>) { # find start of actual list
29 die("No data in the errata section!? Aborting.\n") if eof(ERRATA);
31 print("No matches for these errata entries were found:\n");
33 # Now pick each paragraph (= errata) line
34 ERRATALOOP: while(<ERRATA>) {
35 next unless m/^\s*<p>\(<a\s+idref=".+">.+<\/a>\)/; # no paragraph = not an errata line
36 last if m|</data>|; # end of the errata list
37 ($sect) = m|\(<a (?:id="\w+?" )?idref="(\w+?)">.*</a>\)|;
38 defined($sect) || die("Failed on line: $_");
39 @reps = m|<quote>.*?</quote> with <quote>(.*?)</quote>|g;
40 @adds = m|[Aa]dded <quote>(.*?(?=</quote>))</quote>|g;
41 next unless @reps || @adds;
43 # Now find section and append all contents into one string
44 while(<SECTIONS>) { # locate section
45 last if m/<section.*id=\"$sect\">/;
47 die("Could not find section $sect!? (This might be because the errata\nentry for it is not placed in the correct section order.) Aborting.\n") if eof(SECTIONS);
48 while(<SECTIONS>) { # locate data
51 die("Could not find any data in $sect!? Aborting.\n") if eof(SECTIONS);
53 while(<SECTIONS>) { # grab all section contents
55 if (m/<section/) { # oh no, nested sections - we cannot handle this well
56 seek(SECTIONS, -length(), 1); # give the next section a chance
57 next ERRATALOOP; # give up on this section
59 # Before adding the text to the section blob, modify <link-text> tags
60 # since they are commented out in the errata entries
61 s|<(/?)link-text>|<!--${1}link-text-->|g;
62 # The following line substitutes a colon for </enemy> tag since the
63 # colon is not in the xml but is added during the conversion process.
64 # Some enemies, notably in dotd.xml, were missing a colon after the
65 # enemy declaration, which we have added in the PA editions.
69 die("Could not find the end of $sect!? Aborting.\n") if eof(SECTIONS);
71 # The replacement may contain left/right quotes which in the sections are
72 # <quote> thingies. Translate these. Also ignore &thinspace; first and
73 # last in the replacements.
74 # Refactor the duplicated code below some day!
75 foreach $rep (@reps) {
76 # Contemporary character entities
77 $rep =~ s/<ch.l[sd]quot\/>/<quote>/g;
78 $rep =~ s/<ch.r[sd]quot\/>/<\/quote>/g;
79 $rep =~ s/^<ch.thinspace\/>//g;
80 $rep =~ s/<ch.thinspace\/>$//g;
81 # (Obsolete) character elements
82 $rep =~ s/\&l[sd]quot;/<quote>/g;
83 $rep =~ s/\&r[sd]quot;/<\/quote>/g;
84 $rep =~ s/^\&thinspace;//g;
85 $rep =~ s/\&thinspace;$//g;
87 if ($text !~ m/\Q$rep\E/) {
88 print("Replacement \"$rep\" in $sect\n");
91 foreach $add (@adds) {
92 $add =~ s/<ch.l[sd]quot\/>/<quote>/g;
93 $add =~ s/<ch.r[sd]quot\/>/<\/quote>/g;
94 $add =~ s/^<ch.thinspace\/>//g;
95 $add =~ s/<ch.thinspace\/>$//g;
96 $add =~ s/\&l[sd]quot;/<quote>/g;
97 $add =~ s/\&r[sd]quot;/<\/quote>/g;
98 $add =~ s/^\&thinspace;//g;
99 $add =~ s/\&thinspace;$//g;
101 if ($text !~ m/\Q$add\E/) {
102 print("Addition \"$add\" in $sect\n");
106 print("Checking finished!\n");