2 # This script parses the illustrations list of a book in XML form and checks
3 # that the referred sections contain an illustration of the correct type, and
4 # that the section titles are correct.
9 # Revision 1.1 2005/08/23 19:32:28 angantyr
10 # First checked in version.
14 # 2005-08-22 Implemented handling of multiple instances of an illustration
15 # 2005-04-02 Fixed a bad regexp for section ids.
16 # 2003-03-23 First version.
19 ($xmlfile = shift @ARGV) || die("usage: $0 <xml file>\n");
20 shift @ARGV && die("usage: $0 <xml file>\n");
22 foreach $illtype ("primill", "secill") {
23 open(ILL, $xmlfile) || die("Cannot read input file $xmlfile\n");
24 open(SECTIONS, $xmlfile) || die("Cannot read input file $xmlfile\n");
26 while(<ILL>) { # find illustrations list
27 last if (m/<section.*id="$illtype".*>/i);
29 die("No illustrations section!? Aborting.\n") if eof(ILL);
30 while(<ILL>) { # find start of actual list
33 die("No data in the illustrations section!? Aborting.\n")
35 print("No matches for these $illtype entries were found:\n");
39 # Now pick each paragraph (= illustrations) line
40 ILLLOOP: while(<ILL>) {
41 last if m|</data>|; # end of the illustrations list
42 next unless m/^\s*<li>/; # no list item = not an illustrations line
44 @refpairs = m|<a idref="(\w*)">(.*?)</a>|g;
47 REFLOOP: while((@refpairn = @refpairs[$i++, $i++]) && (($sect, $title) = @refpairn) && $sect && $title) {
48 if ($lastref eq $sect) { # Multiple illustrations in one section
57 FINDSEC: foreach $tryagain (1, 0) {
58 while(<SECTIONS>) { # locate section
59 if (m/<section.*id="$sect">/) {
60 ($class) = m/class="(\S+)"/;
61 last FINDSEC; # Don't try again
68 print("$title ($sect) appears not to be in correct order!\n");
71 print("Could not find $title ($sect) at all!\n");
79 while(<SECTIONS>) { # locate data
82 ($sectnumber) = m|<title>(.+)</title>|;
83 if ($class =~ m/numbered/i) {
84 $secttitle = "Section $sectnumber"
87 $secttitle = $sectnumber;
90 print("Illustrations entry section name '$title' does not match the\nsection name '$secttitle' of section with id $sect!\n")
91 if (("$secttitle" ne "$title") && ("$sectnumber" ne "$title"));
96 while(<SECTIONS>) { # locate data
99 die("Could not find any data in $title!? Aborting.\n") if eof(SECTIONS);
102 while(<SECTIONS>) { # find the illustration
104 #if (m/<section/) { # oh no, nested sections - we cannot handle this well
105 # seek(SECTIONS, -length(), 1); # give the next section a chance
106 # next REFLOOP; # give up on this section
110 if ($illtype eq "primill" && m/<illustration>/ || # Map
111 $illtype eq "primill" && m/<illustration.+class="float".*>/ || # Normal large
112 $illtype eq "primill" && m/<illustration.+class="inline".*>/ || # Action Chart
113 $illtype eq "secill" && m/<illustration.+class="inline".*>/ || # Normal small
114 $illtype eq "secill" && m/<illustration.+class="accent".*>/); # In use still?
116 die("Could not find the end of $title!? Aborting.\n") if eof(SECTIONS);
117 if (!$repeated && !$foundit) {
118 print("$secttitle ($sect) does not contain any matching illustration!\n");
120 elsif ($repeated >= $foundit) {
121 print("$secttitle ($sect) contains too few matching illustrations!\n");
124 if (@refpairs > 2) { # This illustration appears multiple times
125 seek(SECTIONS, 0, 0); # Do not warn if next illustration is not in order
128 close(SECTIONS) || die("What's this?");
129 close(ILL) || die("What's this?");
131 print("Checking finished!\n");