+++ /dev/null
-#!/usr/bin/perl -w
-# This script parses the illustrations list of a book in XML form and checks
-# that the referred sections contain an illustration of the correct type, and
-# that the section titles are correct.
-
-# $Id$
-
-# $Log$
-# Revision 1.1 2005/08/23 19:32:28 angantyr
-# First checked in version.
-#
-
-# Old History:
-# 2005-08-22 Implemented handling of multiple instances of an illustration
-# 2005-04-02 Fixed a bad regexp for section ids.
-# 2003-03-23 First version.
-
-
-($xmlfile = shift @ARGV) || die("usage: $0 <xml file>\n");
-shift @ARGV && die("usage: $0 <xml file>\n");
-
-foreach $illtype ("primill", "secill") {
- open(ILL, $xmlfile) || die("Cannot read input file $xmlfile\n");
- open(SECTIONS, $xmlfile) || die("Cannot read input file $xmlfile\n");
-
- while(<ILL>) { # find illustrations list
- last if (m/<section.*id="$illtype".*>/i);
- }
- die("No illustrations section!? Aborting.\n") if eof(ILL);
- while(<ILL>) { # find start of actual list
- last if m/<data>/;
- }
- die("No data in the illustrations section!? Aborting.\n")
- if eof(ILL);
- print("No matches for these $illtype entries were found:\n");
-
- $lastref = "";
- $repeated = 0;
- # Now pick each paragraph (= illustrations) line
- ILLLOOP: while(<ILL>) {
- last if m|</data>|; # end of the illustrations list
- next unless m/^\s*<li>/; # no list item = not an illustrations line
-
- @refpairs = m|<a idref="(\w*)">(.*?)</a>|g;
-
- $i = 0;
- REFLOOP: while((@refpairn = @refpairs[$i++, $i++]) && (($sect, $title) = @refpairn) && $sect && $title) {
- if ($lastref eq $sect) { # Multiple illustrations in one section
- seek(SECTIONS, 0, 0);
- $repeated++;
- }
- else {
- $repeated = 0;
- }
-
- # Now find section
- FINDSEC: foreach $tryagain (1, 0) {
- while(<SECTIONS>) { # locate section
- if (m/<section.*id="$sect">/) {
- ($class) = m/class="(\S+)"/;
- last FINDSEC; # Don't try again
- }
- }
-
- # Start over
- seek(SECTIONS, 0, 0);
- if ($tryagain) {
- print("$title ($sect) appears not to be in correct order!\n");
- }
- else {
- print("Could not find $title ($sect) at all!\n");
- next REFLOOP;
- }
- }
-
- $lastref = $sect;
-
- my $secttitle;
- while(<SECTIONS>) { # locate data
- if (m/<title>/) {
- my $sectnumber;
- ($sectnumber) = m|<title>(.+)</title>|;
- if ($class =~ m/numbered/i) {
- $secttitle = "Section $sectnumber"
- }
- else {
- $secttitle = $sectnumber;
- }
-
- print("Illustrations entry section name '$title' does not match the\nsection name '$secttitle' of section with id $sect!\n")
- if (("$secttitle" ne "$title") && ("$sectnumber" ne "$title"));
- last;
- }
- }
-
- while(<SECTIONS>) { # locate data
- last if m/<data>/;
- }
- die("Could not find any data in $title!? Aborting.\n") if eof(SECTIONS);
-
- $foundit = 0;
- while(<SECTIONS>) { # find the illustration
- last if m|</data>|;
- #if (m/<section/) { # oh no, nested sections - we cannot handle this well
- # seek(SECTIONS, -length(), 1); # give the next section a chance
- # next REFLOOP; # give up on this section
- #}
-
- $foundit++
- if ($illtype eq "primill" && m/<illustration>/ || # Map
- $illtype eq "primill" && m/<illustration.+class="float".*>/ || # Normal large
- $illtype eq "primill" && m/<illustration.+class="inline".*>/ || # Action Chart
- $illtype eq "secill" && m/<illustration.+class="inline".*>/ || # Normal small
- $illtype eq "secill" && m/<illustration.+class="accent".*>/); # In use still?
- }
- die("Could not find the end of $title!? Aborting.\n") if eof(SECTIONS);
- if (!$repeated && !$foundit) {
- print("$secttitle ($sect) does not contain any matching illustration!\n");
- }
- elsif ($repeated >= $foundit) {
- print("$secttitle ($sect) contains too few matching illustrations!\n");
- }
- } # REFLOOP
- if (@refpairs > 2) { # This illustration appears multiple times
- seek(SECTIONS, 0, 0); # Do not warn if next illustration is not in order
- }
- } # ILLOOP
- close(SECTIONS) || die("What's this?");
- close(ILL) || die("What's this?");
-}
-print("Checking finished!\n");