From 60c406de9fd3fb63d9b86879e4c78ac148dec500 Mon Sep 17 00:00:00 2001 From: Thomas Wolmer Date: Tue, 23 Aug 2005 19:32:28 +0000 Subject: [PATCH] First checked in version. git-svn-id: https://projectaon.org/data/trunk@92 f6f3e2d7-ff33-0410-aaf5-b4bee2cdac11 --- scripts/checkill.pl | 131 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100755 scripts/checkill.pl diff --git a/scripts/checkill.pl b/scripts/checkill.pl new file mode 100755 index 0000000..b91b73c --- /dev/null +++ b/scripts/checkill.pl @@ -0,0 +1,131 @@ +#!/usr/bin/perl -w +# This script parses the illustrations list of a book in XML form and checks +# that the referred sections contain an illustration of the correct type, and +# that the section titles are correct. + +# $Id$ + +# $Log$ +# Revision 1.1 2005/08/23 19:32:28 angantyr +# First checked in version. +# + +# Old History: +# 2005-08-22 Implemented handling of multiple instances of an illustration +# 2005-04-02 Fixed a bad regexp for section ids. +# 2003-03-23 First version. + + +($xmlfile = shift @ARGV) || die("usage: $0 \n"); +shift @ARGV && die("usage: $0 \n"); + +foreach $illtype ("primill", "secill") { + open(ILL, $xmlfile) || die("Cannot read input file $xmlfile\n"); + open(SECTIONS, $xmlfile) || die("Cannot read input file $xmlfile\n"); + + while() { # find illustrations list + last if (m//i); + } + die("No illustrations section!? Aborting.\n") if eof(ILL); + while() { # find start of actual list + last if m//; + } + die("No data in the illustrations section!? Aborting.\n") + if eof(ILL); + print("No matches for these $illtype entries were found:\n"); + + $lastref = ""; + $repeated = 0; + # Now pick each paragraph (= illustrations) line + ILLLOOP: while() { + last if m||; # end of the illustrations list + next unless m/^\s*
  • /; # no list item = not an illustrations line + + @refpairs = m|(.*?)|g; + + $i = 0; + REFLOOP: while((@refpairn = @refpairs[$i++, $i++]) && (($sect, $title) = @refpairn) && $sect && $title) { + if ($lastref eq $sect) { # Multiple illustrations in one section + seek(SECTIONS, 0, 0); + $repeated++; + } + else { + $repeated = 0; + } + + # Now find section + FINDSEC: foreach $tryagain (1, 0) { + while() { # locate section + if (m//) { + ($class) = m/class="(\S+)"/; + last FINDSEC; # Don't try again + } + } + + # Start over + seek(SECTIONS, 0, 0); + if ($tryagain) { + print("$title ($sect) appears not to be in correct order!\n"); + } + else { + print("Could not find $title ($sect) at all!\n"); + next REFLOOP; + } + } + + $lastref = $sect; + + my $secttitle; + while() { # locate data + if (m//) { + my $sectnumber; + ($sectnumber) = m|<title>(.+)|; + if ($class =~ m/numbered/i) { + $secttitle = "Section $sectnumber" + } + else { + $secttitle = $sectnumber; + } + + print("Illustrations entry section name '$title' does not match the\nsection name '$secttitle' of section with id $sect!\n") + if (("$secttitle" ne "$title") && ("$sectnumber" ne "$title")); + last; + } + } + + while() { # locate data + last if m//; + } + die("Could not find any data in $title!? Aborting.\n") if eof(SECTIONS); + + $foundit = 0; + while() { # find the illustration + last if m||; + #if (m/
    / || # Map + $illtype eq "primill" && m// || # Normal large + $illtype eq "primill" && m// || # Action Chart + $illtype eq "secill" && m// || # Normal small + $illtype eq "secill" && m//); # In use still? + } + die("Could not find the end of $title!? Aborting.\n") if eof(SECTIONS); + if (!$repeated && !$foundit) { + print("$secttitle ($sect) does not contain any matching illustration!\n"); + } + elsif ($repeated >= $foundit) { + print("$secttitle ($sect) contains too few matching illustrations!\n"); + } + } # REFLOOP + if (@refpairs > 2) { # This illustration appears multiple times + seek(SECTIONS, 0, 0); # Do not warn if next illustration is not in order + } + } # ILLOOP + close(SECTIONS) || die("What's this?"); + close(ILL) || die("What's this?"); +} +print("Checking finished!\n"); -- 2.34.1