From: Thomas Wolmer Date: Tue, 23 Aug 2005 19:45:13 +0000 (+0000) Subject: First checked in version. X-Git-Tag: 20121028~140 X-Git-Url: http://git.projectaon.org/?a=commitdiff_plain;h=47dd5d417fecd597c0763b4ec207540b421c22d2;p=project-aon.git First checked in version. git-svn-id: https://projectaon.org/data/trunk@93 f6f3e2d7-ff33-0410-aaf5-b4bee2cdac11 --- diff --git a/scripts/checkerrata.pl b/scripts/checkerrata.pl new file mode 100755 index 0000000..258ce2d --- /dev/null +++ b/scripts/checkerrata.pl @@ -0,0 +1,88 @@ +#!/usr/bin/perl -w +# This script parses the errata list of a book in XML form and tries to +# locate the added or replaced strings in the sections where they are now +# supposed to exist. Things that are not found by the simple errata list +# parsing code are just skipped. Nesting frontmatter sections are also +# skipped. + +# Old History: +# 2005-08-23 Tried to improve handling of tags +# 2005-03-12 Treat &thinspace; correctly. +# 2003-03-16 Fixed reporting of false positived due to tags +# 2003-03-15 First version. + +$xmlfile = shift @ARGV; +$xmlfile || die("usage: $0 \n"); +$invalid = shift @ARGV; +$invalid && die("usage: $0 \n"); + +open(ERRATA, $xmlfile) || die("Cannot read input file $xmlfile\n"); +open(SECTIONS, $xmlfile) || die("Cannot read input file $xmlfile\n"); + +while() { # find errata list + last if m//; +} +die("No errata section!? Aborting.\n") if eof(ERRATA); +while() { # find start of actual list + last if m//; +} +die("No data in the errata section!? Aborting.\n") if eof(ERRATA); + +print("No matches for these errata entries were found:\n"); + +# Now pick each paragraph (= errata) line +ERRATALOOP: while() { + next unless m/^\s*

/; # no paragraph = not an errata line + last if m||; # end of the errata list + ($sect) = m|\(.*\)|; + @reps = m|.*? with (.*?)|g; + @adds = m|[Aa]dded (.*?(?=))|g; + next unless @reps || @adds; + + # Now find section and append all contents into one string + while() { # locate section + last if m//; + } + die("Could not find section $sect!? (This might be because the errata\nentry for it is not placed in the correct section order.) Aborting.\n") if eof(SECTIONS); + while() { # locate data + last if m//; + } + die("Could not find any data in $sect!? Aborting.\n") if eof(SECTIONS); + $text = ""; + while() { # grab all section contents + last if m||; + if (m/

tags + # since they are commented out in the errata entries + s|<(/?)link-text>||g; + $text .= $_; + } + die("Could not find the end of $sect!? Aborting.\n") if eof(SECTIONS); + + # The replacement may contain &[lr][sd]quot; which in the sections are + # thingies. Translate these. Also ignore &thinspace; first and + # last in the replacements. + # Refactor the duplicated code below some day! + foreach $rep (@reps) { + $rep =~ s/\&l[sd]quot;//g; + $rep =~ s/\&r[sd]quot;/<\/quote>/g; + $rep =~ s/^\&thinspace;//g; + $rep =~ s/\&thinspace;$//g; + if ($text !~ m/\Q$rep\E/) { + print("Replacement \"$rep\" in $sect\n"); + } + } + foreach $add (@adds) { + $add =~ s/\&l[sd]quot;//g; + $add =~ s/\&r[sd]quot;/<\/quote>/g; + $add =~ s/^\&thinspace;//g; + $add =~ s/\&thinspace;$//g; + if ($text !~ m/\Q$add\E/) { + print("Addition \"$add\" in $sect\n"); + } + } +} +print("Checking finished!\n");