5 # mergecorrhtml [options] -i inputHTML [inputCorrections]
7 # -u include with unspecified bookcode
10 # Merges _sorted_ HTML correction lists: one in a HTML file, one bare list. It
11 # will dump any remaining corrections in the lists after completion. The chief
12 # reasons that this should happen is if the lists aren't sorted. The correction
13 # list in the input HTML should be surrounded by the following markers on lines
16 # <!--mergecorrhtml:BEGIN-->
18 # <!--mergecorrhtml:END-->
20 # Typical usage would be in concert with corrtohtml and sortcorrhtml:
22 # corrtohtml <correctionFile> | sorcorrhtml | mergecorrhtml -b <book> <html>
24 # Output will appear on standard out which would usually be redirected to file.
26 ################################################################################
30 my $programName = 'mergecorrhtml';
31 my $usage = "$programName [options] inputHTML\n" .
33 "\t-u include unspecified book\n" .
34 "\t-v verbose reporting\n";
41 ################################################################################
42 # Process command line
44 my $optsProcessed = 0;
47 my $bookCodeReport = "";
48 my $includeUnspecifiedBook = 0;
51 while( $#ARGV > -1 && not $optsProcessed ) {
52 my $commandLineItem = shift @ARGV;
53 if( $commandLineItem eq "-b" ) {
54 $bookCode = shift @ARGV or die $usage;
56 elsif( $commandLineItem eq "-u" ) {
57 $includeUnspecifiedBook = 1;
59 elsif( $commandLineItem eq "-v" ) {
62 elsif( $commandLineItem eq "--help" ) {
63 print $usage and exit;
66 unshift @ARGV, $commandLineItem;
72 $bookCodeReport = " [$bookCode]";
75 $inFile = shift @ARGV or die $usage;
77 $issueRegex = qr{[^#]+?(?:#([[:digit:]]+))};
79 $htmlRegex = qr{^(<div.*?>)()<a(.*?)href="}; # unused capture to match other regex below
80 if( $bookCode eq "" ) {
81 $corrRegex = $htmlRegex;
83 elsif( $includeUnspecifiedBook ) {
84 $corrRegex = qr{^(<div.*?>)(<!--[[:space:]]*${bookCode}[[:space:]]*-->)?<a(.*?)href="};
87 $corrRegex = qr{^(<div.*?>)(<!--[[:space:]]*${bookCode}[[:space:]]*-->)<a(.*?)href="};
89 $markerRegex = qr{^<div[[:space:]]+?class="section".*$};
91 ################################################################################
92 # Read in HTML into which we're merging and correction HTML
94 open( INFILE, "<$inFile" ) or die( "Error ($programName)$bookCodeReport: unable to open \"$inFile\" for read: $!\n" );
100 while( $#lines > -1 && $lines[ 0 ] !~ m{^[[:space:]]*<!--mergecorrhtml:BEGIN-->[[:space:]]*$} ) {
103 print shift @lines if( $#lines > -1 );
109 while( $#lines > -1 && $lines[ 0 ] !~ m{^[[:space:]]*<!--mergecorrhtml:END-->[[:space:]]*$} ) {
110 if( $lines[ 0 ] =~ m/$htmlRegex/ ) {
111 push( @inHTML, shift @lines );
113 elsif( $lines[ 0 ] =~ m/$markerRegex/ ) {
116 elsif( $lines[ 0 ] =~ m/^[[:space:]]*$/ ) {
120 die( "Error ($programName)$bookCodeReport: unrecognized input HTML: " . $lines[ 0 ] . "\n" );
125 while( my $corr = <> ) {
126 push( @inCorr, $corr ) if( $corr =~ m{$corrRegex} );
129 ################################################################################
132 my @sectSortOrder = &getSectSortOrder( );
134 foreach my $section (@sectSortOrder) {
136 print "<div class=\"section\"><a name=\"$section\">$section</a></div>\n";
137 while( $#inHTML > -1 && $inHTML[ 0 ] =~ m/$htmlRegex$section\.htm${issueRegex}/ ) {
139 while( $#inCorr > -1 && $inCorr[ 0 ] =~ m/$corrRegex$section\.htm${issueRegex}/ && $issue eq $4 ) {
140 my $corr = shift @inCorr;
142 if( $corr !~ m{^.+?:[[:space:]]*<div[^>]+?class="[^"]*cm} ) { warn( "Warning ($programName)$bookCodeReport: discarding data in issue comment: $corr" ); }
143 while( $corr =~ s{^.*?(<div[^>]+?class="[^"]*cm[^>]+>.*?</div>)}{} ) {
146 $inHTML[ 0 ] =~ s{</div>$}{$comm</div>}
150 while( $#inCorr > -1 && $inCorr[ 0 ] =~ m/$corrRegex$section\.htm/ ) {
151 my $corr = shift @inCorr;
152 $corr =~ s{$corrRegex}{$1<a$3href="};
154 $corr =~ s{#:}{#$issue:};
159 ################################################################################
160 # Print the remainder of the input HTML and corrections
162 if( $#inHTML > -1 ) {
163 warn( "Warning ($programName)$bookCodeReport: input HTML probably out of order\n\tor unrecognized section--error near:\n\t" . $inHTML[ 0 ] . "\n" );
166 if( $#inCorr > -1 ) {
167 warn( "Warning ($programName)$bookCodeReport: input corrections probably out of order\n\tor unrecognized section--error near:\n\t" . $inCorr[ 0 ] . "\n" );
174 ################################################################################
175 ################################################################################
178 sub getSectSortOrder {