+++ /dev/null
-#!/usr/bin/perl -w
-#
-# mergecorrhtml.pl
-#
-# mergecorrhtml [options] -i inputHTML [inputCorrections]
-# -b bookcode
-# -u include with unspecified bookcode
-# -v verbose reporting
-#
-# Merges _sorted_ HTML correction lists: one in a HTML file, one bare list. It
-# will dump any remaining corrections in the lists after completion. The chief
-# reasons that this should happen is if the lists aren't sorted. The correction
-# list in the input HTML should be surrounded by the following markers on lines
-# by themselves:
-#
-# <!--mergecorrhtml:BEGIN-->
-# [list goes here]
-# <!--mergecorrhtml:END-->
-#
-# Typical usage would be in concert with corrtohtml and sortcorrhtml:
-#
-# corrtohtml <correctionFile> | sorcorrhtml | mergecorrhtml -b <book> <html>
-#
-# Output will appear on standard out which would usually be redirected to file.
-#
-################################################################################
-
-use strict;
-
-my $programName = 'mergecorrhtml';
-my $usage = "$programName [options] inputHTML\n" .
- "\t-b bookcode\n" .
- "\t-u include unspecified book\n" .
- "\t-v verbose reporting\n";
-
-my $htmlRegex;
-my $corrRegex;
-my $issueRegex;
-my $markerRegex;
-
-################################################################################
-# Process command line
-
-my $optsProcessed = 0;
-my $inFile;
-my $bookCode = "";
-my $bookCodeReport = "";
-my $includeUnspecifiedBook = 0;
-my $verbose = 0;
-
-while( $#ARGV > -1 && not $optsProcessed ) {
- my $commandLineItem = shift @ARGV;
- if( $commandLineItem eq "-b" ) {
- $bookCode = shift @ARGV or die $usage;
- }
- elsif( $commandLineItem eq "-u" ) {
- $includeUnspecifiedBook = 1;
- }
- elsif( $commandLineItem eq "-v" ) {
- $verbose = 1;
- }
- elsif( $commandLineItem eq "--help" ) {
- print $usage and exit;
- }
- else {
- unshift @ARGV, $commandLineItem;
- $optsProcessed = 1;
- }
-}
-
-if( $verbose ) {
- $bookCodeReport = " [$bookCode]";
-}
-
-$inFile = shift @ARGV or die $usage;
-
-$issueRegex = qr{[^#]+?(?:#([[:digit:]]+))};
-
-$htmlRegex = qr{^(<div.*?>)()<a(.*?)href="}; # unused capture to match other regex below
-if( $bookCode eq "" ) {
- $corrRegex = $htmlRegex;
-}
-elsif( $includeUnspecifiedBook ) {
- $corrRegex = qr{^(<div.*?>)(<!--[[:space:]]*${bookCode}[[:space:]]*-->)?<a(.*?)href="};
-}
-else {
- $corrRegex = qr{^(<div.*?>)(<!--[[:space:]]*${bookCode}[[:space:]]*-->)<a(.*?)href="};
-}
-$markerRegex = qr{^<div[[:space:]]+?class="section".*$};
-
-################################################################################
-# Read in HTML into which we're merging and correction HTML
-
-open( INFILE, "<$inFile" ) or die( "Error ($programName)$bookCodeReport: unable to open \"$inFile\" for read: $!\n" );
-my @lines = <INFILE>;
-close INFILE;
-
-#### Consume preamble
-
-while( $#lines > -1 && $lines[ 0 ] !~ m{^[[:space:]]*<!--mergecorrhtml:BEGIN-->[[:space:]]*$} ) {
- print shift @lines;
-}
-print shift @lines if( $#lines > -1 );
-
-my @inHTML;
-
-#### Get good stuff
-
-while( $#lines > -1 && $lines[ 0 ] !~ m{^[[:space:]]*<!--mergecorrhtml:END-->[[:space:]]*$} ) {
- if( $lines[ 0 ] =~ m/$htmlRegex/ ) {
- push( @inHTML, shift @lines );
- }
- elsif( $lines[ 0 ] =~ m/$markerRegex/ ) {
- shift @lines;
- }
- elsif( $lines[ 0 ] =~ m/^[[:space:]]*$/ ) {
- shift @lines;
- }
- else {
- die( "Error ($programName)$bookCodeReport: unrecognized input HTML: " . $lines[ 0 ] . "\n" );
- }
-}
-
-my @inCorr;
-while( my $corr = <> ) {
- push( @inCorr, $corr ) if( $corr =~ m{$corrRegex} );
-}
-
-################################################################################
-# Merge!
-
-my @sectSortOrder = &getSectSortOrder( );
-
-foreach my $section (@sectSortOrder) {
- my $issue;
- print "<div class=\"section\"><a name=\"$section\">$section</a></div>\n";
- while( $#inHTML > -1 && $inHTML[ 0 ] =~ m/$htmlRegex$section\.htm${issueRegex}/ ) {
- $issue = $4;
- while( $#inCorr > -1 && $inCorr[ 0 ] =~ m/$corrRegex$section\.htm${issueRegex}/ && $issue eq $4 ) {
- my $corr = shift @inCorr;
- my $comm = "";
- if( $corr !~ m{^.+?:[[:space:]]*<div[^>]+?class="[^"]*cm} ) { warn( "Warning ($programName)$bookCodeReport: discarding data in issue comment: $corr" ); }
- while( $corr =~ s{^.*?(<div[^>]+?class="[^"]*cm[^>]+>.*?</div>)}{} ) {
- $comm .= $1;
- }
- $inHTML[ 0 ] =~ s{</div>$}{$comm</div>}
- }
- print shift @inHTML;
- }
- while( $#inCorr > -1 && $inCorr[ 0 ] =~ m/$corrRegex$section\.htm/ ) {
- my $corr = shift @inCorr;
- $corr =~ s{$corrRegex}{$1<a$3href="};
- ++$issue;
- $corr =~ s{#:}{#$issue:};
- print $corr;
- }
-}
-
-################################################################################
-# Print the remainder of the input HTML and corrections
-
-if( $#inHTML > -1 ) {
- warn( "Warning ($programName)$bookCodeReport: input HTML probably out of order\n\tor unrecognized section--error near:\n\t" . $inHTML[ 0 ] . "\n" );
- print @inHTML;
-}
-if( $#inCorr > -1 ) {
- warn( "Warning ($programName)$bookCodeReport: input corrections probably out of order\n\tor unrecognized section--error near:\n\t" . $inCorr[ 0 ] . "\n" );
- print @inCorr;
-}
-
-print @lines;
-
-
-################################################################################
-################################################################################
-# Subroutines
-
-sub getSectSortOrder {
- return qw{
- _unknown
- toc
- title
- dedicate
- acknwldg
- coming
- tssf
- gamerulz
- discplnz
- powers
- equipmnt
- cmbtrulz
- lorecrcl
- levels
- imprvdsc
- kaiwisdm
- sage
- numbered
- part1
- sect1
- sect2
- sect3
- sect4
- sect5
- sect6
- sect7
- sect8
- sect9
- sect10
- sect11
- sect12
- sect13
- sect14
- sect15
- sect16
- sect17
- sect18
- sect19
- sect20
- sect21
- sect22
- sect23
- sect24
- sect25
- sect26
- sect27
- sect28
- sect29
- sect30
- sect31
- sect32
- sect33
- sect34
- sect35
- sect36
- sect37
- sect38
- sect39
- sect40
- sect41
- sect42
- sect43
- sect44
- sect45
- sect46
- sect47
- sect48
- sect49
- sect50
- sect51
- sect52
- sect53
- sect54
- sect55
- sect56
- sect57
- sect58
- sect59
- sect60
- sect61
- sect62
- sect63
- sect64
- sect65
- sect66
- sect67
- sect68
- sect69
- sect70
- sect71
- sect72
- sect73
- sect74
- sect75
- sect76
- sect77
- sect78
- sect79
- sect80
- sect81
- sect82
- sect83
- sect84
- sect85
- sect86
- sect87
- sect88
- sect89
- sect90
- sect91
- sect92
- sect93
- sect94
- sect95
- sect96
- sect97
- sect98
- sect99
- sect100
- sect101
- sect102
- sect103
- sect104
- sect105
- sect106
- sect107
- sect108
- sect109
- sect110
- sect111
- sect112
- sect113
- sect114
- sect115
- sect116
- sect117
- sect118
- sect119
- sect120
- sect121
- sect122
- sect123
- sect124
- sect125
- sect126
- sect127
- sect128
- sect129
- sect130
- sect131
- sect132
- sect133
- sect134
- sect135
- sect136
- sect137
- sect138
- sect139
- sect140
- sect141
- sect142
- sect143
- sect144
- sect145
- sect146
- sect147
- sect148
- sect149
- sect150
- sect151
- sect152
- sect153
- sect154
- sect155
- sect156
- sect157
- sect158
- sect159
- sect160
- sect161
- sect162
- sect163
- sect164
- sect165
- sect166
- sect167
- sect168
- sect169
- sect170
- sect171
- sect172
- sect173
- sect174
- sect175
- sect176
- sect177
- sect178
- sect179
- sect180
- sect181
- sect182
- sect183
- sect184
- sect185
- sect186
- sect187
- sect188
- sect189
- sect190
- sect191
- sect192
- sect193
- sect194
- sect195
- sect196
- sect197
- sect198
- sect199
- part2
- sect200
- sect201
- sect202
- sect203
- sect204
- sect205
- sect206
- sect207
- sect208
- sect209
- sect210
- sect211
- sect212
- sect213
- sect214
- sect215
- sect216
- sect217
- sect218
- sect219
- sect220
- sect221
- sect222
- sect223
- sect224
- sect225
- sect226
- sect227
- sect228
- sect229
- sect230
- sect231
- sect232
- sect233
- sect234
- sect235
- sect236
- sect237
- sect238
- sect239
- sect240
- sect241
- sect242
- sect243
- sect244
- sect245
- sect246
- sect247
- sect248
- sect249
- sect250
- sect251
- sect252
- sect253
- sect254
- sect255
- sect256
- sect257
- sect258
- sect259
- sect260
- sect261
- sect262
- sect263
- sect264
- sect265
- sect266
- sect267
- sect268
- sect269
- sect270
- sect271
- sect272
- sect273
- sect274
- sect275
- sect276
- sect277
- sect278
- sect279
- sect280
- sect281
- sect282
- sect283
- sect284
- sect285
- sect286
- sect287
- sect288
- sect289
- sect290
- sect291
- sect292
- sect293
- sect294
- sect295
- sect296
- sect297
- sect298
- sect299
- sect300
- sect301
- sect302
- sect303
- sect304
- sect305
- sect306
- sect307
- sect308
- sect309
- sect310
- sect311
- sect312
- sect313
- sect314
- sect315
- sect316
- sect317
- sect318
- sect319
- sect320
- sect321
- sect322
- sect323
- sect324
- sect325
- sect326
- sect327
- sect328
- sect329
- sect330
- sect331
- sect332
- sect333
- sect334
- sect335
- sect336
- sect337
- sect338
- sect339
- sect340
- sect341
- sect342
- sect343
- sect344
- sect345
- sect346
- sect347
- sect348
- sect349
- sect350
- sect351
- sect352
- sect353
- sect354
- sect355
- sect356
- sect357
- sect358
- sect359
- sect360
- sect361
- sect362
- sect363
- sect364
- sect365
- sect366
- sect367
- sect368
- sect369
- sect370
- sect371
- sect372
- sect373
- sect374
- sect375
- sect376
- sect377
- sect378
- sect379
- sect380
- sect381
- sect382
- sect383
- sect384
- sect385
- sect386
- sect387
- sect388
- sect389
- sect390
- sect391
- sect392
- sect393
- sect394
- sect395
- sect396
- sect397
- sect398
- sect399
- sect400
- ill1
- ill2
- ill3
- ill4
- ill5
- ill6
- ill7
- ill8
- ill9
- ill10
- ill11
- ill12
- ill13
- ill14
- ill15
- ill16
- ill17
- ill18
- ill19
- ill20
- passing
- map
- action
- crsumary
- crtable
- random
- errata
- footnotz
- illstrat
- license
- };
-}