6 use File::Path qw(mkpath);
8 my $PROGRAM_NAME = 'gbtoepub';
9 my $USAGE = "$PROGRAM_NAME [options] book-code\n\t--meta=[metadata file]\n\t--xml=[book XML]\n\t--epub-xsl=[XSL transformation]\n\t--language=[language area of input data (output determined by meta file)]\n\t--font-files=[font-files]\n\t--no-validate\n\t--verbose\n";
11 my $FILENAME_SEPARATOR = '/';
13 my $RXP = qx{which rxp};
14 my $CP = qx{which cp};
15 my $MV = qx{which mv};
16 my $TAR = qx{which tar};
17 my $ZIP = qx{which zip};
18 my $BZIP2 = qx{which bzip2};
19 my $JAVA = qx{which java};
20 my $XALAN_JAR = '/usr/share/java/xalan2.jar';
21 my $RM = qx{which rm};
22 my $CHMOD = qx{which chmod};
34 # Check that all the binaries are were want them
37 push @BINARIES, ($RXP, $CP, $MV, $TAR, $ZIP, $BZIP2, $JAVA, $XALAN_JAR, $RM, $CHMOD);
41 die "$PROGRAM_NAME: Cannot find binary '".$_."'. Please install it.\n";
47 my $EPUB_MIMETYPE = 'application/epub+zip';
48 my $MIMETYPE_FILE = 'mimetype';
49 my $CONTAINER_FILE = 'container.xml';
50 my $OEBPS_DIR = 'OEBPS';
51 my $META_INF_DIR = 'META-INF';
52 my $NCX_FILE = 'toc.ncx';
53 my $XHTML_EXT = 'html';
55 my $PROJECT_AON_URI = 'http://www.projectaon.org';
63 my $ncxXSL = 'common/xsl/epub-ncx.xsl';
64 my $epubXSL = 'common/xsl/epub-xhtml.xsl';
65 my $metadataXSL = 'common/xsl/epub-opf-metadata.xsl';
66 my $spineXSL = 'common/xsl/epub-opf-spine.xsl';
67 my $fontFiles = "$ENV{'HOME'}${FILENAME_SEPARATOR}souvenir";
73 ### read command line options
75 while( $#ARGV > -1 ) {
76 my $cmdLineItem = shift @ARGV;
77 if( $cmdLineItem =~ /^--meta=(.+)$/ ) {
80 elsif( $cmdLineItem =~ /^--xml=(.+)$/ ) {
83 elsif( $cmdLineItem =~ /^--epub-xsl=(.+)$/ ) {
86 elsif( $cmdLineItem =~ /^--language=(.+)$/ ) {
89 elsif( $cmdLineItem =~ /^--verbose/ ) {
92 elsif( $cmdLineItem =~ /^--no-validate/ ) {
95 elsif( $cmdLineItem =~ /^--font-files=(.+)$/ ) {
99 $bookCode = $cmdLineItem;
103 if( $bookCode eq '' ) {
104 die "$PROGRAM_NAME: Unspecified book code\n$USAGE";
106 if( $metaFile eq '' ) { $metaFile = "$language/.publisher/rules/epub"; }
107 if( $bookXML eq '' ) { $bookXML = "$language/xml/$bookCode.xml"; }
108 if( $epubXSL eq '' ) {
109 die "$PROGRAM_NAME: Unspecified XSL transformation file\n$USAGE";
112 ### validate book XML
114 if( (not $noValidate) && -e $RXP ) {
115 system( $RXP, '-Vs', $bookXML ) == 0
116 or die "$PROGRAM_NAME: XML validation failed\n";
118 elsif( $noValidate ) {
119 warn "$PROGRAM_NAME: XML validation skipped - validate before publication\n";
122 warn "$PROGRAM_NAME: XML validator not installed - validate before publication\n";
125 ### read in metadata file
127 unless( -e $metaFile && -f $metaFile && -r $metaFile ) {
128 die qq{$PROGRAM_NAME: Improper metadata file "$metaFile"\n};
131 open( META, '<', $metaFile ) or
132 die qq{$PROGRAM_NAME: Unable to open metadata file "$metaFile": $!\n};
135 while( my $line = <META> ) {
136 $meta .= $line if $line !~ /^[[:space:]]*#/;
140 ### interpret rules from metadata
141 my $rulesString = '';
142 if( $meta =~ /^[[:space:]]*$bookCode[[:space:]]*{([^}]*)}/sm ) {
146 die "$PROGRAM_NAME: Book code ($bookCode) not found in metadata file or invalid file syntax\n";
149 my @rules = split( /[[:space:]\n]*;[[:space:]\n]*/, $rulesString );
151 foreach my $rule (@rules) {
152 if( $rule =~ /[[:space:]]*([^:]+)[[:space:]]*:[[:space:]]*(.+)$/s ) {
153 $rulesHash{ $1 } = $2;
156 die "$PROGRAM_NAME: Unrecognized rule syntax:\n$rule\n";
160 unless( defined $rulesHash{'book-series'} ) {
161 die "$PROGRAM_NAME: no book series set\n";
163 unless( defined $rulesHash{'csst'} ) {
164 die "$PROGRAM_NAME: metadata file leaves CSS templates unspecified\n";
167 my $SERIES = get_series($rulesHash{'book-series'}) ;
168 my $SERIES_NUMBER = get_series_number($bookCode);
171 ### create output directories
174 $outPath{'top'} = $rulesHash{'language'} . $FILENAME_SEPARATOR .
175 'epub' . $FILENAME_SEPARATOR .
176 $rulesHash{'book-series'} . $FILENAME_SEPARATOR .
179 if( -e "$outPath{'top'}$FILENAME_SEPARATOR$MIMETYPE_FILE" ) {
180 print qx{$RM -r $outPath{'top'}$FILENAME_SEPARATOR*};
183 $outPath{'meta-inf'} = $outPath{'top'} . $FILENAME_SEPARATOR . $META_INF_DIR;
184 $outPath{'oebps'} = $outPath{'top'} . $FILENAME_SEPARATOR . $OEBPS_DIR;
186 foreach my $directory (keys(%outPath)) {
187 unless( -e $outPath{$directory} && -d $outPath{$directory} ) {
188 mkpath $outPath{$directory}
189 or die "$PROGRAM_NAME: Unknown error creating output directory " .
190 "\"$outPath{$directory}\"\n";
194 ### create content files
196 # the location of this tempfile also controls where the xhtml will go
197 my $tempFile = "$outPath{'oebps'}${FILENAME_SEPARATOR}foo.xml";
198 print qx{$JAVA -classpath "$XALAN_JAR" org.apache.xalan.xslt.Process -IN "$bookXML" -XSL "$epubXSL" -OUT "$tempFile" -PARAM xhtml-ext ".$XHTML_EXT" -PARAM use-illustrators "$rulesHash{'use-illustrators'}" -PARAM language "$rulesHash{'language'}"}; #" <- comment to unconfuse VIM syntax hilighting (ugh)
199 print qx{$RM $tempFile};
201 foreach my $imagePath (split( /:/, $rulesHash{'images'} )) {
202 unless( -e $imagePath && -d $imagePath ) {
203 die "$PROGRAM_NAME: Image path ($imagePath) does not exist or is not a directory\n";
205 print qx{$CP $imagePath${FILENAME_SEPARATOR}* $outPath{'oebps'}};
208 ### create the CSS stylsheet
210 foreach my $cssTemplate (split( /:/, $rulesHash{'csst'} )) {
211 $cssTemplate =~ m/([^${FILENAME_SEPARATOR}]+)t$/;
212 my $templateFile = $1;
213 open( TEMPLATE, '<', $cssTemplate )
214 or die "$PROGRAM_NAME: Unable to open CSS template file ($cssTemplate): $!\n";
216 my $stylesFile = "$outPath{'oebps'}$FILENAME_SEPARATOR$templateFile";
217 open( STYLESHEET, '>', $stylesFile )
218 or die "$PROGRAM_NAME: Unable to open stylesheet file ($stylesFile) for writing: $!\n";
220 while( my $templateLine = <TEMPLATE> ) {
221 while( $templateLine =~ /%%([^%[:space:]]+)%%/ ) {
223 $templateLine =~ s/%%${name}%%/$rulesHash{$name}/g;
225 print STYLESHEET $templateLine;
231 ### copy the font files
233 unless( -e $fontFiles && -d $fontFiles ) {
234 die "$PROGRAM_NAME: font files directory does not exist or is not a directory \"$fontFiles\": $!\n";
236 print qx{$CP $fontFiles${FILENAME_SEPARATOR}*.otf $outPath{'oebps'}};
240 my $uniqueID = "opf-$bookCode";
241 my $bookUniqueURI = "$PROJECT_AON_URI/$language/epub/" .
242 "$rulesHash{'book-series'}/$bookCode/";
244 my $ncxFile = $outPath{'oebps'} . $FILENAME_SEPARATOR . $NCX_FILE;
245 open( NCXFILE, '>', $ncxFile ) or
246 die "$PROGRAM_NAME: unable to open NCX file for writing " .
248 print NCXFILE qx{$JAVA -classpath "$XALAN_JAR" org.apache.xalan.xslt.Process -IN "$bookXML" -XSL "$ncxXSL" -PARAM xhtml-ext ".$XHTML_EXT" -PARAM unique-identifier "$bookUniqueURI" -PARAM language "$rulesHash{'language'}"}; #" comment to unconfuse VIM syntax highlighting
251 ### write mimetype file
253 my $mimeFile = $outPath{'top'} . $FILENAME_SEPARATOR . $MIMETYPE_FILE;
254 open( MIMETYPE, '>', $mimeFile ) or
255 die "$PROGRAM_NAME: unable to open mimetype file for writing " .
257 print MIMETYPE $EPUB_MIMETYPE;
261 ### write OPF Root file
262 # All content files must be created prior to creating the OPF root file
263 # with its manifest of content files.
265 my $opfFileName = "$bookCode.opf";
266 my $opfFile = "$outPath{'oebps'}$FILENAME_SEPARATOR$opfFileName";
267 open( OPF, '>', $opfFile ) or
268 die "$PROGRAM_NAME: unable to open OPF file for writing " .
271 print OPF <<END_OPF_HEADER;
272 <?xml version="1.0"?>
273 <package version="2.0"
274 xmlns="http://www.idpf.org/2007/opf"
275 unique-identifier="$uniqueID">
281 my $metadata = qx{$JAVA -classpath "$XALAN_JAR" org.apache.xalan.xslt.Process -IN "$bookXML" -XSL "$metadataXSL" -PARAM opf-id "$uniqueID" -PARAM unique-identifier "$bookUniqueURI" -PARAM language "$rulesHash{'language'}" -PARAM book_series "$SERIES" -PARAM book_series_index "$SERIES_NUMBER"}; #" comment to unconfuse VIM syntax hilighting
282 $metadata = " $metadata";
283 $metadata =~ s|(<dc:)|\n $1|g;
284 $metadata =~ s|(</metadata>)|\n $1|g;
286 print OPF "$metadata\n\n";
288 ## write manifest data
289 # assuming a flat directory structure within the OEBPS directory
291 print OPF " <manifest>\n";
293 opendir( my $content_dir, $outPath{'oebps'} )
294 or die "$PROGRAM_NAME: unable to read OEBPS directory " .
295 "\"$outPath{'oebps'}\": $!\n";
297 while( my $content_file = readdir $content_dir ) {
298 next if $content_file eq '.';
299 next if $content_file eq '..';
300 next if $content_file =~ m/\.opf$/i;
301 print OPF qq{ <item id="};
302 print OPF make_id( $content_file );
303 print OPF qq{" href="$content_file" media-type="};
304 print OPF get_mime_type( $content_file );
308 closedir $content_dir;
310 print OPF " </manifest>\n\n";
314 my $ncxID = make_id( $NCX_FILE );
315 my $spine = qx{$JAVA -classpath "$XALAN_JAR" org.apache.xalan.xslt.Process -IN "$bookXML" -XSL "$spineXSL" -PARAM toc-id "$ncxID"};
317 $spine =~ s/idref="([^"]*)"/idref="$1.$XHTML_EXT"/g;
319 $spine =~ s|(<itemref)|\n $1|g;
320 $spine =~ s|(</spine>)|\n $1|g;
322 print OPF "$spine\n\n";
324 # TODO: write (optional) guide data here
325 #print OPF " <guide>\n";
326 #print OPF " </guide>\n</package>";
328 print OPF "</package>";
332 ### write container.xml
334 my $containerFile = "$outPath{'meta-inf'}$FILENAME_SEPARATOR$CONTAINER_FILE";
335 open( CONTAINER, '>', $containerFile ) or
336 die "$PROGRAM_NAME: unable to open container file for writing " .
337 "\"$containerFile\"\n";
338 print CONTAINER <<END_CONTAINER;
339 <?xml version="1.0"?>
340 <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
342 <rootfile full-path="$OEBPS_DIR/$opfFileName"
343 media-type="application/oebps-package+xml" />
349 ### compress epub contents
350 # complies with Open Container Format 2.0.1
351 # http://idpf.org/epub/20/spec/OCF_2.0.1_draft.doc
353 chdir $outPath{'top'};
355 system( $ZIP, '-0Xq', "$bookCode.epub", $MIMETYPE_FILE );
356 system( $ZIP, '-rq', "$bookCode.epub", $META_INF_DIR );
357 system( $ZIP, '-rq', "$bookCode.epub", $OEBPS_DIR );
361 ################################################################################
363 ################################################################################
366 my ( $name ) = ( @_ );
367 $name = "_$name" if( $name =~ m/^[-.0-9]/ );
368 $name =~ tr/\x80-\xff/_/;
373 # relies on valid file name extensions
375 my ( $file ) = ( @_ );
376 if( $file =~ m/\.x?html?$/i ) {
377 return 'application/xhtml+xml';
379 elsif( $file =~ m/\.css$/i ) {
382 elsif( $file =~ m/\.png$/i ) {
385 elsif( $file =~ m/\.jpe?g$/i ) {
388 elsif( $file =~ m/\.svg$/i ) {
389 return 'image/svg+xml';
391 elsif( $file =~ m/\.gif$/i ) {
394 elsif( $file =~ m/\.ncx$/i ) {
395 return 'application/x-dtbncx+xml';
397 elsif( $file =~ m/\.otf$/i ) {
398 return 'application/x-font-opentype';
401 return 'application/x-unrecognized-mime';
406 # Check if a file is empty, if it is, abort as this is an indication
407 # that the previous processing step went wrong
411 print STDERR "There was an error generating $file (empty file produced)";
418 #unless( $bookXML =~ m{^([-\w\@./]+)$} ) {
419 # die "$PROGRAM_NAME: bad book XML filename \"$bookXML\"\n";
423 #unless( -e $bookXML && -f $bookXML && -r $bookXML ) {
424 # die "$PROGRAM_NAME: XML does not exist or is not readable \"$bookXML\"\n";
428 # system( $RXP, '-Vs', $bookXML ) == 0
429 # or die "$PROGRAM_NAME: XML validation failed\n";
432 # warn "$PROGRAM_NAME: XML Validator not installed - validate before publication\n";
435 #unless( defined $rulesHash{'language'} ) { die "$PROGRAM_NAME: Metadata file leaves language unspecified\n"; }
436 #unless( defined $rulesHash{'book-series'} ) { die "$PROGRAM_NAME: Metadata file leaves book series unspecified\n"; }
437 #unless( defined $rulesHash{'images'} ) { die "$PROGRAM_NAME: Metadata file leaves image directories unspecified\n"; }
438 #unless( defined $rulesHash{'csst'} ) { die "$PROGRAM_NAME: Metadata file leaves CSS templates unspecified\n"; }
441 #my $bookPath = "$outPath${FILENAME_SEPARATOR}";
442 #print qx{$RM ${bookPath}*} if -e $bookPath."/toc.htm";
443 #print qx{$JAVA -classpath "$XALAN_JAR" org.apache.xalan.xslt.Process -IN "$bookXML" -XSL "$xhtmlXSL" -OUT "${bookPath}foo.xml" -PARAM background-color "$rulesHash{'background-color'}" -PARAM text-color "$rulesHash{'text-color'}" -PARAM link-color "$rulesHash{'link-color'}" -PARAM use-illustrators "$rulesHash{'use-illustrators'}" -PARAM language "$rulesHash{'language'}"};
444 #print qx{$RM ${bookPath}foo.xml};
448 #print qx{$ZIP -8 -q ${bookCode}.zip ${bookPath}*};
449 #print qx{$MV ${bookCode}* $bookPath};
451 #print "Success\n" if $verbose;
453 # Determine series long name by the series acronym
456 my $series_name = "";
457 if ($series eq "lw" ) {
458 $series_name = "Lone Wolf";
459 } elsif ($series eq "ls" ) {
460 $series_name = "Lobo Solitario";
461 } elsif ($series eq "gs" ) {
462 $series_name = "Grey Star the Wizard";
463 } elsif ($series eq "fw" ) {
464 $series_name = "Freeway Warrior";
466 print STDERR "WARN: Undefined series. Short name given: '$series'\n";
467 $series_name = "[undefined]";
472 # Determine the series number based on book code
473 sub get_series_number {
475 my $series_number = "";
476 if ( $bookCode =~ /^(\d\d)/ ) {
479 print STDERR "WARN: Undefined series number. Book code is '$bookCode'.\n";
480 $series_number = "xx";
482 return $series_number;
485 # Determine the book title by reading the book meta information
488 my $title = ""; my $line = "";
489 open (BOOK, "head -100 $book | ") || die ("Could not read $book: $!");
490 while ($title eq "" && ( $line = <BOOK> ) ) {
492 if ( $line =~ /<title>(.*?)<\/title>/ ) {
498 if ( $title eq "" ) {
499 print STDERR "WARN: Cannot find title for book '$book'\n";
500 $title = "[Undefined]";
503 return convert_entities($title);
506 # Determine the book author by reading the book meta information
511 open (BOOK, "head -100 $book |") || die ("Could not read $book: $!");
514 while ($author eq "" && ( $line = <BOOK> ) ) {
516 if ( $find_line == 1 && $line =~ /<line>(.*?)<\/line>/ ) {
519 $find_line = 1 if ( $line =~ /<creator class="medium">/ );
520 $find_line = 0 if ( $line =~ /<\/creator>/ );
521 if ( $line =~ /<creator class="author">(.*?)<\/title>/ ) {
527 if ( $author eq "" ) {
528 print STDERR "WARN: Cannot find author for book '$book'\n";
529 $author = "[Undefined]";
536 # Determine the book illustrator by reading the book meta information
537 sub find_illustrator {
539 my $illustrator = "";
541 open (BOOK, "head -100 $book | ") || die ("Could not read $book: $!");
544 while ($illustrator eq "" && ( $line = <BOOK> ) ) {
546 if ( $find_line == 1 && $line =~ /<line>Illustrated by (.*?)<\/line>/ ) {
549 $find_line = 1 if ( $line =~ /<creator class="medium">/ );
550 $find_line = 0 if ( $line =~ /<\/creator>/ );
551 if ( $line =~ /<creator class="illustrator">(.*?)<\/title>/ ) {
557 if ( $illustrator eq "" ) {
558 print STDERR "WARN: Cannot find illustrator for book '$book'\n";
559 $illustrator = "[Undefined]";
561 if ( $language eq "en" ) {
562 $illustrator = "Illustrated by ".$illustrator;
563 } elsif ( $language eq "es" ) {
564 $illustrator = "Illustrado por ".$illustrator;
570 sub convert_entities {
571 # Convert character entities to their correspondent values
574 $text =~ s/\<ch.apos\/\>/'/g;
575 $text =~ s/\<ch.nbsp\/\>/ /g;
576 $text =~ s/\<ch.plusmn\/\>/+-/g;
577 $text =~ s/\<ch.aacute\/\>/á/g;
578 $text =~ s/\<ch.eacute\/\>/é/g;
579 $text =~ s/\<ch.iacute\/\>/í/g;
580 $text =~ s/\<ch.oacute\/\>/ó/g;
581 $text =~ s/\<ch.uacute\/\>/ú/g;
582 $text =~ s/\<ch.ntilde\/\>/ñ/g;
583 $text =~ s/\<ch.Aacute\/\>/Á/g;
584 $text =~ s/\<ch.Eacute\/\>/É/g;
585 $text =~ s/\<ch.Iacute\/\>/Í/g;
586 $text =~ s/\<ch.Oacute\/\>/Ó/g;
587 $text =~ s/\<ch.Uacute\/\>/Ú/g;
588 $text =~ s/\<ch.auml\/\>/ä/g;
589 $text =~ s/\<ch.euml\/\>/ë/g;
590 $text =~ s/\<ch.iuml\/\>/ï/g;
591 $text =~ s/\<ch.ouml\/\>/ö/g;
592 $text =~ s/\<ch.uuml\/\>/ü/g;
593 $text =~ s/\<ch.Ntilde\/\>/Ñ/g;
594 $text =~ s/\<ch.acute\/\>/´/g;
595 $text =~ s/\<ch.iexcl\/\>/¡/g;
596 $text =~ s/\<ch.iquest\/\>/¿/g;
597 $text =~ s/\<ch.laquo\/\>/«/g;
598 $text =~ s/\<ch.raquo\/\>/»/g;
599 $text =~ s/\<ch.ampersand\/\>/&/g;
604 # Quote metacaracters for shell use