View file File name : pdf-debug.pl Content :#!/usr/bin/perl use strict; use warnings; # VERSION use PDF::API2::Basic::PDF::File; my $file = shift(@ARGV); unless ($file) { print "Usage: $0 <file.pdf>\n\nDisplays information about a PDF file.\n"; exit; } my $pdf = PDF::API2::Basic::PDF::File->open($file) or die "Unable to open $file: $!"; my $command = shift(@ARGV); unless ($command) { print "PDF Version: " . $pdf->{' version'} . "\n"; print "XRef Table: " . $pdf->{' xref_position'} . "\n"; print "Info: " . _obj_reference($pdf->{'Info'}) . "\n" if $pdf->{'Info'}; print "Root: " . _obj_reference($pdf->{'Root'}) . "\n" if $pdf->{'Root'}; print "\n"; print "To view an object:\n"; print "$0 <file.pdf> obj <id> [generation]\n"; print "\n"; print "To view a cross-reference dictionary (with optional offset in bytes):\n"; print "$0 <file.pdf> xref [offset]\n"; print "\n"; } elsif ($command eq 'xref') { my $location = shift(@ARGV); $location = $pdf->{' xref_position'} unless defined $location; my $object = $pdf->readxrtr($location); print "XRef at $location\n"; print '--------' . ('-' x length($location)) . "\n"; _print_obj($object); } elsif ($command eq 'obj') { my $id = shift(@ARGV); die "Missing required object number" unless $id and $id =~ /^[0-9]+$/; my $generation = shift(@ARGV) || 0; my $location = $pdf->locate_obj($id, $generation); my $object = $pdf->read_objnum($id, $generation); print "Object $id"; unless (ref($location)) { print " (file position $location)\n"; } else { my ($obj_num, $obj_idx) = @$location; print " (object stream $obj_num index $obj_idx)\n"; } print '-------' . ('-' x length($id)) . "\n"; unless ($object) { print "[Unable to read object]\n"; } else { _print_obj($object); } } sub _print_obj { my $object = shift(); if ($object->isa('PDF::API2::Basic::PDF::Dict')) { print _obj_dictionary($object); } elsif ($object->isa('PDF::API2::Basic::PDF::Array')) { print _obj_array($object) . "\n"; } elsif ($object->isa('PDF::API2::Basic::PDF::Name') or $object->isa('PDF::API2::Basic::PDF::Number') or $object->isa('PDF::API2::Basic::PDF::String')) { if ($object->val() =~ /^[[:print:]]+$/) { print $object->val() . "\n"; } else { print $object->as_pdf() . "\n"; } } elsif ($object->isa('PDF::API2::Basic::PDF::Null')) { print "<Null>\n" } else { print "[" . ref($object) . "]\n"; } if ($object->{' stream'}) { print "\n"; print "Stream\n"; print "------\n"; eval { $object->read_stream(1) }; if ($@) { print "[Stream could not be read or decoded]\n"; } elsif ($ENV{'FORCE'} or $object->{' stream'} =~ /^[[:print:]\s]*$/) { print $object->{' stream'} . "\n"; } else { print "[Stream contains non-printable characters. Set environment FORCE=1 to show the stream anyway.]\n"; } } } sub _obj_reference { my $object = shift(); return '<Object ' . $object->{' objnum'} . ($object->{' objgen'} ? ' ' . $object->{' objgen'} : '') . '>'; } sub _obj_dictionary { my $object = shift(); my $indent = shift() || 0; my $data = {}; foreach my $key (keys %$object) { next if $key =~ /^ /; if (ref($object->{$key})) { if ($object->{$key}->isa('PDF::API2::Basic::PDF::Array')) { $data->{$key} = _obj_array($object->{$key}, $indent + 1); chomp $data->{$key}; } elsif ($object->{$key}->isa('PDF::API2::Basic::PDF::Dict')) { if ($object->{$key}->{' objnum'}) { $data->{$key} = '<Object ' . $object->{$key}->{' objnum'} . ($object->{$key}->{' objgen'} ? ' ' . $object->{$key}->{' objgen'} : '') . '>'; } else { unless (scalar grep { $_ !~ /^ / } keys %{$object->{$key}}) { $data->{$key} = '<Empty Dictionary>'; } else { $data->{$key} = "\n" . _obj_dictionary($object->{$key}, $indent + 1); chomp $data->{$key}; } } } elsif ($object->{$key}->isa('PDF::API2::Basic::PDF::Name') or $object->{$key}->isa('PDF::API2::Basic::PDF::Number') or $object->{$key}->isa('PDF::API2::Basic::PDF::String')) { if ($object->{$key}->val() =~ /^[[:print:]]+$/) { $data->{$key} = $object->{$key}->val(); } else { $data->{$key} = $object->{$key}->as_pdf(); } } elsif ($object->{$key}->isa('PDF::API2::Basic::PDF::Null')) { $data->{$key} = '<Null>'; } elsif ($object->{$key}->isa('PDF::API2::Basic::PDF::Objind') and $object->{$key}->{' objnum'}) { $data->{$key} = '<Object ' . $object->{$key}->{' objnum'} . ($object->{$key}->{' objgen'} ? ' ' . $object->{$key}->{' objgen'} : '') . '>'; } else { $data->{$key} = '[' . ref($object->{$key}) . ']'; } } else { $data->{$key} = $object->{$key}; } } my $longest_key = 0; foreach my $key (keys %$data) { next if $data->{$key} =~ /^\n/; $longest_key = length($key) if length($key) > $longest_key; } $longest_key++; my $value = ''; my $cr = sub { return 1 if substr($data->{$_[0]}, 0, 1) eq "\n"; return -1; }; foreach my $key (sort { &$cr($a) <=> &$cr($b) or $a cmp $b } keys %$data) { if ($indent) { $value .= ' ' x ($indent * 4); } $value .= sprintf("%-${longest_key}s ", $key . ':') . $data->{$key} . "\n"; } return $value; } sub _obj_array { my $object = shift(); my $indent = shift() || 0; return '[ ]' unless scalar $object->elements(); my @elements; my $is_complex = 0; foreach my $element ($object->elements()) { unless (ref($element)) { push @elements, $element; } else { if ($element->isa('PDF::API2::Basic::PDF::Array')) { push @elements, _obj_array($element, $indent + 1); } elsif ($element->isa('PDF::API2::Basic::PDF::Dict')) { if ($element->{' objnum'}) { push @elements, '<Object ' . $element->{' objnum'} . ($element->{' objgen'} ? ' ' . $element->{' objgen'} : '') . '>'; } else { unless (scalar grep { $_ !~ /^ / } keys %$element) { push @elements, "<Empty Dictionary>"; } else { $is_complex = 1; push @elements, "Dictionary: \n" . _obj_dictionary($element, $indent + 1); chomp $elements[-1]; } } } elsif ($element->isa('PDF::API2::Basic::PDF::Name') or $element->isa('PDF::API2::Basic::PDF::Number') or $element->isa('PDF::API2::Basic::PDF::String')) { if ($element->val() =~ /^[[:print:]]+$/) { push @elements, $element->val(); } else { push @elements, $element->as_pdf(); } } elsif ($element->isa('PDF::API2::Basic::PDF::Null')) { push @elements, '<Null>'; } elsif ($element->isa('PDF::API2::Basic::PDF::Objind') and $element->{' objnum'}) { push @elements, '<Object ' . $element->{' objnum'} . ($element->{' objgen'} ? ' ' . $element->{' objgen'} : '') . '>'; } else { push @elements, '[' . ref($element) . ']'; } } } my $value; unless ($is_complex) { $value = '[ ' . join(' ', @elements) . ' ]'; } else { $value = "\n"; foreach my $element (@elements) { $value .= ' ' x ($indent * 4) if $indent; $value .= '- ' . $element . "\n"; } } return $value; }