#!/usr/bin/perl

# Program Name:     PDF Metadata Editor
# Program URL:      http://www.arilabs.com/software/pdfmeta/pdfmeta.pl
# Filename:         pdfmeta.pl
# Version:          1.4
# Last Modified:    2007-01-05
# Author:           Brian High <bkh AT arilabs DOT com>
# Copyright:        Analytical Resources, Inc. (2005)
# License:          GNU GPL version 2 or greater.  See LICENSE below.
# Requires:         pdftk version 1.12 or greater, Perl/Tk
#                   pdftk must be in your environment's PATH
# Tested under:     Mandrake 10.1.0 (KDE 3.2.3-99, Perl 5.8.5-3, Perl-Tk 
#                   804.027-2), Debian unstable (Kanotix 2005-01, KDE 3.3.2-1, 
#                   Perl 5.8.4-6, Perl-Tk 800.025-2), Win2K Pro, and 
#                   WinXP Pro (SP2) with ActivePerl 5.8.6.811.
# Known Issues:     (1)  Under Windows XP Pro SP2 and ActivePerl 5.8.6.811,
#                   Windows will not let me drop files onto Perl script icons,
#                   so you can use the drag-and-drop built into the script
#                   to select the PDF file (by dropping the file onto the 
#                   listbox widget) or you can simply run the script
#                   from the command line.  This issue is a function of
#                   how Windows works and may be addressed by some sort
#                   or "registry hack"[1] or "power toy".  Anyway, if you
#                   really want to be able to drop PDFs right onto your
#                   script icon, you can simply create a DOS batch file
#                   or WSH/VBS script which will run pdfmeta.pl.  Example:
#                   Type this one-liner into a file <pdfmeta.bat>:
#                      @perl pdfmeta.pl %1
#                   And save the file in the same folder as pdftk.exe and
#                   pdfmeta.pl.  Then make a shortcut to pdfmeta.bat and
#                   place that shortcut on your desktop.  Modify the shortcut
#                   properties so that the console window it brings up will
#                   be "minimized".  
#                      Right-Click -> Properties -> Run: Minimized
#                   Rename the shortcut: PDF Metadata Editor
#                   (2)  While the drag-and-drop (XDND, remote) works fine
#                   under Mandrake's KDE, it does not under Debian's (Kanotix) 
#                   KDE.  This may have something to do with window manager
#                   settings.  I just get a cursor of a circle with a line
#                   through it.  I will have to look into this...
#                                      
#                   [1] For a discussion of your registry hacking options, see:
#                   http://www.perlmonks.org/?node=122205
#
# Acknowledgements: A. Thanks to 'Anonymous' who posted a tip for allowing.
#                   data entry from Japanese and other languages.  This
#                   change was made 2007-01-05 in version 1.4.  See lines 
#                   containing the strings: "use Encode" and
#                   "Encode::FB_HTMLCREF".
#                   B. Thanks also to 'morganise-IT' who told me about an
#                   alternative "freeware" product for Win32 which does
#                   about the same thing as this Perl script.  See:
#                   http://www.bureausoft.com/products.html and look for
#                   "PDF Info".  I have not tried it, but it looks good.
#                   NOTE: These posts were found on the AccessPDF website: 
#                   http://www.accesspdf.com/article.php/20050529160835361

# This program provides a simple and limited graphical interface for
# pdftk.  It only provides the functionality to modify PDF metadata 
# (document properties) fields.

# Usage:    [ perl ] pdfmeta.pl [ <filename> ]
#
# You can either run from the command line, with or without a filename,
# or you can drag a file onto the script icon, if your desktop supports it,
# or you can execute the script by clicking the icon and you will be 
# offered a drag and drop interface.

# NOTE:     You must have write permissions for the directory which
#           contains the original PDF file, as well as for the original
#           PDF file itself.
           
# NOTE:     Modify the @fieldnames array to use a different set of document
#           properties (metadata fields).  Use only standard PDF field names:
#           Title, Author, Subject, Keywords, Creator, Producer, CreationDate,
#           ModDate, and Trapped.

# PDFTK can be found here: http://www.accesspdf.com/pdftk/
# See also: http://hacks.oreilly.com/pub/h/2422

# Here is the pdftk man page section on the update_info feature:
#
#   update_info <info data filename | - | PROMPT>
#     Changes  the  metadata  stored in a single PDF's
#     Info dictionary to match the  input  data  file.
#     The  input data file uses the same syntax as the
#     output from dump_data. This does not change  the
#     metadata  stored  in the PDF's XMP stream, if it
#     has one. For example:
#
#       pdftk in.pdf update_info in.info output out.pdf

# Also, the author of pdftk (Sid Steward) has this to say about the 
# XMP stream:
#
#     PDFs store this metadata is two places: the Info dictionary and
#     the XMP (RDF/XML) stream. Pdftk updates only the Info dictionary, 
#     but newer versions of Acrobat/Reader defer to the XMP stream.
#
#     I am currently working on new features for updating both the Info 
#     dictionary and the XMP stream.
#
#     One workaround might be to remove the PDF's XMP stream. You can do 
#     this using pdftk, but it also removes bookmarks and other PDF 
#     features.  Run:
#
#       pdftk mydoc.xmp.pdf cat output mydoc.no_xmp.pdf
#
#     to burn of the XMP stream. Then maybe the viewer will fall back to 
#     the Info dictionary with your updated data.
# 
# ( From: http://www.accesspdf.com/comment.php?mode=view&cid=153 )

# ====================================================================
# LICENSE: GNU GPL v2 or greater:  http://www.gnu.org/licenses/gpl.txt
# ====================================================================

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

use strict;
use warnings;
use Encode;
use File::Basename;
use File::Spec::Functions;
use File::Copy;
use POSIX;
use Tk;
use Tk::DropSite;
use constant SUCCESS   => 1;
use constant FAILURE   => 0;
use constant TRUE      => 1;
use constant FALSE     => 0;
use constant PROG_NAME => "PDF Metadata Editor";
use constant PROG_VERS => "1.4";
use constant FILE_SUFF => "-meta";

my (%fields, $err_msg, $main);
my ($input_fn, $output_fn, $meta_fn);

#----------------------------------------------------------------------
# Configuration
#----------------------------------------------------------------------

# PDFTK command to use.  Include explicit path if necessary.
my $pdftk = "pdftk";

# You may modify the @fieldnames array to use a different set of document
# properties (metadata fields).  Use only standard PDF field names:
# Title, Author, Subject, Keywords, Creator, Producer, CreationDate, 
# ModDate, and Trapped.  The order of field names in this array is the 
# same as the order of the fields as listed on the user interface.
my @fieldnames = qw( Title Subject Author Keywords );

#----------------------------------------------------------------------
# Main Routine
#----------------------------------------------------------------------

&create_main_window();
&set_fonts();
&create_field_hash();
&get_input_file();
&MainLoop();

#----------------------------------------------------------------------
# Subroutines
#----------------------------------------------------------------------

sub create_main_window {
    $main = MainWindow->new();
    $main->title( PROG_NAME . " " . PROG_VERS );
}

sub set_fonts {
    my $font_family = 'Helvetica';
    my $large_size = 12;
    my $small_size = 10;

    $main->fontCreate( 'title',  
                       -size     => $large_size, 
                       -weight   => 'bold', 
                       -family   => $font_family,
                     );
    $main->fontCreate( 'header', 
                       -size     => $large_size, 
                       -family   => $font_family,
                     );
    $main->fontCreate( 'label',  
                       -size     => $small_size, 
                       -weight   => 'bold', 
                       -family   => $font_family,
                     );
    $main->fontCreate( 'button', 
                       -size     => $small_size, 
                       -family   => $font_family,
                     );
    $main->fontCreate( 'input',  
                       -size     => $small_size, 
                       -family   => $font_family,
                     );
}

sub create_field_hash {
    # Create hash to store metadata fields and values
    %fields = ();
    foreach my $field ( @fieldnames ) {
        $fields{$field} = '';
    }
}

sub get_input_file {
    # If a filename was given as an argument, use it
    if ( $ARGV[0] ) {
        $input_fn = $ARGV[0];
        &get_metadata() && &complete_gui();
    }
    else {
        # Otherwise offer drag and drop interface
        &create_drop_widgets();
    }
}

sub create_drop_widgets {
    my $drop_label = $main->Label ( 
                 -text         => "Drag your PDF file into the box below:", 
                 -font         => 'title',
                 )
             ->pack (
                 -ipadx        => 12, 
                 -ipady        => 4, 
                 -padx         => 8, 
                 -pady         => 8,
                 );

    # Define a DropSite (source side) for Drag and Drop functionality
    my $drop = $main->Scrolled ( 'Listbox',
	         -scrollbars   => "osoe", 
                 -height       => 1,
                 )
	     ->pack (
                 -pady         => 8,
                 );

    # Tell Tk that $drop should accept drops.
    # When dropping occurs, execute the accept_drop callback.
    $drop->DropSite (
         -dropcommand => [\&accept_drop, $drop],
         -droptypes   => ( $^O eq 'MSWin32' ? 'Win32' : 'XDND' )
    );
}

sub accept_drop {
    my( $widget, $selection ) = @_;
    eval {
        $input_fn = $widget->SelectionGet (
         -selection => $selection, 'STRING' );
        $input_fn =~ s/^file:(.*)/$1/;
    };
    if ( defined $input_fn ) {
	  $widget->insert( 0, $input_fn );
    }

    # After the file is dropped, hide the widgets, and present new widgets
    &clean_gui(); 
    &get_metadata() && &complete_gui();
}

sub clean_gui {
    # Remove (hide) widgets on the form, if any
    my @w = $main->packSlaves;
    foreach (@w) { $_->packForget; }
}
    
sub complete_gui {
    &clean_gui();

    # Finish defining MainWindow attributes and add widgets
    $main->Label ( 
                 -justify      => 'left', 
                 -text         => "Filename: \n$input_fn", 
                 -font         => 'label',
                 )
            ->pack ( 
                 -anchor       => 'w', 
                 -padx         => 8, 
                 -pady         => 8,
                 );
    foreach my $field ( @fieldnames ) {
        $main->Label ( 
                 -justify      => 'left', 
                 -text         => "$field: ", 
                 -font         => 'label',
                 )
            ->pack ( 
                 -anchor       => 'w', 
                 -padx         => 8,
                 );
        $main->Entry ( 
                 -textvariable => \$fields{$field}, 
                 -font         => 'input',
                 )
            ->pack ( 
                 -fill         => 'x', 
                 -padx         => 8,
                 );
    }
    $main->Button ( 
                 -text         => "Save Changes and Exit", 
                 -font         => 'button', 
                 -command      => sub { &save_and_exit(); },
                 )
            ->pack ( 
                 -side         => 'left', 
                 -ipadx        => 12, 
                 -ipady        => 4, 
                 -pady         => 8, 
                 -expand       => TRUE,
                 );
    $main->Button ( 
                 -text         => "Close", 
                 -font         => 'button',
                 -command      => sub { exit },
                 )
            ->pack ( 
                 -side         => 'right', 
                 -ipadx        => 12, 
                 -ipady        => 4, 
                 -pady         => 8, 
                 -expand       => TRUE,
                 );
}

sub get_metadata {
    my ( $input_fn_base, $input_fn_path, $input_fn_type );
    $err_msg = "Input file must be a (single) PDF.  Please try again.";

    # If more than one file is selected, then show abort error
    if ( defined( $output_fn ) ) {
	undef $output_fn;    
        &show_msg() && &create_drop_widgets() && return FAILURE; 
    }

    # Parse the file path, abort if not a PDF, create new file names
    ($input_fn_base, $input_fn_path, $input_fn_type) = 
        fileparse($input_fn, qr{\.pdf}i);
    $input_fn_type =~ /\.pdf/i || &abort_me() && return FAILURE;
    $output_fn = $input_fn_path . $input_fn_base . FILE_SUFF 
        . $input_fn_type;
    $meta_fn = $input_fn_path . $input_fn_base . '.mta';

    # Check to make sure we can read/write to files and directories
    $err_msg = "Cannot read from and/or write to input file!";
    (-r $input_fn && -w $input_fn) || &abort_me() && return FAILURE;
    $err_msg = "Cannot write to $input_fn_path directory!\n\n" . 
        "You must place the original file in a writable directory\n" .
        "before running this program.";
    -w $input_fn_path || &abort_me() && return FAILURE;

    # Dump the PDF's metadata to an ASCII text file and check for errors
    &dump_meta_data() || &abort_me() && return FAILURE;
    &check_for_dict() || &abort_me() && return FAILURE;
    
    # Read metadata text file into %fields hash
    $err_msg = "Error opening temporary metadata file for reading!";
    open ( METADATA, "<", $meta_fn ) 
        || &abort_me() && return FAILURE;
    while ( <METADATA> ) { 
        foreach my $field ( @fieldnames ) {  
            if ( /^InfoKey: $field$/ ) {
                $_ = <METADATA>;
                chomp;
                s/^InfoValue: (.*)$/$1/;
                $fields{$field} = $_;
            }
        }
    }
    &close_and_delete_metadata_file();
}

sub check_for_dict {
    my $no_dict = FALSE;

    # Read metadata text file to check for "no dictionary" error
    $err_msg = "Error opening temporary metadata file for reading!";
    open ( METADATA, "<", $meta_fn ) || return FAILURE;
    while ( <METADATA> ) { 
        if ( /no info dictionary found/ ) {
	   $no_dict = TRUE;
	   last;
        }
    }
    close METADATA;

    # If there was no info dictionary, then create one and save metadata
    if ( $no_dict ) {
        # Delete metadata file
        unlink $meta_fn;

        # Use PDFTK to add a new info dictionary using the 'cat' feature
        &cat_pdf() || return FAILURE;

        # Move repaired file to orig. file, get metadata, then store in file
        my $cmd = "$pdftk \"$input_fn\" dump_data > \"$meta_fn\" 2>&1"; 
        $err_msg = "Error running pdftk dump_data command!";
        move ( $output_fn, $input_fn ) && &dump_meta_data() || return FAILURE;
    }
    return SUCCESS;
}

sub dump_meta_data {
    my $cmd = "$pdftk \"$input_fn\" dump_data > \"$meta_fn\" 2>&1"; 
    $err_msg = "Error running dump_data command!\n\n";
    system ( $cmd ) == 0 || ( &report_metadata_errors() && return FAILURE );
    return SUCCESS
}

sub report_metadata_errors {
    open ( METADATA, "<", $meta_fn ) || &abort_me() && return FAILURE; 
    local $/ = undef;
    $err_msg .= <METADATA>;
    &close_and_delete_metadata_file();
}

sub close_and_delete_metadata_file {
    close METADATA;
    unlink $meta_fn;
}

sub cat_pdf {
    # Get current metadata info from input file and store in text file
    $err_msg = "Error running pdftk cat command!";
    system ( "$pdftk \"$input_fn\" cat output \"$output_fn\" " 
        . "dont_ask 2>&1" ) == 0 || return FAILURE; 
    return SUCCESS;
}
       
sub abort_me {
    &clean_gui();

    # Show an error message and an exit button
    $main->Label ( 
                 -text         => $err_msg, 
                 -font         => 'header',
                 )
             ->pack (
                 -ipadx        => 12, 
                 -ipady        => 4, 
                 -padx         => 8, 
                 -pady         => 8,
                 );
    $main->Button ( 
                 -text         => "Exit",
                 -command      => sub { exit }, 
                 -font         => 'button',
                 )
	     ->pack ( 
                 -ipadx        => 12, 
                 -ipady        => 4, 
                 -padx         => 8, 
                 -pady         => 8,
                 );
}

sub show_msg {
    &clean_gui();

    # Show an error message and an exit button
    $main->Label ( 
                 -text         => $err_msg, 
                 -font         => 'header',
                 )
             ->pack (
                 -ipadx        => 12, 
                 -ipady        => 4, 
                 -padx         => 8, 
                 -pady         => 8,
                 );
}

sub save_and_exit {
    # Save new metadata to metadata file
    $err_msg = "Error opening temporary metadata file for writing!";
    open ( METADATA, ">", $meta_fn ) || &abort_me() && return FAILURE;
    foreach my $field ( @fieldnames ) {  
        print METADATA "InfoKey: $field\n";
        print METADATA "InfoValue: $fields{$field}\n";
        print METADATA "InfoValue: ", 
            encode('us-ascii', $fields{$field}, Encode::FB_HTMLCREF), "\n";
    }

    # Write new metadata into new PDF file
    system ( "$pdftk \"$input_fn\" " . 
             "update_info \"$meta_fn\" " . 
             "output \"$output_fn\" dont_ask" );

    # Replace orig. PDF with new one, so that the old one has new metadata
    $err_msg = "Error updating PDF!  Changes are in:\n" . $output_fn;
    move ( $output_fn, $input_fn ) || &abort_me() && return FAILURE;

    # Delete metadata file.  (The delete does not work without opening first.)
    $err_msg = "Error opening temporary metadata file for reading!";
    open ( METADATA, "<", $meta_fn ) || &abort_me() && return FAILURE;
    &close_and_delete_metadata_file();
    exit;
}

__END__
