diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4d807b8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +__pycache__ +.cache +.coverage + +# Packaging files +*.egg-info diff --git a/.hgignore b/.hgignore deleted file mode 100644 index 15fd641..0000000 --- a/.hgignore +++ /dev/null @@ -1,14 +0,0 @@ -syntax: glob -.bzr/* -.hg/* -dist/* -build/* -*.py[oc] -*.kpf -*.jpg -iptcinfo.state -iptcinfo.*.* -*.log -MANIFEST -*~ -test/rudolph_vogt diff --git a/.pypirc b/.pypirc deleted file mode 100755 index e1b3b59..0000000 --- a/.pypirc +++ /dev/null @@ -1,3 +0,0 @@ -[server-login] -username:gthomas -password:goody8 \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..db3047a --- /dev/null +++ b/.travis.yml @@ -0,0 +1,7 @@ +language: python +python: "3.6" +cache: pip +install: + - pip install pipenv + - pipenv install --dev --system +script: make test diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..ae07440 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,62 @@ +1.9.5-8: https://bitbucket.org/gthomas/iptcinfo/issue/4/file-permissions-for-changed-files-are-not - copy original file's permission bits on save/saveAs + +1.9.5-7: https://bitbucket.org/gthomas/iptcinfo/issue/3/images-w-o-iptc-data-should-not-log-errors - have silencable parse errors. + +1.9.5-6: to have a nice new upload (seems easy_install grabs an old version). + +1.9.5-5: fix some issues with "super" + +1.9.5-3: use logging module. + +1.9.5-2: Emil Stenström pinpointed some bugs/misleading (un)comments + Also a new (mis)feature is implemented: if you don't specify inp_charset + (and the image misses such information, too) than no conversion is made + to unicode, everything stays bytestring! + This way you don't need to deal with charsets, BUT it is your risk to make + the modifications with the SAME charset as it is in the image! + +1.9.5-1: getting in sync with the Perl version 1.9.5 + +1.9.2-rc8: + charset recognition loosened (failed with some image out of + Adobe Lightroom). + +1.9.2-rc7: NOT READY + IPTCInfo now accepts 'inp_charset' keyword for setting input charset. + +1.9.2-rc6: just PyLint-ed out some errors. + +1.9.2-rc5: Amos Latteier sent me a patch which releases the requirement of the + file objects to be file objects (he uses this on jpeg files stored in + databases as strings). + It modifies the module in order to look for a read method on the file + object. If one exists it assumes the argument is a file object, otherwise it + assumes it's a filename. + +1.9.2-rc4: on Windows systems, tmpfile may not work correctly - now I use + cStringIO on file save (to save the file without truncating it on Exception). + +1.9.2-rc3: some little bug fixes, some safety enhancements (now iptcinfo.py + will overwrite the original image file (info.save()) only if everything goes + fine (so if an exception is thrown at writing, it won't cut your original + file). + + This is a pre-release version: needs some testing, and has an unfound bug + (yet): some pictures can be enhanced with iptc data, and iptcinfo.py is able + to read them, but some other iptc data readers will spit on it. + +1.9.1: a first release with some little encoding support + + The class IPTCInfo now has an inp_charset and an out_charset attribute - the + first is the read image's charset (defaults to the system default charset), + the second is the charset the writer will use (defaults to inp_charset). + + Reader will find the charset included in IPTC data (if any, defaults to the + system's default charset), and use it to read to unicode strings. Writer will + write using IPTCinfo.out_charset (if it is not set, will not write charset + IPTC record). + + With this, it is possible to read and write i18n strings correctly. + + I haven't tested this functionality thoroughly, and that little test was only + on my WinXP box only, with the only other IPTC reader: IrfanView. diff --git a/IPTCInfo-1.8.pm b/IPTCInfo-1.8.pm deleted file mode 100755 index a3be91d..0000000 --- a/IPTCInfo-1.8.pm +++ /dev/null @@ -1,1410 +0,0 @@ -# IPTCInfo: extractor for IPTC metadata embedded in images -# Copyright (C) 2000-2004 Josh Carter -# All rights reserved. -# -# This program is free software; you can redistribute it and/or modify -# it under the same terms as Perl itself. - -package Image::IPTCInfo; - -use vars qw($VERSION); -$VERSION = '1.8'; - -# -# Global vars -# -use vars ('%datasets', # master list of dataset id's - '%datanames', # reverse mapping (for saving) - '%listdatasets', # master list of repeating dataset id's - '%listdatanames', # reverse - ); - -# Debug off for production use -my $debugMode = 0; -my $error; - -##################################### -# These names match the codes defined in ITPC's IIM record 2. -# This hash is for non-repeating data items; repeating ones -# are in %listdatasets below. -%datasets = ( -# 0 => 'record version', # skip -- binary data - 5 => 'object name', - 7 => 'edit status', - 8 => 'editorial update', - 10 => 'urgency', - 12 => 'subject reference', - 15 => 'category', -# 20 => 'supplemental category', # in listdatasets (see below) - 22 => 'fixture identifier', -# 25 => 'keywords', # in listdatasets - 26 => 'content location code', - 27 => 'content location name', - 30 => 'release date', - 35 => 'release time', - 37 => 'expiration date', - 38 => 'expiration time', - 40 => 'special instructions', - 42 => 'action advised', - 45 => 'reference service', - 47 => 'reference date', - 50 => 'reference number', - 55 => 'date created', - 60 => 'time created', - 62 => 'digital creation date', - 63 => 'digital creation time', - 65 => 'originating program', - 70 => 'program version', - 75 => 'object cycle', - 80 => 'by-line', - 85 => 'by-line title', - 90 => 'city', - 92 => 'sub-location', - 95 => 'province/state', - 100 => 'country/primary location code', - 101 => 'country/primary location name', - 103 => 'original transmission reference', - 105 => 'headline', - 110 => 'credit', - 115 => 'source', - 116 => 'copyright notice', - 118 => 'contact', - 120 => 'caption/abstract', - 122 => 'writer/editor', -# 125 => 'rasterized caption', # unsupported (binary data) - 130 => 'image type', - 131 => 'image orientation', - 135 => 'language identifier', - 200 => 'custom1', # These are NOT STANDARD, but are used by - 201 => 'custom2', # Fotostation. Use at your own risk. They're - 202 => 'custom3', # here in case you need to store some special - 203 => 'custom4', # stuff, but note that other programs won't - 204 => 'custom5', # recognize them and may blow them away if - 205 => 'custom6', # you open and re-save the file. (Except with - 206 => 'custom7', # Fotostation, of course.) - 207 => 'custom8', - 208 => 'custom9', - 209 => 'custom10', - 210 => 'custom11', - 211 => 'custom12', - 212 => 'custom13', - 213 => 'custom14', - 214 => 'custom15', - 215 => 'custom16', - 216 => 'custom17', - 217 => 'custom18', - 218 => 'custom19', - 219 => 'custom20', - ); - -# this will get filled in if we save data back to file -%datanames = (); - -%listdatasets = ( - 20 => 'supplemental category', - 25 => 'keywords', - ); - -# this will get filled in if we save data back to file -%listdatanames = (); - -####################################################################### -# New, Save, Destroy, Error -####################################################################### - -# -# new -# -# $info = new IPTCInfo('image filename goes here') -# -# Returns iPTCInfo object filled with metadata from the given image -# file. File on disk will be closed, and changes made to the IPTCInfo -# object will *not* be flushed back to disk. -# -sub new -{ - my ($pkg, $filename, $force) = @_; - - # - # Open file and snarf data from it. - # - unless(open(FILE, $filename)) - { - $error = "Can't open file: $!"; Log($error); - return undef; - } - - binmode(FILE); - - my $datafound = ScanToFirstIMMTag(); - unless ($datafound || defined($force)) - { - $error = "No IPTC data found."; Log($error); - close(FILE); - return undef; - } - - my $self = bless - { - '_data' => {}, # empty hashes; wil be - '_listdata' => {}, # filled in CollectIIMInfo - '_filename' => $filename, - }, $pkg; - - # Do the real snarfing here - CollectIIMInfo($self) if $datafound; - - close(FILE); - - return $self; -} - -# -# create -# -# Like new, but forces an object to always be returned. This allows -# you to start adding stuff to files that don't have IPTC info and then -# save it. -# -sub create -{ - my ($pkg, $filename) = @_; - - return new($pkg, $filename, 'force'); -} - -# -# Save -# -# Saves JPEG with IPTC data back to the same file it came from. -# -sub Save -{ - my ($self, $options) = @_; - - return $self->SaveAs($self->{'_filename'}, $options); -} - -# -# Save -# -# Saves JPEG with IPTC data to a given file name. -# -sub SaveAs -{ - my ($self, $newfile, $options) = @_; - - # - # Open file and snarf data from it. - # - unless(open(FILE, $self->{'_filename'})) - { - $error = "Can't open file: $!"; Log($error); - return undef; - } - - binmode(FILE); - - unless (FileIsJPEG()) - { - $error = "Source file is not a JPEG; I can only save JPEGs. Sorry."; - Log($error); - return undef; - } - - my $ret = JPEGCollectFileParts($options); - - close(FILE); - - if ($ret == 0) - { - Log("collectfileparts failed"); - return undef; - } - - my ($start, $end, $adobe) = @$ret; - - if (defined($options) && defined($options->{'discardAdobeParts'})) - { - undef $adobe; - } - - # - # Open dest file and stuff data there - # - unless(open(FILE, '>' . $newfile)) - { - $error = "Can't open output file: $!"; Log($error); - return undef; - } - - binmode(FILE); - - print FILE $start; - print FILE $self->PhotoshopIIMBlock($adobe, $self->PackedIIMData()); - print FILE $end; - - close(FILE); - - return 1; -} - -# -# DESTROY -# -# Called when object is destroyed. No action necessary in this case. -# -sub DESTROY -{ - # no action necessary -} - -# -# Error -# -# Returns description of the last error. -# -sub Error -{ - return $error; -} - -####################################################################### -# Attributes for clients -####################################################################### - -# -# Attribute/SetAttribute -# -# Returns/Changes value of a given data item. -# -sub Attribute -{ - my ($self, $attribute) = @_; - - return $self->{_data}->{$attribute}; -} - -sub SetAttribute -{ - my ($self, $attribute, $newval) = @_; - - $self->{_data}->{$attribute} = $newval; -} - -# -# Keywords/Clear/Add -# -# Returns reference to a list of keywords/clears the keywords -# list/adds a keyword. -# -sub Keywords -{ - my $self = shift; - return $self->{_listdata}->{'keywords'}; -} - -sub ClearKeywords -{ - my $self = shift; - $self->{_listdata}->{'keywords'} = undef; -} - -sub AddKeyword -{ - my ($self, $add) = @_; - - $self->AddListData('keywords', $add); -} - -# -# SupplementalCategories/Clear/Add -# -# Returns reference to a list of supplemental categories. -# -sub SupplementalCategories -{ - my $self = shift; - return $self->{_listdata}->{'supplemental category'}; -} - -sub ClearSupplementalCategories -{ - my $self = shift; - $self->{_listdata}->{'supplemental category'} = undef; -} - -sub AddSupplementalCategories -{ - my ($self, $add) = @_; - - $self->AddListData('supplemental category', $add); -} - -sub AddListData -{ - my ($self, $list, $add) = @_; - - # did user pass in a list ref? - if (ref($add) eq 'ARRAY') - { - # yes, add list contents - push(@{$self->{_listdata}->{$list}}, @$add); - } - else - { - # no, just a literal item - push(@{$self->{_listdata}->{$list}}, $add); - } -} - -####################################################################### -# XML, SQL export -####################################################################### - -# -# ExportXML -# -# $xml = $info->ExportXML('entity-name', \%extra-data, -# 'optional output file name'); -# -# Exports XML containing all image metadata. Attribute names are -# translated into XML tags, making adjustments to spaces and slashes -# for compatibility. (Spaces become underbars, slashes become dashes.) -# Caller provides an entity name; all data will be contained within -# this entity. Caller optionally provides a reference to a hash of -# extra data. This will be output into the XML, too. Keys must be -# valid XML tag names. Optionally provide a filename, and the XML -# will be dumped into there. -# -sub ExportXML -{ - my ($self, $basetag, $extraRef, $filename) = @_; - my $out; - - $basetag = 'photo' unless length($basetag); - - $out .= "<$basetag>\n"; - - # dump extra info first, if any - foreach my $key (keys %$extraRef) - { - $out .= "\t<$key>" . $extraRef->{$key} . "\n"; - } - - # dump our stuff - foreach my $key (keys %{$self->{_data}}) - { - my $cleankey = $key; - $cleankey =~ s/ /_/g; - $cleankey =~ s/\//-/g; - - $out .= "\t<$cleankey>" . $self->{_data}->{$key} . "\n"; - } - - if (defined ($self->Keywords())) - { - # print keywords - $out .= "\t\n"; - - foreach my $keyword (@{$self->Keywords()}) - { - $out .= "\t\t$keyword\n"; - } - - $out .= "\t\n"; - } - - if (defined ($self->SupplementalCategories())) - { - # print supplemental categories - $out .= "\t\n"; - - foreach my $category (@{$self->SupplementalCategories()}) - { - $out .= "\t\t$category\n"; - } - - $out .= "\t\n"; - } - - # close base tag - $out .= "\n"; - - # export to file if caller asked for it. - if (length($filename)) - { - open(XMLOUT, ">$filename"); - print XMLOUT $out; - close(XMLOUT); - } - - return $out; -} - -# -# ExportSQL -# -# my %mappings = ( -# 'IPTC dataset name here' => 'your table column name here', -# 'caption/abstract' => 'caption', -# 'city' => 'city', -# 'province/state' => 'state); # etc etc etc. -# -# $statement = $info->ExportSQL('mytable', \%mappings, \%extra-data); -# -# Returns a SQL statement to insert into your given table name -# a set of values from the image. Caller passes in a reference to -# a hash which maps IPTC dataset names into column names for the -# database table. Optionally pass in a ref to a hash of extra data -# which will also be included in the insert statement. Keys in that -# hash must be valid column names. -# -sub ExportSQL -{ - my ($self, $tablename, $mappingsRef, $extraRef) = @_; - my ($statement, $columns, $values); - - return undef if (($tablename eq undef) || ($mappingsRef eq undef)); - - # start with extra data, if any - foreach my $column (keys %$extraRef) - { - my $value = $extraRef->{$column}; - $value =~ s/'/''/g; # escape single quotes - - $columns .= $column . ", "; - $values .= "\'$value\', "; - } - - # process our data - foreach my $attribute (keys %$mappingsRef) - { - my $value = $self->Attribute($attribute); - $value =~ s/'/''/g; # escape single quotes - - $columns .= $mappingsRef->{$attribute} . ", "; - $values .= "\'$value\', "; - } - - # must trim the trailing ", " from both - $columns =~ s/, $//; - $values =~ s/, $//; - - $statement = "INSERT INTO $tablename ($columns) VALUES ($values)"; - - return $statement; -} - -####################################################################### -# File parsing functions (private) -####################################################################### - -# -# ScanToFirstIMMTag -# -# Scans to first IIM Record 2 tag in the file. The will either use -# smart scanning for JPEGs or blind scanning for other file types. -# -sub ScanToFirstIMMTag -{ - if (FileIsJPEG()) - { - Log("File is JPEG, proceeding with JPEGScan"); - return JPEGScan(); - } - else - { - Log("File not a JPEG, trying BlindScan"); - return BlindScan(); - } -} - -# -# FileIsJPEG -# -# Checks to see if this file is a JPEG/JFIF or not. Will reset the -# file position back to 0 after it's done in either case. -# -sub FileIsJPEG -{ - # reset to beginning just in case - seek(FILE, 0, 0); - - if ($debugMode) - { - Log("Opening 16 bytes of file:\n"); - my $dump; - read (FILE, $dump, 16); - HexDump($dump); - seek(FILE, 0, 0); - } - - # check start of file marker - my ($ff, $soi); - read (FILE, $ff, 1) || goto notjpeg; - read (FILE, $soi, 1); - - goto notjpeg unless (ord($ff) == 0xff && ord($soi) == 0xd8); - - # now check for APP0 marker. I'll assume that anything with a SOI - # followed by APP0 is "close enough" for our purposes. (We're not - # dinking with image data, so anything following the JPEG tagging - # system should work.) - my ($app0, $len, $jpeg); - read (FILE, $ff, 1); - read (FILE, $app0, 1); - - goto notjpeg unless (ord($ff) == 0xff); - - # reset to beginning of file - seek(FILE, 0, 0); - return 1; - - notjpeg: - seek(FILE, 0, 0); - return 0; -} - -# -# JPEGScan -# -# Assuming the file is a JPEG (see above), this will scan through the -# markers looking for the APP13 marker, where IPTC/IIM data should be -# found. While this isn't a formally defined standard, all programs -# have (supposedly) adopted Adobe's technique of putting the data in -# APP13. -# -sub JPEGScan -{ - # Skip past start of file marker - my ($ff, $soi); - read (FILE, $ff, 1) || return 0; - read (FILE, $soi, 1); - - unless (ord($ff) == 0xff && ord($soi) == 0xd8) - { - $error = "JPEGScan: invalid start of file"; Log($error); - return 0; - } - - # Scan for the APP13 marker which will contain our IPTC info (I hope). - - my $marker = JPEGNextMarker(); - - while (ord($marker) != 0xed) - { - if (ord($marker) == 0) - { $error = "Marker scan failed"; Log($error); return 0; } - - if (ord($marker) == 0xd9) - { $error = "Marker scan hit end of image marker"; - Log($error); return 0; } - - if (ord($marker) == 0xda) - { $error = "Marker scan hit start of image data"; - Log($error); return 0; } - - if (JPEGSkipVariable() == 0) - { $error = "JPEGSkipVariable failed"; - Log($error); return 0; } - - $marker = JPEGNextMarker(); - } - - # If were's here, we must have found the right marker. Now - # BlindScan through the data. - return BlindScan(); -} - -# -# JPEGNextMarker -# -# Scans to the start of the next valid-looking marker. Return value is -# the marker id. -# -sub JPEGNextMarker -{ - my $byte; - - # Find 0xff byte. We should already be on it. - read (FILE, $byte, 1) || return 0; - while (ord($byte) != 0xff) - { - Log("JPEGNextMarker: warning: bogus stuff in JPEG file"); - read(FILE, $byte, 1) || return 0; - } - - # Now skip any extra 0xffs, which are valid padding. - do - { - read(FILE, $byte, 1) || return 0; - } while (ord($byte) == 0xff); - - # $byte should now contain the marker id. - Log("JPEGNextMarker: at marker " . unpack("H*", $byte)); - return $byte; -} - -# -# JPEGSkipVariable -# -# Skips variable-length section of JPEG block. Should always be called -# between calls to JPEGNextMarker to ensure JPEGNextMarker is at the -# start of data it can properly parse. -# -sub JPEGSkipVariable -{ - my $rSave = shift; - - # Get the marker parameter length count - my $length; - read(FILE, $length, 2) || return 0; - - ($length) = unpack("n", $length); - - Log("JPEG variable length: $length"); - - # Length includes itself, so must be at least 2 - if ($length < 2) - { - Log("JPEGSkipVariable: Erroneous JPEG marker length"); - return 0; - } - $length -= 2; - - # Skip remaining bytes - my $temp; - if (defined($rSave) || $debugMode) - { - unless (read(FILE, $temp, $length)) - { - Log("JPEGSkipVariable: read failed while skipping var data"); - return 0; - } - - # prints out a heck of a lot of stuff - # HexDump($temp); - } - else - { - # Just seek - unless(seek(FILE, $length, 1)) - { - Log("JPEGSkipVariable: read failed while skipping var data"); - return 0; - } - } - - $$rSave = $temp if defined($rSave); - - return 1; -} - -# -# BlindScan -# -# Scans blindly to first IIM Record 2 tag in the file. This method may -# or may not work on any arbitrary file type, but it doesn't hurt to -# check. We expect to see this tag within the first 8k of data. (This -# limit may need to be changed or eliminated depending on how other -# programs choose to store IIM.) -# -sub BlindScan -{ - my $offset = 0; - my $MAX = 8192; # keep within first 8192 bytes - # NOTE: this may need to change - - # start digging - while ($offset <= $MAX) - { - my $temp; - - unless (read(FILE, $temp, 1)) - { - Log("BlindScan: hit EOF while scanning"); - return 0; - } - - # look for tag identifier 0x1c - if (ord($temp) == 0x1c) - { - # if we found that, look for record 2, dataset 0 - # (record version number) - my ($record, $dataset); - read (FILE, $record, 1); - read (FILE, $dataset, 1); - - if (ord($record) == 2) - { - # found it. seek to start of this tag and return. - Log("BlindScan: found IIM start at offset $offset"); - seek(FILE, -3, 1); # seek rel to current position - return $offset; - } - else - { - # didn't find it. back up 2 to make up for - # those reads above. - seek(FILE, -2, 1); # seek rel to current position - } - } - - # no tag, keep scanning - $offset++; - } - - return 0; -} - -# -# CollectIIMInfo -# -# Assuming file is seeked to start of IIM data (using above), this -# reads all the data into our object's hashes -# -sub CollectIIMInfo -{ - my $self = shift; - - # NOTE: file should already be at the start of the first - # IPTC code: record 2, dataset 0. - - while (1) - { - my $header; - return unless read(FILE, $header, 5); - - ($tag, $record, $dataset, $length) = unpack("CCCn", $header); - - # bail if we're past end of IIM record 2 data - return unless ($tag == 0x1c) && ($record == 2); - - # print "tag : " . $tag . "\n"; - # print "record : " . $record . "\n"; - # print "dataset : " . $dataset . "\n"; - # print "length : " . $length . "\n"; - - my $value; - read(FILE, $value, $length); - - # try to extract first into _listdata (keywords, categories) - # and, if unsuccessful, into _data. Tags which are not in the - # current IIM spec (version 4) are currently discarded. - if (exists $listdatasets{$dataset}) - { - my $dataname = $listdatasets{$dataset}; - my $listref = $listdata{$dataname}; - - push(@{$self->{_listdata}->{$dataname}}, $value); - } - elsif (exists $datasets{$dataset}) - { - my $dataname = $datasets{$dataset}; - - $self->{_data}->{$dataname} = $value; - } - # else discard - } -} - -####################################################################### -# File Saving -####################################################################### - -# -# JPEGCollectFileParts -# -# Collects all pieces of the file except for the IPTC info that we'll -# replace when saving. Returns the stuff before the info, stuff after, -# and the contents of the Adobe Resource Block that the IPTC data goes -# in. Returns undef if a file parsing error occured. -# -sub JPEGCollectFileParts -{ - my ($options) = @_; - my ($start, $end, $adobeParts); - my $discardAppParts = 0; - - if (defined($options) && defined($options->{'discardAppParts'})) - { $discardAppParts = 1; } - - # Start at beginning of file - seek(FILE, 0, 0); - - # Skip past start of file marker - my ($ff, $soi); - read (FILE, $ff, 1) || return 0; - read (FILE, $soi, 1); - - unless (ord($ff) == 0xff && ord($soi) == 0xd8) - { - $error = "JPEGScan: invalid start of file"; Log($error); - return 0; - } - - # - # Begin building start of file - # - $start .= pack("CC", 0xff, 0xd8); - - # Manually insert APP0 if we're trashing application parts, since - # all JFIF format images should start with the version block. - if ($discardAppParts) - { - $start .= pack("CC", 0xff, 0xe0); - $start .= pack("n", 16); # length (including these 2 bytes) - $start .= "JFIF"; # format - $start .= pack("CC", 1, 2); # call it version 1.2 (current JFIF) - $start .= pack(C8, 0); # zero everything else - } - - # - # Now scan through all markers in file until we hit image data or - # IPTC stuff. - # - $marker = JPEGNextMarker(); - - while (1) - { - if (ord($marker) == 0) - { $error = "Marker scan failed"; Log($error); return 0; } - - # Check for end of image - if (ord($marker) == 0xd9) - { - Log("JPEGCollectFileParts: saw end of image marker"); - $end .= pack("CC", 0xff, ord($marker)); - goto doneScanning; - } - - # Check for start of compressed data - if (ord($marker) == 0xda) - { - Log("JPEGCollectFileParts: saw start of compressed data"); - $end .= pack("CC", 0xff, ord($marker)); - goto doneScanning; - } - - my $partdata; - if (JPEGSkipVariable(\$partdata) == 0) - { $error = "JPEGSkipVariable failed"; - Log($error); return 0; } - - # Take all parts aside from APP13, which we'll replace - # ourselves. - if ($discardAppParts && ord($marker) >= 0xe0 && ord($marker) <= 0xef) - { - # Skip all application markers, including Adobe parts - undef $adobeParts; - } - elsif (ord($marker) == 0xed) - { - # Collect the adobe stuff from part 13 - $adobeParts = CollectAdobeParts($partdata); - goto doneScanning; - } - else - { - # Append all other parts to start section - $start .= pack("CC", 0xff, ord($marker)); - $start .= pack("n", length($partdata) + 2); - $start .= $partdata; - } - - $marker = JPEGNextMarker(); - } - - doneScanning: - - # - # Append rest of file to $end - # - my $buffer; - - while (read(FILE, $buffer, 16384)) - { - $end .= $buffer; - } - - return [$start, $end, $adobeParts]; -} - -# -# CollectAdobeParts -# -# Part APP13 contains yet another markup format, one defined by Adobe. -# See "File Formats Specification" in the Photoshop SDK (avail from -# www.adobe.com). We must take everything but the IPTC data so that -# way we can write the file back without losing everything else -# Photoshop stuffed into the APP13 block. -# -sub CollectAdobeParts -{ - my ($data) = @_; - my $length = length($data); - my $offset = 0; - my $out; - - # Skip preamble - $offset = length('Photoshop 3.0 '); - - # Process everything - while ($offset < $length) - { - # Get OSType and ID - my ($ostype, $id1, $id2) = unpack("NCC", substr($data, $offset, 6)); - $offset += 6; - - # Get pascal string - my ($stringlen) = unpack("C", substr($data, $offset, 1)); - $offset += 1; - my $string = substr($data, $offset, $stringlen); - $offset += $stringlen; - # round up if odd - $offset++ if ($stringlen % 2 != 0); - # there should be a null if string len is 0 - $offset++ if ($stringlen == 0); - - # Get variable-size data - my ($size) = unpack("N", substr($data, $offset, 4)); - $offset += 4; - - my $var = substr($data, $offset, $size); - $offset += $size; - $offset++ if ($size % 2 != 0); # round up if odd - - # skip IIM data (0x0404), but write everything else out - unless ($id1 == 4 && $id2 == 4) - { - $out .= pack("NCC", $ostype, $id1, $id2); - $out .= pack("C", $stringlen); - $out .= $string; - $out .= pack("C", 0) if ($stringlen == 0 || - $stringlen % 2 != 0); - $out .= pack("N", $size); - $out .= $var; - $out .= pack("C", 0) if ($size % 2 != 0 && length($out) % 2 != 0); - } - } - - return $out; -} - -# -# PackedIIMData -# -# Assembles and returns our _data and _listdata into IIM format for -# embedding into an image. -# -sub PackedIIMData -{ - my $self = shift; - my $out; - - # First, we need to build a mapping of datanames to dataset - # numbers if we haven't already. - unless (scalar(keys %datanames)) - { - foreach my $dataset (keys %datasets) - { - my $dataname = $datasets{$dataset}; - $datanames{$dataname} = $dataset; - } - } - - # Ditto for the lists - unless (scalar(keys %listdatanames)) - { - foreach my $dataset (keys %listdatasets) - { - my $dataname = $listdatasets{$dataset}; - $listdatanames{$dataname} = $dataset; - } - } - - # Print record version - # tag - record - dataset - len (short) - 2 (short) - $out .= pack("CCCnn", 0x1c, 2, 0, 2, 2); - - # Iterate over data sets - foreach my $key (keys %{$self->{_data}}) - { - my $dataset = $datanames{$key}; - my $value = $self->{_data}->{$key}; - - if ($dataset == 0) - { Log("PackedIIMData: illegal dataname $key"); next; } - - my ($tag, $record) = (0x1c, 0x02); - - $out .= pack("CCCn", $tag, $record, $dataset, length($value)); - $out .= $value; - } - - # Do the same for list data sets - foreach my $key (keys %{$self->{_listdata}}) - { - my $dataset = $listdatanames{$key}; - - if ($dataset == 0) - { Log("PackedIIMData: illegal dataname $key"); next; } - - foreach my $value (@{$self->{_listdata}->{$key}}) - { - my ($tag, $record) = (0x1c, 0x02); - - $out .= pack("CCCn", $tag, $record, $dataset, length($value)); - $out .= $value; - } - } - - return $out; -} - -# -# PhotoshopIIMBlock -# -# Assembles the blob of Photoshop "resource data" that includes our -# fresh IIM data (from PackedIIMData) and the other Adobe parts we -# found in the file, if there were any. -# -sub PhotoshopIIMBlock -{ - my ($self, $otherparts, $data) = @_; - my $resourceBlock; - my $out; - - $resourceBlock .= "Photoshop 3.0"; - $resourceBlock .= pack("C", 0); - # Photoshop identifier - $resourceBlock .= "8BIM"; - # 0x0404 is IIM data, 00 is required empty string - $resourceBlock .= pack("CCCC", 0x04, 0x04, 0, 0); - # length of data as 32-bit, network-byte order - $resourceBlock .= pack("N", length($data)); - # Now tack data on there - $resourceBlock .= $data; - # Pad with a blank if not even size - $resourceBlock .= pack("C", 0) if (length($data) % 2 != 0); - # Finally tack on other data - $resourceBlock .= $otherparts if defined($otherparts); - - $out .= pack("CC", 0xff, 0xed); # JPEG start of block, APP13 - $out .= pack("n", length($resourceBlock) + 2); # length - $out .= $resourceBlock; - - return $out; -} - -####################################################################### -# Helpers, docs -####################################################################### - -# -# Log: just prints a message to STDERR if $debugMode is on. -# -sub Log -{ - if ($debugMode) - { my $message = shift; print STDERR "**IPTC** $message\n"; } -} - -# -# HexDump -# -# Very helpful when debugging. -# -sub HexDump -{ - my $dump = shift; - my $len = length($dump); - my $offset = 0; - my ($dcol1, $dcol2); - - while ($offset < $len) - { - my $temp = substr($dump, $offset++, 1); - - my $hex = unpack("H*", $temp); - $dcol1 .= " " . $hex; - if (ord($temp) >= 0x21 && ord($temp) <= 0x7e) - { $dcol2 .= " $temp"; } - else - { $dcol2 .= " ."; } - - if ($offset % 16 == 0) - { - print STDERR $dcol1 . " | " . $dcol2 . "\n"; - undef $dcol1; undef $dcol2; - } - } - - if (defined($dcol1) || defined($dcol2)) - { - print STDERR $dcol1 . " | " . $dcol2 . "\n"; - undef $dcol1; undef $dcol2; - } -} - -# -# JPEGDebugScan -# -# Also very helpful when debugging. -# -sub JPEGDebugScan -{ - my $filename = shift; - open(FILE, $filename) or die "Can't open $filename: $!"; - - # Skip past start of file marker - my ($ff, $soi); - read (FILE, $ff, 1) || return 0; - read (FILE, $soi, 1); - - unless (ord($ff) == 0xff && ord($soi) == 0xd8) - { - Log("JPEGScan: invalid start of file"); - goto done; - } - - # scan to 0xDA (start of scan), dumping the markers we see between - # here and there. - my $marker = JPEGNextMarker(); - - while (ord($marker) != 0xda) - { - if (ord($marker) == 0) - { Log("Marker scan failed"); goto done; } - - if (ord($marker) == 0xd9) - {Log("Marker scan hit end of image marker"); goto done; } - - if (JPEGSkipVariable() == 0) - { Log("JPEGSkipVariable failed"); return 0; } - - $marker = JPEGNextMarker(); - } - -done: - close(FILE); -} - -# sucessful package load -1; - -__END__ - -=head1 NAME - -Image::IPTCInfo - Perl extension for extracting IPTC image meta-data - -=head1 SYNOPSIS - - use Image::IPTCInfo; - - # Create new info object - my $info = new Image::IPTCInfo('file-name-here.jpg'); - - # Check if file had IPTC data - unless (defined($info)) { die Image::IPTCInfo::Error(); } - - # Get list of keywords or supplemental categories... - my $keywordsRef = $info->Keywords(); - my $suppCatsRef = $info->SupplementalCategories(); - - # Get specific attributes... - my $caption = $info->Attribute('caption/abstract'); - - # Create object for file that may or may not have IPTC data. - $info = create Image::IPTCInfo('file-name-here.jpg'); - - # Add/change an attribute - $info->SetAttribute('caption/abstract', 'Witty caption here'); - - # Save new info to file - ##### See disclaimer in 'SAVING FILES' section ##### - $info->Save(); - $info->SaveAs('new-file-name.jpg'); - -=head1 DESCRIPTION - -Ever wish you add information to your photos like a caption, the place -you took it, the date, and perhaps even keywords and categories? You -already can. The International Press Telecommunications Council (IPTC) -defines a format for exchanging meta-information in news content, and -that includes photographs. You can embed all kinds of information in -your images. The trick is putting it to use. - -That's where this IPTCInfo Perl module comes into play. You can embed -information using many programs, including Adobe Photoshop, and -IPTCInfo will let your web server -- and other automated server -programs -- pull it back out. You can use the information directly in -Perl programs, export it to XML, or even export SQL statements ready -to be fed into a database. - -=head1 USING IPTCINFO - -Install the module as documented in the README file. You can try out -the demo program called "demo.pl" which extracts info from the images -in the "demo-images" directory. - -To integrate with your own code, simply do something like what's in -the synopsys above. - -The complete list of possible attributes is given below. These are as -specified in the IPTC IIM standard, version 4. Keywords and categories -are handled differently: since these are lists, the module allows you -to access them as Perl lists. Call Keywords() and Categories() to get -a reference to each list. - -=head2 NEW VS. CREATE - -You can either create an object using new() or create(): - - $info = new Image::IPTCInfo('file-name-here.jpg'); - $info = create Image::IPTCInfo('file-name-here.jpg'); - -new() will create a new object only if the file had IPTC data in it. -It will return undef otherwise, and you can check Error() to see what -the reason was. Using create(), on the other hand, always returns a -new IPTCInfo object if there was data or not. If there wasn't any IPTC -info there, calling Attribute() on anything will just return undef; -i.e. the info object will be more-or-less empty. - -If you're only reading IPTC data, call new(). If you want to add or -change info, call create(). Even if there's no useful stuff in the -info object, you can then start adding attributes and save the file. -That brings us to the next topic.... - -=head2 MODIFYING IPTC DATA - -You can modify IPTC data in JPEG files and save the file back to -disk. Here are the commands for doing so: - - # Set a given attribute - $info->SetAttribute('iptc attribute here', 'new value here'); - - # Clear the keywords or supp. categories list - $info->ClearKeywords(); - $info->ClearSupplementalCategories(); - - # Add keywords or supp. categories - $info->AddKeyword('frob'); - - # You can also add a list reference - $info->AddKeyword(['frob', 'nob', 'widget']); - -=head2 SAVING FILES - -With JPEG files you can add/change attributes, add keywords, etc., and -then call: - - $info->Save(); - $info->SaveAs('new-file-name.jpg'); - -This will save the file with the updated IPTC info. Please only run -this on *copies* of your images -- not your precious originals! -- -because I'm not liable for any corruption of your images. (If you read -software license agreements, nobody else is liable, either. Make -backups of your originals!) - -If you're into image wizardry, there are a couple handy options you -can use on saving. One feature is to trash the Adobe block of data, -which contains IPTC info, color settings, Photoshop print settings, -and stuff like that. The other is to trash all application blocks, -including stuff like EXIF and FlashPix data. This can be handy for -reducing file sizes. The options are passed as a hashref to Save() and -SaveAs(), e.g.: - - $info->Save({'discardAdobeParts' => 'on'}); - $info->SaveAs('new-file-name.jpg', {'discardAppParts' => 'on'}); - -Note that if there was IPTC info in the image, or you added some -yourself, the new image will have an Adobe part with only the IPTC -information. - -=head2 XML AND SQL EXPORT FEATURES - -IPTCInfo also allows you to easily generate XML and SQL from the image -metadata. For XML, call: - - $xml = $info->ExportXML('entity-name', \%extra-data, - 'optional output file name'); - -This returns XML containing all image metadata. Attribute names are -translated into XML tags, making adjustments to spaces and slashes for -compatibility. (Spaces become underbars, slashes become dashes.) You -provide an entity name; all data will be contained within this entity. -You can optionally provides a reference to a hash of extra data. This -will get put into the XML, too. (Example: you may want to put info on -the image's location into the XML.) Keys must be valid XML tag names. -You can also provide a filename, and the XML will be dumped into -there. See the "demo.pl" script for examples. - -For SQL, it goes like this: - - my %mappings = ( - 'IPTC dataset name here' => 'your table column name here', - 'caption/abstract' => 'caption', - 'city' => 'city', - 'province/state' => 'state); # etc etc etc. - - $statement = $info->ExportSQL('mytable', \%mappings, \%extra-data); - -This returns a SQL statement to insert into your given table name a -set of values from the image. You pass in a reference to a hash which -maps IPTC dataset names into column names for the database table. As -with XML export, you can also provide extra information to be stuck -into the SQL. - -=head1 IPTC ATTRIBUTE REFERENCE - - object name originating program - edit status program version - editorial update object cycle - urgency by-line - subject reference by-line title - category city - fixture identifier sub-location - content location code province/state - content location name country/primary location code - release date country/primary location name - release time original transmission reference - expiration date headline - expiration time credit - special instructions source - action advised copyright notice - reference service contact - reference date caption/abstract - reference number writer/editor - date created image type - time created image orientation - digital creation date language identifier - digital creation time - - custom1 - custom20: NOT STANDARD but used by Fotostation. - IPTCInfo also supports these fields. - -=head1 KNOWN BUGS - -IPTC meta-info on MacOS may be stored in the resource fork instead -of the data fork. This program will currently not scan the resource -fork. - -I have heard that some programs will embed IPTC info at the end of the -file instead of the beginning. The module will currently only look -near the front of the file. If you have a file with IPTC data that -IPTCInfo can't find, please contact me! I would like to ensure -IPTCInfo works with everyone's files. - -=head1 AUTHOR - -Josh Carter, josh@multipart-mixed.com - -=head1 SEE ALSO - -perl(1). - -=cut diff --git a/IPTCInfo-1.9.4.pm b/IPTCInfo-1.9.4.pm deleted file mode 100644 index f51a720..0000000 --- a/IPTCInfo-1.9.4.pm +++ /dev/null @@ -1,1546 +0,0 @@ -# IPTCInfo: extractor for IPTC metadata embedded in images -# Copyright (C) 2000-2004 Josh Carter -# All rights reserved. -# -# This program is free software; you can redistribute it and/or modify -# it under the same terms as Perl itself. - -package Image::IPTCInfo; -use IO::File; - -use vars qw($VERSION); -$VERSION = '1.94'; - -# -# Global vars -# -use vars ('%datasets', # master list of dataset id's - '%datanames', # reverse mapping (for saving) - '%listdatasets', # master list of repeating dataset id's - '%listdatanames', # reverse - '$MAX_FILE_OFFSET', # maximum offset for blind scan - ); - -$MAX_FILE_OFFSET = 8192; # default blind scan depth - -# Debug off for production use -my $debugMode = 0; -my $error; - -##################################### -# These names match the codes defined in ITPC's IIM record 2. -# This hash is for non-repeating data items; repeating ones -# are in %listdatasets below. -%datasets = ( -# 0 => 'record version', # skip -- binary data - 5 => 'object name', - 7 => 'edit status', - 8 => 'editorial update', - 10 => 'urgency', - 12 => 'subject reference', - 15 => 'category', -# 20 => 'supplemental category', # in listdatasets (see below) - 22 => 'fixture identifier', -# 25 => 'keywords', # in listdatasets - 26 => 'content location code', - 27 => 'content location name', - 30 => 'release date', - 35 => 'release time', - 37 => 'expiration date', - 38 => 'expiration time', - 40 => 'special instructions', - 42 => 'action advised', - 45 => 'reference service', - 47 => 'reference date', - 50 => 'reference number', - 55 => 'date created', - 60 => 'time created', - 62 => 'digital creation date', - 63 => 'digital creation time', - 65 => 'originating program', - 70 => 'program version', - 75 => 'object cycle', - 80 => 'by-line', - 85 => 'by-line title', - 90 => 'city', - 92 => 'sub-location', - 95 => 'province/state', - 100 => 'country/primary location code', - 101 => 'country/primary location name', - 103 => 'original transmission reference', - 105 => 'headline', - 110 => 'credit', - 115 => 'source', - 116 => 'copyright notice', -# 118 => 'contact', # in listdatasets - 120 => 'caption/abstract', - 121 => 'local caption', - 122 => 'writer/editor', -# 125 => 'rasterized caption', # unsupported (binary data) - 130 => 'image type', - 131 => 'image orientation', - 135 => 'language identifier', - 200 => 'custom1', # These are NOT STANDARD, but are used by - 201 => 'custom2', # Fotostation. Use at your own risk. They're - 202 => 'custom3', # here in case you need to store some special - 203 => 'custom4', # stuff, but note that other programs won't - 204 => 'custom5', # recognize them and may blow them away if - 205 => 'custom6', # you open and re-save the file. (Except with - 206 => 'custom7', # Fotostation, of course.) - 207 => 'custom8', - 208 => 'custom9', - 209 => 'custom10', - 210 => 'custom11', - 211 => 'custom12', - 212 => 'custom13', - 213 => 'custom14', - 214 => 'custom15', - 215 => 'custom16', - 216 => 'custom17', - 217 => 'custom18', - 218 => 'custom19', - 219 => 'custom20', - ); - -# this will get filled in if we save data back to file -%datanames = (); - -%listdatasets = ( - 20 => 'supplemental category', - 25 => 'keywords', - 118 => 'contact', - ); - -# this will get filled in if we save data back to file -%listdatanames = (); - -####################################################################### -# New, Save, Destroy, Error -####################################################################### - -# -# new -# -# $info = new IPTCInfo('image filename goes here') -# -# Returns IPTCInfo object filled with metadata from the given image -# file. File on disk will be closed, and changes made to the IPTCInfo -# object will *not* be flushed back to disk. -# -sub new -{ - my ($pkg, $file, $force) = @_; - - my $input_is_handle = eval {$file->isa('IO::Handle')}; - if ($input_is_handle and not $file->isa('IO::Seekable')) { - $error = "Handle must be seekable."; Log($error); - return undef; - } - - # - # Open file and snarf data from it. - # - my $handle = $input_is_handle ? $file : IO::File->new($file); - unless($handle) - { - $error = "Can't open file: $!"; Log($error); - return undef; - } - - binmode($handle); - - my $datafound = ScanToFirstIMMTag($handle); - unless ($datafound || defined($force)) - { - $error = "No IPTC data found."; Log($error); - # don't close unless we opened it - $handle->close() unless $input_is_handle; - return undef; - } - - my $self = bless - { - '_data' => {}, # empty hashes; wil be - '_listdata' => {}, # filled in CollectIIMInfo - '_handle' => $handle, - }, $pkg; - - $self->{_filename} = $file unless $input_is_handle; - - # Do the real snarfing here - $self->CollectIIMInfo() if $datafound; - - $handle->close() unless $input_is_handle; - - return $self; -} - -# -# create -# -# Like new, but forces an object to always be returned. This allows -# you to start adding stuff to files that don't have IPTC info and then -# save it. -# -sub create -{ - my ($pkg, $filename) = @_; - - return new($pkg, $filename, 'force'); -} - -# -# Save -# -# Saves JPEG with IPTC data back to the same file it came from. -# -sub Save -{ - my ($self, $options) = @_; - - return $self->SaveAs($self->{'_filename'}, $options); -} - -# -# Save -# -# Saves JPEG with IPTC data to a given file name. -# -sub SaveAs -{ - my ($self, $newfile, $options) = @_; - - # - # Open file and snarf data from it. - # - my $handle = $self->{_filename} ? IO::File->new($self->{_filename}) : $self->{_handle}; - unless($handle) - { - $error = "Can't open file: $!"; Log($error); - return undef; - } - - $handle->seek(0, 0); - binmode($handle); - - unless (FileIsJPEG($handle)) - { - $error = "Source file is not a JPEG; I can only save JPEGs. Sorry."; - Log($error); - return undef; - } - - my $ret = JPEGCollectFileParts($handle, $options); - - if ($ret == 0) - { - Log("collectfileparts failed"); - return undef; - } - - if ($self->{_filename}) { - $handle->close(); - unless ($handle = IO::File->new($newfile, ">")) { - $error = "Can't open output file: $!"; Log($error); - return undef; - } - binmode($handle); - } else { - unless ($handle->truncate(0)) { - $error = "Can't truncate, handle might be read-only"; Log($error); - return undef; - } - } - - my ($start, $end, $adobe) = @$ret; - - if (defined($options) && defined($options->{'discardAdobeParts'})) - { - undef $adobe; - } - - - $handle->print($start); - $handle->print($self->PhotoshopIIMBlock($adobe, $self->PackedIIMData())); - $handle->print($end); - - $handle->close() if $self->{_filename}; - - return 1; -} - -# -# DESTROY -# -# Called when object is destroyed. No action necessary in this case. -# -sub DESTROY -{ - # no action necessary -} - -# -# Error -# -# Returns description of the last error. -# -sub Error -{ - return $error; -} - -####################################################################### -# Attributes for clients -####################################################################### - -# -# Attribute/SetAttribute -# -# Returns/Changes value of a given data item. -# -sub Attribute -{ - my ($self, $attribute) = @_; - - return $self->{_data}->{$attribute}; -} - -sub SetAttribute -{ - my ($self, $attribute, $newval) = @_; - - $self->{_data}->{$attribute} = $newval; -} - -sub ClearAttributes -{ - my $self = shift; - - $self->{_data} = {}; -} - -sub ClearAllData -{ - my $self = shift; - - $self->{_data} = {}; - $self->{_listdata} = {}; -} - -# -# Keywords/Clear/Add -# -# Returns reference to a list of keywords/clears the keywords -# list/adds a keyword. -# -sub Keywords -{ - my $self = shift; - return $self->{_listdata}->{'keywords'}; -} - -sub ClearKeywords -{ - my $self = shift; - $self->{_listdata}->{'keywords'} = undef; -} - -sub AddKeyword -{ - my ($self, $add) = @_; - - $self->AddListData('keywords', $add); -} - -# -# SupplementalCategories/Clear/Add -# -# Returns reference to a list of supplemental categories. -# -sub SupplementalCategories -{ - my $self = shift; - return $self->{_listdata}->{'supplemental category'}; -} - -sub ClearSupplementalCategories -{ - my $self = shift; - $self->{_listdata}->{'supplemental category'} = undef; -} - -sub AddSupplementalCategories -{ - my ($self, $add) = @_; - - $self->AddListData('supplemental category', $add); -} - -# -# Contacts/Clear/Add -# -# Returns reference to a list of contactss/clears the contacts -# list/adds a contact. -# -sub Contacts -{ - my $self = shift; - return $self->{_listdata}->{'contact'}; -} - -sub ClearContacts -{ - my $self = shift; - $self->{_listdata}->{'contact'} = undef; -} - -sub AddContact -{ - my ($self, $add) = @_; - - $self->AddListData('contact', $add); -} - -sub AddListData -{ - my ($self, $list, $add) = @_; - - # did user pass in a list ref? - if (ref($add) eq 'ARRAY') - { - # yes, add list contents - push(@{$self->{_listdata}->{$list}}, @$add); - } - else - { - # no, just a literal item - push(@{$self->{_listdata}->{$list}}, $add); - } -} - -####################################################################### -# XML, SQL export -####################################################################### - -# -# ExportXML -# -# $xml = $info->ExportXML('entity-name', \%extra-data, -# 'optional output file name'); -# -# Exports XML containing all image metadata. Attribute names are -# translated into XML tags, making adjustments to spaces and slashes -# for compatibility. (Spaces become underbars, slashes become dashes.) -# Caller provides an entity name; all data will be contained within -# this entity. Caller optionally provides a reference to a hash of -# extra data. This will be output into the XML, too. Keys must be -# valid XML tag names. Optionally provide a filename, and the XML -# will be dumped into there. -# -sub ExportXML -{ - my ($self, $basetag, $extraRef, $filename) = @_; - my $out; - - $basetag = 'photo' unless length($basetag); - - $out .= "<$basetag>\n"; - - # dump extra info first, if any - foreach my $key (keys %$extraRef) - { - $out .= "\t<$key>" . $extraRef->{$key} . "\n"; - } - - # dump our stuff - foreach my $key (keys %{$self->{_data}}) - { - my $cleankey = $key; - $cleankey =~ s/ /_/g; - $cleankey =~ s/\//-/g; - - $out .= "\t<$cleankey>" . $self->{_data}->{$key} . "\n"; - } - - if (defined ($self->Keywords())) - { - # print keywords - $out .= "\t\n"; - - foreach my $keyword (@{$self->Keywords()}) - { - $out .= "\t\t$keyword\n"; - } - - $out .= "\t\n"; - } - - if (defined ($self->SupplementalCategories())) - { - # print supplemental categories - $out .= "\t\n"; - - foreach my $category (@{$self->SupplementalCategories()}) - { - $out .= "\t\t$category\n"; - } - - $out .= "\t\n"; - } - - if (defined ($self->Contacts())) - { - # print contacts - $out .= "\t\n"; - - foreach my $contact (@{$self->Contacts()}) - { - $out .= "\t\t$contact\n"; - } - - $out .= "\t\n"; - } - - # close base tag - $out .= "\n"; - - # export to file if caller asked for it. - if (length($filename)) - { - open(XMLOUT, ">$filename"); - print XMLOUT $out; - close(XMLOUT); - } - - return $out; -} - -# -# ExportSQL -# -# my %mappings = ( -# 'IPTC dataset name here' => 'your table column name here', -# 'caption/abstract' => 'caption', -# 'city' => 'city', -# 'province/state' => 'state); # etc etc etc. -# -# $statement = $info->ExportSQL('mytable', \%mappings, \%extra-data); -# -# Returns a SQL statement to insert into your given table name -# a set of values from the image. Caller passes in a reference to -# a hash which maps IPTC dataset names into column names for the -# database table. Optionally pass in a ref to a hash of extra data -# which will also be included in the insert statement. Keys in that -# hash must be valid column names. -# -sub ExportSQL -{ - my ($self, $tablename, $mappingsRef, $extraRef) = @_; - my ($statement, $columns, $values); - - return undef if (($tablename eq undef) || ($mappingsRef eq undef)); - - # start with extra data, if any - foreach my $column (keys %$extraRef) - { - my $value = $extraRef->{$column}; - $value =~ s/'/''/g; # escape single quotes - - $columns .= $column . ", "; - $values .= "\'$value\', "; - } - - # process our data - foreach my $attribute (keys %$mappingsRef) - { - my $value = $self->Attribute($attribute); - $value =~ s/'/''/g; # escape single quotes - - $columns .= $mappingsRef->{$attribute} . ", "; - $values .= "\'$value\', "; - } - - # must trim the trailing ", " from both - $columns =~ s/, $//; - $values =~ s/, $//; - - $statement = "INSERT INTO $tablename ($columns) VALUES ($values)"; - - return $statement; -} - -####################################################################### -# File parsing functions (private) -####################################################################### - -# -# ScanToFirstIMMTag -# -# Scans to first IIM Record 2 tag in the file. The will either use -# smart scanning for JPEGs or blind scanning for other file types. -# -sub ScanToFirstIMMTag -{ - my $handle = shift @_; - - if (FileIsJPEG($handle)) - { - Log("File is JPEG, proceeding with JPEGScan"); - return JPEGScan($handle); - } - else - { - Log("File not a JPEG, trying BlindScan"); - return BlindScan($handle); - } -} - -# -# FileIsJPEG -# -# Checks to see if this file is a JPEG/JFIF or not. Will reset the -# file position back to 0 after it's done in either case. -# -sub FileIsJPEG -{ - my $handle = shift @_; - - # reset to beginning just in case - $handle->seek(0, 0); - - if ($debugMode) - { - Log("Opening 16 bytes of file:\n"); - my $dump; - $handle->read($dump, 16); - HexDump($dump); - $handle->seek(0, 0); - } - - # check start of file marker - my ($ff, $soi); - $handle->read($ff, 1) || goto notjpeg; - $handle->read($soi, 1); - - goto notjpeg unless (ord($ff) == 0xff && ord($soi) == 0xd8); - - # now check for APP0 marker. I'll assume that anything with a SOI - # followed by APP0 is "close enough" for our purposes. (We're not - # dinking with image data, so anything following the JPEG tagging - # system should work.) - my ($app0, $len, $jpeg); - $handle->read($ff, 1); - $handle->read($app0, 1); - - goto notjpeg unless (ord($ff) == 0xff); - - # reset to beginning of file - $handle->seek(0, 0); - return 1; - - notjpeg: - $handle->seek(0, 0); - return 0; -} - -# -# JPEGScan -# -# Assuming the file is a JPEG (see above), this will scan through the -# markers looking for the APP13 marker, where IPTC/IIM data should be -# found. While this isn't a formally defined standard, all programs -# have (supposedly) adopted Adobe's technique of putting the data in -# APP13. -# -sub JPEGScan -{ - my $handle = shift @_; - - # Skip past start of file marker - my ($ff, $soi); - $handle->read($ff, 1) || return 0; - $handle->read($soi, 1); - - unless (ord($ff) == 0xff && ord($soi) == 0xd8) - { - $error = "JPEGScan: invalid start of file"; Log($error); - return 0; - } - - # Scan for the APP13 marker which will contain our IPTC info (I hope). - - my $marker = JPEGNextMarker($handle); - - while (ord($marker) != 0xed) - { - if (ord($marker) == 0) - { $error = "Marker scan failed"; Log($error); return 0; } - - if (ord($marker) == 0xd9) - { $error = "Marker scan hit end of image marker"; - Log($error); return 0; } - - if (ord($marker) == 0xda) - { $error = "Marker scan hit start of image data"; - Log($error); return 0; } - - if (JPEGSkipVariable($handle) == 0) - { $error = "JPEGSkipVariable failed"; - Log($error); return 0; } - - $marker = JPEGNextMarker($handle); - } - - # If were's here, we must have found the right marker. Now - # BlindScan through the data. - return BlindScan($handle, JPEGGetVariableLength($handle)); -} - -# -# JPEGNextMarker -# -# Scans to the start of the next valid-looking marker. Return value is -# the marker id. -# -sub JPEGNextMarker -{ - my $handle = shift @_; - - my $byte; - - # Find 0xff byte. We should already be on it. - $handle->read($byte, 1) || return 0; - while (ord($byte) != 0xff) - { - Log("JPEGNextMarker: warning: bogus stuff in JPEG file"); - $handle->read($byte, 1) || return 0; - } - - # Now skip any extra 0xffs, which are valid padding. - do - { - $handle->read($byte, 1) || return 0; - } while (ord($byte) == 0xff); - - # $byte should now contain the marker id. - Log("JPEGNextMarker: at marker " . unpack("H*", $byte)); - return $byte; -} - -# -# JPEGGetVariableLength -# -# Gets length of current variable-length section. File position at -# start must be on the marker itself, e.g. immediately after call to -# JPEGNextMarker. File position is updated to just past the length -# field. -# -sub JPEGGetVariableLength -{ - my $handle = shift @_; - - # Get the marker parameter length count - my $length; - $handle->read($length, 2) || return 0; - - ($length) = unpack("n", $length); - - Log("JPEG variable length: $length"); - - # Length includes itself, so must be at least 2 - if ($length < 2) - { - Log("JPEGGetVariableLength: erroneous JPEG marker length"); - return 0; - } - $length -= 2; - - return $length; -} - -# -# JPEGSkipVariable -# -# Skips variable-length section of JPEG block. Should always be called -# between calls to JPEGNextMarker to ensure JPEGNextMarker is at the -# start of data it can properly parse. -# -sub JPEGSkipVariable -{ - my $handle = shift; - my $rSave = shift; - - my $length = JPEGGetVariableLength($handle); - return if ($length == 0); - - # Skip remaining bytes - my $temp; - if (defined($rSave) || $debugMode) - { - unless ($handle->read($temp, $length)) - { - Log("JPEGSkipVariable: read failed while skipping var data"); - return 0; - } - - # prints out a heck of a lot of stuff - # HexDump($temp); - } - else - { - # Just seek - unless($handle->seek($length, 1)) - { - Log("JPEGSkipVariable: read failed while skipping var data"); - return 0; - } - } - - $$rSave = $temp if defined($rSave); - - return 1; -} - -# -# BlindScan -# -# Scans blindly to first IIM Record 2 tag in the file. This method may -# or may not work on any arbitrary file type, but it doesn't hurt to -# check. We expect to see this tag within the first 8k of data. (This -# limit may need to be changed or eliminated depending on how other -# programs choose to store IIM.) -# -sub BlindScan -{ - my $handle = shift; - my $maxoff = shift() || $MAX_FILE_OFFSET; - - Log("BlindScan: starting scan, max length $maxoff"); - - # start digging - my $offset = 0; - while ($offset <= $maxoff) - { - my $temp; - - unless ($handle->read($temp, 1)) - { - Log("BlindScan: hit EOF while scanning"); - return 0; - } - - # look for tag identifier 0x1c - if (ord($temp) == 0x1c) - { - # if we found that, look for record 2, dataset 0 - # (record version number) - my ($record, $dataset); - $handle->read($record, 1); - $handle->read($dataset, 1); - - if (ord($record) == 2) - { - # found it. seek to start of this tag and return. - Log("BlindScan: found IIM start at offset $offset"); - $handle->seek(-3, 1); # seek rel to current position - return $offset; - } - else - { - # didn't find it. back up 2 to make up for - # those reads above. - $handle->seek(-2, 1); # seek rel to current position - } - } - - # no tag, keep scanning - $offset++; - } - - return 0; -} - -# -# CollectIIMInfo -# -# Assuming file is seeked to start of IIM data (using above), this -# reads all the data into our object's hashes -# -sub CollectIIMInfo -{ - my $self = shift; - - my $handle = $self->{_handle}; - - # NOTE: file should already be at the start of the first - # IPTC code: record 2, dataset 0. - - while (1) - { - my $header; - return unless $handle->read($header, 5); - - ($tag, $record, $dataset, $length) = unpack("CCCn", $header); - - # bail if we're past end of IIM record 2 data - return unless ($tag == 0x1c) && ($record == 2); - - # print "tag : " . $tag . "\n"; - # print "record : " . $record . "\n"; - # print "dataset : " . $dataset . "\n"; - # print "length : " . $length . "\n"; - - my $value; - $handle->read($value, $length); - - # try to extract first into _listdata (keywords, categories) - # and, if unsuccessful, into _data. Tags which are not in the - # current IIM spec (version 4) are currently discarded. - if (exists $listdatasets{$dataset}) - { - my $dataname = $listdatasets{$dataset}; - my $listref = $listdata{$dataname}; - - push(@{$self->{_listdata}->{$dataname}}, $value); - } - elsif (exists $datasets{$dataset}) - { - my $dataname = $datasets{$dataset}; - - $self->{_data}->{$dataname} = $value; - } - # else discard - } -} - -####################################################################### -# File Saving -####################################################################### - -# -# JPEGCollectFileParts -# -# Collects all pieces of the file except for the IPTC info that we'll -# replace when saving. Returns the stuff before the info, stuff after, -# and the contents of the Adobe Resource Block that the IPTC data goes -# in. Returns undef if a file parsing error occured. -# -sub JPEGCollectFileParts -{ - my $handle = shift; - my ($options) = @_; - my ($start, $end, $adobeParts); - my $discardAppParts = 0; - - if (defined($options) && defined($options->{'discardAppParts'})) - { $discardAppParts = 1; } - - # Start at beginning of file - $handle->seek(0, 0); - - # Skip past start of file marker - my ($ff, $soi); - $handle->read($ff, 1) || return 0; - $handle->read($soi, 1); - - unless (ord($ff) == 0xff && ord($soi) == 0xd8) - { - $error = "JPEGScan: invalid start of file"; Log($error); - return 0; - } - - # - # Begin building start of file - # - $start .= pack("CC", 0xff, 0xd8); - - # Get first marker in file. This will be APP0 for JFIF or APP1 for - # EXIF. - my $marker = JPEGNextMarker($handle); - - my $app0data; - if (JPEGSkipVariable($handle, \$app0data) == 0) - { $error = "JPEGSkipVariable failed"; - Log($error); return 0; } - - if (ord($marker) == 0xe0 || !$discardAppParts) - { - # Always include APP0 marker at start if it's present. - $start .= pack("CC", 0xff, ord($marker)); - # Remember that the length must include itself (2 bytes) - $start .= pack("n", length($app0data) + 2); - $start .= $app0data; - } - else - { - # Manually insert APP0 if we're trashing application parts, since - # all JFIF format images should start with the version block. - $start .= pack("CC", 0xff, 0xe0); - $start .= pack("n", 16); # length (including these 2 bytes) - $start .= "JFIF"; # format - $start .= pack("CC", 1, 2); # call it version 1.2 (current JFIF) - $start .= pack(C8, 0); # zero everything else - } - - # - # Now scan through all markers in file until we hit image data or - # IPTC stuff. - # - $marker = JPEGNextMarker($handle); - - while (1) - { - if (ord($marker) == 0) - { $error = "Marker scan failed"; Log($error); return 0; } - - # Check for end of image - if (ord($marker) == 0xd9) - { - Log("JPEGCollectFileParts: saw end of image marker"); - $end .= pack("CC", 0xff, ord($marker)); - goto doneScanning; - } - - # Check for start of compressed data - if (ord($marker) == 0xda) - { - Log("JPEGCollectFileParts: saw start of compressed data"); - $end .= pack("CC", 0xff, ord($marker)); - goto doneScanning; - } - - my $partdata; - if (JPEGSkipVariable($handle, \$partdata) == 0) - { $error = "JPEGSkipVariable failed"; - Log($error); return 0; } - - # Take all parts aside from APP13, which we'll replace - # ourselves. - if ($discardAppParts && ord($marker) >= 0xe0 && ord($marker) <= 0xef) - { - # Skip all application markers, including Adobe parts - undef $adobeParts; - } - elsif (ord($marker) == 0xed) - { - # Collect the adobe stuff from part 13 - $adobeParts = CollectAdobeParts($partdata); - goto doneScanning; - } - else - { - # Append all other parts to start section - $start .= pack("CC", 0xff, ord($marker)); - $start .= pack("n", length($partdata) + 2); - $start .= $partdata; - } - - $marker = JPEGNextMarker($handle); - } - - doneScanning: - - # - # Append rest of file to $end - # - my $buffer; - - while ($handle->read($buffer, 16384)) - { - $end .= $buffer; - } - - return [$start, $end, $adobeParts]; -} - -# -# CollectAdobeParts -# -# Part APP13 contains yet another markup format, one defined by Adobe. -# See "File Formats Specification" in the Photoshop SDK (avail from -# www.adobe.com). We must take everything but the IPTC data so that -# way we can write the file back without losing everything else -# Photoshop stuffed into the APP13 block. -# -sub CollectAdobeParts -{ - my ($data) = @_; - my $length = length($data); - my $offset = 0; - my $out = ''; - - # Skip preamble - $offset = length('Photoshop 3.0 '); - - # Process everything - while ($offset < $length) - { - # Get OSType and ID - my ($ostype, $id1, $id2) = unpack("NCC", substr($data, $offset, 6)); - last unless (($offset += 6) < $length); # $offset += 6; - - # printf("CollectAdobeParts: ID %2.2x %2.2x\n", $id1, $id2); - - # Get pascal string - my ($stringlen) = unpack("C", substr($data, $offset, 1)); - last unless (++$offset < $length); # $offset += 1; - - # printf("CollectAdobeParts: str len %d\n", $stringlen); - - my $string = substr($data, $offset, $stringlen); - $offset += $stringlen; - # round up if odd - $offset++ if ($stringlen % 2 != 0); - # there should be a null if string len is 0 - $offset++ if ($stringlen == 0); - last unless ($offset < $length); - - # Get variable-size data - my ($size) = unpack("N", substr($data, $offset, 4)); - last unless (($offset += 4) < $length); # $offset += 4; - - # printf("CollectAdobeParts: size %d\n", $size); - - my $var = substr($data, $offset, $size); - $offset += $size; - $offset++ if ($size % 2 != 0); # round up if odd - - # skip IIM data (0x0404), but write everything else out - unless ($id1 == 4 && $id2 == 4) - { - $out .= pack("NCC", $ostype, $id1, $id2); - $out .= pack("C", $stringlen); - $out .= $string; - $out .= pack("C", 0) if ($stringlen == 0 || $stringlen % 2 != 0); - $out .= pack("N", $size); - $out .= $var; - $out .= pack("C", 0) if ($size % 2 != 0 && length($out) % 2 != 0); - } - } - - return $out; -} - -# -# PackedIIMData -# -# Assembles and returns our _data and _listdata into IIM format for -# embedding into an image. -# -sub PackedIIMData -{ - my $self = shift; - my $out; - - # First, we need to build a mapping of datanames to dataset - # numbers if we haven't already. - unless (scalar(keys %datanames)) - { - foreach my $dataset (keys %datasets) - { - my $dataname = $datasets{$dataset}; - $datanames{$dataname} = $dataset; - } - } - - # Ditto for the lists - unless (scalar(keys %listdatanames)) - { - foreach my $dataset (keys %listdatasets) - { - my $dataname = $listdatasets{$dataset}; - $listdatanames{$dataname} = $dataset; - } - } - - # Print record version - # tag - record - dataset - len (short) - 2 (short) - $out .= pack("CCCnn", 0x1c, 2, 0, 2, 2); - - # Iterate over data sets - foreach my $key (keys %{$self->{_data}}) - { - my $dataset = $datanames{$key}; - my $value = $self->{_data}->{$key}; - - if ($dataset == 0) - { Log("PackedIIMData: illegal dataname $key"); next; } - - next unless $value; - - my ($tag, $record) = (0x1c, 0x02); - - $out .= pack("CCCn", $tag, $record, $dataset, length($value)); - $out .= $value; - } - - # Do the same for list data sets - foreach my $key (keys %{$self->{_listdata}}) - { - my $dataset = $listdatanames{$key}; - - if ($dataset == 0) - { Log("PackedIIMData: illegal dataname $key"); next; } - - foreach my $value (@{$self->{_listdata}->{$key}}) - { - next unless $value; - - my ($tag, $record) = (0x1c, 0x02); - - $out .= pack("CCCn", $tag, $record, $dataset, length($value)); - $out .= $value; - } - } - - return $out; -} - -# -# PhotoshopIIMBlock -# -# Assembles the blob of Photoshop "resource data" that includes our -# fresh IIM data (from PackedIIMData) and the other Adobe parts we -# found in the file, if there were any. -# -sub PhotoshopIIMBlock -{ - my ($self, $otherparts, $data) = @_; - my $resourceBlock; - my $out; - - $resourceBlock .= "Photoshop 3.0"; - $resourceBlock .= pack("C", 0); - # Photoshop identifier - $resourceBlock .= "8BIM"; - # 0x0404 is IIM data, 00 is required empty string - $resourceBlock .= pack("CCCC", 0x04, 0x04, 0, 0); - # length of data as 32-bit, network-byte order - $resourceBlock .= pack("N", length($data)); - # Now tack data on there - $resourceBlock .= $data; - # Pad with a blank if not even size - $resourceBlock .= pack("C", 0) if (length($data) % 2 != 0); - # Finally tack on other data - $resourceBlock .= $otherparts if defined($otherparts); - - $out .= pack("CC", 0xff, 0xed); # JPEG start of block, APP13 - $out .= pack("n", length($resourceBlock) + 2); # length - $out .= $resourceBlock; - - return $out; -} - -####################################################################### -# Helpers, docs -####################################################################### - -# -# Log: just prints a message to STDERR if $debugMode is on. -# -sub Log -{ - if ($debugMode) - { my $message = shift; print STDERR "**IPTC** $message\n"; } -} - -# -# HexDump -# -# Very helpful when debugging. -# -sub HexDump -{ - my $dump = shift; - my $len = length($dump); - my $offset = 0; - my ($dcol1, $dcol2); - - while ($offset < $len) - { - my $temp = substr($dump, $offset++, 1); - - my $hex = unpack("H*", $temp); - $dcol1 .= " " . $hex; - if (ord($temp) >= 0x21 && ord($temp) <= 0x7e) - { $dcol2 .= " $temp"; } - else - { $dcol2 .= " ."; } - - if ($offset % 16 == 0) - { - print STDERR $dcol1 . " | " . $dcol2 . "\n"; - undef $dcol1; undef $dcol2; - } - } - - if (defined($dcol1) || defined($dcol2)) - { - print STDERR $dcol1 . " | " . $dcol2 . "\n"; - undef $dcol1; undef $dcol2; - } -} - -# -# JPEGDebugScan -# -# Also very helpful when debugging. -# -sub JPEGDebugScan -{ - my $filename = shift; - my $handle = IO::File->new($filename); - $handle or die "Can't open $filename: $!"; - - # Skip past start of file marker - my ($ff, $soi); - $handle->read($ff, 1) || return 0; - $handle->read($soi, 1); - - unless (ord($ff) == 0xff && ord($soi) == 0xd8) - { - Log("JPEGScan: invalid start of file"); - goto done; - } - - # scan to 0xDA (start of scan), dumping the markers we see between - # here and there. - my $marker = JPEGNextMarker($handle); - - while (ord($marker) != 0xda) - { - if (ord($marker) == 0) - { Log("Marker scan failed"); goto done; } - - if (ord($marker) == 0xd9) - {Log("Marker scan hit end of image marker"); goto done; } - - if (JPEGSkipVariable($handle) == 0) - { Log("JPEGSkipVariable failed"); return 0; } - - $marker = JPEGNextMarker($handle); - } - -done: - $handle->close(); -} - -# sucessful package load -1; - -__END__ - -=head1 NAME - -Image::IPTCInfo - Perl extension for extracting IPTC image meta-data - -=head1 SYNOPSIS - - use Image::IPTCInfo; - - # Create new info object - my $info = new Image::IPTCInfo('file-name-here.jpg'); - - # Check if file had IPTC data - unless (defined($info)) { die Image::IPTCInfo::Error(); } - - # Get list of keywords, supplemental categories, or contacts - my $keywordsRef = $info->Keywords(); - my $suppCatsRef = $info->SupplementalCategories(); - my $contactsRef = $info->Contacts(); - - # Get specific attributes... - my $caption = $info->Attribute('caption/abstract'); - - # Create object for file that may or may not have IPTC data. - $info = create Image::IPTCInfo('file-name-here.jpg'); - - # Add/change an attribute - $info->SetAttribute('caption/abstract', 'Witty caption here'); - - # Save new info to file - ##### See disclaimer in 'SAVING FILES' section ##### - $info->Save(); - $info->SaveAs('new-file-name.jpg'); - -=head1 DESCRIPTION - -Ever wish you add information to your photos like a caption, the place -you took it, the date, and perhaps even keywords and categories? You -already can. The International Press Telecommunications Council (IPTC) -defines a format for exchanging meta-information in news content, and -that includes photographs. You can embed all kinds of information in -your images. The trick is putting it to use. - -That's where this IPTCInfo Perl module comes into play. You can embed -information using many programs, including Adobe Photoshop, and -IPTCInfo will let your web server -- and other automated server -programs -- pull it back out. You can use the information directly in -Perl programs, export it to XML, or even export SQL statements ready -to be fed into a database. - -=head1 USING IPTCINFO - -Install the module as documented in the README file. You can try out -the demo program called "demo.pl" which extracts info from the images -in the "demo-images" directory. - -To integrate with your own code, simply do something like what's in -the synopsys above. - -The complete list of possible attributes is given below. These are as -specified in the IPTC IIM standard, version 4. Keywords and categories -are handled differently: since these are lists, the module allows you -to access them as Perl lists. Call Keywords() and Categories() to get -a reference to each list. - -=head2 NEW VS. CREATE - -You can either create an object using new() or create(): - - $info = new Image::IPTCInfo('file-name-here.jpg'); - $info = create Image::IPTCInfo('file-name-here.jpg'); - -new() will create a new object only if the file had IPTC data in it. -It will return undef otherwise, and you can check Error() to see what -the reason was. Using create(), on the other hand, always returns a -new IPTCInfo object if there was data or not. If there wasn't any IPTC -info there, calling Attribute() on anything will just return undef; -i.e. the info object will be more-or-less empty. - -If you're only reading IPTC data, call new(). If you want to add or -change info, call create(). Even if there's no useful stuff in the -info object, you can then start adding attributes and save the file. -That brings us to the next topic.... - -=head2 MODIFYING IPTC DATA - -You can modify IPTC data in JPEG files and save the file back to -disk. Here are the commands for doing so: - - # Set a given attribute - $info->SetAttribute('iptc attribute here', 'new value here'); - - # Clear the keywords or supp. categories list - $info->ClearKeywords(); - $info->ClearSupplementalCategories(); - $info->ClearContacts(); - - # Add keywords or supp. categories - $info->AddKeyword('frob'); - - # You can also add a list reference - $info->AddKeyword(['frob', 'nob', 'widget']); - -=head2 SAVING FILES - -With JPEG files you can add/change attributes, add keywords, etc., and -then call: - - $info->Save(); - $info->SaveAs('new-file-name.jpg'); - -This will save the file with the updated IPTC info. Please only run -this on *copies* of your images -- not your precious originals! -- -because I'm not liable for any corruption of your images. (If you read -software license agreements, nobody else is liable, either. Make -backups of your originals!) - -If you're into image wizardry, there are a couple handy options you -can use on saving. One feature is to trash the Adobe block of data, -which contains IPTC info, color settings, Photoshop print settings, -and stuff like that. The other is to trash all application blocks, -including stuff like EXIF and FlashPix data. This can be handy for -reducing file sizes. The options are passed as a hashref to Save() and -SaveAs(), e.g.: - - $info->Save({'discardAdobeParts' => 'on'}); - $info->SaveAs('new-file-name.jpg', {'discardAppParts' => 'on'}); - -Note that if there was IPTC info in the image, or you added some -yourself, the new image will have an Adobe part with only the IPTC -information. - -=head2 XML AND SQL EXPORT FEATURES - -IPTCInfo also allows you to easily generate XML and SQL from the image -metadata. For XML, call: - - $xml = $info->ExportXML('entity-name', \%extra-data, - 'optional output file name'); - -This returns XML containing all image metadata. Attribute names are -translated into XML tags, making adjustments to spaces and slashes for -compatibility. (Spaces become underbars, slashes become dashes.) You -provide an entity name; all data will be contained within this entity. -You can optionally provides a reference to a hash of extra data. This -will get put into the XML, too. (Example: you may want to put info on -the image's location into the XML.) Keys must be valid XML tag names. -You can also provide a filename, and the XML will be dumped into -there. See the "demo.pl" script for examples. - -For SQL, it goes like this: - - my %mappings = ( - 'IPTC dataset name here' => 'your table column name here', - 'caption/abstract' => 'caption', - 'city' => 'city', - 'province/state' => 'state); # etc etc etc. - - $statement = $info->ExportSQL('mytable', \%mappings, \%extra-data); - -This returns a SQL statement to insert into your given table name a -set of values from the image. You pass in a reference to a hash which -maps IPTC dataset names into column names for the database table. As -with XML export, you can also provide extra information to be stuck -into the SQL. - -=head1 IPTC ATTRIBUTE REFERENCE - - object name originating program - edit status program version - editorial update object cycle - urgency by-line - subject reference by-line title - category city - fixture identifier sub-location - content location code province/state - content location name country/primary location code - release date country/primary location name - release time original transmission reference - expiration date headline - expiration time credit - special instructions source - action advised copyright notice - reference service contact - reference date caption/abstract - reference number local caption - date created writer/editor - time created image type - digital creation date image orientation - digital creation time language identifier - - custom1 - custom20: NOT STANDARD but used by Fotostation. - IPTCInfo also supports these fields. - -=head1 KNOWN BUGS - -IPTC meta-info on MacOS may be stored in the resource fork instead -of the data fork. This program will currently not scan the resource -fork. - -I have heard that some programs will embed IPTC info at the end of the -file instead of the beginning. The module will currently only look -near the front of the file. If you have a file with IPTC data that -IPTCInfo can't find, please contact me! I would like to ensure -IPTCInfo works with everyone's files. - -=head1 AUTHOR - -Josh Carter, josh@multipart-mixed.com - -=head1 SEE ALSO - -perl(1). - -=cut diff --git a/IPTCInfo-1.95.pm b/IPTCInfo-1.95.pm deleted file mode 100644 index 1a7f820..0000000 --- a/IPTCInfo-1.95.pm +++ /dev/null @@ -1,1546 +0,0 @@ -# IPTCInfo: extractor for IPTC metadata embedded in images -# Copyright (C) 2000-2004 Josh Carter -# All rights reserved. -# -# This program is free software; you can redistribute it and/or modify -# it under the same terms as Perl itself. - -package Image::IPTCInfo; -use IO::File; - -use vars qw($VERSION); -$VERSION = '1.95'; - -# -# Global vars -# -use vars ('%datasets', # master list of dataset id's - '%datanames', # reverse mapping (for saving) - '%listdatasets', # master list of repeating dataset id's - '%listdatanames', # reverse - '$MAX_FILE_OFFSET', # maximum offset for blind scan - ); - -$MAX_FILE_OFFSET = 8192; # default blind scan depth - -# Debug off for production use -my $debugMode = 0; -my $error; - -##################################### -# These names match the codes defined in ITPC's IIM record 2. -# This hash is for non-repeating data items; repeating ones -# are in %listdatasets below. -%datasets = ( -# 0 => 'record version', # skip -- binary data - 5 => 'object name', - 7 => 'edit status', - 8 => 'editorial update', - 10 => 'urgency', - 12 => 'subject reference', - 15 => 'category', -# 20 => 'supplemental category', # in listdatasets (see below) - 22 => 'fixture identifier', -# 25 => 'keywords', # in listdatasets - 26 => 'content location code', - 27 => 'content location name', - 30 => 'release date', - 35 => 'release time', - 37 => 'expiration date', - 38 => 'expiration time', - 40 => 'special instructions', - 42 => 'action advised', - 45 => 'reference service', - 47 => 'reference date', - 50 => 'reference number', - 55 => 'date created', - 60 => 'time created', - 62 => 'digital creation date', - 63 => 'digital creation time', - 65 => 'originating program', - 70 => 'program version', - 75 => 'object cycle', - 80 => 'by-line', - 85 => 'by-line title', - 90 => 'city', - 92 => 'sub-location', - 95 => 'province/state', - 100 => 'country/primary location code', - 101 => 'country/primary location name', - 103 => 'original transmission reference', - 105 => 'headline', - 110 => 'credit', - 115 => 'source', - 116 => 'copyright notice', -# 118 => 'contact', # in listdatasets - 120 => 'caption/abstract', - 121 => 'local caption', - 122 => 'writer/editor', -# 125 => 'rasterized caption', # unsupported (binary data) - 130 => 'image type', - 131 => 'image orientation', - 135 => 'language identifier', - 200 => 'custom1', # These are NOT STANDARD, but are used by - 201 => 'custom2', # Fotostation. Use at your own risk. They're - 202 => 'custom3', # here in case you need to store some special - 203 => 'custom4', # stuff, but note that other programs won't - 204 => 'custom5', # recognize them and may blow them away if - 205 => 'custom6', # you open and re-save the file. (Except with - 206 => 'custom7', # Fotostation, of course.) - 207 => 'custom8', - 208 => 'custom9', - 209 => 'custom10', - 210 => 'custom11', - 211 => 'custom12', - 212 => 'custom13', - 213 => 'custom14', - 214 => 'custom15', - 215 => 'custom16', - 216 => 'custom17', - 217 => 'custom18', - 218 => 'custom19', - 219 => 'custom20', - ); - -# this will get filled in if we save data back to file -%datanames = (); - -%listdatasets = ( - 20 => 'supplemental category', - 25 => 'keywords', - 118 => 'contact', - ); - -# this will get filled in if we save data back to file -%listdatanames = (); - -####################################################################### -# New, Save, Destroy, Error -####################################################################### - -# -# new -# -# $info = new IPTCInfo('image filename goes here') -# -# Returns IPTCInfo object filled with metadata from the given image -# file. File on disk will be closed, and changes made to the IPTCInfo -# object will *not* be flushed back to disk. -# -sub new -{ - my ($pkg, $file, $force) = @_; - - my $input_is_handle = eval {$file->isa('IO::Handle')}; - if ($input_is_handle and not $file->isa('IO::Seekable')) { - $error = "Handle must be seekable."; Log($error); - return undef; - } - - # - # Open file and snarf data from it. - # - my $handle = $input_is_handle ? $file : IO::File->new($file); - unless($handle) - { - $error = "Can't open file: $!"; Log($error); - return undef; - } - - binmode($handle); - - my $datafound = ScanToFirstIMMTag($handle); - unless ($datafound || defined($force)) - { - $error = "No IPTC data found."; Log($error); - # don't close unless we opened it - $handle->close() unless $input_is_handle; - return undef; - } - - my $self = bless - { - '_data' => {}, # empty hashes; wil be - '_listdata' => {}, # filled in CollectIIMInfo - '_handle' => $handle, - }, $pkg; - - $self->{_filename} = $file unless $input_is_handle; - - # Do the real snarfing here - $self->CollectIIMInfo() if $datafound; - - $handle->close() unless $input_is_handle; - - return $self; -} - -# -# create -# -# Like new, but forces an object to always be returned. This allows -# you to start adding stuff to files that don't have IPTC info and then -# save it. -# -sub create -{ - my ($pkg, $filename) = @_; - - return new($pkg, $filename, 'force'); -} - -# -# Save -# -# Saves JPEG with IPTC data back to the same file it came from. -# -sub Save -{ - my ($self, $options) = @_; - - return $self->SaveAs($self->{'_filename'}, $options); -} - -# -# Save -# -# Saves JPEG with IPTC data to a given file name. -# -sub SaveAs -{ - my ($self, $newfile, $options) = @_; - - # - # Open file and snarf data from it. - # - my $handle = $self->{_filename} ? IO::File->new($self->{_filename}) : $self->{_handle}; - unless($handle) - { - $error = "Can't open file: $!"; Log($error); - return undef; - } - - $handle->seek(0, 0); - binmode($handle); - - unless (FileIsJPEG($handle)) - { - $error = "Source file is not a JPEG; I can only save JPEGs. Sorry."; - Log($error); - return undef; - } - - my $ret = JPEGCollectFileParts($handle, $options); - - if ($ret == 0) - { - Log("collectfileparts failed"); - return undef; - } - - if ($self->{_filename}) { - $handle->close(); - unless ($handle = IO::File->new($newfile, ">")) { - $error = "Can't open output file: $!"; Log($error); - return undef; - } - binmode($handle); - } else { - unless ($handle->truncate(0)) { - $error = "Can't truncate, handle might be read-only"; Log($error); - return undef; - } - } - - my ($start, $end, $adobe) = @$ret; - - if (defined($options) && defined($options->{'discardAdobeParts'})) - { - undef $adobe; - } - - - $handle->print($start); - $handle->print($self->PhotoshopIIMBlock($adobe, $self->PackedIIMData())); - $handle->print($end); - - $handle->close() if $self->{_filename}; - - return 1; -} - -# -# DESTROY -# -# Called when object is destroyed. No action necessary in this case. -# -sub DESTROY -{ - # no action necessary -} - -# -# Error -# -# Returns description of the last error. -# -sub Error -{ - return $error; -} - -####################################################################### -# Attributes for clients -####################################################################### - -# -# Attribute/SetAttribute -# -# Returns/Changes value of a given data item. -# -sub Attribute -{ - my ($self, $attribute) = @_; - - return $self->{_data}->{$attribute}; -} - -sub SetAttribute -{ - my ($self, $attribute, $newval) = @_; - - $self->{_data}->{$attribute} = $newval; -} - -sub ClearAttributes -{ - my $self = shift; - - $self->{_data} = {}; -} - -sub ClearAllData -{ - my $self = shift; - - $self->{_data} = {}; - $self->{_listdata} = {}; -} - -# -# Keywords/Clear/Add -# -# Returns reference to a list of keywords/clears the keywords -# list/adds a keyword. -# -sub Keywords -{ - my $self = shift; - return $self->{_listdata}->{'keywords'}; -} - -sub ClearKeywords -{ - my $self = shift; - $self->{_listdata}->{'keywords'} = undef; -} - -sub AddKeyword -{ - my ($self, $add) = @_; - - $self->AddListData('keywords', $add); -} - -# -# SupplementalCategories/Clear/Add -# -# Returns reference to a list of supplemental categories. -# -sub SupplementalCategories -{ - my $self = shift; - return $self->{_listdata}->{'supplemental category'}; -} - -sub ClearSupplementalCategories -{ - my $self = shift; - $self->{_listdata}->{'supplemental category'} = undef; -} - -sub AddSupplementalCategories -{ - my ($self, $add) = @_; - - $self->AddListData('supplemental category', $add); -} - -# -# Contacts/Clear/Add -# -# Returns reference to a list of contactss/clears the contacts -# list/adds a contact. -# -sub Contacts -{ - my $self = shift; - return $self->{_listdata}->{'contact'}; -} - -sub ClearContacts -{ - my $self = shift; - $self->{_listdata}->{'contact'} = undef; -} - -sub AddContact -{ - my ($self, $add) = @_; - - $self->AddListData('contact', $add); -} - -sub AddListData -{ - my ($self, $list, $add) = @_; - - # did user pass in a list ref? - if (ref($add) eq 'ARRAY') - { - # yes, add list contents - push(@{$self->{_listdata}->{$list}}, @$add); - } - else - { - # no, just a literal item - push(@{$self->{_listdata}->{$list}}, $add); - } -} - -####################################################################### -# XML, SQL export -####################################################################### - -# -# ExportXML -# -# $xml = $info->ExportXML('entity-name', \%extra-data, -# 'optional output file name'); -# -# Exports XML containing all image metadata. Attribute names are -# translated into XML tags, making adjustments to spaces and slashes -# for compatibility. (Spaces become underbars, slashes become dashes.) -# Caller provides an entity name; all data will be contained within -# this entity. Caller optionally provides a reference to a hash of -# extra data. This will be output into the XML, too. Keys must be -# valid XML tag names. Optionally provide a filename, and the XML -# will be dumped into there. -# -sub ExportXML -{ - my ($self, $basetag, $extraRef, $filename) = @_; - my $out; - - $basetag = 'photo' unless length($basetag); - - $out .= "<$basetag>\n"; - - # dump extra info first, if any - foreach my $key (keys %$extraRef) - { - $out .= "\t<$key>" . $extraRef->{$key} . "\n"; - } - - # dump our stuff - foreach my $key (keys %{$self->{_data}}) - { - my $cleankey = $key; - $cleankey =~ s/ /_/g; - $cleankey =~ s/\//-/g; - - $out .= "\t<$cleankey>" . $self->{_data}->{$key} . "\n"; - } - - if (defined ($self->Keywords())) - { - # print keywords - $out .= "\t\n"; - - foreach my $keyword (@{$self->Keywords()}) - { - $out .= "\t\t$keyword\n"; - } - - $out .= "\t\n"; - } - - if (defined ($self->SupplementalCategories())) - { - # print supplemental categories - $out .= "\t\n"; - - foreach my $category (@{$self->SupplementalCategories()}) - { - $out .= "\t\t$category\n"; - } - - $out .= "\t\n"; - } - - if (defined ($self->Contacts())) - { - # print contacts - $out .= "\t\n"; - - foreach my $contact (@{$self->Contacts()}) - { - $out .= "\t\t$contact\n"; - } - - $out .= "\t\n"; - } - - # close base tag - $out .= "\n"; - - # export to file if caller asked for it. - if (length($filename)) - { - open(XMLOUT, ">$filename"); - print XMLOUT $out; - close(XMLOUT); - } - - return $out; -} - -# -# ExportSQL -# -# my %mappings = ( -# 'IPTC dataset name here' => 'your table column name here', -# 'caption/abstract' => 'caption', -# 'city' => 'city', -# 'province/state' => 'state); # etc etc etc. -# -# $statement = $info->ExportSQL('mytable', \%mappings, \%extra-data); -# -# Returns a SQL statement to insert into your given table name -# a set of values from the image. Caller passes in a reference to -# a hash which maps IPTC dataset names into column names for the -# database table. Optionally pass in a ref to a hash of extra data -# which will also be included in the insert statement. Keys in that -# hash must be valid column names. -# -sub ExportSQL -{ - my ($self, $tablename, $mappingsRef, $extraRef) = @_; - my ($statement, $columns, $values); - - return undef if (($tablename eq undef) || ($mappingsRef eq undef)); - - # start with extra data, if any - foreach my $column (keys %$extraRef) - { - my $value = $extraRef->{$column}; - $value =~ s/'/''/g; # escape single quotes - - $columns .= $column . ", "; - $values .= "\'$value\', "; - } - - # process our data - foreach my $attribute (keys %$mappingsRef) - { - my $value = $self->Attribute($attribute); - $value =~ s/'/''/g; # escape single quotes - - $columns .= $mappingsRef->{$attribute} . ", "; - $values .= "\'$value\', "; - } - - # must trim the trailing ", " from both - $columns =~ s/, $//; - $values =~ s/, $//; - - $statement = "INSERT INTO $tablename ($columns) VALUES ($values)"; - - return $statement; -} - -####################################################################### -# File parsing functions (private) -####################################################################### - -# -# ScanToFirstIMMTag -# -# Scans to first IIM Record 2 tag in the file. The will either use -# smart scanning for JPEGs or blind scanning for other file types. -# -sub ScanToFirstIMMTag -{ - my $handle = shift @_; - - if (FileIsJPEG($handle)) - { - Log("File is JPEG, proceeding with JPEGScan"); - return JPEGScan($handle); - } - else - { - Log("File not a JPEG, trying BlindScan"); - return BlindScan($handle); - } -} - -# -# FileIsJPEG -# -# Checks to see if this file is a JPEG/JFIF or not. Will reset the -# file position back to 0 after it's done in either case. -# -sub FileIsJPEG -{ - my $handle = shift @_; - - # reset to beginning just in case - $handle->seek(0, 0); - - if ($debugMode) - { - Log("Opening 16 bytes of file:\n"); - my $dump; - $handle->read($dump, 16); - HexDump($dump); - $handle->seek(0, 0); - } - - # check start of file marker - my ($ff, $soi); - $handle->read($ff, 1) || goto notjpeg; - $handle->read($soi, 1); - - goto notjpeg unless (ord($ff) == 0xff && ord($soi) == 0xd8); - - # now check for APP0 marker. I'll assume that anything with a SOI - # followed by APP0 is "close enough" for our purposes. (We're not - # dinking with image data, so anything following the JPEG tagging - # system should work.) - my ($app0, $len, $jpeg); - $handle->read($ff, 1); - $handle->read($app0, 1); - - goto notjpeg unless (ord($ff) == 0xff); - - # reset to beginning of file - $handle->seek(0, 0); - return 1; - - notjpeg: - $handle->seek(0, 0); - return 0; -} - -# -# JPEGScan -# -# Assuming the file is a JPEG (see above), this will scan through the -# markers looking for the APP13 marker, where IPTC/IIM data should be -# found. While this isn't a formally defined standard, all programs -# have (supposedly) adopted Adobe's technique of putting the data in -# APP13. -# -sub JPEGScan -{ - my $handle = shift @_; - - # Skip past start of file marker - my ($ff, $soi); - $handle->read($ff, 1) || return 0; - $handle->read($soi, 1); - - unless (ord($ff) == 0xff && ord($soi) == 0xd8) - { - $error = "JPEGScan: invalid start of file"; Log($error); - return 0; - } - - # Scan for the APP13 marker which will contain our IPTC info (I hope). - - my $marker = JPEGNextMarker($handle); - - while (ord($marker) != 0xed) - { - if (ord($marker) == 0) - { $error = "Marker scan failed"; Log($error); return 0; } - - if (ord($marker) == 0xd9) - { $error = "Marker scan hit end of image marker"; - Log($error); return 0; } - - if (ord($marker) == 0xda) - { $error = "Marker scan hit start of image data"; - Log($error); return 0; } - - if (JPEGSkipVariable($handle) == 0) - { $error = "JPEGSkipVariable failed"; - Log($error); return 0; } - - $marker = JPEGNextMarker($handle); - } - - # If were's here, we must have found the right marker. Now - # BlindScan through the data. - return BlindScan($handle, JPEGGetVariableLength($handle)); -} - -# -# JPEGNextMarker -# -# Scans to the start of the next valid-looking marker. Return value is -# the marker id. -# -sub JPEGNextMarker -{ - my $handle = shift @_; - - my $byte; - - # Find 0xff byte. We should already be on it. - $handle->read($byte, 1) || return 0; - while (ord($byte) != 0xff) - { - Log("JPEGNextMarker: warning: bogus stuff in JPEG file"); - $handle->read($byte, 1) || return 0; - } - - # Now skip any extra 0xffs, which are valid padding. - do - { - $handle->read($byte, 1) || return 0; - } while (ord($byte) == 0xff); - - # $byte should now contain the marker id. - Log("JPEGNextMarker: at marker " . unpack("H*", $byte)); - return $byte; -} - -# -# JPEGGetVariableLength -# -# Gets length of current variable-length section. File position at -# start must be on the marker itself, e.g. immediately after call to -# JPEGNextMarker. File position is updated to just past the length -# field. -# -sub JPEGGetVariableLength -{ - my $handle = shift @_; - - # Get the marker parameter length count - my $length; - $handle->read($length, 2) || return 0; - - ($length) = unpack("n", $length); - - Log("JPEG variable length: $length"); - - # Length includes itself, so must be at least 2 - if ($length < 2) - { - Log("JPEGGetVariableLength: erroneous JPEG marker length"); - return 0; - } - $length -= 2; - - return $length; -} - -# -# JPEGSkipVariable -# -# Skips variable-length section of JPEG block. Should always be called -# between calls to JPEGNextMarker to ensure JPEGNextMarker is at the -# start of data it can properly parse. -# -sub JPEGSkipVariable -{ - my $handle = shift; - my $rSave = shift; - - my $length = JPEGGetVariableLength($handle); - return if ($length == 0); - - # Skip remaining bytes - my $temp; - if (defined($rSave) || $debugMode) - { - unless ($handle->read($temp, $length)) - { - Log("JPEGSkipVariable: read failed while skipping var data"); - return 0; - } - - # prints out a heck of a lot of stuff - # HexDump($temp); - } - else - { - # Just seek - unless($handle->seek($length, 1)) - { - Log("JPEGSkipVariable: read failed while skipping var data"); - return 0; - } - } - - $$rSave = $temp if defined($rSave); - - return 1; -} - -# -# BlindScan -# -# Scans blindly to first IIM Record 2 tag in the file. This method may -# or may not work on any arbitrary file type, but it doesn't hurt to -# check. We expect to see this tag within the first 8k of data. (This -# limit may need to be changed or eliminated depending on how other -# programs choose to store IIM.) -# -sub BlindScan -{ - my $handle = shift; - my $maxoff = shift() || $MAX_FILE_OFFSET; - - Log("BlindScan: starting scan, max length $maxoff"); - - # start digging - my $offset = 0; - while ($offset <= $maxoff) - { - my $temp; - - unless ($handle->read($temp, 1)) - { - Log("BlindScan: hit EOF while scanning"); - return 0; - } - - # look for tag identifier 0x1c - if (ord($temp) == 0x1c) - { - # if we found that, look for record 2, dataset 0 - # (record version number) - my ($record, $dataset); - $handle->read($record, 1); - $handle->read($dataset, 1); - - if (ord($record) == 2) - { - # found it. seek to start of this tag and return. - Log("BlindScan: found IIM start at offset $offset"); - $handle->seek(-3, 1); # seek rel to current position - return $offset; - } - else - { - # didn't find it. back up 2 to make up for - # those reads above. - $handle->seek(-2, 1); # seek rel to current position - } - } - - # no tag, keep scanning - $offset++; - } - - return 0; -} - -# -# CollectIIMInfo -# -# Assuming file is seeked to start of IIM data (using above), this -# reads all the data into our object's hashes -# -sub CollectIIMInfo -{ - my $self = shift; - - my $handle = $self->{_handle}; - - # NOTE: file should already be at the start of the first - # IPTC code: record 2, dataset 0. - - while (1) - { - my $header; - return unless $handle->read($header, 5); - - ($tag, $record, $dataset, $length) = unpack("CCCn", $header); - - # bail if we're past end of IIM record 2 data - return unless ($tag == 0x1c) && ($record == 2); - - # print "tag : " . $tag . "\n"; - # print "record : " . $record . "\n"; - # print "dataset : " . $dataset . "\n"; - # print "length : " . $length . "\n"; - - my $value; - $handle->read($value, $length); - - # try to extract first into _listdata (keywords, categories) - # and, if unsuccessful, into _data. Tags which are not in the - # current IIM spec (version 4) are currently discarded. - if (exists $listdatasets{$dataset}) - { - my $dataname = $listdatasets{$dataset}; - my $listref = $listdata{$dataname}; - - push(@{$self->{_listdata}->{$dataname}}, $value); - } - elsif (exists $datasets{$dataset}) - { - my $dataname = $datasets{$dataset}; - - $self->{_data}->{$dataname} = $value; - } - # else discard - } -} - -####################################################################### -# File Saving -####################################################################### - -# -# JPEGCollectFileParts -# -# Collects all pieces of the file except for the IPTC info that we'll -# replace when saving. Returns the stuff before the info, stuff after, -# and the contents of the Adobe Resource Block that the IPTC data goes -# in. Returns undef if a file parsing error occured. -# -sub JPEGCollectFileParts -{ - my $handle = shift; - my ($options) = @_; - my ($start, $end, $adobeParts); - my $discardAppParts = 0; - - if (defined($options) && defined($options->{'discardAppParts'})) - { $discardAppParts = 1; } - - # Start at beginning of file - $handle->seek(0, 0); - - # Skip past start of file marker - my ($ff, $soi); - $handle->read($ff, 1) || return 0; - $handle->read($soi, 1); - - unless (ord($ff) == 0xff && ord($soi) == 0xd8) - { - $error = "JPEGScan: invalid start of file"; Log($error); - return 0; - } - - # - # Begin building start of file - # - $start .= pack("CC", 0xff, 0xd8); - - # Get first marker in file. This will be APP0 for JFIF or APP1 for - # EXIF. - my $marker = JPEGNextMarker($handle); - - my $app0data; - if (JPEGSkipVariable($handle, \$app0data) == 0) - { $error = "JPEGSkipVariable failed"; - Log($error); return 0; } - - if (ord($marker) == 0xe0 || !$discardAppParts) - { - # Always include APP0 marker at start if it's present. - $start .= pack("CC", 0xff, ord($marker)); - # Remember that the length must include itself (2 bytes) - $start .= pack("n", length($app0data) + 2); - $start .= $app0data; - } - else - { - # Manually insert APP0 if we're trashing application parts, since - # all JFIF format images should start with the version block. - $start .= pack("CC", 0xff, 0xe0); - $start .= pack("n", 16); # length (including these 2 bytes) - $start .= "JFIF"; # format - $start .= pack("CC", 1, 2); # call it version 1.2 (current JFIF) - $start .= pack(C8, 0); # zero everything else - } - - # - # Now scan through all markers in file until we hit image data or - # IPTC stuff. - # - $marker = JPEGNextMarker($handle); - - while (1) - { - if (ord($marker) == 0) - { $error = "Marker scan failed"; Log($error); return 0; } - - # Check for end of image - if (ord($marker) == 0xd9) - { - Log("JPEGCollectFileParts: saw end of image marker"); - $end .= pack("CC", 0xff, ord($marker)); - goto doneScanning; - } - - # Check for start of compressed data - if (ord($marker) == 0xda) - { - Log("JPEGCollectFileParts: saw start of compressed data"); - $end .= pack("CC", 0xff, ord($marker)); - goto doneScanning; - } - - my $partdata; - if (JPEGSkipVariable($handle, \$partdata) == 0) - { $error = "JPEGSkipVariable failed"; - Log($error); return 0; } - - # Take all parts aside from APP13, which we'll replace - # ourselves. - if ($discardAppParts && ord($marker) >= 0xe0 && ord($marker) <= 0xef) - { - # Skip all application markers, including Adobe parts - undef $adobeParts; - } - elsif (ord($marker) == 0xed) - { - # Collect the adobe stuff from part 13 - $adobeParts = CollectAdobeParts($partdata); - goto doneScanning; - } - else - { - # Append all other parts to start section - $start .= pack("CC", 0xff, ord($marker)); - $start .= pack("n", length($partdata) + 2); - $start .= $partdata; - } - - $marker = JPEGNextMarker($handle); - } - - doneScanning: - - # - # Append rest of file to $end - # - my $buffer; - - while ($handle->read($buffer, 16384)) - { - $end .= $buffer; - } - - return [$start, $end, $adobeParts]; -} - -# -# CollectAdobeParts -# -# Part APP13 contains yet another markup format, one defined by Adobe. -# See "File Formats Specification" in the Photoshop SDK (avail from -# www.adobe.com). We must take everything but the IPTC data so that -# way we can write the file back without losing everything else -# Photoshop stuffed into the APP13 block. -# -sub CollectAdobeParts -{ - my ($data) = @_; - my $length = length($data); - my $offset = 0; - my $out = ''; - - # Skip preamble - $offset = length('Photoshop 3.0 '); - - # Process everything - while ($offset < $length) - { - # Get OSType and ID - my ($ostype, $id1, $id2) = unpack("NCC", substr($data, $offset, 6)); - last unless (($offset += 6) < $length); # $offset += 6; - - # printf("CollectAdobeParts: ID %2.2x %2.2x\n", $id1, $id2); - - # Get pascal string - my ($stringlen) = unpack("C", substr($data, $offset, 1)); - last unless (++$offset < $length); # $offset += 1; - - # printf("CollectAdobeParts: str len %d\n", $stringlen); - - my $string = substr($data, $offset, $stringlen); - $offset += $stringlen; - # round up if odd - $offset++ if ($stringlen % 2 != 0); - # there should be a null if string len is 0 - $offset++ if ($stringlen == 0); - last unless ($offset < $length); - - # Get variable-size data - my ($size) = unpack("N", substr($data, $offset, 4)); - last unless (($offset += 4) < $length); # $offset += 4; - - # printf("CollectAdobeParts: size %d\n", $size); - - my $var = substr($data, $offset, $size); - $offset += $size; - $offset++ if ($size % 2 != 0); # round up if odd - - # skip IIM data (0x0404), but write everything else out - unless ($id1 == 4 && $id2 == 4) - { - $out .= pack("NCC", $ostype, $id1, $id2); - $out .= pack("C", $stringlen); - $out .= $string; - $out .= pack("C", 0) if ($stringlen == 0 || $stringlen % 2 != 0); - $out .= pack("N", $size); - $out .= $var; - $out .= pack("C", 0) if ($size % 2 != 0 && length($out) % 2 != 0); - } - } - - return $out; -} - -# -# PackedIIMData -# -# Assembles and returns our _data and _listdata into IIM format for -# embedding into an image. -# -sub PackedIIMData -{ - my $self = shift; - my $out; - - # First, we need to build a mapping of datanames to dataset - # numbers if we haven't already. - unless (scalar(keys %datanames)) - { - foreach my $dataset (keys %datasets) - { - my $dataname = $datasets{$dataset}; - $datanames{$dataname} = $dataset; - } - } - - # Ditto for the lists - unless (scalar(keys %listdatanames)) - { - foreach my $dataset (keys %listdatasets) - { - my $dataname = $listdatasets{$dataset}; - $listdatanames{$dataname} = $dataset; - } - } - - # Print record version - # tag - record - dataset - len (short) - 2 (short) - $out .= pack("CCCnn", 0x1c, 2, 0, 2, 2); - - # Iterate over data sets - foreach my $key (keys %{$self->{_data}}) - { - my $dataset = $datanames{$key}; - my $value = $self->{_data}->{$key}; - - if ($dataset == 0) - { Log("PackedIIMData: illegal dataname $key"); next; } - - next unless $value; - - my ($tag, $record) = (0x1c, 0x02); - - $out .= pack("CCCn", $tag, $record, $dataset, length($value)); - $out .= $value; - } - - # Do the same for list data sets - foreach my $key (keys %{$self->{_listdata}}) - { - my $dataset = $listdatanames{$key}; - - if ($dataset == 0) - { Log("PackedIIMData: illegal dataname $key"); next; } - - foreach my $value (@{$self->{_listdata}->{$key}}) - { - next unless $value; - - my ($tag, $record) = (0x1c, 0x02); - - $out .= pack("CCCn", $tag, $record, $dataset, length($value)); - $out .= $value; - } - } - - return $out; -} - -# -# PhotoshopIIMBlock -# -# Assembles the blob of Photoshop "resource data" that includes our -# fresh IIM data (from PackedIIMData) and the other Adobe parts we -# found in the file, if there were any. -# -sub PhotoshopIIMBlock -{ - my ($self, $otherparts, $data) = @_; - my $resourceBlock; - my $out; - - $resourceBlock .= "Photoshop 3.0"; - $resourceBlock .= pack("C", 0); - # Photoshop identifier - $resourceBlock .= "8BIM"; - # 0x0404 is IIM data, 00 is required empty string - $resourceBlock .= pack("CCCC", 0x04, 0x04, 0, 0); - # length of data as 32-bit, network-byte order - $resourceBlock .= pack("N", length($data)); - # Now tack data on there - $resourceBlock .= $data; - # Pad with a blank if not even size - $resourceBlock .= pack("C", 0) if (length($data) % 2 != 0); - # Finally tack on other data - $resourceBlock .= $otherparts if defined($otherparts); - - $out .= pack("CC", 0xff, 0xed); # JPEG start of block, APP13 - $out .= pack("n", length($resourceBlock) + 2); # length - $out .= $resourceBlock; - - return $out; -} - -####################################################################### -# Helpers, docs -####################################################################### - -# -# Log: just prints a message to STDERR if $debugMode is on. -# -sub Log -{ - if ($debugMode) - { my $message = shift; print STDERR "**IPTC** $message\n"; } -} - -# -# HexDump -# -# Very helpful when debugging. -# -sub HexDump -{ - my $dump = shift; - my $len = length($dump); - my $offset = 0; - my ($dcol1, $dcol2); - - while ($offset < $len) - { - my $temp = substr($dump, $offset++, 1); - - my $hex = unpack("H*", $temp); - $dcol1 .= " " . $hex; - if (ord($temp) >= 0x21 && ord($temp) <= 0x7e) - { $dcol2 .= " $temp"; } - else - { $dcol2 .= " ."; } - - if ($offset % 16 == 0) - { - print STDERR $dcol1 . " | " . $dcol2 . "\n"; - undef $dcol1; undef $dcol2; - } - } - - if (defined($dcol1) || defined($dcol2)) - { - print STDERR $dcol1 . " | " . $dcol2 . "\n"; - undef $dcol1; undef $dcol2; - } -} - -# -# JPEGDebugScan -# -# Also very helpful when debugging. -# -sub JPEGDebugScan -{ - my $filename = shift; - my $handle = IO::File->new($filename); - $handle or die "Can't open $filename: $!"; - - # Skip past start of file marker - my ($ff, $soi); - $handle->read($ff, 1) || return 0; - $handle->read($soi, 1); - - unless (ord($ff) == 0xff && ord($soi) == 0xd8) - { - Log("JPEGScan: invalid start of file"); - goto done; - } - - # scan to 0xDA (start of scan), dumping the markers we see between - # here and there. - my $marker = JPEGNextMarker($handle); - - while (ord($marker) != 0xda) - { - if (ord($marker) == 0) - { Log("Marker scan failed"); goto done; } - - if (ord($marker) == 0xd9) - {Log("Marker scan hit end of image marker"); goto done; } - - if (JPEGSkipVariable($handle) == 0) - { Log("JPEGSkipVariable failed"); return 0; } - - $marker = JPEGNextMarker($handle); - } - -done: - $handle->close(); -} - -# sucessful package load -1; - -__END__ - -=head1 NAME - -Image::IPTCInfo - Perl extension for extracting IPTC image meta-data - -=head1 SYNOPSIS - - use Image::IPTCInfo; - - # Create new info object - my $info = new Image::IPTCInfo('file-name-here.jpg'); - - # Check if file had IPTC data - unless (defined($info)) { die Image::IPTCInfo::Error(); } - - # Get list of keywords, supplemental categories, or contacts - my $keywordsRef = $info->Keywords(); - my $suppCatsRef = $info->SupplementalCategories(); - my $contactsRef = $info->Contacts(); - - # Get specific attributes... - my $caption = $info->Attribute('caption/abstract'); - - # Create object for file that may or may not have IPTC data. - $info = create Image::IPTCInfo('file-name-here.jpg'); - - # Add/change an attribute - $info->SetAttribute('caption/abstract', 'Witty caption here'); - - # Save new info to file - ##### See disclaimer in 'SAVING FILES' section ##### - $info->Save(); - $info->SaveAs('new-file-name.jpg'); - -=head1 DESCRIPTION - -Ever wish you add information to your photos like a caption, the place -you took it, the date, and perhaps even keywords and categories? You -already can. The International Press Telecommunications Council (IPTC) -defines a format for exchanging meta-information in news content, and -that includes photographs. You can embed all kinds of information in -your images. The trick is putting it to use. - -That's where this IPTCInfo Perl module comes into play. You can embed -information using many programs, including Adobe Photoshop, and -IPTCInfo will let your web server -- and other automated server -programs -- pull it back out. You can use the information directly in -Perl programs, export it to XML, or even export SQL statements ready -to be fed into a database. - -=head1 USING IPTCINFO - -Install the module as documented in the README file. You can try out -the demo program called "demo.pl" which extracts info from the images -in the "demo-images" directory. - -To integrate with your own code, simply do something like what's in -the synopsys above. - -The complete list of possible attributes is given below. These are as -specified in the IPTC IIM standard, version 4. Keywords and categories -are handled differently: since these are lists, the module allows you -to access them as Perl lists. Call Keywords() and Categories() to get -a reference to each list. - -=head2 NEW VS. CREATE - -You can either create an object using new() or create(): - - $info = new Image::IPTCInfo('file-name-here.jpg'); - $info = create Image::IPTCInfo('file-name-here.jpg'); - -new() will create a new object only if the file had IPTC data in it. -It will return undef otherwise, and you can check Error() to see what -the reason was. Using create(), on the other hand, always returns a -new IPTCInfo object if there was data or not. If there wasn't any IPTC -info there, calling Attribute() on anything will just return undef; -i.e. the info object will be more-or-less empty. - -If you're only reading IPTC data, call new(). If you want to add or -change info, call create(). Even if there's no useful stuff in the -info object, you can then start adding attributes and save the file. -That brings us to the next topic.... - -=head2 MODIFYING IPTC DATA - -You can modify IPTC data in JPEG files and save the file back to -disk. Here are the commands for doing so: - - # Set a given attribute - $info->SetAttribute('iptc attribute here', 'new value here'); - - # Clear the keywords or supp. categories list - $info->ClearKeywords(); - $info->ClearSupplementalCategories(); - $info->ClearContacts(); - - # Add keywords or supp. categories - $info->AddKeyword('frob'); - - # You can also add a list reference - $info->AddKeyword(['frob', 'nob', 'widget']); - -=head2 SAVING FILES - -With JPEG files you can add/change attributes, add keywords, etc., and -then call: - - $info->Save(); - $info->SaveAs('new-file-name.jpg'); - -This will save the file with the updated IPTC info. Please only run -this on *copies* of your images -- not your precious originals! -- -because I'm not liable for any corruption of your images. (If you read -software license agreements, nobody else is liable, either. Make -backups of your originals!) - -If you're into image wizardry, there are a couple handy options you -can use on saving. One feature is to trash the Adobe block of data, -which contains IPTC info, color settings, Photoshop print settings, -and stuff like that. The other is to trash all application blocks, -including stuff like EXIF and FlashPix data. This can be handy for -reducing file sizes. The options are passed as a hashref to Save() and -SaveAs(), e.g.: - - $info->Save({'discardAdobeParts' => 'on'}); - $info->SaveAs('new-file-name.jpg', {'discardAppParts' => 'on'}); - -Note that if there was IPTC info in the image, or you added some -yourself, the new image will have an Adobe part with only the IPTC -information. - -=head2 XML AND SQL EXPORT FEATURES - -IPTCInfo also allows you to easily generate XML and SQL from the image -metadata. For XML, call: - - $xml = $info->ExportXML('entity-name', \%extra-data, - 'optional output file name'); - -This returns XML containing all image metadata. Attribute names are -translated into XML tags, making adjustments to spaces and slashes for -compatibility. (Spaces become underbars, slashes become dashes.) You -provide an entity name; all data will be contained within this entity. -You can optionally provides a reference to a hash of extra data. This -will get put into the XML, too. (Example: you may want to put info on -the image's location into the XML.) Keys must be valid XML tag names. -You can also provide a filename, and the XML will be dumped into -there. See the "demo.pl" script for examples. - -For SQL, it goes like this: - - my %mappings = ( - 'IPTC dataset name here' => 'your table column name here', - 'caption/abstract' => 'caption', - 'city' => 'city', - 'province/state' => 'state); # etc etc etc. - - $statement = $info->ExportSQL('mytable', \%mappings, \%extra-data); - -This returns a SQL statement to insert into your given table name a -set of values from the image. You pass in a reference to a hash which -maps IPTC dataset names into column names for the database table. As -with XML export, you can also provide extra information to be stuck -into the SQL. - -=head1 IPTC ATTRIBUTE REFERENCE - - object name originating program - edit status program version - editorial update object cycle - urgency by-line - subject reference by-line title - category city - fixture identifier sub-location - content location code province/state - content location name country/primary location code - release date country/primary location name - release time original transmission reference - expiration date headline - expiration time credit - special instructions source - action advised copyright notice - reference service contact - reference date caption/abstract - reference number local caption - date created writer/editor - time created image type - digital creation date image orientation - digital creation time language identifier - - custom1 - custom20: NOT STANDARD but used by Fotostation. - IPTCInfo also supports these fields. - -=head1 KNOWN BUGS - -IPTC meta-info on MacOS may be stored in the resource fork instead -of the data fork. This program will currently not scan the resource -fork. - -I have heard that some programs will embed IPTC info at the end of the -file instead of the beginning. The module will currently only look -near the front of the file. If you have a file with IPTC data that -IPTCInfo can't find, please contact me! I would like to ensure -IPTCInfo works with everyone's files. - -=head1 AUTHOR - -Josh Carter, josh@multipart-mixed.com - -=head1 SEE ALSO - -perl(1). - -=cut diff --git a/IPTCInfo.pm b/IPTCInfo.pm deleted file mode 120000 index d16a118..0000000 --- a/IPTCInfo.pm +++ /dev/null @@ -1 +0,0 @@ -IPTCInfo-1.9.4.pm \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c9079b9 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +help: ## Shows this help + @echo "$$(grep -h '#\{2\}' $(MAKEFILE_LIST) | sed 's/: #\{2\} / /' | column -t -s ' ')" + +tdd: ## Run tests with a watcher + ptw -- -sx + +test: ## Run test suite + pytest --cov diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..8a7908a --- /dev/null +++ b/Pipfile @@ -0,0 +1,22 @@ +[[source]] + +url = "https://pypi.python.org/simple" +verify_ssl = true +name = "pypi" + + +[packages] + + + +[dev-packages] + +pytest = "*" +pytest-watch = "*" +pytest-cov = "*" +"flake8" = "*" + + +[requires] + +python_version = "3.6" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..9e31edb --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,205 @@ +{ + "_meta": { + "hash": { + "sha256": "ce01c5983f5682489422faecf731ec6511b7bf7911affc1d50ead8a5840d141b" + }, + "host-environment-markers": { + "implementation_name": "cpython", + "implementation_version": "3.6.4", + "os_name": "posix", + "platform_machine": "x86_64", + "platform_python_implementation": "CPython", + "platform_release": "16.7.0", + "platform_system": "Darwin", + "platform_version": "Darwin Kernel Version 16.7.0: Thu Jun 15 17:36:27 PDT 2017; root:xnu-3789.70.16~2/RELEASE_X86_64", + "python_full_version": "3.6.4", + "python_version": "3.6", + "sys_platform": "darwin" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.6" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.python.org/simple", + "verify_ssl": true + } + ] + }, + "default": {}, + "develop": { + "argh": { + "hashes": [ + "sha256:a9b3aaa1904eeb78e32394cd46c6f37ac0fb4af6dc488daa58971bdc7d7fcaf3", + "sha256:e9535b8c84dc9571a48999094fda7f33e63c3f1b74f3e5f3ac0105a58405bb65" + ], + "version": "==0.26.2" + }, + "attrs": { + "hashes": [ + "sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450", + "sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9" + ], + "version": "==17.4.0" + }, + "colorama": { + "hashes": [ + "sha256:463f8483208e921368c9f306094eb6f725c6ca42b0f97e313cb5d5512459feda", + "sha256:48eb22f4f8461b1df5734a074b57042430fb06e1d61bd1e11b078c0fe6d7a1f1" + ], + "version": "==0.3.9" + }, + "coverage": { + "hashes": [ + "sha256:d1ee76f560c3c3e8faada866a07a32485445e16ed2206ac8378bd90dadffb9f0", + "sha256:007eeef7e23f9473622f7d94a3e029a45d55a92a1f083f0f3512f5ab9a669b05", + "sha256:17307429935f96c986a1b1674f78079528833410750321d22b5fb35d1883828e", + "sha256:845fddf89dca1e94abe168760a38271abfc2e31863fbb4ada7f9a99337d7c3dc", + "sha256:3f4d0b3403d3e110d2588c275540649b1841725f5a11a7162620224155d00ba2", + "sha256:4c4f368ffe1c2e7602359c2c50233269f3abe1c48ca6b288dcd0fb1d1c679733", + "sha256:f8c55dd0f56d3d618dfacf129e010cbe5d5f94b6951c1b2f13ab1a2f79c284da", + "sha256:cdd92dd9471e624cd1d8c1a2703d25f114b59b736b0f1f659a98414e535ffb3d", + "sha256:2ad357d12971e77360034c1596011a03f50c0f9e1ecd12e081342b8d1aee2236", + "sha256:e9a0e1caed2a52f15c96507ab78a48f346c05681a49c5b003172f8073da6aa6b", + "sha256:eea9135432428d3ca7ee9be86af27cb8e56243f73764a9b6c3e0bda1394916be", + "sha256:700d7579995044dc724847560b78ac786f0ca292867447afda7727a6fbaa082e", + "sha256:66f393e10dd866be267deb3feca39babba08ae13763e0fc7a1063cbe1f8e49f6", + "sha256:5ff16548492e8a12e65ff3d55857ccd818584ed587a6c2898a9ebbe09a880674", + "sha256:d00e29b78ff610d300b2c37049a41234d48ea4f2d2581759ebcf67caaf731c31", + "sha256:87d942863fe74b1c3be83a045996addf1639218c2cb89c5da18c06c0fe3917ea", + "sha256:358d635b1fc22a425444d52f26287ae5aea9e96e254ff3c59c407426f44574f4", + "sha256:81912cfe276e0069dca99e1e4e6be7b06b5fc8342641c6b472cb2fed7de7ae18", + "sha256:079248312838c4c8f3494934ab7382a42d42d5f365f0cf7516f938dbb3f53f3f", + "sha256:b0059630ca5c6b297690a6bf57bf2fdac1395c24b7935fd73ee64190276b743b", + "sha256:493082f104b5ca920e97a485913de254cbe351900deed72d4264571c73464cd0", + "sha256:e3ba9b14607c23623cf38f90b23f5bed4a3be87cbfa96e2e9f4eabb975d1e98b", + "sha256:82cbd3317320aa63c65555aa4894bf33a13fb3a77f079059eb5935eea415938d", + "sha256:9721f1b7275d3112dc7ccf63f0553c769f09b5c25a26ee45872c7f5c09edf6c1", + "sha256:bd4800e32b4c8d99c3a2c943f1ac430cbf80658d884123d19639bcde90dad44a", + "sha256:f29841e865590af72c4b90d7b5b8e93fd560f5dea436c1d5ee8053788f9285de", + "sha256:f3a5c6d054c531536a83521c00e5d4004f1e126e2e2556ce399bef4180fbe540", + "sha256:dd707a21332615108b736ef0b8513d3edaf12d2a7d5fc26cd04a169a8ae9b526", + "sha256:2e1a5c6adebb93c3b175103c2f855eda957283c10cf937d791d81bef8872d6ca", + "sha256:f87f522bde5540d8a4b11df80058281ac38c44b13ce29ced1e294963dd51a8f8", + "sha256:a7cfaebd8f24c2b537fa6a271229b051cdac9c1734bb6f939ccfc7c055689baa", + "sha256:309d91bd7a35063ec7a0e4d75645488bfab3f0b66373e7722f23da7f5b0f34cc", + "sha256:0388c12539372bb92d6dde68b4627f0300d948965bbb7fc104924d715fdc0965", + "sha256:ab3508df9a92c1d3362343d235420d08e2662969b83134f8a97dc1451cbe5e84", + "sha256:43a155eb76025c61fc20c3d03b89ca28efa6f5be572ab6110b2fb68eda96bfea", + "sha256:f98b461cb59f117887aa634a66022c0bd394278245ed51189f63a036516e32de", + "sha256:b6cebae1502ce5b87d7c6f532fa90ab345cfbda62b95aeea4e431e164d498a3d", + "sha256:a4497faa4f1c0fc365ba05eaecfb6b5d24e3c8c72e95938f9524e29dadb15e76", + "sha256:2b4d7f03a8a6632598cbc5df15bbca9f778c43db7cf1a838f4fa2c8599a8691a", + "sha256:1afccd7e27cac1b9617be8c769f6d8a6d363699c9b86820f40c74cfb3328921c" + ], + "version": "==4.4.2" + }, + "docopt": { + "hashes": [ + "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" + ], + "version": "==0.6.2" + }, + "flake8": { + "hashes": [ + "sha256:c7841163e2b576d435799169b78703ad6ac1bbb0f199994fc05f700b2a90ea37", + "sha256:7253265f7abd8b313e3892944044a365e3f4ac3fcdcfb4298f55ee9ddf188ba0" + ], + "version": "==3.5.0" + }, + "mccabe": { + "hashes": [ + "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", + "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" + ], + "version": "==0.6.1" + }, + "pathtools": { + "hashes": [ + "sha256:7c35c5421a39bb82e58018febd90e3b6e5db34c5443aaaf742b3f33d4655f1c0" + ], + "version": "==0.1.2" + }, + "pluggy": { + "hashes": [ + "sha256:7f8ae7f5bdf75671a718d2daf0a64b7885f74510bcd98b1a0bb420eb9a9d0cff" + ], + "version": "==0.6.0" + }, + "py": { + "hashes": [ + "sha256:8cca5c229d225f8c1e3085be4fcf306090b00850fefad892f9d96c7b6e2f310f", + "sha256:ca18943e28235417756316bfada6cd96b23ce60dd532642690dcfdaba988a76d" + ], + "version": "==1.5.2" + }, + "pycodestyle": { + "hashes": [ + "sha256:6c4245ade1edfad79c3446fadfc96b0de2759662dc29d07d80a6f27ad1ca6ba9", + "sha256:682256a5b318149ca0d2a9185d365d8864a768a28db66a84a2ea946bcc426766" + ], + "version": "==2.3.1" + }, + "pyflakes": { + "hashes": [ + "sha256:08bd6a50edf8cffa9fa09a463063c425ecaaf10d1eb0335a7e8b1401aef89e6f", + "sha256:8d616a382f243dbf19b54743f280b80198be0bca3a5396f1d2e1fca6223e8805" + ], + "version": "==1.6.0" + }, + "pytest": { + "hashes": [ + "sha256:b84878865558194630c6147f44bdaef27222a9f153bbd4a08908b16bf285e0b1", + "sha256:53548280ede7818f4dc2ad96608b9f08ae2cc2ca3874f2ceb6f97e3583f25bc4" + ], + "version": "==3.3.2" + }, + "pytest-cov": { + "hashes": [ + "sha256:890fe5565400902b0c78b5357004aab1c814115894f4f21370e2433256a3eeec", + "sha256:03aa752cf11db41d281ea1d807d954c4eda35cfa1b21d6971966cc041bbf6e2d" + ], + "version": "==2.5.1" + }, + "pytest-watch": { + "hashes": [ + "sha256:29941f6ff74e6d85cc0796434a5cbc27ebe51e91ed24fd0757fad5cc6fd3d491" + ], + "version": "==4.1.0" + }, + "pyyaml": { + "hashes": [ + "sha256:3262c96a1ca437e7e4763e2843746588a965426550f3797a79fca9c6199c431f", + "sha256:16b20e970597e051997d90dc2cddc713a2876c47e3d92d59ee198700c5427736", + "sha256:e863072cdf4c72eebf179342c94e6989c67185842d9997960b3e69290b2fa269", + "sha256:bc6bced57f826ca7cb5125a10b23fd0f2fff3b7c4701d64c439a300ce665fff8", + "sha256:c01b880ec30b5a6e6aa67b09a2fe3fb30473008c85cd6a67359a1b15ed6d83a4", + "sha256:827dc04b8fa7d07c44de11fabbc888e627fa8293b695e0f99cb544fdfa1bf0d1", + "sha256:592766c6303207a20efc445587778322d7f73b161bd994f227adaa341ba212ab", + "sha256:5f84523c076ad14ff5e6c037fe1c89a7f73a3e04cf0377cb4d017014976433f3", + "sha256:0c507b7f74b3d2dd4d1322ec8a94794927305ab4cebbe89cc47fe5e81541e6e8", + "sha256:b4c423ab23291d3945ac61346feeb9a0dc4184999ede5e7c43e1ffb975130ae6", + "sha256:ca233c64c6e40eaa6c66ef97058cdc80e8d0157a443655baa1b2966e812807ca", + "sha256:4474f8ea030b5127225b8894d626bb66c01cda098d47a2b0d3429b6700af9fd8", + "sha256:326420cbb492172dec84b0f65c80942de6cedb5233c413dd824483989c000608", + "sha256:5ac82e411044fb129bae5cfbeb3ba626acb2af31a8d17d175004b70862a741a7" + ], + "version": "==3.12" + }, + "six": { + "hashes": [ + "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb", + "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9" + ], + "version": "==1.11.0" + }, + "watchdog": { + "hashes": [ + "sha256:7e65882adb7746039b6f3876ee174952f8eaaa34491ba34333ddf1fe35de4162" + ], + "version": "==0.8.3" + } + } +} diff --git a/README.md b/README.md index 658b802..d1bb75c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ # IPTCINFO 3 + +[![Build Status](https://travis-ci.org/crccheck/iptcinfo3.svg?branch=master)](https://travis-ci.org/crccheck/iptcinfo3) + ### Like IPTCInfo but finally compatible for Python 3 IPTCInfo: extract and modify IPTC (metadata) information on images - port of IPTCInfo.pm by Josh Carter ' @@ -19,105 +22,31 @@ programs -- pull it back out. You can use the information directly in Python programs, export it to XML, or even export SQL statements ready to be fed into a database. -1.9.5-8: https://bitbucket.org/gthomas/iptcinfo/issue/4/file-permissions-for-changed-files-are-not - copy original file's permission bits on save/saveAs - -1.9.5-7: https://bitbucket.org/gthomas/iptcinfo/issue/3/images-w-o-iptc-data-should-not-log-errors - have silencable parse errors. - -1.9.5-6: to have a nice new upload (seems easy_install grabs an old version). - -1.9.5-5: fix some issues with "super" - -1.9.5-3: use logging module. - -1.9.5-2: Emil Stenström pinpointed some bugs/misleading (un)comments - Also a new (mis)feature is implemented: if you don't specify inp_charset - (and the image misses such information, too) than no conversion is made - to unicode, everything stays bytestring! - This way you don't need to deal with charsets, BUT it is your risk to make - the modifications with the SAME charset as it is in the image! - -1.9.5-1: getting in sync with the Perl version 1.9.5 - -1.9.2-rc8: - charset recognition loosened (failed with some image out of - Adobe Lightroom). - -1.9.2-rc7: NOT READY - IPTCInfo now accepts 'inp_charset' keyword for setting input charset. - -1.9.2-rc6: just PyLint-ed out some errors. - -1.9.2-rc5: Amos Latteier sent me a patch which releases the requirement of the - file objects to be file objects (he uses this on jpeg files stored in - databases as strings). - It modifies the module in order to look for a read method on the file - object. If one exists it assumes the argument is a file object, otherwise it - assumes it's a filename. +Usage +----- -1.9.2-rc4: on Windows systems, tmpfile may not work correctly - now I use - cStringIO on file save (to save the file without truncating it on Exception). + from iptcinfo3 import IPTCInfo -1.9.2-rc3: some little bug fixes, some safety enhancements (now iptcinfo.py - will overwrite the original image file (info.save()) only if everything goes - fine (so if an exception is thrown at writing, it won't cut your original - file). - - This is a pre-release version: needs some testing, and has an unfound bug - (yet): some pictures can be enhanced with iptc data, and iptcinfo.py is able - to read them, but some other iptc data readers will spit on it. - -1.9.1: a first release with some little encoding support - - The class IPTCInfo now has an inp_charset and an out_charset attribute - the - first is the read image's charset (defaults to the system default charset), - the second is the charset the writer will use (defaults to inp_charset). - - Reader will find the charset included in IPTC data (if any, defaults to the - system's default charset), and use it to read to unicode strings. Writer will - write using IPTCinfo.out_charset (if it is not set, will not write charset - IPTC record). - - With this, it is possible to read and write i18n strings correctly. - - I haven't tested this functionality thoroughly, and that little test was only - on my WinXP box only, with the only other IPTC reader: IrfanView. - - -SYNOPSIS - - from iptcinfo import IPTCInfo - import sys - - fn = (len(sys.argv) > 1 and [sys.argv[1]] or ['test.jpg'])[0] - fn2 = (len(sys.argv) > 2 and [sys.argv[2]] or ['test_out.jpg'])[0] # Create new info object - info = IPTCInfo(fn) - - # Check if file had IPTC data - if len(info.data) < 4: raise Exception(info.error) + info = IPTCInfo('doge.jpg') # Print list of keywords, supplemental categories, contacts - print info.keywords - print info.supplementalCategories - print info.contacts + print(info['keywords']) + print(info['supplementalCategories']) + print(info['contacts']) # Get specific attributes... - caption = info.data['caption/abstract'] + caption = info['caption/abstract'] - # Create object for file that may does have IPTC data. - # info = IPTCInfo(fn) - # for files without IPTC data, use - info = IPTCInfo(fn, force=True) + # Create object for file that may not have IPTC data + info = IPTCInfo('such_iptc.jpg', force=True) # Add/change an attribute - info.data['caption/abstract'] = 'Witty caption here' - info.data['supplemental category'] = ['portrait'] + info['caption/abstract'] = 'Witty caption here' + info['supplemental category'] = ['portrait'] # Save new info to file ##### See disclaimer in 'SAVING FILES' section ##### info.save() - info.saveAs(fn2) - - #re-read IPTC info - print IPTCInfo(fn2) + info.save_as('very_meta.jpg') diff --git a/break.py b/break.py deleted file mode 100755 index eb8f92f..0000000 --- a/break.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python - -import sys, os -import iptcinfo - -iptcinfo.debugMode = 4 - -IPTCInfo = iptcinfo.IPTCInfo - -fn = (len(sys.argv) > 0 and [sys.argv[1]] or ['test.jpg'])[0] -fn2 = '.'.join(fn.split('.')[:-1]) + '_o.jpg' -info = IPTCInfo(fn, force=True) -print(info) -info.data['urgency'] = 'GT' -info.keywords += ['ize'] -print(info) -#info2.data[field] = "" -#print info2 -info.saveAs(fn2) -info = IPTCInfo(fn2) -print(info) - diff --git a/fixtures/Lenna.jpg b/fixtures/Lenna.jpg new file mode 100644 index 0000000..f7bd210 Binary files /dev/null and b/fixtures/Lenna.jpg differ diff --git a/fixtures/instagram.jpg b/fixtures/instagram.jpg new file mode 100644 index 0000000..41cfdc8 Binary files /dev/null and b/fixtures/instagram.jpg differ diff --git a/iptcinfo3.py b/iptcinfo3.py index 984e239..a08397f 100644 --- a/iptcinfo3.py +++ b/iptcinfo3.py @@ -16,280 +16,426 @@ # VERSION = '1.9'; """ IPTCInfo - Python module for extracting and modifying IPTC image meta-data +""" +import contextlib +import logging +import os +import shutil +import sys +import tempfile +from struct import pack, unpack -Ported from Josh Carter's Perl IPTCInfo-1.9.pm by Tamás Gulácsi - -Ever wish you add information to your photos like a caption, the place -you took it, the date, and perhaps even keywords and categories? You -already can. The International Press Telecommunications Council (IPTC) -defines a format for exchanging meta-information in news content, and -that includes photographs. You can embed all kinds of information in -your images. The trick is putting it to use. - -That's where this IPTCInfo Python module comes into play. You can embed -information using many programs, including Adobe Photoshop, and -IPTCInfo will let your web server -- and other automated server -programs -- pull it back out. You can use the information directly in -Python programs, export it to XML, or even export SQL statements ready -to be fed into a database. - - -PREFACE - -First, I want to apologize a little bit: as this module is originally -written in Perl by Josh Carter, it is quite non-Pythonic (for example -the addKeyword, clearSupplementalCategories functions - I think it -would be better having a derived list class with add, clear functions) -and tested only by me reading/writing IPTC metadata for family photos. -Any suggestions welcomed! - -Thanks, -Tamás Gulácsi - -SYNOPSIS - - from iptcinfo import IPTCInfo - import sys +__version__ = '1.9.5-8' +__author__ = 'Gulácsi, Tamás' - fn = (len(sys.argv) > 1 and [sys.argv[1]] or ['test.jpg'])[0] - fn2 = (len(sys.argv) > 2 and [sys.argv[2]] or ['test_out.jpg'])[0] +SURELY_WRITE_CHARSET_INFO = False +debugMode = 0 +# Debug off for production use - # Create new info object - info = IPTCInfo(fn) - # for file without IPTC data use - info = IPTCInfo(fn, force=True) +logger = logging.getLogger('iptcinfo') +LOGDBG = logging.getLogger('iptcinfo.debug') - # Check if file had IPTC data - if len(info.data) < 4: raise Exception(info.error) +SOI = 0xd8 # Start of image +APP0 = 0xe0 # Exif +APP1 = 0xe1 # Exif +APP13 = 0xed # Photoshop3 IPTC +COM = 0xfe # Comment +SOS = 0xda # Start of scan +EOI = 0xd9 # End of image - # Print list of keywords, supplemental categories, or contacts - print info.keywords - print info.supplementalCategories - print info.contacts +# Misc utilities +################ - # Get specific attributes... - caption = info.data['caption/abstract'] +@contextlib.contextmanager +def smart_open(path, *args, **kwargs): + """ + Lets you treat a fild handler as if it were a file path. - # Create object for file that may or may not have IPTC data. - info = IPTCInfo(fn) + Based on https://stackoverflow.com/a/17603000/8049516 + """ + if hasattr(path, 'read'): + fh = path + else: + fh = open(path, *args, **kwargs) - # Add/change an attribute - info.data['caption/abstract'] = 'Witty caption here' - info.data['supplemental category'] = ['portrait'] + try: + yield fh + finally: + fh.close() - # Save new info to file - ##### See disclaimer in 'SAVING FILES' section ##### - info.save() - info.saveAs(fn2) - #re-read IPTC info - print IPTCInfo(fn2) +def duck_typed(obj, prefs): + if isinstance(prefs, str): + prefs = [prefs] + for pref in prefs: + if not hasattr(obj, pref): + return False -DESCRIPTION + return True - USING IPTCINFO - To integrate with your own code, simply do something like what's in - the synopsys above. +def ord3(x): + return x if isinstance(x, int) else ord(x) - The complete list of possible attributes is given below. These are - as specified in the IPTC IIM standard, version 4. Keywords and - categories are handled slightly differently: since these are lists, - the module allows you to access them as Python lists. Call - keywords() and supplementalCategories() to get each list. - IMAGES NOT HAVING IPTC METADATA +def hex_dump(dump): + """ + Create an xxd style hex dump from a binary dump. + """ + length = len(dump) + P = lambda z: chr(z) if ord3(z) >= 0x21 and ord3(z) <= 0x7e else '.' # noqa: E731 + ROWLEN = 18 + res = ['\n'] + for j in range(length // ROWLEN + int(length % ROWLEN > 0)): + row = dump[j * ROWLEN:(j + 1) * ROWLEN] + if isinstance(row, list): + row = b''.join(row) + res.append( + ('%02X ' * len(row) + ' ' * (ROWLEN - len(row)) + '| %s\n') % + tuple(list(row) + [''.join(map(P, row))])) + return ''.join(res) + + +# File utilities +################ +# Should we just use .read and .seek? - If yout apply +class EOFException(Exception): + def __init__(self, *args): + super().__init__(self) + self._str = '\n'.join(args) - info = IPTCInfo('file-name-here.jpg') + def __str__(self): + return self._str - to an image not having IPTC metadata, len(info.data) will be 3 - ('supplemental categories', 'keywords', 'contacts') will be empty - lists. - MODIFYING IPTC DATA +def read_exactly(fh, length): + """ + Reads exactly `length` bytes and throws an exception if EOF is hit. + """ + buf = fh.read(length) + if buf is None or len(buf) < length: + raise EOFException('read_exactly: %s' % str(fh)) - You can modify IPTC data in JPEG files and save the file back to - disk. Here are the commands for doing so: + return buf - # Set a given attribute - info.data['iptc attribute here'] = 'new value here' - # Clear the keywords or supp. categories list - info.keywords = [] - info.supplementalCategories = [] - info.contacts = [] +def seek_exactly(fh, length): + """ + Seeks length bytes from the current position and checks the result + """ + pos = fh.tell() + fh.seek(length, 1) + if fh.tell() - pos != length: + raise EOFException('seek_exactly') - # Add keywords or supp. categories - info.keyword.append('frob') - # You can also add a list reference - info.keyword.extend(['frob', 'nob', 'widget']) - info.keyword += ['gadget'] +# JPEG utilities +################ - SAVING FILES +def file_is_jpeg(fh): + """ + Checks to see if this file is a Jpeg/JFIF or not. - With JPEG files you can add/change attributes, add keywords, etc., and - then call: + Will reset the file position back to 0 after it's done in either case. + """ + fh.seek(0) + if debugMode: # pragma: no cover + logger.info("Opening 16 bytes of file: %r", hex_dump(fh.read(16))) + fh.seek(0) - info.save() - info.saveAs('new-file-name.jpg') + ered = False + try: + (ff, soi) = fh.read(2) + if not (ff == 0xff and soi == SOI): + ered = False + else: + # now check for APP0 marker. I'll assume that anything with a + # SOI followed by APP0 is "close enough" for our purposes. + # (We're not dinking with image data, so anything following + # the Jpeg tagging system should work.) + (ff, app0) = fh.read(2) + ered = ff == 0xff + finally: + fh.seek(0) + return ered + + +def jpeg_get_variable_length(fh): + """Gets length of current variable-length section. File position + at start must be on the marker itself, e.g. immediately after call + to JPEGNextMarker. File position is updated to just past the + length field.""" + try: + length = unpack('!H', read_exactly(fh, 2))[0] + except EOFException: + return 0 + logger.debug('JPEG variable length: %d', length) + + # Length includes itself, so must be at least 2 + if length < 2: + logger.warn("jpeg_get_variable_length: erroneous JPEG marker length") + return 0 + return length - 2 + + +def jpeg_next_marker(fh): + """Scans to the start of the next valid-looking marker. Return + value is the marker id. + + TODO use fh.read instead of read_exactly + """ + # Find 0xff byte. We should already be on it. + try: + byte = read_exactly(fh, 1) + while ord3(byte) != 0xff: + # logger.warn("jpeg_next_marker: bogus stuff in Jpeg file at: ') + byte = read_exactly(fh, 1) - This will save the file with the updated IPTC info. Please only run - this on *copies* of your images -- not your precious originals! -- - because I'm not liable for any corruption of your images. (If you - read software license agreements, nobody else is liable, - either. Make backups of your originals!) + # Now skip any extra 0xffs, which are valid padding. + while True: + byte = read_exactly(fh, 1) + if ord3(byte) != 0xff: + break - If you're into image wizardry, there are a couple handy options you - can use on saving. One feature is to trash the Adobe block of data, - which contains IPTC info, color settings, Photoshop print settings, - and stuff like that. The other is to trash all application blocks, - including stuff like EXIF and FlashPix data. This can be handy for - reducing file sizes. The options are passed as a dict to save() - and saveAs(), e.g.: + except EOFException: + return None - info.save({'discardAdobeParts': 'on'}) - info.saveAs('new-file-name.jpg', {'discardAppParts': 'on'}) + # byte should now contain the marker id. + logger.debug("jpeg_next_marker: at marker %02X (%d)", ord3(byte), ord3(byte)) + return byte - Note that if there was IPTC info in the image, or you added some - yourself, the new image will have an Adobe part with only the IPTC - information. - XML AND SQL EXPORT FEATURES +def jpeg_skip_variable(fh, rSave=None): + """Skips variable-length section of Jpeg block. Should always be + called between calls to JpegNextMarker to ensure JpegNextMarker is + at the start of data it can properly parse.""" - IPTCInfo also allows you to easily generate XML and SQL from the image - metadata. For XML, call: + # Get the marker parameter length count + length = jpeg_get_variable_length(fh) + if length == 0: + return None - xml = info.exportXML('entity-name', extra-data, - 'optional output file name') + # Skip remaining bytes + if rSave is not None or debugMode > 0: + try: + temp = read_exactly(fh, length) + except EOFException: + logger.error("jpeg_skip_variable: read failed while skipping var data") + return None + else: + # Just seek + try: + seek_exactly(fh, length) + except EOFException: + logger.error("jpeg_skip_variable: read failed while skipping var data") + return None - This returns XML containing all image metadata. Attribute names are - translated into XML tags, making adjustments to spaces and slashes - for compatibility. (Spaces become underbars, slashes become dashes.) - You provide an entity name; all data will be contained within this - entity. You can optionally provides a reference to a hash of extra - data. This will get put into the XML, too. (Example: you may want to - put info on the image's location into the XML.) Keys must be valid - XML tag names. You can also provide a filename, and the XML will be - dumped into there. - - For SQL, it goes like this: + return (rSave is not None and [temp] or [True])[0] - my mappings = { - 'IPTC dataset name here': 'your table column name here', - 'caption/abstract': 'caption', - 'city': 'city', - 'province/state': 'state} # etc etc etc. - - statement = info.exportSQL('mytable', mappings, extra-data) - This returns a SQL statement to insert into your given table name a - set of values from the image. You pass in a reference to a hash - which maps IPTC dataset names into column names for the database - table. As with XML export, you can also provide extra information to - be stuck into the SQL. - -IPTC ATTRIBUTE REFERENCE - - object name originating program - edit status program version - editorial update object cycle - urgency by-line - subject reference by-line title - category city - fixture identifier sub-location - content location code province/state - content location name country/primary location code - release date country/primary location name - release time original transmission reference - expiration date headline - expiration time credit - special instructions source - action advised copyright notice - reference service contact - reference date caption/abstract - reference number writer/editor - date created image type - time created image orientation - digital creation date language identifier - digital creation time - - custom1 - custom20: NOT STANDARD but used by Fotostation. - IPTCInfo also supports these fields. - -KNOWN BUGS - -IPTC meta-info on MacOS may be stored in the resource fork instead -of the data fork. This program will currently not scan the resource -fork. - -I have heard that some programs will embed IPTC info at the end of the -file instead of the beginning. The module will currently only look -near the front of the file. If you have a file with IPTC data that -IPTCInfo can't find, please contact me! I would like to ensure -IPTCInfo works with everyone's files. - -AUTHOR - -Josh Carter, josh@multipart-mixed.com -""" +def jpeg_collect_file_parts(fh, discard_app_parts=False): + """ + Collect all pieces of the file except for the IPTC info that we'll replace when saving. -__version__ = '1.9.5-8' -__author__ = 'Gulácsi, Tamás' + Returns: + start: the stuff before the info + end: the stuff after the info + adobe: the contents of the Adobe Resource Block that the IPTC data goes in -SURELY_WRITE_CHARSET_INFO = False + Returns None if a file parsing error occured. + """ + adobeParts = b'' + start = [] + fh.seek(0) + (ff, soi) = fh.read(2) + if not (ord3(ff) == 0xff and ord3(soi) == SOI): + raise Exception('invalid start of file, is it a Jpeg?') + + # Begin building start of file + start.append(pack('BB', 0xff, SOI)) # pack('BB', ff, soi) + + # Get first marker. This *should* be APP0 for JFIF or APP1 for EXIF + marker = ord(jpeg_next_marker(fh)) + while marker != APP0 and marker != APP1: + # print('bad first marker: %02X, skipping it' % marker) + marker = ord(jpeg_next_marker(fh)) + + if marker is None: + break + + # print('first marker: %02X %02X' % (marker, APP0)) + app0data = b'' + app0data = jpeg_skip_variable(fh, app0data) + if app0data is None: + raise Exception('jpeg_skip_variable failed') + + if marker == APP0 or not discard_app_parts: + # Always include APP0 marker at start if it's present. + start.append(pack('BB', 0xff, marker)) + # Remember that the length must include itself (2 bytes) + start.append(pack('!H', len(app0data) + 2)) + start.append(app0data) + else: + # Manually insert APP0 if we're trashing application parts, since + # all JFIF format images should start with the version block. + LOGDBG.debug('discard_app_parts=%s', discard_app_parts) + start.append(pack("BB", 0xff, APP0)) + start.append(pack("!H", 16)) # length (including these 2 bytes) + start.append(b'JFIF') # format + start.append(pack("BB", 1, 2)) # call it version 1.2 (current JFIF) + start.append(pack('8B', 0, 0, 0, 0, 0, 0, 0, 0)) # zero everything else + + # Now scan through all markers in file until we hit image data or + # IPTC stuff. + end = [] + while True: + marker = jpeg_next_marker(fh) + if marker is None or ord3(marker) == 0: + raise Exception('Marker scan failed') + + # Check for end of image + elif ord3(marker) == EOI: + logger.debug("jpeg_collect_file_parts: saw end of image marker") + end.append(pack("BB", 0xff, ord3(marker))) + break + + # Check for start of compressed data + elif ord3(marker) == SOS: + logger.debug("jpeg_collect_file_parts: saw start of compressed data") + end.append(pack("BB", 0xff, ord3(marker))) + break + + partdata = b'' + partdata = jpeg_skip_variable(fh, partdata) + if not partdata: + raise Exception('jpeg_skip_variable failed') + + partdata = bytes(partdata) + + # Take all parts aside from APP13, which we'll replace ourselves. + if discard_app_parts and ord3(marker) >= APP0 and ord3(marker) <= 0xef: + # Skip all application markers, including Adobe parts + adobeParts = b'' + elif ord3(marker) == 0xed: + # Collect the adobe stuff from part 13 + adobeParts = collect_adobe_parts(partdata) + break -from struct import pack, unpack -#~ from cStringIO import StringIO -import sys -import re -import os -import tempfile -import shutil + else: + # Append all other parts to start section + start.append(pack("BB", 0xff, ord3(marker))) + start.append(pack("!H", len(partdata) + 2)) + start.append(partdata) -import logging -LOG = logging.getLogger('iptcinfo') -LOGDBG = logging.getLogger('iptcinfo.debug') + # Append rest of file to end + while True: + buff = fh.read(8192) + if buff is None or len(buff) == 0: + break + end.append(buff) -class String(str): - def __iadd__(self, other): - assert isinstance(other, str) - super(type(self), self).__iadd__(other) + return (b''.join(start), b''.join(end), adobeParts) -class EOFException(Exception): - def __init__(self, *args): - Exception.__init__(self) - self._str = '\n'.join(args) +def jpeg_debug_scan(filename): # pragma: no cover + """Also very helpful when debugging.""" + assert isinstance(filename, str) and os.path.isfile(filename) + with open(filename, 'wb') as fh: - def __str__(self): - return self._str + # Skip past start of file marker + (ff, soi) = fh.read(2) + if not (ord3(ff) == 0xff and ord3(soi) == SOI): + logger.error("jpeg_debug_scan: invalid start of file") + else: + # scan to 0xDA (start of scan), dumping the markers we see between + # here and there. + while True: + marker = jpeg_next_marker(fh) + if ord3(marker) == 0xda: + break + if ord3(marker) == 0: + logger.warn("Marker scan failed") + break -def push(diction, key, value): - if key in diction and hasattr(diction[key], 'append'): - diction[key].append(value) - else: - diction[key] = value + elif ord3(marker) == 0xd9: + logger.debug("Marker scan hit end of image marker") + break + if not jpeg_skip_variable(fh): + logger.warn("jpeg_skip_variable failed") + return None -def duck_typed(obj, prefs): - if isinstance(prefs, str): - prefs = [prefs] - for pref in prefs: - if not hasattr(obj, pref): - return False - return True -sys_enc = sys.getfilesystemencoding() +def collect_adobe_parts(data): + """Part APP13 contains yet another markup format, one defined by + Adobe. See"File Formats Specification" in the Photoshop SDK + (avail from www.adobe.com). We must take + everything but the IPTC data so that way we can write the file back + without losing everything else Photoshop stuffed into the APP13 + block.""" + assert isinstance(data, bytes) + length = len(data) + offset = 0 + out = [] + # Skip preamble + offset = len('Photoshop 3.0 ') + # Process everything + while offset < length: + # Get OSType and ID + (ostype, id1, id2) = unpack("!LBB", data[offset:offset + 6]) + offset += 6 + if offset >= length: + break + + # Get pascal string + stringlen = unpack("B", data[offset:offset + 1])[0] + offset += 1 + if offset >= length: + break + + string = data[offset:offset + stringlen] + offset += stringlen + + # round up if odd + if (stringlen % 2 != 0): + offset += 1 + # there should be a null if string len is 0 + if stringlen == 0: + offset += 1 + if offset >= length: + break + + # Get variable-size data + size = unpack("!L", data[offset:offset + 4])[0] + offset += 4 + if offset >= length: + break + + var = data[offset:offset + size] + offset += size + if size % 2 != 0: + offset += 1 # round up if odd + + # skip IIM data (0x0404), but write everything else out + if not (id1 == 4 and id2 == 4): + out.append(pack("!LBB", ostype, id1, id2)) + out.append(pack("B", stringlen)) + out.append(string) + if stringlen == 0 or stringlen % 2 != 0: + out.append(pack("B", 0)) + out.append(pack("!L", size)) + out.append(var) + out = [''.join(out)] + if size % 2 != 0 and len(out[0]) % 2 != 0: + out.append(pack("B", 0)) + + return b''.join(out) -# Debug off for production use -debugMode = 0 ##################################### # These names match the codes defined in ITPC's IIM record 2. @@ -340,7 +486,7 @@ def duck_typed(obj, prefs): 120: 'caption/abstract', 121: 'local caption', 122: 'writer/editor', -# 125: 'rasterized caption', # unsupported (binary data) + # 125: 'rasterized caption', # unsupported (binary data) 130: 'image type', 131: 'image orientation', 135: 'language identifier', @@ -366,34 +512,37 @@ def duck_typed(obj, prefs): 219: 'custom20', } -c_datasets_r = dict([(v, k) for k, v in c_datasets.items()]) -# del k, v +c_datasets_r = {v: k for k, v in c_datasets.items()} + +c_charset = {100: 'iso8859_1', 101: 'iso8859_2', 109: 'iso8859_3', + 110: 'iso8859_4', 111: 'iso8859_5', 125: 'iso8859_7', + 127: 'iso8859_6', 138: 'iso8859_8', + 196: 'utf_8'} +c_charset_r = {v: k for k, v in c_charset.items()} class IPTCData(dict): """Dict with int/string keys from c_listdatanames""" def __init__(self, diction={}, *args, **kwds): - dict.__init__(self, *args, **kwds) - self.update(dict((self.keyAsInt(k), v) - for k, v in list(diction.items()))) + super().__init__(self, *args, **kwds) + self.update({self._key_as_int(k): v for k, v in diction.items()}) c_cust_pre = 'nonstandard_' @classmethod - def keyAsInt(cls, key): - #~ global c_datasets_r + def _key_as_int(cls, key): if isinstance(key, int): return key - elif key in c_datasets_r: - return c_datasets_r[key] - elif (key.startswith(cls.c_cust_pre) - and key[len(cls.c_cust_pre):].isdigit()): + elif isinstance(key, str) and key.lower() in c_datasets_r: + return c_datasets_r[key.lower()] + elif key.startswith(cls.c_cust_pre) and key[len(cls.c_cust_pre):].isdigit(): + # example: nonstandard_69 -> 69 return int(key[len(cls.c_cust_pre):]) else: - raise KeyError("Key %s is not in %s!" % (key, list(c_datasets_r.keys()))) + raise KeyError('Key %s is not in %s!' % (key, c_datasets_r.keys())) @classmethod - def keyAsStr(cls, key): + def _key_as_str(cls, key): if isinstance(key, str) and key in c_datasets_r: return key elif key in c_datasets: @@ -404,40 +553,23 @@ def keyAsStr(cls, key): raise KeyError("Key %s is not in %s!" % (key, list(c_datasets.keys()))) def __getitem__(self, name): - return dict.get(self, self.keyAsInt(name), None) + return self.get(self._key_as_int(name), None) def __setitem__(self, name, value): - key = self.keyAsInt(name) - if key in self and isinstance(dict.__getitem__(self, key), - (tuple, list)): - #print key, c_datasets[key], o.__getitem__(key) + key = self._key_as_int(name) + if key in self and isinstance(super().__getitem__(key), (tuple, list)): if isinstance(value, (tuple, list)): dict.__setitem__(self, key, value) else: - raise ValueError("For %s only lists acceptable!" % name) + raise ValueError("%s must be iterable" % name) else: - dict.__setitem__(self, self.keyAsInt(name), value) + dict.__setitem__(self, key, value) - -def _getSetSomeList(name): - def getList(self): - """Returns the list of %s.""" % name - return self._data[name] - - def setList(self, value): - """Sets the list of %s.""" % name - if isinstance(value, (list, tuple)): - self._data[name] = list(value) - elif isinstance(value, str): - self._data[name] = [value] - LOG.warn('Warning: IPTCInfo.%s is a list!', name) - else: - raise ValueError('IPTCInfo.%s is a list!' % name) - - return (getList, setList) + def __str__(self): + return str({self._key_as_str(k): v for k, v in self.items()}) -class IPTCInfo(object): +class IPTCInfo: """info = IPTCInfo('image filename goes here') File can be a file-like object or a string. If it is a string, it is @@ -456,110 +588,78 @@ class IPTCInfo(object): be VERY careful to use bytestrings overall with the SAME ENCODING! """ - def __init__(self, fobj, force=False, inp_charset=None, out_charset=None, - *args, **kwds): - # Open file and snarf data from it. - self._error = None - self._data = IPTCData({'supplemental category': [], 'keywords': [], - 'contact': []}) - if duck_typed(fobj, 'read'): + error = None + + def __init__(self, fobj, force=False, inp_charset=None, out_charset=None): + self._data = IPTCData({ + 'supplemental category': [], + 'keywords': [], + 'contact': [], + }) + self._fobj = fobj + if duck_typed(fobj, 'read'): # DELETEME self._filename = None - self._fh = fobj else: self._filename = fobj - fh = self._getfh() self.inp_charset = inp_charset self.out_charset = out_charset or inp_charset - datafound = self.scanToFirstIMMTag(fh) - if datafound or force: - # Do the real snarfing here - if datafound: - self.collectIIMInfo(fh) - else: - LOG.warn("No IPTC data found.") - self._closefh(fh) - # raise Exception("No IPTC data found.") - self._closefh(fh) - - def _closefh(self, fh): - if fh and self._filename is not None: - fh.close() - - def _getfh(self, mode='r'): - assert self._filename is not None or self._fh is not None - if self._filename is not None: - fh = open(self._filename, (mode + 'b').replace('bb', 'b')) - if not fh: - LOG.error("Can't open file (%r)", self._filename) - return None + with smart_open(self._fobj, 'rb') as fh: + datafound = self.scanToFirstIMMTag(fh) + if datafound or force: + # Do the real snarfing here + if datafound: + self.collectIIMInfo(fh) else: - return fh - else: - return self._fh - - ####################################################################### - # New, Save, Destroy, Error - ####################################################################### - - def get_error(self): - """Returns the last error message""" - return self._error + logger.warn('No IPTC data found in %s', fobj) - def set_error(self, obj): - '''Sets the last error message''' - self._error = obj - error = property(get_error, set_error) + def _filepos(self, fh): + """For debugging, return what position in the file we are.""" + fh.flush() + return fh.tell() def save(self, options=None): """Saves Jpeg with IPTC data back to the same file it came from.""" + # TODO handle case when file handle is passed in assert self._filename is not None - return self.saveAs(self._filename, options) - - def _filepos(self, fh): - fh.flush() - #~ return 'POS=%d\n' % fh.tell() - return fh.tell() + return self.save_as(self._filename, options) - def saveAs(self, newfile, options=None): + def save_as(self, newfile, options=None): """Saves Jpeg with IPTC data to a given file name.""" - # Open file and snarf data from it. - fh = self._getfh() - assert fh - fh.seek(0, 0) - if not self.fileIsJpeg(fh): - LOG.error("Source file is not a Jpeg; I can only save Jpegs." - " Sorry.") - return None - ret = self.jpegCollectFileParts(fh, options) - self._closefh(fh) - if ret is None: - LOG.error("collectfileparts failed") - raise Exception('collectfileparts failed') - - (start, end, adobe) = ret - LOGDBG.debug('start: %d, end: %d, adobe:%d', *list(map(len, ret))) - self.hexDump(start), len(end) + with smart_open(self._fobj, 'rb') as fh: + if not file_is_jpeg(fh): + logger.error('Source file %s is not a Jpeg.' % self._fob) + return None + + jpeg_parts = jpeg_collect_file_parts(fh) + + if jpeg_parts is None: + raise Exception('jpeg_collect_file_parts failed: %s' % self.error) + + (start, end, adobe) = jpeg_parts + LOGDBG.debug('start: %d, end: %d, adobe: %d', *map(len, jpeg_parts)) + hex_dump(start) LOGDBG.debug('adobe1: %r', adobe) if options is not None and 'discardAdobeParts' in options: adobe = None - LOGDBG.debug('adobe2: %r', adobe) + LOGDBG.debug('adobe2: %r', adobe) LOGDBG.info('writing...') (tmpfd, tmpfn) = tempfile.mkstemp() if self._filename and os.path.exists(self._filename): shutil.copystat(self._filename, tmpfn) - #os.close(tmpfd) tmpfh = os.fdopen(tmpfd, 'wb') - #tmpfh = open(tmpfn, 'wb') if not tmpfh: - LOG.error("Can't open output file %r", tmpfn) + logger.error("Can't open output file %r", tmpfn) return None + LOGDBG.debug('start=%d end=%d', len(start), len(end)) + LOGDBG.debug('start len=%d dmp=%s', len(start), hex_dump(start)) + # FIXME `start` contains the old IPTC data, so the next we read, we'll get the wrong data tmpfh.write(start) # character set - ch = self.c_charset_r.get(self.out_charset, None) + ch = c_charset_r.get(self.out_charset, None) # writing the character set is not the best practice # - couldn't find the needed place (record) for it yet! if SURELY_WRITE_CHARSET_INFO and ch is not None: @@ -567,15 +667,13 @@ def saveAs(self, newfile, options=None): LOGDBG.debug('pos: %d', self._filepos(tmpfh)) data = self.photoshopIIMBlock(adobe, self.packedIIMData()) - LOGDBG.debug('data len=%d dmp=%r', len(data), self.hexDump(data)) + LOGDBG.debug('data len=%d dmp=%s', len(data), hex_dump(data)) tmpfh.write(data) LOGDBG.debug('pos: %d', self._filepos(tmpfh)) tmpfh.write(end) LOGDBG.debug('pos: %d', self._filepos(tmpfh)) tmpfh.flush() - #print tmpfh, tmpfn, newfile - #copy the successfully written file back to the given file if hasattr(tmpfh, 'getvalue'): # StringIO fh2 = open(newfile, 'wb') fh2.truncate() @@ -597,400 +695,123 @@ def __del__(self): No action necessary in this case.""" pass - ####################################################################### - # Attributes for clients - ####################################################################### - - def getData(self): - return self._data + def __len__(self): + return len(self._data) - def setData(self, _): - raise Exception('You cannot overwrite the data, only its elements!') - data = property(getData, setData) + def __getitem__(self, key): + return self._data[key] - keywords = property(*_getSetSomeList('keywords')) - supplementalCategories = property( - *_getSetSomeList('supplemental category')) - contacts = property(*_getSetSomeList('contact')) + def __setitem__(self, key, value): + self._data[key] = value def __str__(self): - return ('charset: %s\n%s' % (self.inp_charset, - str(dict((self._data.keyAsStr(k), v) - for k, v in list(self._data.items()))))) - - def readExactly(self, fh, length): - """readExactly - - Reads exactly length bytes and throws an exception if EOF is hitten - before. - """ - ## assert isinstance(fh, file) - assert duck_typed(fh, 'read') # duck typing - buf = fh.read(length) - if buf is None or len(buf) < length: - raise EOFException('readExactly: %s' % str(fh)) - return buf - - def seekExactly(self, fh, length): - """seekExactly - - Seeks length bytes from the current position and checks the result - """ - ## assert isinstance(fh, file) - assert duck_typed(fh, ['seek', 'tell']) # duck typing - pos = fh.tell() - fh.seek(length, 1) - if fh.tell() - pos != length: - raise EOFException() - - ####################################################################### - # XML, SQL export - ####################################################################### - - def exportXML(self, basetag, extra, filename): - """xml = info.exportXML('entity-name', extra-data, - 'optional output file name') - - Exports XML containing all image metadata. Attribute names are - translated into XML tags, making adjustments to spaces and slashes - for compatibility. (Spaces become underbars, slashes become - dashes.) Caller provides an entity name; all data will be - contained within this entity. Caller optionally provides a - reference to a hash of extra data. This will be output into the - XML, too. Keys must be valid XML tag names. Optionally provide a - filename, and the XML will be dumped into there.""" - - def P(s): - #global off - return ' ' * off + s + '\n' - off = 0 - - if len(basetag) == 0: - basetag = 'photo' - out = [P("<%s>" % basetag)] - - off += 1 - # dump extra info first, if any - for k, v in list((isinstance(extra, dict) - and [extra] or [{}])[0].items()): - out.append(P("<%s>%s" % (k, v, k))) - - # dump our stuff - for k, v in list(self._data.items()): - if not isinstance(v, list): - key = re.sub('/', '-', - re.sub(' +', ' ', self._data.keyAsStr(k))) - out.append(P("<%s>%s" % (key, v, key))) - - # print keywords - kw = self.keywords() - if kw and len(kw) > 0: - out.append(P("")) - off += 1 - for k in kw: - out.append(P("%s" % k)) - off -= 1 - out.append(P("")) - - # print supplemental categories - sc = self.supplementalCategories() - if sc and len(sc) > 0: - out.append(P("")) - off += 1 - for k in sc: - out.append( - P("%s" % k)) - off -= 1 - out.append(P("")) - - # print contacts - kw = self.contacts() - if kw and len(kw) > 0: - out.append(P("")) - off += 1 - for k in kw: - out.append(P("%s" % k)) - off -= 1 - out.append(P("")) - - # close base tag - off -= 1 - out.append(P("" % basetag)) - - # export to file if caller asked for it. - if len(filename) > 0: - xmlout = file(filename, 'wb') - xmlout.write(out) - xmlout.close() - - return ''.join(out) - - def exportSQL(self, tablename, mappings, extra): - """statement = info.exportSQL('mytable', mappings, extra-data) - - mappings = { - 'IPTC dataset name here': 'your table column name here', - 'caption/abstract': 'caption', - 'city': 'city', - 'province/state': 'state} # etc etc etc. - - Returns a SQL statement to insert into your given table name a set - of values from the image. Caller passes in a reference to a hash - which maps IPTC dataset names into column names for the database - table. Optionally pass in a ref to a hash of extra data which will - also be included in the insert statement. Keys in that hash must - be valid column names.""" - - if (tablename is None or mappings is None): - return None - statement = columns = values = None - - E = lambda s: "'%s'" % re.sub("'", "''", s) # escape single quotes - - # start with extra data, if any - columns = ', '.join(list(extra.keys()) + list(mappings.keys())) - values = ', '.join(map(E, list(extra.values()) - + [self.data[k] for k in list(mappings.keys())])) - # process our data - - statement = "INSERT INTO %s (%s) VALUES (%s)" \ - % (tablename, columns, values) - - return statement - - ####################################################################### - # File parsing functions (private) - ####################################################################### + return 'charset:\t%s\ndata:\t%s' % (self.inp_charset, self._data) - def scanToFirstIMMTag(self, fh): # OK + def scanToFirstIMMTag(self, fh): """Scans to first IIM Record 2 tag in the file. The will either use smart scanning for Jpegs or blind scanning for other file types.""" - ## assert isinstance(fh, file) - if self.fileIsJpeg(fh): - LOG.info("File is Jpeg, proceeding with JpegScan") + if file_is_jpeg(fh): + logger.info("File is JPEG, proceeding with JpegScan") return self.jpegScan(fh) else: - LOG.warn("File not a JPEG, trying blindScan") + logger.warn("File not a JPEG, trying blindScan") return self.blindScan(fh) - def fileIsJpeg(self, fh): # OK - """Checks to see if this file is a Jpeg/JFIF or not. Will reset - the file position back to 0 after it's done in either case.""" - - # reset to beginning just in case - ## assert isinstance(fh, file) - assert duck_typed(fh, ['read', 'seek']) - fh.seek(0, 0) - if debugMode > 0: - LOG.info("Opening 16 bytes of file: %r", - self.hexDump(fh.read(16))) - fh.seek(0, 0) - # check start of file marker - ered = False - try: - (ff, soi) = fh.read(2) - if not (ord(ff) == 0xff and ord(soi) == 0xd8): - ered = False - else: - # now check for APP0 marker. I'll assume that anything with a - # SOI followed by APP0 is "close enough" for our purposes. - # (We're not dinking with image data, so anything following - # the Jpeg tagging system should work.) - (ff, app0) = fh.read(2) - if not (ord(ff) == 0xff): - ered = False - else: - ered = True - finally: - # reset to beginning of file - fh.seek(0, 0) - return ered - c_marker_err = {0: "Marker scan failed", - 0xd9: "Marker scan hit end of image marker", + 0xd9: "Marker scan hit EOI (end of image) marker", 0xda: "Marker scan hit start of image data"} - def jpegScan(self, fh): # OK + def jpegScan(self, fh): """Assuming the file is a Jpeg (see above), this will scan through the markers looking for the APP13 marker, where IPTC/IIM data should be found. While this isn't a formally defined standard, all programs have (supposedly) adopted Adobe's technique of putting the data in APP13.""" # Skip past start of file marker - ## assert isinstance(fh, file) try: - (ff, soi) = self.readExactly(fh, 2) + (ff, soi) = read_exactly(fh, 2) except EOFException: return None - if not (ord(ff) == 0xff and ord(soi) == 0xd8): + if not (ord3(ff) == 0xff and ord3(soi) == SOI): self.error = "JpegScan: invalid start of file" - LOG.error(self.error) + logger.error(self.error) return None + # Scan for the APP13 marker which will contain our IPTC info (I hope). - while 1: + while True: err = None - marker = self.jpegNextMarker(fh) - if ord(marker) == 0xed: + marker = jpeg_next_marker(fh) + if ord3(marker) == 0xed: break # 237 - err = self.c_marker_err.get(ord(marker), None) - if err is None and self.jpegSkipVariable(fh) == 0: - err = "JpegSkipVariable failed" + err = self.c_marker_err.get(ord3(marker), None) + if err is None and jpeg_skip_variable(fh) == 0: + err = "jpeg_skip_variable failed" if err is not None: self.error = err - LOG.warn(err) - return None - - # If were's here, we must have found the right marker. Now - # blindScan through the data. - return self.blindScan(fh, MAX=self.jpegGetVariableLength(fh)) - - def jpegNextMarker(self, fh): # OK - """Scans to the start of the next valid-looking marker. Return - value is the marker id.""" - - ## assert isinstance(fh, file) - # Find 0xff byte. We should already be on it. - try: - byte = self.readExactly(fh, 1) - except EOFException: - return None - - while ord(byte) != 0xff: - LOG.warn("JpegNextMarker: warning: bogus stuff in Jpeg file") - try: - byte = self.readExactly(fh, 1) - except EOFException: - return None - # Now skip any extra 0xffs, which are valid padding. - while 1: - try: - byte = self.readExactly(fh, 1) - except EOFException: - return None - if ord(byte) != 0xff: - break - - # byte should now contain the marker id. - LOG.debug("JpegNextMarker: at marker %02X (%d)", ord(byte), ord(byte)) - return byte - - def jpegGetVariableLength(self, fh): # OK - """Gets length of current variable-length section. File position - at start must be on the marker itself, e.g. immediately after call - to JPEGNextMarker. File position is updated to just past the - length field.""" - ## assert isinstance(fh, file) - try: - length = unpack('!H', self.readExactly(fh, 2))[0] - except EOFException: - return 0 - LOG.debug('JPEG variable length: %d', length) - - # Length includes itself, so must be at least 2 - if length < 2: - LOG.warn("JPEGGetVariableLength: erroneous JPEG marker length") - return 0 - return length - 2 - - def jpegSkipVariable(self, fh, rSave=None): # OK - """Skips variable-length section of Jpeg block. Should always be - called between calls to JpegNextMarker to ensure JpegNextMarker is - at the start of data it can properly parse.""" - - ## assert isinstance(fh, file) - # Get the marker parameter length count - length = self.jpegGetVariableLength(fh) - if length == 0: - return None - - # Skip remaining bytes - if rSave is not None or debugMode > 0: - try: - temp = self.readExactly(fh, length) - except EOFException: - LOG.error("JpegSkipVariable: read failed while skipping" - " var data") + logger.warn(err) return None - # prints out a heck of a lot of stuff - # self.hexDump(temp) - else: - # Just seek - try: - self.seekExactly(fh, length) - except EOFException: - LOG.error("JpegSkipVariable: read failed while skipping" - " var data") - return None - - return (rSave is not None and [temp] or [True])[0] - c_charset = {100: 'iso8859_1', 101: 'iso8859_2', 109: 'iso8859_3', - 110: 'iso8859_4', 111: 'iso8859_5', 125: 'iso8859_7', - 127: 'iso8859_6', 138: 'iso8859_8', - 196: 'utf_8'} - c_charset_r = dict([(v, k) for k, v in list(c_charset.items())]) + # If were's here, we must have found the right marker. + # Now blindScan through the data. + return self.blindScan(fh, MAX=jpeg_get_variable_length(fh)) - def blindScan(self, fh, MAX=8192): # OK + def blindScan(self, fh, MAX=8192): """Scans blindly to first IIM Record 2 tag in the file. This method may or may not work on any arbitrary file type, but it doesn't hurt to check. We expect to see this tag within the first 8k of data. (This limit may need to be changed or eliminated depending on how other programs choose to store IIM.)""" - ## assert isinstance(fh, file) - assert duck_typed(fh, 'read') offset = 0 # keep within first 8192 bytes # NOTE: this may need to change - LOG.debug('blindScan: starting scan, max length %d', MAX) + logger.debug('blindScan: starting scan, max length %d', MAX) # start digging while offset <= MAX: try: - temp = self.readExactly(fh, 1) + temp = read_exactly(fh, 1) except EOFException: - LOG.warn("BlindScan: hit EOF while scanning") + logger.warn("BlindScan: hit EOF while scanning") return None # look for tag identifier 0x1c - if ord(temp) == 0x1c: + if ord3(temp) == 0x1c: # if we found that, look for record 2, dataset 0 # (record version number) (record, dataset) = fh.read(2) if record == 1 and dataset == 90: # found character set's record! try: - temp = self.readExactly(fh, - self.jpegGetVariableLength(fh)) + temp = read_exactly(fh, jpeg_get_variable_length(fh)) try: cs = unpack('!H', temp)[0] - except: - LOG.warn('WARNING: problems with charset ' - 'recognition (%r)', temp) + except Exception: # TODO better exception + logger.warn('WARNING: problems with charset recognition (%r)', temp) cs = None - if cs in self.c_charset: - self.inp_charset = self.c_charset[cs] - LOG.info("BlindScan: found character set '%s'" - " at offset %d", self.inp_charset, offset) + if cs in c_charset: + self.inp_charset = c_charset[cs] + logger.info("BlindScan: found character set '%s' at offset %d", + self.inp_charset, offset) except EOFException: pass elif record == 2: # found it. seek to start of this tag and return. - LOG.debug("BlindScan: found IIM start at offset %d", - offset) + logger.debug("BlindScan: found IIM start at offset %d", offset) try: # seek rel to current position - self.seekExactly(fh, -3) + seek_exactly(fh, -3) except EOFException: return None return offset + else: # didn't find it. back up 2 to make up for # those reads above. try: # seek rel to current position - self.seekExactly(fh, -2) + seek_exactly(fh, -2) except EOFException: return None @@ -999,16 +820,14 @@ def blindScan(self, fh, MAX=8192): # OK return False - def collectIIMInfo(self, fh): # OK + def collectIIMInfo(self, fh): """Assuming file is seeked to start of IIM data (using above), this reads all the data into our object's hashes""" # NOTE: file should already be at the start of the first # IPTC code: record 2, dataset 0. - ## assert isinstance(fh, file) - assert duck_typed(fh, 'read') - while 1: + while True: try: - header = self.readExactly(fh, 5) + header = read_exactly(fh, 5) except EOFException: return None @@ -1017,27 +836,21 @@ def collectIIMInfo(self, fh): # OK if not (tag == 0x1c and record == 2): return None - alist = {'tag': tag, 'record': record, 'dataset': dataset, - 'length': length} - LOG.debug('\n'.join('%s\t: %s' % (k, v) - for k, v in list(alist.items()))) + alist = {'tag': tag, 'record': record, 'dataset': dataset, 'length': length} + logger.debug('\t'.join('%s: %s' % (k, v) for k, v in alist.items())) value = fh.read(length) if self.inp_charset: try: - value = str(value, encoding=self.inp_charset, - errors='strict') - except: - LOG.warn('Data "%r" is not in encoding %s!', - value, self.inp_charset) - value = str(value, encoding=self.inp_charset, - errors='replace') + value = str(value, encoding=self.inp_charset, errors='strict') + except Exception: # TODO better exception + logger.warn('Data "%r" is not in encoding %s!', value, self.inp_charset) + value = str(value, encoding=self.inp_charset, errors='replace') # try to extract first into _listdata (keywords, categories) # and, if unsuccessful, into _data. Tags which are not in the # current IIM spec (version 4) are currently discarded. - if (dataset in self._data - and hasattr(self._data[dataset], 'append')): + if dataset in self._data and hasattr(self._data[dataset], 'append'): self._data[dataset].append(value) elif dataset != 0: self._data[dataset] = value @@ -1046,172 +859,6 @@ def collectIIMInfo(self, fh): # OK # File Saving ####################################################################### - def jpegCollectFileParts(self, fh, discardAppParts=False): - """Collects all pieces of the file except for the IPTC info that - we'll replace when saving. Returns the stuff before the info, - stuff after, and the contents of the Adobe Resource Block that the - IPTC data goes in. Returns None if a file parsing error occured.""" - - ## assert isinstance(fh, file) - assert duck_typed(fh, ['seek', 'read']) - adobeParts = '' - start = [] - - # Start at beginning of file - fh.seek(0, 0) - # Skip past start of file marker - (ff, soi) = fh.read(2) - if not (ord(ff) == 0xff and ord(soi) == 0xd8): - self.error = "JpegScan: invalid start of file" - LOG.error(self.error) - return None - - # Begin building start of file - start.append(pack("BB", 0xff, 0xd8)) - - # Get first marker in file. This will be APP0 for JFIF or APP1 for - # EXIF. - marker = self.jpegNextMarker(fh) - app0data = '' - app0data = self.jpegSkipVariable(fh, app0data) - if app0data is None: - self.error = 'jpegSkipVariable failed' - LOG.error(self.error) - return None - - if ord(marker) == 0xe0 or not discardAppParts: - # Always include APP0 marker at start if it's present. - start.append(pack('BB', 0xff, ord(marker))) - # Remember that the length must include itself (2 bytes) - start.append(pack('!H', len(app0data) + 2)) - start.append(app0data) - else: - # Manually insert APP0 if we're trashing application parts, since - # all JFIF format images should start with the version block. - LOGDBG.debug('discardAppParts=%r', discardAppParts) - start.append(pack("BB", 0xff, 0xe0)) - start.append(pack("!H", 16)) # length (including these 2 bytes) - start.append("JFIF") # format - start.append(pack("BB", 1, 2)) # call it version 1.2 (current - # JFIF) - start.append(pack('8B', 0)) # zero everything else - - # Now scan through all markers in file until we hit image data or - # IPTC stuff. - end = [] - while 1: - marker = self.jpegNextMarker(fh) - if marker is None or ord(marker) == 0: - self.error = "Marker scan failed" - LOG.error(self.error) - return None - # Check for end of image - elif ord(marker) == 0xd9: - LOG.debug("JpegCollectFileParts: saw end of image marker") - end.append(pack("BB", 0xff, ord(marker))) - break - # Check for start of compressed data - elif ord(marker) == 0xda: - LOG.debug("JpegCollectFileParts: saw start of compressed data") - end.append(pack("BB", 0xff, ord(marker))) - break - partdata = '' - partdata = self.jpegSkipVariable(fh, partdata) - if not partdata: - self.error = "JpegSkipVariable failed" - LOG.error(self.error) - return None - partdata = str(partdata) - - # Take all parts aside from APP13, which we'll replace - # ourselves. - if (discardAppParts and ord(marker) >= 0xe0 - and ord(marker) <= 0xef): - # Skip all application markers, including Adobe parts - adobeParts = '' - elif ord(marker) == 0xed: - # Collect the adobe stuff from part 13 - adobeParts = self.collectAdobeParts(partdata) - break - else: - # Append all other parts to start section - start.append(pack("BB", 0xff, ord(marker))) - start.append(pack("!H", len(partdata) + 2)) - start.append(partdata) - - # Append rest of file to end - while 1: - buff = fh.read(8192) - if buff is None or len(buff) == 0: - break - end.append(buff) - - return (''.join(start), ''.join(end), adobeParts) - - def collectAdobeParts(self, data): - """Part APP13 contains yet another markup format, one defined by - Adobe. See"File Formats Specification" in the Photoshop SDK - (avail from www.adobe.com). We must take - everything but the IPTC data so that way we can write the file back - without losing everything else Photoshop stuffed into the APP13 - block.""" - assert isinstance(data, str) - length = len(data) - offset = 0 - out = [] - # Skip preamble - offset = len('Photoshop 3.0 ') - # Process everything - while offset < length: - # Get OSType and ID - (ostype, id1, id2) = unpack("!LBB", data[offset:offset + 6]) - offset += 6 - if offset >= length: - break - - # Get pascal string - stringlen = unpack("B", data[offset:offset + 1])[0] - offset += 1 - if offset >= length: - break - string = data[offset:offset + stringlen] - offset += stringlen - - # round up if odd - if (stringlen % 2 != 0): - offset += 1 - # there should be a null if string len is 0 - if stringlen == 0: - offset += 1 - if offset >= length: - break - - # Get variable-size data - size = unpack("!L", data[offset:offset + 4])[0] - offset += 4 - if offset >= length: - break - - var = data[offset:offset + size] - offset += size - if size % 2 != 0: - offset += 1 # round up if odd - - # skip IIM data (0x0404), but write everything else out - if not (id1 == 4 and id2 == 4): - out.append(pack("!LBB", ostype, id1, id2)) - out.append(pack("B", stringlen)) - out.append(string) - if stringlen == 0 or stringlen % 2 != 0: - out.append(pack("B", 0)) - out.append(pack("!L", size)) - out.append(var) - out = [''.join(out)] - if size % 2 != 0 and len(out[0]) % 2 != 0: - out.append(pack("B", 0)) - - return ''.join(out) - def _enc(self, text): """Recodes the given text from the old character set to utf-8""" res = text @@ -1223,10 +870,8 @@ def _enc(self, text): res = str(text, encoding=self.inp_charset).encode( out_charset) except (UnicodeEncodeError, UnicodeDecodeError): - LOG.error("_enc: charset %s is not working for %s", - self.inp_charset, text) - res = str(text, encoding=self.inp_charset, - errors='replace').encode(out_charset) + logger.error("_enc: charset %s is not working for %s", self.inp_charset, text) + res = str(text, encoding=self.inp_charset, errors='replace').encode(out_charset) elif isinstance(text, (list, tuple)): res = type(text)(list(map(self._enc, text))) return res @@ -1240,40 +885,42 @@ def packedIIMData(self): # tag - record - dataset - len (short) - 4 (short) out.append(pack("!BBBHH", tag, record, 0, 2, 4)) - LOGDBG.debug('out=%r', self.hexDump(out)) + LOGDBG.debug('out=%s', hex_dump(out)) # Iterate over data sets - for dataset, value in list(self._data.items()): + for dataset, value in self._data.items(): if len(value) == 0: continue + if not (isinstance(dataset, int) and dataset in c_datasets): - LOG.warn("PackedIIMData: illegal dataname '%s' (%d)", - dataset, dataset) + logger.warn("packedIIMData: illegal dataname '%s' (%d)", dataset, dataset) continue - LOG.debug('packedIIMData %r -> %r', value, self._enc(value)) + + logger.debug('packedIIMData %02X: %r -> %r', dataset, value, self._enc(value)) value = self._enc(value) if not isinstance(value, list): - value = str(value) + value = bytes(value) out.append(pack("!BBBH", tag, record, dataset, len(value))) out.append(value) else: - for v in map(str, value): + for v in map(bytes, value): if v is None or len(v) == 0: continue + out.append(pack("!BBBH", tag, record, dataset, len(v))) out.append(v) - return ''.join(out) + return b''.join(out) def photoshopIIMBlock(self, otherparts, data): """Assembles the blob of Photoshop "resource data" that includes our fresh IIM data (from PackedIIMData) and the other Adobe parts we found in the file, if there were any.""" out = [] - assert isinstance(data, str) - resourceBlock = ["Photoshop 3.0"] + assert isinstance(data, bytes) + resourceBlock = [b"Photoshop 3.0"] resourceBlock.append(pack("B", 0)) # Photoshop identifier - resourceBlock.append("8BIM") + resourceBlock.append(b"8BIM") # 0x0404 is IIM data, 00 is required empty string resourceBlock.append(pack("BBBB", 0x04, 0x04, 0, 0)) # length of data as 32-bit, network-byte order @@ -1286,67 +933,16 @@ def photoshopIIMBlock(self, otherparts, data): # Finally tack on other data if otherparts is not None: resourceBlock.append(otherparts) - resourceBlock = ''.join(resourceBlock) + resourceBlock = b''.join(resourceBlock) out.append(pack("BB", 0xff, 0xed)) # Jpeg start of block, APP13 out.append(pack("!H", len(resourceBlock) + 2)) # length out.append(resourceBlock) - return ''.join(out) - - ####################################################################### - # Helpers, docs - ####################################################################### - - @staticmethod - def hexDump(dump): - """Very helpful when debugging.""" - length = len(dump) - P = lambda z: ((ord(z) >= 0x21 and ord(z) <= 0x7e) and [z] or ['.'])[0] - ROWLEN = 18 - res = ['\n'] - for j in range(length // ROWLEN + int(length % ROWLEN > 0)): - row = dump[j * ROWLEN:(j + 1) * ROWLEN] - if isinstance(row, list): - row = ''.join(row) - res.append( - ('%02X ' * len(row) + ' ' * (ROWLEN - len(row)) + '| %s\n') % \ - tuple(list(map(ord, list(row))) + [''.join(map(P, row))])) - return ''.join(res) - - def jpegDebugScan(self, filename): - """Also very helpful when debugging.""" - assert isinstance(filename, str) and os.path.isfile(filename) - fh = file(filename, 'wb') - if not fh: - raise Exception("Can't open %s" % filename) - - # Skip past start of file marker - (ff, soi) = fh.read(2) - if not (ord(ff) == 0xff and ord(soi) == 0xd8): - LOG.error("JpegScan: invalid start of file") - else: - # scan to 0xDA (start of scan), dumping the markers we see between - # here and there. - while 1: - marker = self.jpegNextMarker(fh) - if ord(marker) == 0xda: - break - - if ord(marker) == 0: - LOG.warn("Marker scan failed") - break - elif ord(marker) == 0xd9: - LOG.debug("Marker scan hit end of image marker") - break - - if not self.jpegSkipVariable(fh): - LOG.warn("JpegSkipVariable failed") - return None + return b''.join(out) - self._closefh(fh) -if __name__ == '__main__': +if __name__ == '__main__': # pragma: no cover logging.basicConfig(level=logging.DEBUG) if len(sys.argv) > 1: info = IPTCInfo(sys.argv[1]) diff --git a/iptcinfo_test.py b/iptcinfo_test.py old mode 100755 new mode 100644 index 6e8732d..c724f87 --- a/iptcinfo_test.py +++ b/iptcinfo_test.py @@ -1,44 +1,117 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# :mode=python:encoding=UTF-8: -from iptcinfo3 import IPTCInfo -import logging -logging.basicConfig(level=logging.DEBUG) -import sys +import random +import os -fn = (len(sys.argv) > 1 and [sys.argv[1]] or ['test.jpg'])[0] -fn2 = (len(sys.argv) > 2 and [sys.argv[2]] or ['test_out.jpg'])[0] +import pytest -# Create new info object -info = IPTCInfo(fn, force=True) +from iptcinfo3 import ( + EOFException, + IPTCData, + IPTCInfo, + file_is_jpeg, + hex_dump, + jpeg_collect_file_parts, +) -# Check if file had IPTC data -# if len(info.data) < 4: raise Exception(info.error) -# Get list of keywords, supplemental categories, or contacts -keywords = info.keywords -suppCats = info.supplementalCategories -contacts = info.contacts +def test_EOFException_message(): + exp = EOFException() + assert str(exp) == '' -# Get specific attributes... -caption = info.data['caption/abstract'] + exp = EOFException('ugh', 'well') + assert str(exp) == 'ugh\nwell' -# Create object for file that may or may not have IPTC data. -info = IPTCInfo(fn, force=True) -# Add/change an attribute -info.data['caption/abstract'] = 'árvíztűrő tükörfúrógép' -info.data['supplemental category'] = ['portrait'] -info.data[123] = '123' -info.data['nonstandard_123'] = 'n123' +def test_hex_dump(): + out = hex_dump(b'ABCDEF') + assert out.strip() == '41 42 43 44 45 46 | ABCDEF' -print((info.data)) -# Save new info to file -##### See disclaimer in 'SAVING FILES' section ##### -info.save() -info.saveAs(fn2) +def test_jpeg_collect_parts_works_with_many_jpegs(): + with open('fixtures/Lenna.jpg', 'rb') as fh: + start, end, adobe = jpeg_collect_file_parts(fh) -#re-read IPTC info -print((IPTCInfo(fn2))) + assert len(start) == 356 + assert len(end) == 42891 + assert len(adobe) == 0 + with open('fixtures/instagram.jpg', 'rb') as fh: + start, end, adobe = jpeg_collect_file_parts(fh) + + assert len(start) == 20 + assert len(end) == 73394 + assert len(adobe) == 0 + + +def test_IPTCData(): + data = IPTCData({105: 'Audiobook Narrator Really Going For Broke With Cajun Accent'}) + assert data['headline'].startswith('Audiobook') + assert data[105].startswith('Audiobook') + assert data['Headline'].startswith('Audiobook') + + data['keywords'] = ['foo'] + data['keywords'] = ['foo', 'bar'] + with pytest.raises(ValueError): + data['keywords'] = 'foo' + + with pytest.raises(KeyError): + IPTCData({'yobby': 'yoshi'}) + + with pytest.raises(KeyError): + data['yobby'] = 'yoshi' + + data = IPTCData({'nonstandard_69': 'sanic'}) + assert data[69] == 'sanic' + + assert str(data) == "{'nonstandard_69': 'sanic'}" + + +def test_file_is_jpeg_detects_invalid_file(): + with open('fixtures/Lenna.jpg', 'rb') as fh: + assert file_is_jpeg(fh) + + with open('setup.cfg', 'rb') as fh: + assert not file_is_jpeg(fh) + + +def test_getitem_can_read_info(): + info = IPTCInfo('fixtures/Lenna.jpg') + + assert len(info) >= 4 + assert info['keywords'] == [b'lenna', b'test'] + assert info['supplemental category'] == [b'supplemental category'] + assert info['caption/abstract'] == b'I am a caption' + + +def test_save_as_saves_as_new_file_with_info(): + if os.path.isfile('fixtures/deleteme.jpg'): # pragma: no cover + os.unlink('fixtures/deleteme.jpg') + + info = IPTCInfo('fixtures/Lenna.jpg') + info.save_as('fixtures/deleteme.jpg') + + info2 = IPTCInfo('fixtures/deleteme.jpg') + + # The files won't be byte for byte exact, so filecmp won't work + assert info._data == info2._data + with open('fixtures/Lenna.jpg', 'rb') as fh, open('fixtures/deleteme.jpg', 'rb') as fh2: + start, end, adobe = jpeg_collect_file_parts(fh) + start2, end2, adobe2 = jpeg_collect_file_parts(fh2) + + # But we can compare each section + assert start == start2 + assert end == end2 + assert adobe == adobe2 + + +def test_save_as_saves_as_new_file_with_new_info(): + if os.path.isfile('fixtures/deleteme.jpg'): # pragma: no cover + os.unlink('fixtures/deleteme.jpg') + + new_headline = b'test headline %d' % random.randint(0, 100) + info = IPTCInfo('fixtures/Lenna.jpg') + info['headline'] = new_headline + info.save_as('fixtures/deleteme.jpg') + + info2 = IPTCInfo('fixtures/deleteme.jpg') + + assert info2['headline'] == new_headline diff --git a/list.py b/list.py deleted file mode 100755 index 784f0fb..0000000 --- a/list.py +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env python -import iptcinfo, sys - -if len(sys.argv) != 2: - print("""usage = list file.jpg""") - sys.exit() -fn = sys.argv[1] - -info = iptcinfo.IPTCInfo(fn, force=True) -print(info) - diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..60cd6af --- /dev/null +++ b/setup.cfg @@ -0,0 +1,10 @@ +[flake8] +max-line-length = 100 + +[coverage:run] +branch = True +source = . + +[coverage:report] +show_missing = True +skip_covered = True diff --git a/test.pl b/test.pl deleted file mode 100755 index 97fbf54..0000000 --- a/test.pl +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env perl - -use IPTCInfo; - -my $fn = ($#ARGV > -1 ? $ARGV[0] : 'test.jpg'); -my $fn2 = substr($fn, 0, rindex($fn, '.')) . '_o.jpg'; -print "fn2=$fn2\n"; - -($info = new Image::IPTCInfo($fn, 'force')) or die("Couldn't...\n"); -print "info: $info\n"; -$info->SetAttribute('urgency', 'GT'); -$info->AddKeyword('ize'); -$info->SaveAs($fn2); -$info = new Image::IPTCInfo($fn2, 1); -#print $info->ExportXML('iptc'); - diff --git "a/test/emil_stenstr\303\266m/iptcinfo-test.py" "b/test/emil_stenstr\303\266m/iptcinfo-test.py" deleted file mode 100644 index 10defd7..0000000 --- "a/test/emil_stenstr\303\266m/iptcinfo-test.py" +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -try: - from iptcinfo import IPTCInfo -except ImportError: - import sys, os - sys.path.insert(0, os.path.join(os.pardir, os.pardir)) - from iptcinfo import IPTCInfo - -if __name__ == '__main__': - iptc = IPTCInfo(sys.argv[1], force=True) - caption = iptc.data["caption/abstract"] or u'árvíztűrő Dag 1 tükörfúrógép' - newcaption = caption.replace("Dag 1", "Dag 2") - iptc.data["caption/abstract"] = newcaption - iptc.saveAs(sys.argv[1].rsplit('.', 1)[0] + '-t.jpg') diff --git a/test/iptcinfo-test.py b/test/iptcinfo-test.py deleted file mode 100755 index 6a5b66e..0000000 --- a/test/iptcinfo-test.py +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env python -# :mode=python:encoding=utf-8 -# -*- coding: utf-8 -*- - -import sys -sys.path.insert(0, '.') -from iptcinfo import IPTCInfo, LOG, LOGDBG - -if __name__ == '__main__': - import logging - logging.basicConfig(level=logging.DEBUG) - LOGDBG.setLevel(logging.DEBUG) - if len(sys.argv) > 1: - info = IPTCInfo(sys.argv[1],True) - info.keywords = ['test'] - info.supplementalCategories = [] - info.contacts = [] - print("info = %s\n%s" % (info,"="*30), file=sys.stderr) - info.save() diff --git a/test/matej_cepl/iptcinfo-test.py b/test/matej_cepl/iptcinfo-test.py deleted file mode 100644 index d58f10a..0000000 --- a/test/matej_cepl/iptcinfo-test.py +++ /dev/null @@ -1,1226 +0,0 @@ -#!/usr/bin/env python -# :mode=python:encoding=utf-8 -# -*- coding: utf-8 -*- -# Author: 2004 Gulcsi Tams -# -# Ported from Josh Carter's Perl IPTCInfo.pm by Tam?s Gul?csi -# -# IPTCInfo: extractor for IPTC metadata embedded in images -# Copyright (C) 2000-2004 Josh Carter -# All rights reserved. -# -# This program is free software; you can redistribute it and/or modify -# it under the same terms as Python itself. -# -# VERSION = '1.9'; -""" -IPTCInfo - Python module for extracting and modifying IPTC image meta-data - -Ported from Josh Carter's Perl IPTCInfo-1.9.pm by Tams Gulcsi - -Ever wish you add information to your photos like a caption, the place -you took it, the date, and perhaps even keywords and categories? You -already can. The International Press Telecommunications Council (IPTC) -defines a format for exchanging meta-information in news content, and -that includes photographs. You can embed all kinds of information in -your images. The trick is putting it to use. - -That's where this IPTCInfo Python module comes into play. You can embed -information using many programs, including Adobe Photoshop, and -IPTCInfo will let your web server -- and other automated server -programs -- pull it back out. You can use the information directly in -Python programs, export it to XML, or even export SQL statements ready -to be fed into a database. - - -PREFACE - -First, I want to apologize a little bit: as this module is originally -written in Perl by Josh Carter, it is quite non-Pythonic (for example -the addKeyword, clearSupplementalCategories functions - I think it -would be better having a derived list class with add, clear functions) -and tested only by me reading/writing IPTC metadata for family photos. -Any suggestions welcomed! - -Thanks, -Tams Gulcsi - -SYNOPSIS - - from iptcinfo import IPTCInfo - import sys - - fn = (len(sys.argv) > 1 and [sys.argv[1]] or ['test.jpg'])[0] - fn2 = (len(sys.argv) > 2 and [sys.argv[2]] or ['test_out.jpg'])[0] - - # Create new info object - info = IPTCInfo(fn) - - # Check if file had IPTC data - if len(info.data) < 4: raise Exception(info.error) - - # Print list of keywords, supplemental categories, or contacts - print info.keywords - print info.supplementalCategories - print info.contacts - - # Get specific attributes... - caption = info.data['caption/abstract'] - - # Create object for file that may or may not have IPTC data. - info = IPTCInfo(fn) - - # Add/change an attribute - info.data['caption/abstract'] = 'Witty caption here' - info.data['supplemental category'] = ['portrait'] - - # Save new info to file - ##### See disclaimer in 'SAVING FILES' section ##### - info.save() - info.saveAs(fn2) - - #re-read IPTC info - print IPTCInfo(fn2) - -DESCRIPTION - - USING IPTCINFO - - To integrate with your own code, simply do something like what's in - the synopsys above. - - The complete list of possible attributes is given below. These are - as specified in the IPTC IIM standard, version 4. Keywords and - categories are handled slightly differently: since these are lists, - the module allows you to access them as Python lists. Call - keywords() and supplementalCategories() to get each list. - - IMAGES NOT HAVING IPTC METADATA - - If yout apply - - info = IPTCInfo('file-name-here.jpg') - - to an image not having IPTC metadata, len(info.data) will be 3 - ('supplemental categories', 'keywords', 'contacts') will be empty - lists. - - MODIFYING IPTC DATA - - You can modify IPTC data in JPEG files and save the file back to - disk. Here are the commands for doing so: - - # Set a given attribute - info.data['iptc attribute here'] = 'new value here' - - # Clear the keywords or supp. categories list - info.keywords = [] - info.supplementalCategories = [] - info.contacts = [] - - # Add keywords or supp. categories - info.keyword.append('frob') - - # You can also add a list reference - info.keyword.extend(['frob', 'nob', 'widget']) - info.keyword += ['gadget'] - - SAVING FILES - - With JPEG files you can add/change attributes, add keywords, etc., and - then call: - - info.save() - info.saveAs('new-file-name.jpg') - - This will save the file with the updated IPTC info. Please only run - this on *copies* of your images -- not your precious originals! -- - because I'm not liable for any corruption of your images. (If you - read software license agreements, nobody else is liable, - either. Make backups of your originals!) - - If you're into image wizardry, there are a couple handy options you - can use on saving. One feature is to trash the Adobe block of data, - which contains IPTC info, color settings, Photoshop print settings, - and stuff like that. The other is to trash all application blocks, - including stuff like EXIF and FlashPix data. This can be handy for - reducing file sizes. The options are passed as a dict to save() - and saveAs(), e.g.: - - info.save({'discardAdobeParts': 'on'}) - info.saveAs('new-file-name.jpg', {'discardAppParts': 'on'}) - - Note that if there was IPTC info in the image, or you added some - yourself, the new image will have an Adobe part with only the IPTC - information. - - XML AND SQL EXPORT FEATURES - - IPTCInfo also allows you to easily generate XML and SQL from the image - metadata. For XML, call: - - xml = info.exportXML('entity-name', extra-data, - 'optional output file name') - - This returns XML containing all image metadata. Attribute names are - translated into XML tags, making adjustments to spaces and slashes - for compatibility. (Spaces become underbars, slashes become dashes.) - You provide an entity name; all data will be contained within this - entity. You can optionally provides a reference to a hash of extra - data. This will get put into the XML, too. (Example: you may want to - put info on the image's location into the XML.) Keys must be valid - XML tag names. You can also provide a filename, and the XML will be - dumped into there. - - For SQL, it goes like this: - - my mappings = { - 'IPTC dataset name here': 'your table column name here', - 'caption/abstract': 'caption', - 'city': 'city', - 'province/state': 'state} # etc etc etc. - - statement = info.exportSQL('mytable', mappings, extra-data) - - This returns a SQL statement to insert into your given table name a - set of values from the image. You pass in a reference to a hash - which maps IPTC dataset names into column names for the database - table. As with XML export, you can also provide extra information to - be stuck into the SQL. - -IPTC ATTRIBUTE REFERENCE - - object name originating program - edit status program version - editorial update object cycle - urgency by-line - subject reference by-line title - category city - fixture identifier sub-location - content location code province/state - content location name country/primary location code - release date country/primary location name - release time original transmission reference - expiration date headline - expiration time credit - special instructions source - action advised copyright notice - reference service contact - reference date caption/abstract - reference number writer/editor - date created image type - time created image orientation - digital creation date language identifier - digital creation time - - custom1 - custom20: NOT STANDARD but used by Fotostation. - IPTCInfo also supports these fields. - -KNOWN BUGS - -IPTC meta-info on MacOS may be stored in the resource fork instead -of the data fork. This program will currently not scan the resource -fork. - -I have heard that some programs will embed IPTC info at the end of the -file instead of the beginning. The module will currently only look -near the front of the file. If you have a file with IPTC data that -IPTCInfo can't find, please contact me! I would like to ensure -IPTCInfo works with everyone's files. - -AUTHOR - -Josh Carter, josh@multipart-mixed.com -""" - -__version__ = '1.9.2-rc5' -__author__ = 'Gulcsi, Tams' - -SURELY_WRITE_CHARSET_INFO = False - -from struct import pack, unpack -from io import StringIO -import sys, re, codecs, os - -class String(str): - def __iadd__(self, other): - assert isinstance(other, str) - super(type(self), self).__iadd__(other) - -class EOFException(Exception): - def __init__(self, *args): - Exception.__init__(self) - self._str = '\n'.join(args) - - def __str__(self): - return self._str - -def push(diction, key, value): - if key in diction and isinstance(diction[key], list): - diction[key].append(value) - else: diction[key] = value - -def duck_typed(obj, prefs): - if isinstance(prefs, str): prefs = [prefs] - for pref in prefs: - if not hasattr(obj, pref): return False - return True - -#~ sys_enc = sys.getfilesystemencoding() -sys_enc = "utf_8" - -# Debug off for production use -debugMode = 0 - -##################################### -# These names match the codes defined in ITPC's IIM record 2. -# This hash is for non-repeating data items; repeating ones -# are in %listdatasets below. -c_datasets = { - # 0: 'record version', # skip -- binary data - 5: 'object name', - 7: 'edit status', - 8: 'editorial update', - 10: 'urgency', - 12: 'subject reference', - 15: 'category', - 20: 'supplemental category', - 22: 'fixture identifier', - 25: 'keywords', - 26: 'content location code', - 27: 'content location name', - 30: 'release date', - 35: 'release time', - 37: 'expiration date', - 38: 'expiration time', - 40: 'special instructions', - 42: 'action advised', - 45: 'reference service', - 47: 'reference date', - 50: 'reference number', - 55: 'date created', - 60: 'time created', - 62: 'digital creation date', - 63: 'digital creation time', - 65: 'originating program', - 70: 'program version', - 75: 'object cycle', - 80: 'by-line', - 85: 'by-line title', - 90: 'city', - 92: 'sub-location', - 95: 'province/state', - 100: 'country/primary location code', - 101: 'country/primary location name', - 103: 'original transmission reference', - 105: 'headline', - 110: 'credit', - 115: 'source', - 116: 'copyright notice', - 118: 'contact', - 120: 'caption/abstract', - 122: 'writer/editor', -# 125: 'rasterized caption', # unsupported (binary data) - 130: 'image type', - 131: 'image orientation', - 135: 'language identifier', - 200: 'custom1', # These are NOT STANDARD, but are used by - 201: 'custom2', # Fotostation. Use at your own risk. They're - 202: 'custom3', # here in case you need to store some special - 203: 'custom4', # stuff, but note that other programs won't - 204: 'custom5', # recognize them and may blow them away if - 205: 'custom6', # you open and re-save the file. (Except with - 206: 'custom7', # Fotostation, of course.) - 207: 'custom8', - 208: 'custom9', - 209: 'custom10', - 210: 'custom11', - 211: 'custom12', - 212: 'custom13', - 213: 'custom14', - 214: 'custom15', - 215: 'custom16', - 216: 'custom17', - 217: 'custom18', - 218: 'custom19', - 219: 'custom20', -} - -c_datasets_r = dict([(v, k) for k, v in c_datasets.items()]) - -class IPTCData(dict): - """Dict with int/string keys from c_listdatanames""" - def __init__(self, diction={}, *args, **kwds): - super(type(self), self).__init__(self, *args, **kwds) - self.update(dict([(self.keyAsInt(k), v) - for k, v in diction.items()])) - - c_cust_pre = 'nonstandard_' - def keyAsInt(self, key): - global c_datasets_r - if isinstance(key, int): return key #and c_datasets.has_key(key): return key - elif key in c_datasets_r: return c_datasets_r[key] - elif (key.startswith(self.c_cust_pre) - and key[len(self.c_cust_pre):].isdigit()): - return int(key[len(self.c_cust_pre):]) - else: raise KeyError("Key %s is not in %s!" % (key, list(c_datasets_r.keys()))) - - def keyAsStr(self, key): - global c_datasets - if isinstance(key, str) and key in c_datasets_r: return key - elif key in c_datasets: return c_datasets[key] - elif isinstance(key, int): return self.c_cust_pre + str(key) - else: raise KeyError("Key %s is not in %s!" % (key, list(c_datasets.keys()))) - - def __getitem__(self, name): - return super(type(self), self).get(self.keyAsInt(name), None) - - def __setitem__(self, name, value): - key = self.keyAsInt(name) - o = super(type(self), self) - if key in o and isinstance(o.__getitem__(key), list): - #print key, c_datasets[key], o.__getitem__(key) - if isinstance(value, list): o.__setitem__(key, value) - else: raise ValueError("For %s only lists acceptable!" % name) - else: o.__setitem__(self.keyAsInt(name), value) - -def debug(level, *args): - if level < debugMode: - print('\n'.join(map(str, args))) - -def _getSetSomeList(name): - def getList(self): - """Returns the list of %s.""" % name - return self._data[name] - - def setList(self, value): - """Sets the list of %s.""" % name - if isinstance(value, (list, tuple)): self._data[name] = list(value) - elif isinstance(value, str): - self._data[name] = [value] - print('Warning: IPTCInfo.%s is a list!' % name) - else: raise ValueError('IPTCInfo.%s is a list!' % name) - - return (getList, setList) - - -class IPTCInfo(object): - """info = IPTCInfo('image filename goes here') - - File can be a file-like object or a string. If it is a string, it is - assumed to be a filename. - - Returns IPTCInfo object filled with metadata from the given image - file. File on disk will be closed, and changes made to the IPTCInfo - object will *not* be flushed back to disk. - - If force==True, than forces an object to always be returned. This - allows you to start adding stuff to files that don't have IPTC info - and then save it.""" - - def __init__(self, fobj, force=False, *args, **kwds): - # Open file and snarf data from it. - self.error = None - self._data = IPTCData({'supplemental category': [], 'keywords': [], - 'contact': []}) - if duck_typed(fobj, 'read'): - self._filename = None - self._fh = fobj - else: - self._filename = fobj - - fh = self._getfh() - self.inp_charset = sys_enc - self.out_charset = 'utf_8' - - datafound = self.scanToFirstIMMTag(fh) - if datafound or force: - # Do the real snarfing here - if datafound: self.collectIIMInfo(fh) - else: - self.log("No IPTC data found.") - self._closefh(fh) - raise Exception("No IPTC data found.") - self._closefh(fh) - - def _closefh(self, fh): - if fh and self._filename is not None: fh.close() - - def _getfh(self, mode='r'): - assert self._filename is not None or self._fh is not None - if self._filename is not None: - fh = file(self._filename, (mode + 'b').replace('bb', 'b')) - if not fh: - self.log("Can't open file") - return None - else: return fh - else: return self._fh - - ####################################################################### - # New, Save, Destroy, Error - ####################################################################### - - def error(self): - """Returns the last error message""" - return self.error - - def save(self, options=None): - """Saves Jpeg with IPTC data back to the same file it came from.""" - assert self._filename is not None - print("iptcinfo.IPTCInfo.save: self.keywords = %s" % self.keywords, file=sys.stderr) - return self.saveAs(self._filename, options) - - def _filepos(self, fh): - fh.flush() - return 'POS=%d\n' % fh.tell() - - def saveAs(self, newfile, options=None): - """Saves Jpeg with IPTC data to a given file name.""" - assert self._filename is not None - print("saveAs: self = %s" % self, file=sys.stderr) - # Open file and snarf data from it. - fh = self._getfh() - if not self.fileIsJpeg(fh): - self.log("Source file is not a Jpeg; I can only save Jpegs. Sorry.") - return None - ret = self.jpegCollectFileParts(fh, options) - self._closefh(fh) - if ret is None: - self.log("collectfileparts failed") - print(self.error, file=sys.stderr) - raise Exception('collectfileparts failed') - - (start, end, adobe) = ret - debug(2, 'start: %d, end: %d, adobe:%d' % tuple(map(len, ret))) - self.hexDump(start), len(end) - debug(3, 'adobe1', adobe) - if options is not None and 'discardAdobeParts' in options: - adobe = None - debug(3, 'adobe2', adobe) - - debug(1, 'writing...') - # fh = os.tmpfile() ## 20051011 - Windows doesn't like tmpfile ## - # Open dest file and stuff data there - # fh.truncate() - # fh.seek(0, 0) - # debug(2, self._filepos(fh)) - fh = StringIO() - if not fh: - self.log("Can't open output file") - return None - debug(3, len(start), len(end)) - fh.write(start) - # character set - ch = self.c_charset_r.get((self.out_charset is None and [self.inp_charset] - or [self.out_charset])[0], None) - # writing the character set is not the best practice - couldn't find the needed place (record) for it yet! - if SURELY_WRITE_CHARSET_INFO and ch is not None: - fh.write(pack("!BBBHH", 0x1c, 1, 90, 4, ch)) - - - debug(2, self._filepos(fh)) - #$self->PhotoshopIIMBlock($adobe, $self->PackedIIMData()); - data = self.photoshopIIMBlock(adobe, self.packedIIMData()) - debug(3, len(data), self.hexDump(data)) - fh.write(data) - debug(2, self._filepos(fh)) - fh.flush() - fh.write(end) - debug(2, self._filepos(fh)) - fh.flush() - - #copy the successfully written file back to the given file - fh2 = file(newfile, 'wb') - fh2.truncate() - fh2.seek(0,0) - fh.seek(0, 0) - while 1: - buf = fh.read(8192) - if buf is None or len(buf) == 0: break - fh2.write(buf) - self._closefh(fh) - fh2.flush() - fh2.close() - return True - - def __destroy__(self): - """Called when object is destroyed. No action necessary in this case.""" - pass - - - ####################################################################### - # Attributes for clients - ####################################################################### - - def getData(self): - return self._data - def setData(self, value): - raise Exception('You cannot overwrite the data, only its elements!') - data = property(getData, setData) - - keywords = property(*_getSetSomeList('keywords')) - supplementalCategories = property(*_getSetSomeList('supplemental category')) - contacts = property(*_getSetSomeList('contact')) - - def __str__(self): - return ('charset: ' + self.inp_charset + '\n' - + str(dict([(self._data.keyAsStr(k), v) - for k, v in self._data.items()]))) - - - def readExactly(self, fh, length): - """readExactly - - Reads exactly length bytes and throws an exception if EOF is hitten before. - """ - ## assert isinstance(fh, file) - assert duck_typed(fh, 'read') # duck typing - buf = fh.read(length) - if buf is None or len(buf) < length: raise EOFException('readExactly: %s' % str(fh)) - return buf - - def seekExactly(self, fh, length): - """seekExactly - - Seeks length bytes from the current position and checks the result - """ - ## assert isinstance(fh, file) - assert duck_typed(fh, ['seek', 'tell']) # duck typing - pos = fh.tell() - fh.seek(length, 1) - if fh.tell() - pos != length: raise EOFException() - - - ####################################################################### - # XML, SQL export - ####################################################################### - - def exportXML(self, basetag, extra, filename): - """xml = info.exportXML('entity-name', extra-data, - 'optional output file name') - - Exports XML containing all image metadata. Attribute names are - translated into XML tags, making adjustments to spaces and slashes - for compatibility. (Spaces become underbars, slashes become - dashes.) Caller provides an entity name; all data will be - contained within this entity. Caller optionally provides a - reference to a hash of extra data. This will be output into the - XML, too. Keys must be valid XML tag names. Optionally provide a - filename, and the XML will be dumped into there.""" - - P = lambda s: ' '*off + s + '\n' - off = 0 - - if len(basetag) == 0: basetag = 'photo' - out = P("<%s>" % basetag) - - off += 1 - # dump extra info first, if any - for k, v in (isinstance(extra, dict) and [extra] or [{}])[0].items(): - out += P("<%s>%s" % (k, v, k)) - - # dump our stuff - for k, v in self._data.items(): - if not isinstance(v, list): - key = re.sub('/', '-', re.sub(' +', ' ', self._data.keyAsStr(k))) - out += P("<%s>%s" % (key, v, key)) - - # print keywords - kw = self.keywords() - if kw and len(kw) > 0: - out += P("") - off += 1 - for k in kw: out += P("%s" % k) - off -= 1 - out += P("") - - # print supplemental categories - sc = self.supplementalCategories() - if sc and len(sc) > 0: - out += P("") - off += 1 - for k in sc: - out += P("%s" % k) - off -= 1 - out += P("") - - # print contacts - kw = self.contacts() - if kw and len(kw) > 0: - out += P("") - off += 1 - for k in kw: out += P("%s" % k) - off -= 1 - out += P("") - - # close base tag - off -= 1 - out += P("" % basetag) - - # export to file if caller asked for it. - if len(filename) > 0: - xmlout = file(filename, 'wb') - xmlout.write(out) - xmlout.close() - - return out - - def exportSQL(self, tablename, mappings, extra): - """statement = info.exportSQL('mytable', mappings, extra-data) - - mappings = { - 'IPTC dataset name here': 'your table column name here', - 'caption/abstract': 'caption', - 'city': 'city', - 'province/state': 'state} # etc etc etc. - - Returns a SQL statement to insert into your given table name a set - of values from the image. Caller passes in a reference to a hash - which maps IPTC dataset names into column names for the database - table. Optionally pass in a ref to a hash of extra data which will - also be included in the insert statement. Keys in that hash must - be valid column names.""" - - if (tablename is None or mappings is None): return None - statement = columns = values = None - - E = lambda s: "'%s'" % re.sub("'", "''", s) # escape single quotes - - # start with extra data, if any - columns = ', '.join(list(extra.keys()) + list(mappings.keys())) - values = ', '.join(map(E, list(extra.values()) - + [self.getdata(k) for k in list(mappings.keys())])) - # process our data - - statement = "INSERT INTO %s (%s) VALUES (%s)" \ - % (tablename, columns, values) - - return statement - - ####################################################################### - # File parsing functions (private) - ####################################################################### - - def scanToFirstIMMTag(self, fh): #OK# - """Scans to first IIM Record 2 tag in the file. The will either - use smart scanning for Jpegs or blind scanning for other file - types.""" - ## assert isinstance(fh, file) - if self.fileIsJpeg(fh): - self.log("File is Jpeg, proceeding with JpegScan") - return self.jpegScan(fh) - else: - self.log("File not a JPEG, trying BlindScan") - return self.blindScan(fh) - - def fileIsJpeg(self, fh): #OK# - """Checks to see if this file is a Jpeg/JFIF or not. Will reset - the file position back to 0 after it's done in either case.""" - - # reset to beginning just in case - ## assert isinstance(fh, file) - assert duck_typed(fh, ['read', 'seek']) - fh.seek(0, 0) - if debugMode > 0: - self.log("Opening 16 bytes of file:\n"); - dump = fh.read(16) - debug(3, self.hexDump(dump)) - fh.seek(0, 0) - # check start of file marker - ered = False - try: - (ff, soi) = fh.read(2) - if not (ord(ff) == 0xff and ord(soi) == 0xd8): ered = False - else: - # now check for APP0 marker. I'll assume that anything with a SOI - # followed by APP0 is "close enough" for our purposes. (We're not - # dinking with image data, so anything following the Jpeg tagging - # system should work.) - (ff, app0) = fh.read(2) - if not (ord(ff) == 0xff): ered = False - else: ered = True - finally: - # reset to beginning of file - fh.seek(0, 0) - return ered - - c_marker_err = {0: "Marker scan failed", - 0xd9: "Marker scan hit end of image marker", - 0xda: "Marker scan hit start of image data"} - def jpegScan(self, fh): #OK# - """Assuming the file is a Jpeg (see above), this will scan through - the markers looking for the APP13 marker, where IPTC/IIM data - should be found. While this isn't a formally defined standard, all - programs have (supposedly) adopted Adobe's technique of putting - the data in APP13.""" - # Skip past start of file marker - ## assert isinstance(fh, file) - try: (ff, soi) = self.readExactly(fh, 2) - except EOFException: return None - - if not (ord(ff) == 0xff and ord(soi) == 0xd8): - self.error = "JpegScan: invalid start of file" - self.log(self.error) - return None - # Scan for the APP13 marker which will contain our IPTC info (I hope). - while 1: - err = None - marker = self.jpegNextMarker(fh) - if ord(marker) == 0xed: break #237 - - err = self.c_marker_err.get(ord(marker), None) - if err is None and self.jpegSkipVariable(fh) == 0: - err = "JpegSkipVariable failed" - if err is not None: - self.error = err - self.log(err) - return None - - # If were's here, we must have found the right marker. Now - # BlindScan through the data. - return self.blindScan(fh, MAX=self.jpegGetVariableLength(fh)) - - def jpegNextMarker(self, fh): #OK# - """Scans to the start of the next valid-looking marker. Return - value is the marker id.""" - - ## assert isinstance(fh, file) - # Find 0xff byte. We should already be on it. - try: byte = self.readExactly(fh, 1) - except EOFException: return None - - while ord(byte) != 0xff: - self.log("JpegNextMarker: warning: bogus stuff in Jpeg file"); - try: byte = self.readExactly(fh, 1) - except EOFException: return None - # Now skip any extra 0xffs, which are valid padding. - while 1: - try: byte = self.readExactly(fh, 1) - except EOFException: return None - if ord(byte) != 0xff: break - - # byte should now contain the marker id. - self.log("JpegNextMarker: at marker %02X (%d)" % (ord(byte), ord(byte))) - return byte - - def jpegGetVariableLength(self, fh): #OK# - """Gets length of current variable-length section. File position - at start must be on the marker itself, e.g. immediately after call - to JPEGNextMarker. File position is updated to just past the - length field.""" - ## assert isinstance(fh, file) - try: length = unpack('!H', self.readExactly(fh, 2))[0] - except EOFException: return 0 - self.log('JPEG variable length: %d' % length) - - # Length includes itself, so must be at least 2 - if length < 2: - self.log("JPEGGetVariableLength: erroneous JPEG marker length") - return 0 - return length-2 - - def jpegSkipVariable(self, fh, rSave=None): #OK# - """Skips variable-length section of Jpeg block. Should always be - called between calls to JpegNextMarker to ensure JpegNextMarker is - at the start of data it can properly parse.""" - - ## assert isinstance(fh, file) - # Get the marker parameter length count - length = self.jpegGetVariableLength(fh) - if length == 0: return None - - # Skip remaining bytes - if rSave is not None or debugMode > 0: - try: temp = self.readExactly(fh, length) - except EOFException: - self.log("JpegSkipVariable: read failed while skipping var data"); - return None - # prints out a heck of a lot of stuff - # self.hexDump(temp) - else: - # Just seek - try: self.seekExactly(fh, length) - except EOFException: - self.log("JpegSkipVariable: read failed while skipping var data"); - return None - - return (rSave is not None and [temp] or [True])[0] - - c_charset = {100: 'iso8859_1', 101: 'iso8859_2', 109: 'iso8859_3', - 110: 'iso8859_4', 111: 'iso8859_5', 125: 'iso8859_7', - 127: 'iso8859_6', 138: 'iso8859_8', - 196: 'utf_8'} - c_charset_r = dict([(v, k) for k, v in c_charset.items()]) - def blindScan(self, fh, MAX=8192): #OK# - """Scans blindly to first IIM Record 2 tag in the file. This - method may or may not work on any arbitrary file type, but it - doesn't hurt to check. We expect to see this tag within the first - 8k of data. (This limit may need to be changed or eliminated - depending on how other programs choose to store IIM.)""" - - ## assert isinstance(fh, file) - assert duck_typed(fh, 'read') - offset = 0 - # keep within first 8192 bytes - # NOTE: this may need to change - self.log('blindScan: starting scan, max length %d' % MAX) - - # start digging - while offset <= MAX: - try: temp = self.readExactly(fh, 1) - except EOFException: - self.log("BlindScan: hit EOF while scanning"); - return None - # look for tag identifier 0x1c - if ord(temp) == 0x1c: - # if we found that, look for record 2, dataset 0 - # (record version number) - (record, dataset) = fh.read(2) - if ord(record) == 1 and ord(dataset) == 90: - # found character set's record! - try: - temp = self.readExactly(fh, self.jpegGetVariableLength(fh)) - self.inp_charset = self.c_charset.get(unpack('!H', temp)[0], - sys_enc) - self.log("BlindScan: found character set '%s' at offset %d" - % (self.inp_charset, offset)) - except EOFException: - pass - - elif ord(record) == 2: - # found it. seek to start of this tag and return. - self.log("BlindScan: found IIM start at offset %d" % offset); - try: self.seekExactly(fh, -3) # seek rel to current position - except EOFException: - return None - return offset - else: - # didn't find it. back up 2 to make up for - # those reads above. - try: self.seekExactly(fh, -2) # seek rel to current position - except EOFException: return None - - # no tag, keep scanning - offset += 1 - - return False - - def collectIIMInfo(self, fh): #OK# - """Assuming file is seeked to start of IIM data (using above), - this reads all the data into our object's hashes""" - # NOTE: file should already be at the start of the first - # IPTC code: record 2, dataset 0. - ## assert isinstance(fh, file) - assert duck_typed(fh, 'read') - while 1: - try: header = self.readExactly(fh, 5) - except EOFException: return None - - (tag, record, dataset, length) = unpack("!BBBH", header) - # bail if we're past end of IIM record 2 data - if not (tag == 0x1c and record == 2): return None - - alist = {'tag': tag, 'record': record, 'dataset': dataset, - 'length': length} - debug(1, '\n'.join(['%s\t: %s' % (k, v) for k, v in alist.items()])) - value = fh.read(length) - - try: value = str(value, encoding=self.inp_charset, errors='strict') - except: - self.log('Data "%s" is not in encoding %s!' % (value, self.inp_charset)) - value = str(value, encoding=self.inp_charset, errors='replace') - - # try to extract first into _listdata (keywords, categories) - # and, if unsuccessful, into _data. Tags which are not in the - # current IIM spec (version 4) are currently discarded. - if dataset in self._data and isinstance(self._data[dataset], list): - self._data[dataset] += [value] - elif dataset != 0: - self._data[dataset] = value - - ####################################################################### - # File Saving - ####################################################################### - - def jpegCollectFileParts(self, fh, discardAppParts=False): - """Collects all pieces of the file except for the IPTC info that - we'll replace when saving. Returns the stuff before the info, - stuff after, and the contents of the Adobe Resource Block that the - IPTC data goes in. Returns None if a file parsing error occured.""" - - ## assert isinstance(fh, file) - assert duck_typed(fh, ['seek', 'read']) - adobeParts = '' - start = '' - - # Start at beginning of file - fh.seek(0, 0) - # Skip past start of file marker - (ff, soi) = fh.read(2) - if not (ord(ff) == 0xff and ord(soi) == 0xd8): - self.error = "JpegScan: invalid start of file" - self.log(self.error) - return None - - # Begin building start of file - start += pack("BB", 0xff, 0xd8) - - # Get first marker in file. This will be APP0 for JFIF or APP1 for - # EXIF. - marker = self.jpegNextMarker(fh) - app0data = '' - app0data = self.jpegSkipVariable(fh, app0data) - if app0data is None: - self.error = 'jpegSkipVariable failed 01' - self.log(error) - return None - - if ord(marker) == 0xe0 or not discardAppParts: - # Always include APP0 marker at start if it's present. - start += pack('BB', 0xff, ord(marker)) - # Remember that the length must include itself (2 bytes) - start += pack('!H', len(app0data)+2) - start += app0data - else: - # Manually insert APP0 if we're trashing application parts, since - # all JFIF format images should start with the version block. - debug(2, 'discardAppParts=', discardAppParts) - start += pack("BB", 0xff, 0xe0) - start += pack("!H", 16) # length (including these 2 bytes) - start += "JFIF" # format - start += pack("BB", 1, 2) # call it version 1.2 (current JFIF) - start += pack('8B', 0) # zero everything else - - # Now scan through all markers in file until we hit image data or - # IPTC stuff. - end = '' - while 1: - marker = self.jpegNextMarker(fh) - if marker is None or ord(marker) == 0: - self.error = "Marker scan failed" - self.log(self.error) - return None - # Check for end of image - elif ord(marker) == 0xd9: - self.log("JpegCollectFileParts: saw end of image marker") - end += pack("BB", 0xff, ord(marker)) - break - # Check for start of compressed data - elif ord(marker) == 0xda: - self.log("JpegCollectFileParts: saw start of compressed data") - end += pack("BB", 0xff, ord(marker)) - break - partdata = '' - partdata = self.jpegSkipVariable(fh, partdata) - if not partdata: - self.error = "JpegSkipVariable failed 02" - self.log(self.error) - return None - partdata = str(partdata) - - # Take all parts aside from APP13, which we'll replace - # ourselves. - if (discardAppParts and ord(marker) >= 0xe0 and ord(marker) <= 0xef): - # Skip all application markers, including Adobe parts - adobeParts = '' - elif ord(marker) == 0xed: - # Collect the adobe stuff from part 13 - adobeParts = self.collectAdobeParts(partdata) - break - else: - # Append all other parts to start section - start += pack("BB", 0xff, ord(marker)) - start += pack("!H", len(partdata) + 2) - start += partdata - - # Append rest of file to end - while 1: - buff = fh.read() - if buff is None or len(buff) == 0: break - end += buff - - return (start, end, adobeParts) - - def collectAdobeParts(self, data): - """Part APP13 contains yet another markup format, one defined by - Adobe. See"File Formats Specification" in the Photoshop SDK - (avail from www.adobe.com). We must take - everything but the IPTC data so that way we can write the file back - without losing everything else Photoshop stuffed into the APP13 - block.""" - assert isinstance(data, str) - length = len(data) - offset = 0 - out = '' - # Skip preamble - offset = len('Photoshop 3.0 ') - # Process everything - while offset < length: - # Get OSType and ID - (ostype, id1, id2) = unpack("!LBB", data[offset:offset+6]) - offset += 6 - - # Get pascal string - stringlen = unpack("B", data[offset:offset+1])[0] - offset += 1 - string = data[offset:offset+stringlen] - offset += stringlen - - # round up if odd - if (stringlen % 2 != 0): offset += 1 - # there should be a null if string len is 0 - if stringlen == 0: offset += 1 - - # Get variable-size data - size = unpack("!L", data[offset:offset+4])[0] - offset += 4 - - var = data[offset:offset+size] - offset += size - if size % 2 != 0: offset += 1 # round up if odd - - # skip IIM data (0x0404), but write everything else out - if not (id1 == 4 and id2 == 4): - out += pack("!LBB", ostype, id1, id2) - out += pack("B", stringlen) - out += string - if stringlen == 0 or stringlen % 2 != 0: out += pack("B", 0) - out += pack("!L", size) - out += var - if size % 2 != 0 and len(out) % 2 != 0: out += pack("B", 0) - - return out - - def _enc(self, text): - """Recodes the given text from the old character set to utf-8""" - res = text - out_charset = (self.out_charset is None and [self.inp_charset] - or [self.out_charset])[0] - if isinstance(text, str): res = text.encode(out_charset) - elif isinstance(text, str): - try: res = str(text, encoding=self.inp_charset).encode(out_charset) - except: - self.log("_enc: charset %s is not working for %s" - % (self.inp_charset, text)) - res = str(text, encoding=self.inp_charset, errors='replace' - ).encode(out_charset) - elif isinstance(text, (list, tuple)): - res = type(text)(list(map(self._enc, text))) - return res - - def packedIIMData(self): - """Assembles and returns our _data and _listdata into IIM format for - embedding into an image.""" - out = '' - (tag, record) = (0x1c, 0x02) - # Print record version - # tag - record - dataset - len (short) - 4 (short) - out += pack("!BBBHH", tag, record, 0, 2, 4) - - debug(3, self.hexDump(out)) - # Iterate over data sets - for dataset, value in self._data.items(): - if len(value) == 0: continue - if not (dataset in c_datasets or isinstance(dataset, int)): - self.log("PackedIIMData: illegal dataname '%s' (%d)" - % (c_datasets[dataset], dataset)) - continue - value = self._enc(value) - #~ print value - if not isinstance(value, list): - value = str(value) - out += pack("!BBBH", tag, record, dataset, len(value)) - out += value - else: - for v in map(str, value): - out += pack("!BBBH", tag, record, dataset, len(v)) - out += v - - return out - - def photoshopIIMBlock(self, otherparts, data): - """Assembles the blob of Photoshop "resource data" that includes our - fresh IIM data (from PackedIIMData) and the other Adobe parts we - found in the file, if there were any.""" - out = '' - assert isinstance(data, str) - resourceBlock = "Photoshop 3.0" - resourceBlock += pack("B", 0) - # Photoshop identifier - resourceBlock += "8BIM" - # 0x0404 is IIM data, 00 is required empty string - resourceBlock += pack("BBBB", 0x04, 0x04, 0, 0) - # length of data as 32-bit, network-byte order - resourceBlock += pack("!L", len(data)) - # Now tack data on there - resourceBlock += data - # Pad with a blank if not even size - if len(data) % 2 != 0: resourceBlock += pack("B", 0) - # Finally tack on other data - if otherparts is not None: resourceBlock += otherparts - - out += pack("BB", 0xff, 0xed) # Jpeg start of block, APP13 - out += pack("!H", len(resourceBlock) + 2) # length - out += resourceBlock - - return out - - ####################################################################### - # Helpers, docs - ####################################################################### - - def log(self, string): - """log: just prints a message to STDERR if debugMode is on.""" - if debugMode > 0: - sys.stderr.write("**IPTC** %s\n" % string) - - def hexDump(self, dump): - """Very helpful when debugging.""" - length = len(dump) - P = lambda z: ((ord(z) >= 0x21 and ord(z) <= 0x7e) and [z] or ['.'])[0] - ROWLEN = 18 - ered = '\n' - for j in range(0, length/ROWLEN + int(length%ROWLEN>0)): - row = dump[j*ROWLEN:(j+1)*ROWLEN] - ered += ('%02X '*len(row) + ' '*(ROWLEN-len(row)) + '| %s\n') % \ - tuple(list(map(ord, row)) + [''.join(map(P, row))]) - return ered - - def jpegDebugScan(filename): - """Also very helpful when debugging.""" - assert isinstance(filename, str) and os.path.isfile(filename) - fh = file(filename, 'wb') - if not fh: raise Exception("Can't open %s" % filename) - - # Skip past start of file marker - (ff, soi) = fh.read(2) - if not (ord(ff) == 0xff and ord(soi) == 0xd8): - self.log("JpegScan: invalid start of file") - else: - # scan to 0xDA (start of scan), dumping the markers we see between - # here and there. - while 1: - marker = self.jpegNextMarker(fh) - if ord(marker) == 0xda: break - - if ord(marker) == 0: - self.log("Marker scan failed") - break - elif ord(marker) == 0xd9: - self.log("Marker scan hit end of image marker") - break - - if not self.jpegSkipVariable(fh): - self.log("JpegSkipVariable failed") - return None - - self._closefh(fh) - -if __name__ == '__main__': - if len(sys.argv) > 1: - info = IPTCInfo(sys.argv[1],True) - info.keywords = ['test'] - info.supplementalCategories = [] - info.contacts = [] - print("info = %s\n%s" % (info,"="*30), file=sys.stderr) - info.save() diff --git a/test/rudolph_vogt/header_corr.py b/test/rudolph_vogt/header_corr.py deleted file mode 100644 index 32e783d..0000000 --- a/test/rudolph_vogt/header_corr.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env python - -import sys, os -sys.path.insert(0, os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)) - -import iptcinfo - -def checkrefid(filename,fileobj,ncounter): - - """ - ---------------------------------------------------- - write clean header for refid update - ---------------------------------------------------- - """ - - nDisplay = 0 - - #~ if chkfile(filename): - info = iptcinfo.IPTCInfo(filename,force=True) - - if len(info.data) > 3: - if info.data['reference number'] >= 0 or info.data['reference number'] != None: - ldigit = info.data['reference number'].isdigit() - if ldigit: - nDisplay = 1 - else: - nDisplay = 2 - info.keywords = [] - info.supplementalCategories = [] - info.contacts = [] - info.data['reference number'] = [0] - info.save() - else: - nDisplay = 3 - info.keywords = [] - info.supplementalCategories = [] - info.contacts = [] - info.data['reference number'] = [0] - info.save() - - print("number.... ",ncounter , filename) - - if nDisplay == 2 or nDisplay == 3: - try: - info = iptcinfo.IPTCInfo(filename) - fileobj.writelines('"' + str(nDisplay) + '","' + str(ncounter) + '","' + str(info.data['reference number']) + '","' + filename + '"' + "\n") - except: - fileobj.writelines('"' + str(nDisplay) + '","' + str(ncounter) + '","000000","' + filename + '"' + "\n") - elif nDisplay == 1: - fileobj.writelines('"' + str(nDisplay) + '","' + str(ncounter) + '","' + str(info.data['reference number']) + '","' + filename + '"' + "\n") - - else: - fileobj.writelines('"DONT EXIST","' + filename + '"' + "\n") - - if nDisplay == 0: - fileobj.writelines('"' + str(nDisplay) + '","' + str(ncounter) + '","000000","' + filename + '"' + "\n") - return - -if '__main__' == __name__: - checkrefid('test.jpg', sys.stdout, 100) - - -## -## -IPTC:objectpreviewfileformat=0 - - -## diff --git a/upl.sh b/upl.sh deleted file mode 100755 index 5b443ac..0000000 --- a/upl.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh -VERSION=$(sed -ne '/^__version__/ { s/^[^0-9]*//;s/. *$//p }' iptcinfo.py) -echo VERSION=$VERSION -hg tags | grep -q $VERSION || { - echo "tagging $VERSION" - hg tag "iptcinfo-$VERSION" || exit 2 -} -echo 'hg push...' && hg push \ -&& echo 'hg push bitbucket...' && hg push bitbucket \ -&& echo 'python setup.py register...' && python setup.py -v register \ -&& echo 'python setup.py sdist upload...' \ -&& python setup.py sdist -d dist upload && { - FILE=dist/IPTCInfo-${VERSION}.tar.gz - echo "scp $FILE gho:html/python/..." - scp -p $FILE gthomas@gthomas.homelinux.org:html/python/ - curl -T "$FILE" ftp://gthomas@ftp.fw.hu/gthomas/python/ -}