From a8afa4fa549733820f4a0abe7c6a2cd6ce5b2c19 Mon Sep 17 00:00:00 2001 From: raf Date: Tue, 29 Nov 2005 17:47:32 +1100 Subject: [PATCH] 20051129 - Fixed regexp for quoted-printable =xx values (was [0-9A-Fa-z]) - Fixed comment stripping when parsing /etc/mime.types - Fixed infinite loop on extremely corrupt winmail.dat attachments - Corrupt winmail.dat attachments are now left intact (unless -f) - Remove temp directory even when killed by a signal (int, quit, term) --- CHANGELOG | 8 +++++ textmail | 103 +++++++++++++++++++++++++++--------------------------- 2 files changed, 59 insertions(+), 52 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a700f5d..c28db4f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +20051129 + + - Fixed regexp for quoted-printable =xx values (was [0-9A-Fa-z]) + - Fixed comment stripping when parsing /etc/mime.types + - Fixed infinite loop on extremely corrupt winmail.dat attachments + - Corrupt winmail.dat attachments are now left intact (unless -f) + - Remove temp directory even when killed by a signal (int, quit, term) + 20051121 - Removed the -S option's argument (now only option is space or underscore) diff --git a/textmail b/textmail index 62575f1..de71edb 100755 --- a/textmail +++ b/textmail @@ -20,7 +20,7 @@ use strict; # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # or visit http://www.gnu.org/copyleft/gpl.html # -# 20051121 raf +# 20051129 raf =head1 NAME @@ -59,19 +59,19 @@ I - mail filter to replace MS Word/HTML attachments with plain text =head1 DESCRIPTION -I filters a mail message, replacing MS Word, MS Excel, HTML, RTF -and PDF attachments with the plain text contained therein. By default, the -following attachments are also deleted: image, audio, video and MS Windows -executables. MS winmail.dat attachments are replaced by their contents which -are then replaced by text or deleted in the same fashion. Any of these -actions can be suppressed with the command line options. Mail headers can -also be selectively deleted. +I filters a mail message or mbox, replacing MS Word, MS +Excel, HTML, RTF and PDF attachments with the plain text contained therein. +By default, the following attachments are also deleted: image, audio, video +and MS Windows executables. MS C attachments are replaced by +any attachments contained therein which are then replaced by text or deleted +in the same fashion. Any of these actions can be suppressed with the command +line options. Mail headers can also be selectively deleted. This is useful for increasing the accessibility of mail messages (by reducing their dependence on proprietary file formats), for dramatically reducing their size (and the time it takes to download them and the time it takes to read them), and for dramatically reducing the risk of mail-borne -viruses). Its intended use is as a preprocessor for mailing lists. This is +viruses. Its intended use is as a preprocessor for mailing lists. This is more friendly than a strict "No Attachments" policy. =head1 OPTIONS @@ -256,10 +256,11 @@ documents. Whenever I is unable to translate any attachment into text, it will leave the attachment intact. This happens when the requisite translation software can't be found, when it runs but returns an error code, -and when it produces an empty file. This option causes the empty translation -to take the place of the original attachment. Only the name of the -attachment is preserved. This is needed to ensure plain text even in the -face of an MS Word document that contains no text (e.g. only images). +and when it produces an empty file. It also happens when C +attachments are corrupt. This option causes the empty translation to take +the place of the original attachment. Only the name of the attachment is +preserved. This is needed to ensure plain text even in the face of an MS +Word document that contains no text (e.g. only images). =item C<-?> @@ -342,12 +343,12 @@ I, I, I, I, -I +I, C =head1 AUTHOR -20051121 raf +20051129 raf =head1 URL @@ -389,11 +390,13 @@ sub help " -f - On translation error, keep translation, not original\n", " -? - Print paths of helper applications then exit\n", "\n", - "Filters a mail message, replacing MS Word, MS Excel, HTML, RTF and PDF\n", - "attachments with the plain text contained therein. By default, the\n", - "following attachments are also deleted: image, audio, video and MS\n", - "Windows executables. MS winmail.dat attachments are replaced by their\n", - "contents which are then replaced by text or deleted in the same fashion.\n"; + "Filters a mail message or mbox, replacing MS Word, MS Excel, HTML, RTF and PDF\n", + "attachments with the plain text contained therein. By default, the following\n", + "attachments are also deleted: image, audio, video and MS Windows executables.\n", + "MS winmail.dat attachments are replaced by any attachments contained therein\n", + "which are then replaced by text or deleted in the same fashion. Any of these\n", + "actions can be suppressed with the command line options. Mail headers can also\n", + "be selectively deleted.\n"; exit; } @@ -811,7 +814,7 @@ sub decode_quoted_printable my $quoted = shift; $quoted =~ tr/\x00-\x08\x0b-\x0c\x0e-\x19\x7f-\xff//d; $quoted =~ s/=\n//g; - $quoted =~ s/=([0-9A-Fa-z]{2})/chr hex $1/eg; + $quoted =~ s/=([0-9A-Fa-f]{2})/chr hex $1/eg; return $quoted; } @@ -831,7 +834,7 @@ sub add_mimetypes while () { - s/#.*$//, s/^\s+//, s/\s+$//, next unless $_; + s/#.*$//, s/^\s+//, s/\s+$//; next unless $_; my ($mimetype, $ext) = /^(\S+)\s+(.*)$/; next unless $ext; $mimetype{$_} = $mimetype for split /\s+/, $ext; } @@ -847,18 +850,22 @@ sub ATTACH_DATA { 0x0006800f } sub ATTACH_FILENAME { 0x00018010 } sub ATTACH_RENDDATA { 0x00069002 } sub ATTACH_MODIFIED { 0x00038013 } -sub VERSION { 0x00089006 } -my $data; my @attachment; my $attachment; my $pos; +my $data; my @attachment; my $attachment; my $pos; my $badtnef; sub winmail { - sub read_version + sub read_message_attribute { my $type = unpack 'C', substr $data, $pos, 1; - return unless defined $type && $type == MESSAGE; - my $version = unpack 'V', substr $data, $pos + 1, 4; - return unless $version == VERSION; $pos += 13; + return 0 unless defined $type && $type == MESSAGE; ++$pos; + my $id = unpack 'V', substr $data, $pos, 4; $pos += 4; + my $len = unpack 'V', substr $data, $pos, 4; $pos += 4; + ++$badtnef, return 0 if $pos + $len > length $data; + my $buf = substr $data, $pos, $len; $pos += $len; my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2; + my $tot = unpack '%16C*', $buf; + ++$badtnef unless $chk == $tot; + return $chk == $tot; } sub read_attribute_message_class @@ -868,24 +875,11 @@ sub winmail my $id = unpack 'V', substr $data, $pos + 1, 4; return unless $id == MESSAGE_CLASS; $pos += 5; my $len = unpack 'V', substr $data, $pos, 4; $pos += 4; - return 0 if $pos + $len > length $data; - my $buf = substr($data, $pos, $len); $pos += $len; + ++$badtnef, return if $pos + $len > length $data; + my $buf = substr $data, $pos, $len; $pos += $len; my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2; my $tot = unpack '%16C*', $buf; - return $chk == $tot; - } - - sub read_message_attribute - { - my $type = unpack 'C', substr $data, $pos, 1; - return 0 unless defined $type && $type == MESSAGE; ++$pos; - my $id = unpack 'V', substr $data, $pos, 4; $pos += 4; - my $len = unpack 'V', substr $data, $pos, 4; $pos += 4; - return 0 if $pos + $len > length $data; - my $buf = substr($data, $pos, $len); $pos += $len; - my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2; - my $tot = unpack '%16C*', $buf; - return $chk == $tot; + ++$badtnef unless $chk == $tot; } sub read_attachment_attribute @@ -895,11 +889,11 @@ sub winmail my $id = unpack 'V', substr $data, $pos, 4; $pos += 4; push @attachment, $attachment = {} if $id == ATTACH_RENDDATA; my $len = unpack 'V', substr $data, $pos, 4; $pos += 4; - return 0 if $pos + $len > length $data; - my $buf = substr($data, $pos, $len); $pos += $len; + ++$badtnef, return 0 if $pos + $len > length $data; + my $buf = substr $data, $pos, $len; $pos += $len; my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2; my $tot = unpack '%16C*', $buf; - return 0 unless $chk == $tot; + ++$badtnef, return 0 unless $chk == $tot; $attachment->{body} = $buf, $attachment->{size} = length $buf if $id == ATTACH_DATA; $buf =~ s/\x00+$//, $attachment->{filename} = $buf, $attachment->{type} = $mimetype{($attachment->{filename} =~ /\.([^.]+)$/) || 'other'} || 'application/octet-stream' if $id == ATTACH_FILENAME && !exists $attachment->{filename}; my $fname; $attachment->{filename} = $fname, $attachment->{type} = $mimetype{($attachment->{filename} =~ /\.([^.]+)$/) || 'other'} || 'application/octet-stream' if $id == ATTACH_ATTACHMENT && ($fname = realname($buf)); @@ -919,18 +913,17 @@ sub winmail } my $m = shift; - $pos = 0; $data = body($m); @attachment = (); + $pos = 0; $data = body($m); @attachment = (); $badtnef = 0; my $signature = unpack 'V', substr($data, $pos, 4); $pos += 4; return $m unless $signature == 0x223E9F78; my $key = unpack 'v', substr($data, $pos, 2); $pos += 2; my $type = unpack 'C', substr($data, $pos, 1); return $m unless $type == MESSAGE || $type == ATTACHMENT; - read_version(); do {} while read_message_attribute(); read_attribute_message_class(); do {} while read_message_attribute(); do {} while read_attachment_attribute(); - return map { newmail(%$_) } @attachment; + return ($badtnef) ? $m : map { newmail(%$_) } @attachment; } my %opt; @@ -994,6 +987,8 @@ formail(sub { <> }, sub rmdir $tmp or system "rm -rf $tmp"; +BEGIN { $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = sub { rmdir $tmp or system "rm -rf $tmp" if defined $tmp } } + # Print paths to help applications then exit sub paths @@ -1116,8 +1111,12 @@ sub textmail if ($remove_tnef && isa($parts[$i], qr/ms-tnef/i, qr/winmail\.dat$/i)) { - splice @parts, $i, 1, winmail($parts[$i]); - --$i, next; + my @a = winmail($parts[$i]); + my $failed = @a == 1 && $a[0] == $parts[$i]; + @a = () if $failed && $force; + splice @parts, $i, 1, @a; + --$i if !$failed || $force; + next; } # Remove images, audio, video, MS Windows executables, octet streams, application/*