Skip to content

Commit a8afa4f

Browse files
committed
20051129
- Fixed regexp for quoted-printable =xx values (was [0-9A-Fa-z]) - Fixed comment stripping when parsing /etc/mime.types - Fixed infinite loop on extremely corrupt winmail.dat attachments - Corrupt winmail.dat attachments are now left intact (unless -f) - Remove temp directory even when killed by a signal (int, quit, term)
1 parent 7e8e06c commit a8afa4f

File tree

2 files changed

+59
-52
lines changed

2 files changed

+59
-52
lines changed

CHANGELOG

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
20051129
2+
3+
- Fixed regexp for quoted-printable =xx values (was [0-9A-Fa-z])
4+
- Fixed comment stripping when parsing /etc/mime.types
5+
- Fixed infinite loop on extremely corrupt winmail.dat attachments
6+
- Corrupt winmail.dat attachments are now left intact (unless -f)
7+
- Remove temp directory even when killed by a signal (int, quit, term)
8+
19
20051121
210

311
- Removed the -S option's argument (now only option is space or underscore)

textmail

Lines changed: 51 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use strict;
2020
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2121
# or visit http://www.gnu.org/copyleft/gpl.html
2222
#
23-
# 20051121 raf <raf@raf.org>
23+
# 20051129 raf <raf@raf.org>
2424

2525
=head1 NAME
2626
@@ -59,19 +59,19 @@ I<textmail> - mail filter to replace MS Word/HTML attachments with plain text
5959
6060
=head1 DESCRIPTION
6161
62-
I<textmail> filters a mail message, replacing MS Word, MS Excel, HTML, RTF
63-
and PDF attachments with the plain text contained therein. By default, the
64-
following attachments are also deleted: image, audio, video and MS Windows
65-
executables. MS winmail.dat attachments are replaced by their contents which
66-
are then replaced by text or deleted in the same fashion. Any of these
67-
actions can be suppressed with the command line options. Mail headers can
68-
also be selectively deleted.
62+
I<textmail> filters a mail message or mbox, replacing MS Word, MS
63+
Excel, HTML, RTF and PDF attachments with the plain text contained therein.
64+
By default, the following attachments are also deleted: image, audio, video
65+
and MS Windows executables. MS C<winmail.dat> attachments are replaced by
66+
any attachments contained therein which are then replaced by text or deleted
67+
in the same fashion. Any of these actions can be suppressed with the command
68+
line options. Mail headers can also be selectively deleted.
6969
7070
This is useful for increasing the accessibility of mail messages (by
7171
reducing their dependence on proprietary file formats), for dramatically
7272
reducing their size (and the time it takes to download them and the time it
7373
takes to read them), and for dramatically reducing the risk of mail-borne
74-
viruses). Its intended use is as a preprocessor for mailing lists. This is
74+
viruses. Its intended use is as a preprocessor for mailing lists. This is
7575
more friendly than a strict "No Attachments" policy.
7676
7777
=head1 OPTIONS
@@ -256,10 +256,11 @@ documents.
256256
Whenever I<textmail> is unable to translate any attachment into text, it
257257
will leave the attachment intact. This happens when the requisite
258258
translation software can't be found, when it runs but returns an error code,
259-
and when it produces an empty file. This option causes the empty translation
260-
to take the place of the original attachment. Only the name of the
261-
attachment is preserved. This is needed to ensure plain text even in the
262-
face of an MS Word document that contains no text (e.g. only images).
259+
and when it produces an empty file. It also happens when C<winmail.dat>
260+
attachments are corrupt. This option causes the empty translation to take
261+
the place of the original attachment. Only the name of the attachment is
262+
preserved. This is needed to ensure plain text even in the face of an MS
263+
Word document that contains no text (e.g. only images).
263264
264265
=item C<-?>
265266
@@ -342,12 +343,12 @@ I<xls2csv(1)>,
342343
I<lynx(1)>,
343344
I<pdftotext(1)>,
344345
I<pod2man(1)>,
345-
I<pod2html(1)>
346+
I<pod2html(1)>,
346347
C<http://raf.org/minimail/>
347348
348349
=head1 AUTHOR
349350
350-
20051121 raf <raf@raf.org>
351+
20051129 raf <raf@raf.org>
351352
352353
=head1 URL
353354
@@ -389,11 +390,13 @@ sub help
389390
" -f - On translation error, keep translation, not original\n",
390391
" -? - Print paths of helper applications then exit\n",
391392
"\n",
392-
"Filters a mail message, replacing MS Word, MS Excel, HTML, RTF and PDF\n",
393-
"attachments with the plain text contained therein. By default, the\n",
394-
"following attachments are also deleted: image, audio, video and MS\n",
395-
"Windows executables. MS winmail.dat attachments are replaced by their\n",
396-
"contents which are then replaced by text or deleted in the same fashion.\n";
393+
"Filters a mail message or mbox, replacing MS Word, MS Excel, HTML, RTF and PDF\n",
394+
"attachments with the plain text contained therein. By default, the following\n",
395+
"attachments are also deleted: image, audio, video and MS Windows executables.\n",
396+
"MS winmail.dat attachments are replaced by any attachments contained therein\n",
397+
"which are then replaced by text or deleted in the same fashion. Any of these\n",
398+
"actions can be suppressed with the command line options. Mail headers can also\n",
399+
"be selectively deleted.\n";
397400
exit;
398401
}
399402

@@ -811,7 +814,7 @@ sub decode_quoted_printable
811814
my $quoted = shift;
812815
$quoted =~ tr/\x00-\x08\x0b-\x0c\x0e-\x19\x7f-\xff//d;
813816
$quoted =~ s/=\n//g;
814-
$quoted =~ s/=([0-9A-Fa-z]{2})/chr hex $1/eg;
817+
$quoted =~ s/=([0-9A-Fa-f]{2})/chr hex $1/eg;
815818
return $quoted;
816819
}
817820
@@ -831,7 +834,7 @@ sub add_mimetypes
831834
832835
while (<M>)
833836
{
834-
s/#.*$//, s/^\s+//, s/\s+$//, next unless $_;
837+
s/#.*$//, s/^\s+//, s/\s+$//; next unless $_;
835838
my ($mimetype, $ext) = /^(\S+)\s+(.*)$/; next unless $ext;
836839
$mimetype{$_} = $mimetype for split /\s+/, $ext;
837840
}
@@ -847,18 +850,22 @@ sub ATTACH_DATA { 0x0006800f }
847850
sub ATTACH_FILENAME { 0x00018010 }
848851
sub ATTACH_RENDDATA { 0x00069002 }
849852
sub ATTACH_MODIFIED { 0x00038013 }
850-
sub VERSION { 0x00089006 }
851-
my $data; my @attachment; my $attachment; my $pos;
853+
my $data; my @attachment; my $attachment; my $pos; my $badtnef;
852854
853855
sub winmail
854856
{
855-
sub read_version
857+
sub read_message_attribute
856858
{
857859
my $type = unpack 'C', substr $data, $pos, 1;
858-
return unless defined $type && $type == MESSAGE;
859-
my $version = unpack 'V', substr $data, $pos + 1, 4;
860-
return unless $version == VERSION; $pos += 13;
860+
return 0 unless defined $type && $type == MESSAGE; ++$pos;
861+
my $id = unpack 'V', substr $data, $pos, 4; $pos += 4;
862+
my $len = unpack 'V', substr $data, $pos, 4; $pos += 4;
863+
++$badtnef, return 0 if $pos + $len > length $data;
864+
my $buf = substr $data, $pos, $len; $pos += $len;
861865
my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2;
866+
my $tot = unpack '%16C*', $buf;
867+
++$badtnef unless $chk == $tot;
868+
return $chk == $tot;
862869
}
863870
864871
sub read_attribute_message_class
@@ -868,24 +875,11 @@ sub winmail
868875
my $id = unpack 'V', substr $data, $pos + 1, 4;
869876
return unless $id == MESSAGE_CLASS; $pos += 5;
870877
my $len = unpack 'V', substr $data, $pos, 4; $pos += 4;
871-
return 0 if $pos + $len > length $data;
872-
my $buf = substr($data, $pos, $len); $pos += $len;
878+
++$badtnef, return if $pos + $len > length $data;
879+
my $buf = substr $data, $pos, $len; $pos += $len;
873880
my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2;
874881
my $tot = unpack '%16C*', $buf;
875-
return $chk == $tot;
876-
}
877-
878-
sub read_message_attribute
879-
{
880-
my $type = unpack 'C', substr $data, $pos, 1;
881-
return 0 unless defined $type && $type == MESSAGE; ++$pos;
882-
my $id = unpack 'V', substr $data, $pos, 4; $pos += 4;
883-
my $len = unpack 'V', substr $data, $pos, 4; $pos += 4;
884-
return 0 if $pos + $len > length $data;
885-
my $buf = substr($data, $pos, $len); $pos += $len;
886-
my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2;
887-
my $tot = unpack '%16C*', $buf;
888-
return $chk == $tot;
882+
++$badtnef unless $chk == $tot;
889883
}
890884
891885
sub read_attachment_attribute
@@ -895,11 +889,11 @@ sub winmail
895889
my $id = unpack 'V', substr $data, $pos, 4; $pos += 4;
896890
push @attachment, $attachment = {} if $id == ATTACH_RENDDATA;
897891
my $len = unpack 'V', substr $data, $pos, 4; $pos += 4;
898-
return 0 if $pos + $len > length $data;
899-
my $buf = substr($data, $pos, $len); $pos += $len;
892+
++$badtnef, return 0 if $pos + $len > length $data;
893+
my $buf = substr $data, $pos, $len; $pos += $len;
900894
my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2;
901895
my $tot = unpack '%16C*', $buf;
902-
return 0 unless $chk == $tot;
896+
++$badtnef, return 0 unless $chk == $tot;
903897
$attachment->{body} = $buf, $attachment->{size} = length $buf if $id == ATTACH_DATA;
904898
$buf =~ s/\x00+$//, $attachment->{filename} = $buf, $attachment->{type} = $mimetype{($attachment->{filename} =~ /\.([^.]+)$/) || 'other'} || 'application/octet-stream' if $id == ATTACH_FILENAME && !exists $attachment->{filename};
905899
my $fname; $attachment->{filename} = $fname, $attachment->{type} = $mimetype{($attachment->{filename} =~ /\.([^.]+)$/) || 'other'} || 'application/octet-stream' if $id == ATTACH_ATTACHMENT && ($fname = realname($buf));
@@ -919,18 +913,17 @@ sub winmail
919913
}
920914
921915
my $m = shift;
922-
$pos = 0; $data = body($m); @attachment = ();
916+
$pos = 0; $data = body($m); @attachment = (); $badtnef = 0;
923917
my $signature = unpack 'V', substr($data, $pos, 4); $pos += 4;
924918
return $m unless $signature == 0x223E9F78;
925919
my $key = unpack 'v', substr($data, $pos, 2); $pos += 2;
926920
my $type = unpack 'C', substr($data, $pos, 1);
927921
return $m unless $type == MESSAGE || $type == ATTACHMENT;
928-
read_version();
929922
do {} while read_message_attribute();
930923
read_attribute_message_class();
931924
do {} while read_message_attribute();
932925
do {} while read_attachment_attribute();
933-
return map { newmail(%$_) } @attachment;
926+
return ($badtnef) ? $m : map { newmail(%$_) } @attachment;
934927
}
935928
936929
my %opt;
@@ -994,6 +987,8 @@ formail(sub { <> }, sub
994987
995988
rmdir $tmp or system "rm -rf $tmp";
996989
990+
BEGIN { $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = sub { rmdir $tmp or system "rm -rf $tmp" if defined $tmp } }
991+
997992
# Print paths to help applications then exit
998993
999994
sub paths
@@ -1116,8 +1111,12 @@ sub textmail
11161111
11171112
if ($remove_tnef && isa($parts[$i], qr/ms-tnef/i, qr/winmail\.dat$/i))
11181113
{
1119-
splice @parts, $i, 1, winmail($parts[$i]);
1120-
--$i, next;
1114+
my @a = winmail($parts[$i]);
1115+
my $failed = @a == 1 && $a[0] == $parts[$i];
1116+
@a = () if $failed && $force;
1117+
splice @parts, $i, 1, @a;
1118+
--$i if !$failed || $force;
1119+
next;
11211120
}
11221121
11231122
# Remove images, audio, video, MS Windows executables, octet streams, application/*

0 commit comments

Comments
 (0)