@@ -20,7 +20,7 @@ use strict;
20
20
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
21
# or visit http://www.gnu.org/copyleft/gpl.html
22
22
#
23
- # 20051121 raf <raf@raf.org>
23
+ # 20051129 raf <raf@raf.org>
24
24
25
25
=head1 NAME
26
26
@@ -59,19 +59,19 @@ I<textmail> - mail filter to replace MS Word/HTML attachments with plain text
59
59
60
60
=head1 DESCRIPTION
61
61
62
- I<textmail > filters a mail message, replacing MS Word, MS Excel, HTML, RTF
63
- and PDF attachments with the plain text contained therein. By default, the
64
- following attachments are also deleted: image, audio, video and MS Windows
65
- executables. MS winmail.dat attachments are replaced by their contents which
66
- are then replaced by text or deleted in the same fashion. Any of these
67
- actions can be suppressed with the command line options. Mail headers can
68
- also be selectively deleted.
62
+ I<textmail > filters a mail message or mbox , replacing MS Word, MS
63
+ Excel, HTML, RTF and PDF attachments with the plain text contained therein.
64
+ By default, the following attachments are also deleted: image, audio, video
65
+ and MS Windows executables. MS C< winmail.dat > attachments are replaced by
66
+ any attachments contained therein which are then replaced by text or deleted
67
+ in the same fashion. Any of these actions can be suppressed with the command
68
+ line options. Mail headers can also be selectively deleted.
69
69
70
70
This is useful for increasing the accessibility of mail messages (by
71
71
reducing their dependence on proprietary file formats), for dramatically
72
72
reducing their size (and the time it takes to download them and the time it
73
73
takes to read them), and for dramatically reducing the risk of mail-borne
74
- viruses) . Its intended use is as a preprocessor for mailing lists. This is
74
+ viruses. Its intended use is as a preprocessor for mailing lists. This is
75
75
more friendly than a strict "No Attachments" policy.
76
76
77
77
=head1 OPTIONS
@@ -256,10 +256,11 @@ documents.
256
256
Whenever I<textmail > is unable to translate any attachment into text, it
257
257
will leave the attachment intact. This happens when the requisite
258
258
translation software can't be found, when it runs but returns an error code,
259
- and when it produces an empty file. This option causes the empty translation
260
- to take the place of the original attachment. Only the name of the
261
- attachment is preserved. This is needed to ensure plain text even in the
262
- face of an MS Word document that contains no text (e.g. only images).
259
+ and when it produces an empty file. It also happens when C<winmail.dat >
260
+ attachments are corrupt. This option causes the empty translation to take
261
+ the place of the original attachment. Only the name of the attachment is
262
+ preserved. This is needed to ensure plain text even in the face of an MS
263
+ Word document that contains no text (e.g. only images).
263
264
264
265
=item C<-? >
265
266
@@ -342,12 +343,12 @@ I<xls2csv(1)>,
342
343
I<lynx(1) > ,
343
344
I<pdftotext(1) > ,
344
345
I<pod2man(1) > ,
345
- I<pod2html(1) >
346
+ I<pod2html(1) > ,
346
347
C<http://raf.org/minimail/ >
347
348
348
349
=head1 AUTHOR
349
350
350
- 20051121 raf <raf@raf.org>
351
+ 20051129 raf <raf@raf.org>
351
352
352
353
=head1 URL
353
354
@@ -389,11 +390,13 @@ sub help
389
390
" -f - On translation error, keep translation, not original\n " ,
390
391
" -? - Print paths of helper applications then exit\n " ,
391
392
" \n " ,
392
- " Filters a mail message, replacing MS Word, MS Excel, HTML, RTF and PDF\n " ,
393
- " attachments with the plain text contained therein. By default, the\n " ,
394
- " following attachments are also deleted: image, audio, video and MS\n " ,
395
- " Windows executables. MS winmail.dat attachments are replaced by their\n " ,
396
- " contents which are then replaced by text or deleted in the same fashion.\n " ;
393
+ " Filters a mail message or mbox, replacing MS Word, MS Excel, HTML, RTF and PDF\n " ,
394
+ " attachments with the plain text contained therein. By default, the following\n " ,
395
+ " attachments are also deleted: image, audio, video and MS Windows executables.\n " ,
396
+ " MS winmail.dat attachments are replaced by any attachments contained therein\n " ,
397
+ " which are then replaced by text or deleted in the same fashion. Any of these\n " ,
398
+ " actions can be suppressed with the command line options. Mail headers can also\n " ,
399
+ " be selectively deleted.\n " ;
397
400
exit ;
398
401
}
399
402
@@ -811,7 +814,7 @@ sub decode_quoted_printable
811
814
my $quoted = shift;
812
815
$quoted =~ tr/\x00 -\x08\x0b -\x0c\x0e -\x19\x7f -\xff //d;
813
816
$quoted =~ s/=\n //g;
814
- $quoted =~ s/=([0-9A-Fa-z ]{2})/chr hex $1 /eg;
817
+ $quoted =~ s/=([0-9A-Fa-f ]{2})/chr hex $1 /eg;
815
818
return $quoted ;
816
819
}
817
820
@@ -831,7 +834,7 @@ sub add_mimetypes
831
834
832
835
while (<M>)
833
836
{
834
- s/#.*$/ /, s/^\s +//, s/\s +$/ /, next unless $_ ;
837
+ s/#.*$/ /, s/^\s +//, s/\s +$/ /; next unless $_ ;
835
838
my ($mimetype , $ext ) = /^(\S +)\s +(.*)$/ ; next unless $ext ;
836
839
$mimetype {$_ } = $mimetype for split /\s +/, $ext ;
837
840
}
@@ -847,18 +850,22 @@ sub ATTACH_DATA { 0x0006800f }
847
850
sub ATTACH_FILENAME { 0x00018010 }
848
851
sub ATTACH_RENDDATA { 0x00069002 }
849
852
sub ATTACH_MODIFIED { 0x00038013 }
850
- sub VERSION { 0x00089006 }
851
- my $data ; my @attachment ; my $attachment ; my $pos ;
853
+ my $data ; my @attachment ; my $attachment ; my $pos ; my $badtnef ;
852
854
853
855
sub winmail
854
856
{
855
- sub read_version
857
+ sub read_message_attribute
856
858
{
857
859
my $type = unpack 'C', substr $data , $pos , 1;
858
- return unless defined $type && $type == MESSAGE;
859
- my $version = unpack 'V', substr $data , $pos + 1, 4;
860
- return unless $version == VERSION; $pos += 13;
860
+ return 0 unless defined $type && $type == MESSAGE; ++$pos ;
861
+ my $id = unpack 'V', substr $data , $pos , 4; $pos += 4;
862
+ my $len = unpack 'V', substr $data , $pos , 4; $pos += 4;
863
+ ++$badtnef , return 0 if $pos + $len > length $data ;
864
+ my $buf = substr $data , $pos , $len ; $pos += $len ;
861
865
my $chk = unpack 'v', substr $data , $pos , 2; $pos += 2;
866
+ my $tot = unpack '%16C*', $buf ;
867
+ ++$badtnef unless $chk == $tot ;
868
+ return $chk == $tot ;
862
869
}
863
870
864
871
sub read_attribute_message_class
@@ -868,24 +875,11 @@ sub winmail
868
875
my $id = unpack 'V', substr $data , $pos + 1, 4;
869
876
return unless $id == MESSAGE_CLASS; $pos += 5;
870
877
my $len = unpack 'V', substr $data , $pos , 4; $pos += 4;
871
- return 0 if $pos + $len > length $data ;
872
- my $buf = substr( $data , $pos , $len ) ; $pos += $len ;
878
+ ++ $badtnef , return if $pos + $len > length $data ;
879
+ my $buf = substr $data , $pos , $len ; $pos += $len ;
873
880
my $chk = unpack 'v', substr $data , $pos , 2; $pos += 2;
874
881
my $tot = unpack '%16C*', $buf ;
875
- return $chk == $tot ;
876
- }
877
-
878
- sub read_message_attribute
879
- {
880
- my $type = unpack 'C', substr $data , $pos , 1;
881
- return 0 unless defined $type && $type == MESSAGE; ++$pos ;
882
- my $id = unpack 'V', substr $data , $pos , 4; $pos += 4;
883
- my $len = unpack 'V', substr $data , $pos , 4; $pos += 4;
884
- return 0 if $pos + $len > length $data ;
885
- my $buf = substr($data , $pos , $len ); $pos += $len ;
886
- my $chk = unpack 'v', substr $data , $pos , 2; $pos += 2;
887
- my $tot = unpack '%16C*', $buf ;
888
- return $chk == $tot ;
882
+ ++$badtnef unless $chk == $tot ;
889
883
}
890
884
891
885
sub read_attachment_attribute
@@ -895,11 +889,11 @@ sub winmail
895
889
my $id = unpack 'V', substr $data , $pos , 4; $pos += 4;
896
890
push @attachment , $attachment = {} if $id == ATTACH_RENDDATA;
897
891
my $len = unpack 'V', substr $data , $pos , 4; $pos += 4;
898
- return 0 if $pos + $len > length $data ;
899
- my $buf = substr( $data , $pos , $len ) ; $pos += $len ;
892
+ ++ $badtnef , return 0 if $pos + $len > length $data ;
893
+ my $buf = substr $data , $pos , $len ; $pos += $len ;
900
894
my $chk = unpack 'v', substr $data , $pos , 2; $pos += 2;
901
895
my $tot = unpack '%16C*', $buf ;
902
- return 0 unless $chk == $tot ;
896
+ ++ $badtnef , return 0 unless $chk == $tot ;
903
897
$attachment ->{body} = $buf , $attachment ->{size} = length $buf if $id == ATTACH_DATA;
904
898
$buf =~ s/\x00 +$/ /, $attachment ->{filename} = $buf , $attachment ->{type} = $mimetype {($attachment ->{filename} =~ /\. ([^.]+)$/ ) || 'other'} || 'application/octet-stream' if $id == ATTACH_FILENAME && !exists $attachment ->{filename};
905
899
my $fname ; $attachment ->{filename} = $fname , $attachment ->{type} = $mimetype {($attachment ->{filename} =~ /\. ([^.]+)$/ ) || 'other'} || 'application/octet-stream' if $id == ATTACH_ATTACHMENT && ($fname = realname($buf ));
@@ -919,18 +913,17 @@ sub winmail
919
913
}
920
914
921
915
my $m = shift;
922
- $pos = 0; $data = body($m ); @attachment = ();
916
+ $pos = 0; $data = body($m ); @attachment = (); $badtnef = 0;
923
917
my $signature = unpack 'V', substr($data , $pos , 4); $pos += 4;
924
918
return $m unless $signature == 0x223E9F78;
925
919
my $key = unpack 'v', substr($data , $pos , 2); $pos += 2;
926
920
my $type = unpack 'C', substr($data , $pos , 1);
927
921
return $m unless $type == MESSAGE || $type == ATTACHMENT;
928
- read_version();
929
922
do {} while read_message_attribute();
930
923
read_attribute_message_class();
931
924
do {} while read_message_attribute();
932
925
do {} while read_attachment_attribute();
933
- return map { newmail(%$_ ) } @attachment ;
926
+ return ( $badtnef ) ? $m : map { newmail(%$_ ) } @attachment ;
934
927
}
935
928
936
929
my %opt ;
@@ -994,6 +987,8 @@ formail(sub { <> }, sub
994
987
995
988
rmdir $tmp or system "rm -rf $tmp ";
996
989
990
+ BEGIN { $SIG {INT} = $SIG {QUIT} = $SIG {TERM} = sub { rmdir $tmp or system "rm -rf $tmp " if defined $tmp } }
991
+
997
992
# Print paths to help applications then exit
998
993
999
994
sub paths
@@ -1116,8 +1111,12 @@ sub textmail
1116
1111
1117
1112
if ($remove_tnef && isa($parts [$i ], qr/ms-tnef/i, qr/winmail\. dat$/i))
1118
1113
{
1119
- splice @parts , $i , 1, winmail($parts [$i ]);
1120
- --$i , next;
1114
+ my @a = winmail($parts [$i ]);
1115
+ my $failed = @a == 1 && $a [0] == $parts [$i ];
1116
+ @a = () if $failed && $force ;
1117
+ splice @parts , $i , 1, @a ;
1118
+ --$i if !$failed || $force ;
1119
+ next;
1121
1120
}
1122
1121
1123
1122
# Remove images, audio, video, MS Windows executables, octet streams, application/*
0 commit comments