Return-Path: Delivered-To: apmail-incubator-spamassassin-cvs-archive@www.apache.org Received: (qmail 32877 invoked from network); 25 Jan 2004 03:17:02 -0000 Received: from daedalus.apache.org (HELO mail.apache.org) (208.185.179.12) by minotaur-2.apache.org with SMTP; 25 Jan 2004 03:17:02 -0000 Received: (qmail 67348 invoked by uid 500); 25 Jan 2004 03:16:44 -0000 Delivered-To: apmail-incubator-spamassassin-cvs-archive@incubator.apache.org Received: (qmail 67321 invoked by uid 500); 25 Jan 2004 03:16:44 -0000 Mailing-List: contact spamassassin-cvs-help@incubator.apache.org; run by ezmlm Precedence: bulk list-help: list-unsubscribe: list-post: Reply-To: "Spam Assassin Dev" Delivered-To: mailing list spamassassin-cvs@incubator.apache.org Received: (qmail 67308 invoked from network); 25 Jan 2004 03:16:44 -0000 Received: from unknown (HELO minotaur.apache.org) (209.237.227.194) by daedalus.apache.org with SMTP; 25 Jan 2004 03:16:44 -0000 Received: (qmail 32869 invoked by uid 65534); 25 Jan 2004 03:17:02 -0000 Date: 25 Jan 2004 03:17:02 -0000 Message-ID: <20040125031702.32868.qmail@minotaur.apache.org> From: felicity@apache.org To: spamassassin-cvs@incubator.apache.org Subject: svn commit: rev 6265 - incubator/spamassassin/trunk/lib/Mail/SpamAssassin X-Spam-Rating: daedalus.apache.org 1.6.2 0/1000/N X-Spam-Rating: minotaur-2.apache.org 1.6.2 0/1000/N Author: felicity Date: Sat Jan 24 19:17:01 2004 New Revision: 6265 Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Log: merge the new html rendering with the old html rules ... Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/MsgContainer.pm Sat Jan 24 19:17:01 2004 @@ -229,6 +229,7 @@ if ( !exists $self->{'rendered'} ) { my $text = $self->decode(); + my $raw = length($text); # render text/html always, or any other text part as text/html based # on a heuristic which simulates a certain common mail client @@ -237,10 +238,27 @@ _html_near_start($1) ) ) { + $self->{'rendered_type'} = 'text/html'; my $html = Mail::SpamAssassin::HTML->new(); # object + my @lines = @{$html->html_render($text)}; $self->{rendered} = join('', @{$html->html_render($text)}); # rendered text $self->{html_results} = $html->get_results(); # needed in eval tests - $self->{'rendered_type'} = 'text/html'; + + my $space = 0; + $self->{html_results}{non_uri_len} = 0; + for my $line (@lines) { + $line = pack ('C0A*', $line); + $space += ($line =~ tr/ \t\n\r\x0b\xa0/ \t\n\r\x0b\xa0/); + $self->{html_results}{non_uri_len} += length($line); + for my $uri ($line =~ m/\b(URI:\S+)/g) { + $self->{html_results}{non_uri_len} -= length($uri); + } + } + $self->{html_results}{non_space_len} = $self->{html_results}{non_uri_len} - $space; + $self->{html_results}{ratio} = ($raw - $self->{html_results}{non_uri_len}) / $raw; + if (exists $self->{html_results}{total_comment_length} && $self->{html_results}{non_uri_len} > 0) { + $self->{html_results}{total_comment_ratio} = $self->{html_results}{total_comment_length} / $self->{html_results}{non_uri_len}; + } } else { $self->{'rendered_type'} = $self->{'type'}; Modified: incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm ============================================================================== --- incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original) +++ incubator/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Sat Jan 24 19:17:01 2004 @@ -1001,42 +1001,14 @@ if ( defined $rnd ) { # Only text/* types are rendered ... $text .= $text ? "\n$rnd" : $rnd; + + # TVD - if there are multiple parts, what should we do? + $self->{html} = $p->{html_results} if ( $type eq 'text/html' ); } else { $text .= $text ? "\n".$p->decode() : $p->decode(); } } - -# # do HTML conversions if necessary -# if ($text =~ m/<(?:$Mail::SpamAssassin::HTML::re_strict|$Mail::SpamAssassin::HTML::re_loose|!--|!doctype)(?:\s|>)/ois) { -# my $raw = length($text); -# my $before = substr($text, 0, $-[0], ''); -# -# # render -# $self->{html_text} = $self->{html_mod}->html_render($text); -# $self->{html} = $self->{html_mod}->get_results(); -# -# $text = join('', $before, @{$self->{html_text}}); -# -# if ($raw > 0) { -# my $space = ($before =~ tr/ \t\n\r\x0b\xa0/ \t\n\r\x0b\xa0/); -# $self->{html}{non_uri_len} = length($before); -# for my $line (@{$self->{html_text}}) { -# $line = pack ('C0A*', $line); -# $space += ($line =~ tr/ \t\n\r\x0b\xa0/ \t\n\r\x0b\xa0/); -# $self->{html}{non_uri_len} += length($line); -# for my $uri ($line =~ m/\b(URI:\S+)/g) { -# $self->{html}{non_uri_len} -= length($uri); -# } -# } -# $self->{html}{non_space_len} = $self->{html}{non_uri_len} - $space; -# $self->{html}{ratio} = ($raw - $self->{html}{non_uri_len}) / $raw; -# if (exists $self->{html}{total_comment_length} && $self->{html}{non_uri_len} > 0) { -# $self->{html}{total_comment_ratio} = $self->{html}{total_comment_length} / $self->{html}{non_uri_len}; -# } -# } # if ($raw > 0) -# delete $self->{html_last_tag}; -# } # if HTML # whitespace handling (warning: small changes have large effects!) $text =~ s/\n+\s*\n+/\f/gs; # double newlines => form feed