Return-Path: Delivered-To: apmail-spamassassin-commits-archive@www.apache.org Received: (qmail 99584 invoked from network); 16 Mar 2006 21:37:59 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (209.237.227.199) by minotaur.apache.org with SMTP; 16 Mar 2006 21:37:59 -0000 Received: (qmail 11951 invoked by uid 500); 16 Mar 2006 21:37:58 -0000 Delivered-To: apmail-spamassassin-commits-archive@spamassassin.apache.org Received: (qmail 11929 invoked by uid 500); 16 Mar 2006 21:37:58 -0000 Mailing-List: contact commits-help@spamassassin.apache.org; run by ezmlm Precedence: bulk list-help: list-unsubscribe: List-Post: Reply-To: "SpamAssassin Dev" List-Id: Delivered-To: mailing list commits@spamassassin.apache.org Received: (qmail 11907 invoked by uid 99); 16 Mar 2006 21:37:58 -0000 Received: from asf.osuosl.org (HELO asf.osuosl.org) (140.211.166.49) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 16 Mar 2006 13:37:58 -0800 X-ASF-Spam-Status: No, hits=-9.4 required=10.0 tests=ALL_TRUSTED,NO_REAL_NAME X-Spam-Check-By: apache.org Received: from [209.237.227.194] (HELO minotaur.apache.org) (209.237.227.194) by apache.org (qpsmtpd/0.29) with SMTP; Thu, 16 Mar 2006 13:37:56 -0800 Received: (qmail 99442 invoked by uid 65534); 16 Mar 2006 21:37:35 -0000 Message-ID: <20060316213735.99441.qmail@minotaur.apache.org> Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r386462 - in /spamassassin/rules/trunk/sandbox/felicity: 70_other.cf 70_phishing.cf sandbox-felicity.pm Date: Thu, 16 Mar 2006 21:37:35 -0000 To: commits@spamassassin.apache.org From: felicity@apache.org X-Mailer: svnmailer-1.0.7 X-Virus-Checked: Checked by ClamAV on apache.org X-Spam-Rating: minotaur.apache.org 1.6.2 0/1000/N Author: felicity Date: Thu Mar 16 13:37:32 2006 New Revision: 386462 URL: http://svn.apache.org/viewcvs?rev=386462&view=rev Log: try out a number of different HTTPS_HTTP_MISMATCH anti-phishing rules -- found a version that has a good hit rate! thanks to Fred Tarasevicius for leading to the idea. Modified: spamassassin/rules/trunk/sandbox/felicity/70_other.cf spamassassin/rules/trunk/sandbox/felicity/70_phishing.cf spamassassin/rules/trunk/sandbox/felicity/sandbox-felicity.pm Modified: spamassassin/rules/trunk/sandbox/felicity/70_other.cf URL: http://svn.apache.org/viewcvs/spamassassin/rules/trunk/sandbox/felicity/70_other.cf?rev=386462&r1=386461&r2=386462&view=diff ============================================================================== --- spamassassin/rules/trunk/sandbox/felicity/70_other.cf (original) +++ spamassassin/rules/trunk/sandbox/felicity/70_other.cf Thu Mar 16 13:37:32 2006 @@ -81,10 +81,6 @@ # 0.062 0.0800 0.0000 1.000 0.58 0.01 T_TVD_RATWARE_CB header TVD_RATWARE_CB Content-Type =~ /\bboundary\b.{1,40}qzsoft_directmail_seperator/i -#loadplugin Mail::SpamAssassin::Plugin::Sandbox::felicity sandbox-felicity.pm -#ifplugin Mail::SpamAssassin::Plugin::Sandbox::felicity -#endif - header TVD_SUBJ_WIPE_DEBT Subject =~ /(?:wipe out|remove|get (?:rid|out) of|eradicate) .{0,20}(?:owe|debt|obligation)/i header TVD_SUBJ_APPR_LOAN Subject =~ /approved? .{0,20}loan/i Modified: spamassassin/rules/trunk/sandbox/felicity/70_phishing.cf URL: http://svn.apache.org/viewcvs/spamassassin/rules/trunk/sandbox/felicity/70_phishing.cf?rev=386462&r1=386461&r2=386462&view=diff ============================================================================== --- spamassassin/rules/trunk/sandbox/felicity/70_phishing.cf (original) +++ spamassassin/rules/trunk/sandbox/felicity/70_phishing.cf Thu Mar 16 13:37:32 2006 @@ -82,3 +82,70 @@ describe T_LOCAL_PP_UPD_BADURL paypal account update, but has bad URL describe T_LOCAL_PP_UPD_BADADDR paypal account update, but not from @paypal.com + + + +loadplugin Mail::SpamAssassin::Plugin::Sandbox::felicity sandbox-felicity.pm + +ifplugin Mail::SpamAssassin::Plugin::Sandbox::felicity + +# bug 4255: with some ideas from Fred Tarasevicius I came up with a rule that +# performs pretty decently, worthy of a general mass-check: +# 0.194 0.2293 0.0000 1.000 1.00 0.01 T_HTTPS_HTTP_MISMATCH_1_14 +# 0.194 0.2293 0.0000 1.000 1.00 0.01 T_HTTPS_HTTP_MISMATCH_1_10 +# 0.194 0.2293 0.0000 1.000 1.00 0.01 T_HTTPS_HTTP_MISMATCH_1_12 +# 0.194 0.2293 0.0000 1.000 1.00 0.01 T_HTTPS_HTTP_MISMATCH_1_11 +# 0.194 0.2293 0.0000 1.000 1.00 0.01 T_HTTPS_HTTP_MISMATCH_1_13 +# 0.197 0.2293 0.0189 0.924 0.57 0.01 T_HTTPS_HTTP_MISMATCH_1_15 +# generally, hams seem to have a lot of links, whereas phishing mails don't. +# so compare the domains between https? href and https anchor text, and flag +# if the number of anchors is inside the given range and the domains don't +# match. +# FYI: these rules don't overlap HTTPS_IP_MISMATCH as IPs are ignored in the +# href -- IPs tend not to be used in ham, so don't bother with the overhead of +# this rule. though the two rules are very similar and could definitely share +# code. if promoted, the two should get merged together to backup both rules. + +body T_HTTPS_HTTP_MISMATCH_1 eval:check_https_http_mismatch('1') +body T_HTTPS_HTTP_MISMATCH_2 eval:check_https_http_mismatch('2') +body T_HTTPS_HTTP_MISMATCH_3 eval:check_https_http_mismatch('3') +body T_HTTPS_HTTP_MISMATCH_4 eval:check_https_http_mismatch('4') +body T_HTTPS_HTTP_MISMATCH_5 eval:check_https_http_mismatch('5') + +body T_HTTPS_HTTP_MISMATCH_1_10 eval:check_https_http_mismatch('1','10') +body T_HTTPS_HTTP_MISMATCH_2_10 eval:check_https_http_mismatch('2','10') +body T_HTTPS_HTTP_MISMATCH_3_10 eval:check_https_http_mismatch('3','10') +body T_HTTPS_HTTP_MISMATCH_4_10 eval:check_https_http_mismatch('4','10') +body T_HTTPS_HTTP_MISMATCH_5_10 eval:check_https_http_mismatch('5','10') + +body T_HTTPS_HTTP_MISMATCH_1_11 eval:check_https_http_mismatch('1','11') +body T_HTTPS_HTTP_MISMATCH_2_11 eval:check_https_http_mismatch('2','11') +body T_HTTPS_HTTP_MISMATCH_3_11 eval:check_https_http_mismatch('3','11') +body T_HTTPS_HTTP_MISMATCH_4_11 eval:check_https_http_mismatch('4','11') +body T_HTTPS_HTTP_MISMATCH_5_11 eval:check_https_http_mismatch('5','11') + +body T_HTTPS_HTTP_MISMATCH_1_12 eval:check_https_http_mismatch('1','12') +body T_HTTPS_HTTP_MISMATCH_2_12 eval:check_https_http_mismatch('2','12') +body T_HTTPS_HTTP_MISMATCH_3_12 eval:check_https_http_mismatch('3','12') +body T_HTTPS_HTTP_MISMATCH_4_12 eval:check_https_http_mismatch('4','12') +body T_HTTPS_HTTP_MISMATCH_5_12 eval:check_https_http_mismatch('5','12') + +body T_HTTPS_HTTP_MISMATCH_1_13 eval:check_https_http_mismatch('1','13') +body T_HTTPS_HTTP_MISMATCH_2_13 eval:check_https_http_mismatch('2','13') +body T_HTTPS_HTTP_MISMATCH_3_13 eval:check_https_http_mismatch('3','13') +body T_HTTPS_HTTP_MISMATCH_4_13 eval:check_https_http_mismatch('4','13') +body T_HTTPS_HTTP_MISMATCH_5_13 eval:check_https_http_mismatch('5','13') + +body T_HTTPS_HTTP_MISMATCH_1_14 eval:check_https_http_mismatch('1','14') +body T_HTTPS_HTTP_MISMATCH_2_14 eval:check_https_http_mismatch('2','14') +body T_HTTPS_HTTP_MISMATCH_3_14 eval:check_https_http_mismatch('3','14') +body T_HTTPS_HTTP_MISMATCH_4_14 eval:check_https_http_mismatch('4','14') +body T_HTTPS_HTTP_MISMATCH_5_14 eval:check_https_http_mismatch('5','14') + +body T_HTTPS_HTTP_MISMATCH_1_15 eval:check_https_http_mismatch('1','15') +body T_HTTPS_HTTP_MISMATCH_2_15 eval:check_https_http_mismatch('2','15') +body T_HTTPS_HTTP_MISMATCH_3_15 eval:check_https_http_mismatch('3','15') +body T_HTTPS_HTTP_MISMATCH_4_15 eval:check_https_http_mismatch('4','15') +body T_HTTPS_HTTP_MISMATCH_5_15 eval:check_https_http_mismatch('5','15') + +endif Modified: spamassassin/rules/trunk/sandbox/felicity/sandbox-felicity.pm URL: http://svn.apache.org/viewcvs/spamassassin/rules/trunk/sandbox/felicity/sandbox-felicity.pm?rev=386462&r1=386461&r2=386462&view=diff ============================================================================== --- spamassassin/rules/trunk/sandbox/felicity/sandbox-felicity.pm (original) +++ spamassassin/rules/trunk/sandbox/felicity/sandbox-felicity.pm Thu Mar 16 13:37:32 2006 @@ -17,6 +17,7 @@ package Mail::SpamAssassin::Plugin::Sandbox::felicity; use Mail::SpamAssassin::Plugin; +use Mail::SpamAssassin::Logger; use strict; use warnings; use bytes; @@ -35,9 +36,76 @@ bless ($self, $class); # the important bit! - #$self->register_eval_rule("function_name"); + $self->register_eval_rule ("check_https_http_mismatch"); return $self; +} + +# https://bankofamerica.com/ +sub check_https_http_mismatch { + my ($self, $permsgstatus, undef, $minanchors, $maxanchors) = @_; + my $uris = $permsgstatus->get_uri_detail_list(); + + $minanchors ||= 1; + + if (!exists $permsgstatus->{chhm_hit}) { + + $permsgstatus->{chhm_hit} = 0; + $permsgstatus->{chhm_anchors} = 0; + + while( my($k,$v) = each %{$permsgstatus->{html}->{uri_detail}} ) { + # if the URI wasn't used for an anchor tag, or the anchor text didn't + # exist, skip this. + next unless (exists $v->{anchor_text} && @{$v->{anchor_text}}); + + my $uri; + foreach (@{$v->{cleaned}}) { + if (m@^https?://([^/:]+)@i) { + $uri = $1; + + # Skip IPs since there's another rule to catch that already + if ($uri =~ m%^\d+\.\d+\.\d+\.\d+$%) { + undef $uri; + next; + } + + # want to compare whole hostnames? comment this if next section to + # the blank line. + if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) { + $uri = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($uri); + undef $uri unless (Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($uri)); + } + + last if $uri; + } + } + + next unless $uri; + $permsgstatus->{chhm_anchors}++ if exists $v->{anchor_text}; + + foreach (@{$v->{anchor_text}}) { + if (m@^https://([^/:]+)@i) { + my $https = $1; + + # want to compare whole hostnames? comment this if next section to + # the blank line. + if ($https !~ /^\d+\.\d+\.\d+\.\d+$/) { + $https = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($https); + undef $https unless (Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($https)); + } + next unless $https; + + dbg("https_http_mismatch: domains $uri -> $https"); + next if $uri eq $https; + $permsgstatus->{chhm_hit} = 1; + last; + } + } + } + dbg("https_http_mismatch: anchors ".$permsgstatus->{chhm_anchors}); + } + + return ( (defined $maxanchors && $permsgstatus->{chhm_anchors} < $maxanchors) && $permsgstatus->{chhm_hit} && $permsgstatus->{chhm_anchors} >= $minanchors); } 1;