spamassassin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From felic...@apache.org
Subject svn commit: r386462 - in /spamassassin/rules/trunk/sandbox/felicity: 70_other.cf 70_phishing.cf sandbox-felicity.pm
Date Thu, 16 Mar 2006 21:37:35 GMT
Author: felicity
Date: Thu Mar 16 13:37:32 2006
New Revision: 386462

URL: http://svn.apache.org/viewcvs?rev=386462&view=rev
Log:
try out a number of different HTTPS_HTTP_MISMATCH anti-phishing rules -- found a version that
has a good hit rate!  thanks to Fred Tarasevicius for leading to the idea.

Modified:
    spamassassin/rules/trunk/sandbox/felicity/70_other.cf
    spamassassin/rules/trunk/sandbox/felicity/70_phishing.cf
    spamassassin/rules/trunk/sandbox/felicity/sandbox-felicity.pm

Modified: spamassassin/rules/trunk/sandbox/felicity/70_other.cf
URL: http://svn.apache.org/viewcvs/spamassassin/rules/trunk/sandbox/felicity/70_other.cf?rev=386462&r1=386461&r2=386462&view=diff
==============================================================================
--- spamassassin/rules/trunk/sandbox/felicity/70_other.cf (original)
+++ spamassassin/rules/trunk/sandbox/felicity/70_other.cf Thu Mar 16 13:37:32 2006
@@ -81,10 +81,6 @@
 # 0.062   0.0800   0.0000    1.000   0.58    0.01  T_TVD_RATWARE_CB
 header TVD_RATWARE_CB		Content-Type =~ /\bboundary\b.{1,40}qzsoft_directmail_seperator/i
 
-#loadplugin Mail::SpamAssassin::Plugin::Sandbox::felicity sandbox-felicity.pm
-#ifplugin Mail::SpamAssassin::Plugin::Sandbox::felicity
-#endif
-
 header TVD_SUBJ_WIPE_DEBT	Subject =~ /(?:wipe out|remove|get (?:rid|out) of|eradicate) .{0,20}(?:owe|debt|obligation)/i
 header TVD_SUBJ_APPR_LOAN	Subject =~ /approved? .{0,20}loan/i
 

Modified: spamassassin/rules/trunk/sandbox/felicity/70_phishing.cf
URL: http://svn.apache.org/viewcvs/spamassassin/rules/trunk/sandbox/felicity/70_phishing.cf?rev=386462&r1=386461&r2=386462&view=diff
==============================================================================
--- spamassassin/rules/trunk/sandbox/felicity/70_phishing.cf (original)
+++ spamassassin/rules/trunk/sandbox/felicity/70_phishing.cf Thu Mar 16 13:37:32 2006
@@ -82,3 +82,70 @@
 
 describe T_LOCAL_PP_UPD_BADURL paypal account update, but has bad URL
 describe T_LOCAL_PP_UPD_BADADDR paypal account update, but not from @paypal.com
+
+
+
+loadplugin Mail::SpamAssassin::Plugin::Sandbox::felicity sandbox-felicity.pm
+
+ifplugin Mail::SpamAssassin::Plugin::Sandbox::felicity
+
+# bug 4255: with some ideas from Fred Tarasevicius I came up with a rule that
+# performs pretty decently, worthy of a general mass-check:
+# 0.194   0.2293   0.0000    1.000   1.00    0.01  T_HTTPS_HTTP_MISMATCH_1_14
+# 0.194   0.2293   0.0000    1.000   1.00    0.01  T_HTTPS_HTTP_MISMATCH_1_10
+# 0.194   0.2293   0.0000    1.000   1.00    0.01  T_HTTPS_HTTP_MISMATCH_1_12
+# 0.194   0.2293   0.0000    1.000   1.00    0.01  T_HTTPS_HTTP_MISMATCH_1_11
+# 0.194   0.2293   0.0000    1.000   1.00    0.01  T_HTTPS_HTTP_MISMATCH_1_13
+# 0.197   0.2293   0.0189    0.924   0.57    0.01  T_HTTPS_HTTP_MISMATCH_1_15
+# generally, hams seem to have a lot of links, whereas phishing mails don't.
+# so compare the domains between https? href and https anchor text, and flag
+# if the number of anchors is inside the given range and the domains don't
+# match.
+# FYI: these rules don't overlap HTTPS_IP_MISMATCH as IPs are ignored in the
+# href -- IPs tend not to be used in ham, so don't bother with the overhead of
+# this rule.  though the two rules are very similar and could definitely share
+# code.  if promoted, the two should get merged together to backup both rules.
+
+body  T_HTTPS_HTTP_MISMATCH_1 eval:check_https_http_mismatch('1')
+body  T_HTTPS_HTTP_MISMATCH_2 eval:check_https_http_mismatch('2')
+body  T_HTTPS_HTTP_MISMATCH_3 eval:check_https_http_mismatch('3')
+body  T_HTTPS_HTTP_MISMATCH_4 eval:check_https_http_mismatch('4')
+body  T_HTTPS_HTTP_MISMATCH_5 eval:check_https_http_mismatch('5')
+
+body  T_HTTPS_HTTP_MISMATCH_1_10 eval:check_https_http_mismatch('1','10')
+body  T_HTTPS_HTTP_MISMATCH_2_10 eval:check_https_http_mismatch('2','10')
+body  T_HTTPS_HTTP_MISMATCH_3_10 eval:check_https_http_mismatch('3','10')
+body  T_HTTPS_HTTP_MISMATCH_4_10 eval:check_https_http_mismatch('4','10')
+body  T_HTTPS_HTTP_MISMATCH_5_10 eval:check_https_http_mismatch('5','10')
+
+body  T_HTTPS_HTTP_MISMATCH_1_11 eval:check_https_http_mismatch('1','11')
+body  T_HTTPS_HTTP_MISMATCH_2_11 eval:check_https_http_mismatch('2','11')
+body  T_HTTPS_HTTP_MISMATCH_3_11 eval:check_https_http_mismatch('3','11')
+body  T_HTTPS_HTTP_MISMATCH_4_11 eval:check_https_http_mismatch('4','11')
+body  T_HTTPS_HTTP_MISMATCH_5_11 eval:check_https_http_mismatch('5','11')
+
+body  T_HTTPS_HTTP_MISMATCH_1_12 eval:check_https_http_mismatch('1','12')
+body  T_HTTPS_HTTP_MISMATCH_2_12 eval:check_https_http_mismatch('2','12')
+body  T_HTTPS_HTTP_MISMATCH_3_12 eval:check_https_http_mismatch('3','12')
+body  T_HTTPS_HTTP_MISMATCH_4_12 eval:check_https_http_mismatch('4','12')
+body  T_HTTPS_HTTP_MISMATCH_5_12 eval:check_https_http_mismatch('5','12')
+
+body  T_HTTPS_HTTP_MISMATCH_1_13 eval:check_https_http_mismatch('1','13')
+body  T_HTTPS_HTTP_MISMATCH_2_13 eval:check_https_http_mismatch('2','13')
+body  T_HTTPS_HTTP_MISMATCH_3_13 eval:check_https_http_mismatch('3','13')
+body  T_HTTPS_HTTP_MISMATCH_4_13 eval:check_https_http_mismatch('4','13')
+body  T_HTTPS_HTTP_MISMATCH_5_13 eval:check_https_http_mismatch('5','13')
+
+body  T_HTTPS_HTTP_MISMATCH_1_14 eval:check_https_http_mismatch('1','14')
+body  T_HTTPS_HTTP_MISMATCH_2_14 eval:check_https_http_mismatch('2','14')
+body  T_HTTPS_HTTP_MISMATCH_3_14 eval:check_https_http_mismatch('3','14')
+body  T_HTTPS_HTTP_MISMATCH_4_14 eval:check_https_http_mismatch('4','14')
+body  T_HTTPS_HTTP_MISMATCH_5_14 eval:check_https_http_mismatch('5','14')
+
+body  T_HTTPS_HTTP_MISMATCH_1_15 eval:check_https_http_mismatch('1','15')
+body  T_HTTPS_HTTP_MISMATCH_2_15 eval:check_https_http_mismatch('2','15')
+body  T_HTTPS_HTTP_MISMATCH_3_15 eval:check_https_http_mismatch('3','15')
+body  T_HTTPS_HTTP_MISMATCH_4_15 eval:check_https_http_mismatch('4','15')
+body  T_HTTPS_HTTP_MISMATCH_5_15 eval:check_https_http_mismatch('5','15')
+
+endif

Modified: spamassassin/rules/trunk/sandbox/felicity/sandbox-felicity.pm
URL: http://svn.apache.org/viewcvs/spamassassin/rules/trunk/sandbox/felicity/sandbox-felicity.pm?rev=386462&r1=386461&r2=386462&view=diff
==============================================================================
--- spamassassin/rules/trunk/sandbox/felicity/sandbox-felicity.pm (original)
+++ spamassassin/rules/trunk/sandbox/felicity/sandbox-felicity.pm Thu Mar 16 13:37:32 2006
@@ -17,6 +17,7 @@
 package Mail::SpamAssassin::Plugin::Sandbox::felicity;
 
 use Mail::SpamAssassin::Plugin;
+use Mail::SpamAssassin::Logger;
 use strict;
 use warnings;
 use bytes;
@@ -35,9 +36,76 @@
   bless ($self, $class);
 
   # the important bit!
-  #$self->register_eval_rule("function_name");
+  $self->register_eval_rule ("check_https_http_mismatch");
 
   return $self;
+}
+
+# <a href="http://baboz-njeryz.de/">https://bankofamerica.com/</a>
+sub check_https_http_mismatch {
+  my ($self, $permsgstatus, undef, $minanchors, $maxanchors) = @_;
+  my $uris = $permsgstatus->get_uri_detail_list();
+
+  $minanchors ||= 1;
+
+  if (!exists $permsgstatus->{chhm_hit}) {
+
+  $permsgstatus->{chhm_hit} = 0;
+  $permsgstatus->{chhm_anchors} = 0;
+
+  while( my($k,$v) = each %{$permsgstatus->{html}->{uri_detail}} ) {
+    # if the URI wasn't used for an anchor tag, or the anchor text didn't
+    # exist, skip this.
+    next unless (exists $v->{anchor_text} && @{$v->{anchor_text}});
+
+    my $uri;
+    foreach (@{$v->{cleaned}}) {
+      if (m@^https?://([^/:]+)@i) {
+	$uri = $1;
+
+	# Skip IPs since there's another rule to catch that already
+        if ($uri =~ m%^\d+\.\d+\.\d+\.\d+$%) {
+          undef $uri;
+          next;
+        }
+
+	# want to compare whole hostnames?  comment this if next section to
+	# the blank line.
+        if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) {
+	  $uri = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($uri);
+          undef $uri unless (Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($uri));
+        }
+
+	last if $uri;
+      }
+    }
+
+    next unless $uri;
+    $permsgstatus->{chhm_anchors}++ if exists $v->{anchor_text};
+
+    foreach (@{$v->{anchor_text}}) {
+      if (m@^https://([^/:]+)@i) {
+        my $https = $1;
+
+	# want to compare whole hostnames?  comment this if next section to
+	# the blank line.
+        if ($https !~ /^\d+\.\d+\.\d+\.\d+$/) {
+	  $https = Mail::SpamAssassin::Util::RegistrarBoundaries::trim_domain($https);
+          undef $https unless (Mail::SpamAssassin::Util::RegistrarBoundaries::is_domain_valid($https));
+        }
+	next unless $https;
+
+	dbg("https_http_mismatch: domains $uri -> $https");
+	next if $uri eq $https;
+	$permsgstatus->{chhm_hit} = 1;
+	last;
+      }
+    }
+  }
+  dbg("https_http_mismatch: anchors ".$permsgstatus->{chhm_anchors});
+  }
+
+  return ( (defined $maxanchors && $permsgstatus->{chhm_anchors} < $maxanchors)
&& $permsgstatus->{chhm_hit} && $permsgstatus->{chhm_anchors} >=
$minanchors);
 }
 
 1;



Mime
View raw message