spamassassin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j.@apache.org
Subject svn commit: r332722 - in /spamassassin/trunk: build/automc/populate_cor masses/corpora/mk-corpus-link-farm
Date Sat, 12 Nov 2005 03:23:01 GMT
Author: jm
Date: Fri Nov 11 19:22:58 2005
New Revision: 332722

URL: http://svn.apache.org/viewcvs?rev=332722&view=rev
Log:
fix --after bugs

Modified:
    spamassassin/trunk/build/automc/populate_cor
    spamassassin/trunk/masses/corpora/mk-corpus-link-farm

Modified: spamassassin/trunk/build/automc/populate_cor
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/build/automc/populate_cor?rev=332722&r1=332721&r2=332722&view=diff
==============================================================================
--- spamassassin/trunk/build/automc/populate_cor (original)
+++ spamassassin/trunk/build/automc/populate_cor Fri Nov 11 19:22:58 2005
@@ -11,7 +11,9 @@
 date
 
 TMPDIR=/home/bbmass/tmpfs/tmp \
+  /local/perl586/bin/perl \
   $SADIR/masses/corpora/mk-corpus-link-farm \
+          -after="4 months ago" \
           -dest /home/bbmass/tmpfs/cor/mc-fast -num 1000 \
           -dest /home/bbmass/tmpfs/cor/mc-med -num 5000 \
           -dest /home/bbmass/tmpfs/cor/mc-slow -num 10000 \

Modified: spamassassin/trunk/masses/corpora/mk-corpus-link-farm
URL: http://svn.apache.org/viewcvs/spamassassin/trunk/masses/corpora/mk-corpus-link-farm?rev=332722&r1=332721&r2=332722&view=diff
==============================================================================
--- spamassassin/trunk/masses/corpora/mk-corpus-link-farm (original)
+++ spamassassin/trunk/masses/corpora/mk-corpus-link-farm Fri Nov 11 19:22:58 2005
@@ -43,6 +43,8 @@
 }
 
 use Time::ParseDate;
+use Time::Local;
+
 
 use Cwd;
 use File::Path;
@@ -52,7 +54,7 @@
 use SDBM_File;
 use Fcntl;
 
-my $DEBUG; #$DEBUG=1;
+my $DEBUG; $DEBUG=1;
 
 my @classes = qw(ham spam);
 my $srcs = [ ];
@@ -69,6 +71,7 @@
 );
 
 $opt_most_recent = 0;
+tz_init();
 
 my $curdest;
 GetOptions(
@@ -527,13 +530,16 @@
   my ($fromline) = @_;
 
   # From xscludshmkjgc@yahoo.com  Thu Apr 29 20:02:18 2004
+  return unless ($fromline =~ /^From \S+  (.*)$/);
+
+  $fromline = $1;
   $fromline .= " ".local_tz() unless $fromline =~ /(?:[-+]\d{4}|\b[A-Z]{2,4}\b)/;
   my $time = first_date($fromline);
-  return $time;
+  return message_is_useful_by_date($time);
 }
 
 sub message_is_useful_by_date {
-  my ($self, $date) = @_;
+  my ($date) = @_;
 
   return 0 unless $date;        # undef or 0 date = unusable
 
@@ -567,9 +573,14 @@
 
 ###########################################################################
 
+my %TZ;
+my %MONTH;
+
+sub tz_init {
+
 # timezone mappings: in case of conflicts, use RFC 2822, then most
 # common and least conflicting mapping
-my %TZ = (
+%TZ = (
 	# standard
 	'UT'   => '+0000',
 	'UTC'  => '+0000',
@@ -630,9 +641,11 @@
 	);
 
 # month mappings
-my %MONTH = (jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6,
+%MONTH = (jan => 1, feb => 2, mar => 3, apr => 4, may => 5, jun => 6,
 	     jul => 7, aug => 8, sep => 9, oct => 10, nov => 11, dec => 12);
 
+}
+
 sub local_tz {
   # standard method for determining local timezone
   my $time = time;
@@ -713,7 +726,7 @@
   };
 
   if ($@) {
-    dbg("util: time cannot be parsed: $date, $yyyy-$mmm-$dd $hh:$mm:$ss");
+    dbg("util: time cannot be parsed: $date, $yyyy-$mmm-$dd $hh:$mm:$ss: $@");
     return undef;
   }
 



Mime
View raw message