spamassassin-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j.@apache.org
Subject svn commit: r433599 - /spamassassin/branches/3.1/masses/mass-check
Date Tue, 22 Aug 2006 11:34:14 GMT
Author: jm
Date: Tue Aug 22 04:34:13 2006
New Revision: 433599

URL: http://svn.apache.org/viewvc?rev=433599&view=rev
Log:
more backporting of masses changes from trunk, for ruleqa

Modified:
    spamassassin/branches/3.1/masses/mass-check

Modified: spamassassin/branches/3.1/masses/mass-check
URL: http://svn.apache.org/viewvc/spamassassin/branches/3.1/masses/mass-check?rev=433599&r1=433598&r2=433599&view=diff
==============================================================================
--- spamassassin/branches/3.1/masses/mass-check (original)
+++ spamassassin/branches/3.1/masses/mass-check Tue Aug 22 04:34:13 2006
@@ -33,8 +33,9 @@
   --debug=LIST  report debugging information (default is all facilities, LIST
                 is a comma-separated list of facilities)
   --progress    show progress updates during check
-  --rewrite=OUT save rewritten message to OUT (default is /tmp/out)
+  --noisy       show noisier progress updates during check
   --showdots    print a dot for each scanned message
+  --rewrite=OUT save rewritten message to OUT (default is /tmp/out)
   --rules=RE    Only test rules matching the given regexp RE
   --restart=N   restart all of the children after processing N messages
   --deencap=RE  Extract SpamAssassin-encapsulated spam mails only if they
@@ -52,6 +53,8 @@
  
   message selection options
   -n            no date sorting or spam/ham interleaving
+  --cache	use cache information when selecting messages
+  --cachedir=dir write cache info for --cache in this directory tree
   --after=N     only test mails received after time_t N (negative values
                 are an offset from current time, e.g. -86400 = last day)
                 or after date as parsed by Time::ParseDate (e.g. '-6 months')
@@ -76,7 +79,8 @@
   non-option arguments are used as target names (mail files and folders),
   the target format is: <class>:<format>:<location>
   <class>       is "spam" or "ham"
-  <format>      is "dir", "file", "mbx", or "mbox"
+  <format>      is "dir", "file", "mbx", "mbox", or "detect"
+                (see 'perldoc Mail::SpamAssassin::ArchiveIterator)
   <location>    is a file or directory name.  globbing of ~ and * is supported
 
 EOF
@@ -90,8 +94,8 @@
 	    $opt_mid $opt_net $opt_nosort $opt_progress $opt_showdots
 	    $opt_spamlog $opt_tail $opt_rules $opt_restart $opt_loguris
 	    $opt_logmem $opt_after $opt_before $opt_rewrite $opt_deencap
-	    $opt_learn $opt_reuse $opt_lint
-	    $total_messages $statusevery
+	    $opt_learn $opt_reuse $opt_lint $opt_cache $opt_noisy
+	    $total_messages $statusevery $opt_cachedir
 	    %reuse %orig_conf %reuse_conf $reuse_rules_loaded_p);
 
 use FindBin;
@@ -102,10 +106,11 @@
 use Mail::SpamAssassin;
 use Mail::SpamAssassin::Logger;
 use File::Copy;
+use File::Spec;
 use Getopt::Long;
 use POSIX qw(strftime);
 use constant HAS_TIME_PARSEDATE => eval { require Time::ParseDate; };
-use Config;
+use Config; 
 
 # default settings
 $opt_c = "$FindBin::Bin/../rules";
@@ -117,11 +122,13 @@
 $opt_learn = 0;
 $reuse_rules_loaded_p = 0;
 
+my @ORIG_ARGV = @ARGV;
 GetOptions("c=s", "p=s", "f=s", "j=i", "n", "o", "all", "bayes", "debug:s",
 	   "hamlog=s", "head=i", "loghits", "mh", "mid", "ms", "net",
 	   "progress", "rewrite:s", "showdots", "spamlog=s", "tail=i",
 	   "rules=s", "restart=i", "after=s", "before=s", "loguris",
-	   "deencap=s", "logmem", "learn=i", "reuse", "lint",
+	   "deencap=s", "logmem", "learn=i", "reuse", "lint", "cache",
+           "cachedir=s", "noisy",
 	   "dir" => sub { $opt_format = "dir"; },
 	   "file" => sub { $opt_format = "file"; },
 	   "mbox" => sub { $opt_format = "mbox"; },
@@ -131,15 +138,22 @@
 
 # rules.pl is for the --reuse option, score set doesn't matter
 if ($opt_reuse && ! -f "$FindBin::Bin/tmp/rules.pl") {
-  system("cd $FindBin::Bin; perl parse-rules-for-masses -d $opt_c");
+  # some people specify paths relatively, whereas this needs an absolute path,
+  # so "do the right thing"(tm).
+  my $abs_opt_c = File::Spec->rel2abs($opt_c);
+  system("cd $FindBin::Bin; perl parse-rules-for-masses -d $abs_opt_c");
 }
 
 require "rules.pl" if $opt_reuse;
 
+if ($opt_noisy) {
+  $opt_progress = 1;        # implies --progress
+}
+
 # test messages for the mass-check
 my @targets;
 if ($opt_f) {
-  open(F, $opt_f) || die $!;
+  open(F, $opt_f) || die "cannot read target $opt_f: $!";
   push(@targets, map { chomp; $_ } <F>);
   close(F);
 }
@@ -226,23 +240,27 @@
 chomp $where;
 chomp $when;
 chomp $host;
-my $revision = "unknown";
-if (open(TESTING, "$opt_c/70_testing.cf")) {
-  chomp($revision = <TESTING>);
-  $revision =~ s/.*\$Rev:\s*(\S+).*/$1/;
-  close(TESTING);
-}
+my $revision = get_current_svn_revision();
+my $cmdline = join(' ',@ORIG_ARGV); $cmdline =~ s/\s+/ /gs;
+my $isowhen = strftime("%Y%m%dT%H%M%SZ", gmtime(time)); # better
+
 my $log_header = "# mass-check results from $who\@$where, on $when\n" .
 		 "# M:SA version ".$spamtest->Version()."\n" .
 		 "# SVN revision: $revision\n" .
-		 "# Perl version: $] on $Config{archname}\n";
+                 "# Date: $isowhen\n" .
+		 "# Perl version: $] on $Config{archname}\n" .
+                 "# Switches: '$cmdline'\n";
 
-my $updates = 10;
+my $updates = ($opt_noisy ? 100 : 10);
 my $total_count = 0;
 my $spam_count = 0;
 my $ham_count = 0;
 my $init_results = 0;
 
+my $showdots_active = ($opt_showdots || $opt_noisy);
+my $showdots_counter = 0;
+my $showdots_every = ($opt_showdots ? 1 : 20);
+
 # Deal with --rewrite
 if (defined $opt_rewrite) {
   my $rewrite = ($opt_rewrite ? $opt_rewrite : "/tmp/out");
@@ -274,15 +292,22 @@
 	'opt_all' => $opt_all,
 	'opt_head' => $opt_head,
 	'opt_tail' => $opt_tail,
+	'opt_cache' => $opt_cache,
+	'opt_cachedir' => $opt_cachedir,
 	'opt_after' => $opt_after,
 	'opt_before' => $opt_before,
 	'opt_restart' => $opt_restart,
+        'scan_progress_sub' => \&scan_progress_cb
 });
 
 if ($opt_progress) {
   status('starting scan stage');
 }
 
+sub scan_progress_cb {
+  showdots_blip();
+}
+
 $iter->set_functions(\&wanted, \&result);
 $iter->run(@targets);
 
@@ -290,12 +315,14 @@
   status('completed run stage');
 }
 
-print STDERR "\n" if ($opt_showdots);
+showdots_finish();
 
 if (defined $opt_rewrite) {
   close(REWRITE);
 }
 
+$spamtest->finish();
+
 # exit status: did we check at least one message correctly?
 exit(!($ham_count || $spam_count));
 
@@ -315,6 +342,11 @@
 ###########################################################################
 
 sub init_results {
+  showdots_finish();
+
+  # now, showdots only happens if --showdots was used
+  $showdots_active = $opt_showdots;
+
   if ($opt_progress) {
     # make it a local variable for now
     $total_messages = $Mail::SpamAssassin::ArchiveIterator::MESSAGES;
@@ -386,7 +418,9 @@
   my @previous;
   if ($x_spam_status) {
     $x_spam_status =~ s/,\s+/,/gs;
-    if ($x_spam_status =~ m/tests=(\S*)/) {
+    if ($x_spam_status =~ m/tests=(\S*)/
+        && $x_spam_status !~ /\bshortcircuit=(?:ham|spam|default)\b/)
+    {
       push @previous, split(/,/, $1);
 
       # we found previous tests, so move the reuse config into place
@@ -562,11 +596,25 @@
   undef $ma;		# clean 'em up
   undef $status;
 
-  if ($opt_showdots) {
+  showdots_blip();
+  return $out;
+}
+
+sub showdots_blip {
+  return unless ($showdots_active);
+
+  $showdots_counter++;
+  if ($showdots_counter % $showdots_every == 0) {
     print STDERR '.';
+    if ($showdots_counter % (60 * $showdots_every) == 0) {
+      print STDERR "\n";
+    }
   }
+}
 
-  return $out;
+sub showdots_finish {
+  print STDERR "\n" if ($showdots_active);
+  $showdots_counter = 0;
 }
 
 # ick.  We have to go grovelling through the body parts to see if a message
@@ -671,4 +719,42 @@
     }
   }
   $str;
+}
+
+sub get_current_svn_revision {
+  my $revision;
+
+  # this is usually "${TOPDIR}/masses"
+  my $dir = $FindBin::Bin || ".";
+
+  if (-d "$dir/.svn" || -f "$dir/svninfo.tmp") {
+    if (-f "$dir/svninfo.tmp") {
+      # created by build/automc/buildbot_ready for chrooted mass-checks
+      open (SVNINFO, "< $dir/svninfo.tmp");
+    } 
+    else {
+      # note, ".." since we want to pick up changes outside 'masses'
+      # too!
+      open (SVNINFO, "( svn info --non-interactive $dir/.. || svn info $dir/.. ) 2>&1
|");
+    }
+
+    while (<SVNINFO>) {
+      # Revision: 383822
+      next unless /^Revision: (\d+)/;
+      $revision = $1;
+      last;
+    }
+    close SVNINFO;
+    return $revision if $revision;
+  }
+
+  # this probably will never work due to Rules Project changes TODO
+  if (open(TESTING, "$opt_c/70_testing.cf")) {
+    chomp($revision = <TESTING>);
+    $revision =~ s/.*\$Rev:\s*(\S+).*/$1/;
+    close(TESTING);
+    return $revision if $revision;
+  }
+
+  return $revision || "unknown";
 }



Mime
View raw message