tomcat-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ma...@apache.org
Subject svn commit: r1067759 - /tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
Date Sun, 06 Feb 2011 21:00:52 GMT
Author: markt
Date: Sun Feb  6 21:00:52 2011
New Revision: 1067759

URL: http://svn.apache.org/viewvc?rev=1067759&view=rev
Log:
Review from kkolinko
1. Matcher not thread safe
2. >1 UA header -> not a bot

Modified:
    tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java

Modified: tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java
URL: http://svn.apache.org/viewvc/tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java?rev=1067759&r1=1067758&r2=1067759&view=diff
==============================================================================
--- tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java (original)
+++ tomcat/trunk/java/org/apache/catalina/valves/CrawlerSessionManagerValve.java Sun Feb 
6 21:00:52 2011
@@ -22,7 +22,6 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import javax.servlet.ServletException;
@@ -51,7 +50,7 @@ public class CrawlerSessionManagerValve 
 
     private String crawlerUserAgents =
         ".*GoogleBot.*|.*bingbot.*|.*Yahoo! Slurp.*";
-    private Matcher uaMatcher = null;
+    private Pattern uaPattern = null;
     private int sessionInactiveInterval = 60;
 
 
@@ -65,9 +64,9 @@ public class CrawlerSessionManagerValve 
     public void setCrawlerUserAgents(String crawlerUserAgents) {
         this.crawlerUserAgents = crawlerUserAgents;
         if (crawlerUserAgents == null || crawlerUserAgents.length() == 0) {
-            uaMatcher = null;
+            uaPattern = null;
         } else {
-            uaMatcher = Pattern.compile(crawlerUserAgents).matcher("");
+            uaPattern = Pattern.compile(crawlerUserAgents);
         }
     }
 
@@ -103,7 +102,7 @@ public class CrawlerSessionManagerValve 
     protected void initInternal() throws LifecycleException {
         super.initInternal();
         
-        uaMatcher = Pattern.compile(crawlerUserAgents).matcher("");
+        uaPattern = Pattern.compile(crawlerUserAgents);
     }
 
 
@@ -124,19 +123,18 @@ public class CrawlerSessionManagerValve 
         // If the incoming request has a session ID, no action is required
         if (request.getRequestedSessionId() == null) {
 
-            // Is this a crawler
+            // Is this a crawler - cheack the UA headers
             Enumeration<String> uaHeaders = request.getHeaders("user-agent");
-            while (!isBot && uaMatcher != null &&
-                    uaHeaders.hasMoreElements()) {
-                
-                String uaHeader = uaHeaders.nextElement();
-                uaMatcher.reset(uaHeader);
-                
+            String uaHeader = uaHeaders.nextElement();
+            
+            // If more than one UA header - assume not a bot
+            if (!uaHeaders.hasMoreElements()) {
+
                 if (log.isDebugEnabled()) {
                     log.debug(request.hashCode() + ": UserAgent=" + uaHeader);
                 }
                 
-                if (uaMatcher.matches()) {
+                if (uaPattern.matcher(uaHeader).matches()) {
                     isBot = true;
                     
                     if (log.isDebugEnabled()) {



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


Mime
View raw message