creadur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s...@apache.org
Subject svn commit: r1484209 - /creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java
Date Sun, 19 May 2013 02:24:19 GMT
Author: sebb
Date: Sun May 19 02:24:19 2013
New Revision: 1484209

URL: http://svn.apache.org/r1484209
Log:
RAT-138 RAT runs very slowly on some input
Ensure buffer only grows sufficiently large to allow a match
Speeds up processing considerably

Modified:
    creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java

Modified: creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java
URL: http://svn.apache.org/viewvc/creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java?rev=1484209&r1=1484208&r2=1484209&view=diff
==============================================================================
--- creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java
(original)
+++ creadur/rat/trunk/apache-rat-core/src/main/java/org/apache/rat/analysis/license/FullTextMatchingLicense.java
Sun May 19 02:24:19 2013
@@ -38,7 +38,14 @@ import org.apache.rat.api.MetaData.Datum
 public class FullTextMatchingLicense extends BaseLicense
     implements IHeaderMatcher {
 
+    // Number of match characters assumed to be present on first line
+    private static final int DEFAULT_INITIAL_LINE_LENGTH = 20;
+
     private String fullText;
+    
+    private String firstLine;
+
+    private boolean seenFirstLine = false;
 
     private final StringBuilder buffer = new StringBuilder();
 
@@ -54,25 +61,60 @@ public class FullTextMatchingLicense ext
     }
 
     public final void setFullText(String text) {
+        int offset = text.indexOf('\n');
+        if (offset == -1) {
+            offset = Math.min(DEFAULT_INITIAL_LINE_LENGTH, text.length());
+        }
+        firstLine = prune(text.substring(0, offset)).toLowerCase(Locale.ENGLISH);
         fullText = prune(text).toLowerCase(Locale.ENGLISH);
+        init();
     }
 
     public final boolean hasFullText() {
         return fullText != null;
     }
 
-    // TODO this is still quite inefficient if the match does not occur near the start of
the buffer
-    // see RAT-138
     public boolean match(Document subject, String line) throws RatHeaderAnalysisException
{
-        buffer.append(prune(line).toLowerCase(Locale.ENGLISH));
-        if (buffer.toString().contains(fullText)) {
-            reportOnLicense(subject);
-            return true;
+        final String inputToMatch = prune(line).toLowerCase(Locale.ENGLISH);
+        if (seenFirstLine) { // Accumulate more input
+            buffer.append(inputToMatch);
+        } else {
+            int offset = inputToMatch.indexOf(firstLine);
+            if (offset >= 0) {
+                // we have a match, save the text starting with the match
+                buffer.append(inputToMatch.substring(offset));
+                seenFirstLine = true;
+                // Drop out to check whether full text is matched
+            } else {
+                // we assume that the first line must appear in a single line
+                return false; // no more to do here
+            }
+        }
+ 
+        if (buffer.length() >= fullText.length()) { // we have enough data to match
+            if (buffer.toString().contains(fullText)) {
+                reportOnLicense(subject);
+                return true; // we found a match
+            } else { // buffer contains first line but does not contain full text
+                // It's possible that the buffer contains the first line again
+                int offset = buffer.substring(1).toString().indexOf(firstLine);
+                if (offset >= 0) { // first line found again
+                    buffer.delete(0,offset); // reset buffer to the new start
+                } else { // buffer does not even contain first line, so cannot be used to
match full text
+                    init();
+                }
+            }
         }
         return false;
     }
 
     public void reset() {
+        init();
+    }
+
+    // This is called indirectly from a ctor so must be final or private
+    private void init() {
         buffer.setLength(0);
+        seenFirstLine = false;
     }
 }
\ No newline at end of file



Mime
View raw message