lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ka...@apache.org
Subject svn commit: r821311 - in /lucene/java/trunk/contrib/analyzers/common/src: java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
Date Sat, 03 Oct 2009 13:17:11 GMT
Author: kalle
Date: Sat Oct  3 13:17:11 2009
New Revision: 821311

URL: http://svn.apache.org/viewvc?rev=821311&view=rev
Log:
LUCENE-1257: Generified ShingleMatrixFilter

Modified:
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java?rev=821311&r1=821310&r2=821311&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
(original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
Sat Oct  3 13:17:11 2009
@@ -112,7 +112,7 @@
  * See {@link #calculateShingleWeight(org.apache.lucene.analysis.Token, java.util.List, int,
java.util.List, java.util.List)}.
  * <p/>
  * <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes,
i.e. Attributes other than
- * the ones located in org.apache.lucene.analysis.tokenattributes. 
+ * the ones located in org.apache.lucene.analysis.tokenattributes.
  */
 public class ShingleMatrixFilter extends TokenStream {
 
@@ -206,7 +206,7 @@
   private TypeAttribute in_typeAtt;
   private FlagsAttribute in_flagsAtt;
 
-  
+
   /**
    * Creates a shingle filter based on a user defined matrix.
    *
@@ -237,7 +237,7 @@
 
     // set the input to be an empty token stream, we already have the data.
     this.input = new EmptyTokenStream();
-    
+
     in_termAtt = input.addAttribute(TermAttribute.class);
     in_posIncrAtt = input.addAttribute(PositionIncrementAttribute.class);
     in_payloadAtt = input.addAttribute(PayloadAttribute.class);
@@ -316,7 +316,7 @@
     offsetAtt = addAttribute(OffsetAttribute.class);
     typeAtt = addAttribute(TypeAttribute.class);
     flagsAtt = addAttribute(FlagsAttribute.class);
-    
+
     in_termAtt = input.addAttribute(TermAttribute.class);
     in_posIncrAtt = input.addAttribute(PositionIncrementAttribute.class);
     in_payloadAtt = input.addAttribute(PayloadAttribute.class);
@@ -328,12 +328,12 @@
   // internal filter instance variables
 
   /** iterator over the current matrix row permutations */
-  private Iterator permutations;
+  private Iterator<Matrix.Column.Row[]> permutations;
 
   /** the current permutation of tokens used to produce shingles */
-  private List currentPermuationTokens;
+  private List<Token> currentPermuationTokens;
   /** index to what row a token in currentShingleTokens represents*/
-  private List currentPermutationRows;
+  private List<Matrix.Column.Row> currentPermutationRows;
 
   private int currentPermutationTokensStartOffset;
   private int currentShingleLength;
@@ -342,7 +342,7 @@
    * a set containing shingles that has been the result of a call to next(Token),
    * used to avoid producing the same shingle more than once.
    */
-  private Set shinglesSeen = new HashSet();
+  private Set<List<Token>> shinglesSeen = new HashSet<List<Token>>();
 
 
   public void reset() throws IOException {
@@ -352,9 +352,9 @@
   }
 
   private Matrix matrix;
-  
+
   private Token reusableToken = new Token();
-  
+
   public final boolean incrementToken() throws IOException {
     if (matrix == null) {
       matrix = new Matrix();
@@ -372,7 +372,7 @@
       token = produceNextToken(reusableToken);
     } while (token == request_next_token);
     if (token == null) return false;
-    
+
     termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
     posIncrAtt.setPositionIncrement(token.getPositionIncrement());
     flagsAtt.setFlags(token.getFlags());
@@ -381,7 +381,7 @@
     payloadAtt.setPayload(token.getPayload());
     return true;
   }
-  
+
   private Token getNextInputToken(Token token) throws IOException {
     if (!input.incrementToken()) return null;
     token.setTermBuffer(in_termAtt.termBuffer(), 0, in_termAtt.termLength());
@@ -404,7 +404,7 @@
   public final Token next() throws java.io.IOException {
     return super.next();
   }
-  
+
   private static final Token request_next_token = new Token();
 
   /**
@@ -428,16 +428,16 @@
 
         if (ignoringSinglePrefixOrSuffixShingle
             && currentShingleLength == 1
-            && (((Matrix.Column.Row) currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isFirst()
|| ((Matrix.Column.Row) currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isLast()))
{
+            && ((currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isFirst()
|| (currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isLast()))
{
           return next(reusableToken);
         }
 
         int termLength = 0;
 
-        List shingle = new ArrayList();
+        List<Token> shingle = new ArrayList<Token>(currentShingleLength);
 
         for (int i = 0; i < currentShingleLength; i++) {
-          Token shingleToken = (Token) currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
+          Token shingleToken = currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
           termLength += shingleToken.termLength();
           shingle.add(shingleToken);
         }
@@ -452,8 +452,7 @@
 
         // shingle token factory
         StringBuilder sb = new StringBuilder(termLength + 10); // paranormal ability to foresee
the future.
-        for (Iterator iterator = shingle.iterator(); iterator.hasNext();) {
-          Token shingleToken = (Token) iterator.next();
+        for (Token shingleToken : shingle) {
           if (spacerCharacter != null && sb.length() > 0) {
             sb.append(spacerCharacter);
           }
@@ -493,22 +492,19 @@
           // get rid of resources
 
           // delete the first column in the matrix
-          Matrix.Column deletedColumn = (Matrix.Column) matrix.columns.remove(0);
+          Matrix.Column deletedColumn = matrix.columns.remove(0);
 
           // remove all shingles seen that include any of the tokens from the deleted column.
-          List deletedColumnTokens = new ArrayList();
-          for (Iterator iterator = deletedColumn.getRows().iterator(); iterator.hasNext();)
{
-            Matrix.Column.Row row = (Matrix.Column.Row) iterator.next();
-            for (Iterator rowIter = row.getTokens().iterator(); rowIter.hasNext();) {
-              Object o = rowIter.next();//Token
-              deletedColumnTokens.add(o);
+          List<Token> deletedColumnTokens = new ArrayList<Token>();
+          for (Matrix.Column.Row row : deletedColumn.getRows()) {
+            for (Token token : row.getTokens()) {
+              deletedColumnTokens.add(token);
             }
 
           }
-          for (Iterator shinglesSeenIterator = shinglesSeen.iterator(); shinglesSeenIterator.hasNext();)
{
-            List shingle = (List) shinglesSeenIterator.next();
-            for (Iterator deletedIter = deletedColumnTokens.iterator(); deletedIter.hasNext();)
{
-              Token deletedColumnToken = (Token) deletedIter.next();
+          for (Iterator<List<Token>> shinglesSeenIterator = shinglesSeen.iterator();
shinglesSeenIterator.hasNext();) {
+            List<Token> shingle = shinglesSeenIterator.next();
+            for (Token deletedColumnToken : deletedColumnTokens) {
               if (shingle.contains(deletedColumnToken)) {
                 shinglesSeenIterator.remove();
                 break;
@@ -552,14 +548,12 @@
    * finally resets the current (next) shingle size and offset.
    */
   private void nextTokensPermutation() {
-    Matrix.Column.Row[] rowsPermutation;
-    rowsPermutation = (Matrix.Column.Row[]) permutations.next();
-    List currentPermutationRows = new ArrayList();
-    List currentPermuationTokens = new ArrayList();
-    for (int i = 0; i < rowsPermutation.length; i++) {
-      Matrix.Column.Row row = rowsPermutation[i];
-      for (Iterator iterator = row.getTokens().iterator(); iterator.hasNext();) {
-        currentPermuationTokens.add(iterator.next());
+    Matrix.Column.Row[] rowsPermutation = permutations.next();
+    List<Matrix.Column.Row> currentPermutationRows = new ArrayList<Matrix.Column.Row>();
+    List<Token> currentPermuationTokens = new ArrayList<Token>();
+    for (Matrix.Column.Row row : rowsPermutation) {
+      for (Token token : row.getTokens()) {
+        currentPermuationTokens.add(token);
         currentPermutationRows.add(row);
       }
     }
@@ -627,8 +621,7 @@
     double factor = 1d / Math.sqrt(total);
 
     double weight = 0d;
-    for (int i = 0; i < weights.length; i++) {
-      double partWeight = weights[i];
+    for (double partWeight : weights) {
       weight += partWeight * factor;
     }
 
@@ -709,7 +702,7 @@
 
     private boolean columnsHasBeenCreated = false;
 
-    private List columns = new ArrayList();
+    private List<Column> columns = new ArrayList<Column>();
 
     public List getColumns() {
       return columns;
@@ -740,9 +733,9 @@
         Matrix.this.columns.add(this);
       }
 
-      private List rows = new ArrayList();
+      private List<Row> rows = new ArrayList<Row>();
 
-      public List getRows() {
+      public List<Row> getRows() {
         return rows;
       }
 
@@ -781,7 +774,7 @@
           return Column.this;
         }
 
-        private List tokens = new LinkedList();
+        private List<Token> tokens = new LinkedList<Token>();
 
         public Row() {
           Column.this.rows.add(this);
@@ -791,11 +784,11 @@
           return Column.this.rows.indexOf(this);
         }
 
-        public List getTokens() {
+        public List<Token> getTokens() {
           return tokens;
         }
 
-        public void setTokens(List tokens) {
+        public void setTokens(List<Token> tokens) {
           this.tokens = tokens;
         }
 
@@ -826,9 +819,9 @@
     }
 
 
-    public Iterator permutationIterator() {
+    public Iterator<Column.Row[]> permutationIterator() {
 
-      return new Iterator() {
+      return new Iterator<Column.Row[]>() {
 
         private int[] columnRowCounters = new int[columns.size()];
 
@@ -838,10 +831,10 @@
 
         public boolean hasNext() {
           int s = columnRowCounters.length;
-          return s != 0 && columnRowCounters[s - 1] < ((Column) columns.get(s
- 1)).getRows().size();
+          return s != 0 && columnRowCounters[s - 1] < (columns.get(s - 1)).getRows().size();
         }
 
-        public Object next() {
+        public Column.Row[] next() {
           if (!hasNext()) {
             throw new NoSuchElementException("no more elements");
           }
@@ -849,7 +842,7 @@
           Column.Row[] rows = new Column.Row[columnRowCounters.length];
 
           for (int i = 0; i < columnRowCounters.length; i++) {
-            rows[i] = (Matrix.Column.Row) ((Column) columns.get(i)).rows.get(columnRowCounters[i]);
+            rows[i] = columns.get(i).rows.get(columnRowCounters[i]);
           }
           incrementColumnRowCounters();
 
@@ -859,7 +852,7 @@
         private void incrementColumnRowCounters() {
           for (int i = 0; i < columnRowCounters.length; i++) {
             columnRowCounters[i]++;
-            if (columnRowCounters[i] == ((Column) columns.get(i)).rows.size() &&
+            if (columnRowCounters[i] == columns.get(i).rows.size() &&
                 i < columnRowCounters.length - 1) {
               columnRowCounters[i] = 0;
             } else {

Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java?rev=821311&r1=821310&r2=821311&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
(original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
Sat Oct  3 13:17:11 2009
@@ -18,6 +18,7 @@
  */
 
 import java.io.IOException;
+import java.io.StringReader;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.LinkedList;
@@ -28,6 +29,7 @@
 import org.apache.lucene.analysis.CachingTokenFilter;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
 import org.apache.lucene.analysis.miscellaneous.PrefixAndSuffixAwareTokenFilter;
 import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
@@ -44,7 +46,7 @@
       "testBehavingAsShingleFilter", "testMatrix"
     })));
   }
-
+    
   public void testBehavingAsShingleFilter() throws IOException {
 
     ShingleMatrixFilter.defaultSettingsCodec = null;



Mime
View raw message