lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r604870 - in /lucene/java/trunk/contrib/analyzers/src: java/org/apache/lucene/analysis/payloads/ test/org/apache/lucene/analysis/payloads/
Date Mon, 17 Dec 2007 13:55:47 GMT
Author: gsingers
Date: Mon Dec 17 05:55:46 2007
New Revision: 604870

URL: http://svn.apache.org/viewvc?rev=604870&view=rev
Log:
LUCENE-1077:  refactored to have a common PayloadHelper classes.  Also added TokenOffsetPayloadTokenFilter,
which encodes the Token offset into the payloads

Added:
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
      - copied, changed from r603640, lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
      - copied, changed from r603640, lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
Modified:
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java

Modified: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java?rev=604870&r1=604869&r2=604870&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
(original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
Mon Dec 17 05:55:46 2007
@@ -17,9 +17,9 @@
  */
 
 
+import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.index.Payload;
 
 import java.io.IOException;
@@ -37,42 +37,8 @@
   public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
     super(input);
     //Need to encode the payload
-    thePayload = new Payload(encodePayload(payload));
+    thePayload = new Payload(PayloadHelper.encodeFloat(payload));
     this.typeMatch = typeMatch;
-  }
-
-  public static byte[] encodePayload(float payload) {
-    byte[] result = new byte[4];
-    int tmp = Float.floatToIntBits(payload);
-    result[0] = (byte)(tmp >> 24);
-    result[1] = (byte)(tmp >> 16);
-    result[2] = (byte)(tmp >>  8);
-    result[3] = (byte) tmp;
-
-    return result;
-  }
-
-  /**
-   * @see #decodePayload(byte[], int)
-   * @see #encodePayload(float)
-   */
-  public static float decodePayload(byte [] bytes){
-    return decodePayload(bytes, 0);
-  }
-
-  /**
-   * Decode the payload that was encoded using {@link #encodePayload(float)}.
-   * NOTE: the length of the array must be at least offset + 4 long.
-   * @param bytes The bytes to decode
-   * @param offset The offset into the array.
-   * @return The float that was encoded
-   *
-   * @see #encodePayload(float) 
-   */
-  public static final float decodePayload(byte [] bytes, int offset){
-    int tmp = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF)
<< 16)
-         | ((bytes[offset + 2] & 0xFF) <<  8) |  (bytes[offset + 3] & 0xFF);
-    return Float.intBitsToFloat(tmp);
   }
 
   public Token next(Token result) throws IOException {

Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java?rev=604870&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/PayloadHelper.java
Mon Dec 17 05:55:46 2007
@@ -0,0 +1,70 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ *
+ *
+ **/
+public class PayloadHelper {
+
+  public static byte[] encodeFloat(float payload) {
+    return encodeFloat(payload, new byte[4], 0);
+  }
+
+  public static byte[] encodeFloat(float payload, byte[] data, int offset){
+    return encodeInt(Float.floatToIntBits(payload), data, offset);
+  }
+
+  public static byte[] encodeInt(int payload, byte[] data, int offset){
+    data[offset] = (byte)(payload >> 24);
+    data[offset + 1] = (byte)(payload >> 16);
+    data[offset + 2] = (byte)(payload >>  8);
+    data[offset + 3] = (byte) payload;
+    return data;
+  }
+
+  /**
+   * @param bytes
+   * @see #decodeFloat(byte[], int)
+   * @see #encodeFloat(float)
+   * @return the decoded float
+   */
+  public static float decodeFloat(byte [] bytes){
+    return decodeFloat(bytes, 0);
+  }
+
+  /**
+   * Decode the payload that was encoded using {@link #encodeFloat(float)}.
+   * NOTE: the length of the array must be at least offset + 4 long.
+   * @param bytes The bytes to decode
+   * @param offset The offset into the array.
+   * @return The float that was encoded
+   *
+   * @see # encodeFloat (float)
+   */
+  public static final float decodeFloat(byte [] bytes, int offset){
+
+    return Float.intBitsToFloat(decodeInt(bytes, offset));
+  }
+
+  public static final int decodeInt(byte [] bytes, int offset){
+    return ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF) <<
16)
+         | ((bytes[offset + 2] & 0xFF) <<  8) |  (bytes[offset + 3] & 0xFF);
+  }
+}

Copied: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
(from r603640, lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java)
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java?p2=lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java&p1=lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java&r1=603640&r2=604870&rev=604870&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
(original)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
Mon Dec 17 05:55:46 2007
@@ -17,69 +17,36 @@
  */
 
 
+import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.index.Payload;
 
 import java.io.IOException;
 
 
 /**
- * Assigns a payload to a token based on the {@link org.apache.lucene.analysis.Token#type()}
+ * Adds the {@link org.apache.lucene.analysis.Token#setStartOffset(int)}
+ * and {@link org.apache.lucene.analysis.Token#setEndOffset(int)}
+ * First 4 bytes are the start
  *
  **/
-public class NumericPayloadTokenFilter extends TokenFilter {
+public class TokenOffsetPayloadTokenFilter extends TokenFilter {
 
-  private String typeMatch;
-  private Payload thePayload;
 
-  public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
+  public TokenOffsetPayloadTokenFilter(TokenStream input) {
     super(input);
-    //Need to encode the payload
-    thePayload = new Payload(encodePayload(payload));
-    this.typeMatch = typeMatch;
-  }
-
-  public static byte[] encodePayload(float payload) {
-    byte[] result = new byte[4];
-    int tmp = Float.floatToIntBits(payload);
-    result[0] = (byte)(tmp >> 24);
-    result[1] = (byte)(tmp >> 16);
-    result[2] = (byte)(tmp >>  8);
-    result[3] = (byte) tmp;
-
-    return result;
-  }
-
-  /**
-   * @see #decodePayload(byte[], int)
-   * @see #encodePayload(float)
-   */
-  public static float decodePayload(byte [] bytes){
-    return decodePayload(bytes, 0);
-  }
-
-  /**
-   * Decode the payload that was encoded using {@link #encodePayload(float)}.
-   * NOTE: the length of the array must be at least offset + 4 long.
-   * @param bytes The bytes to decode
-   * @param offset The offset into the array.
-   * @return The float that was encoded
-   *
-   * @see #encodePayload(float) 
-   */
-  public static final float decodePayload(byte [] bytes, int offset){
-    int tmp = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF)
<< 16)
-         | ((bytes[offset + 2] & 0xFF) <<  8) |  (bytes[offset + 3] & 0xFF);
-    return Float.intBitsToFloat(tmp);
   }
 
   public Token next(Token result) throws IOException {
     result = input.next(result);
-    if (result != null && result.type().equals(typeMatch)){
-      result.setPayload(thePayload);
+    if (result != null){
+      byte[] data = new byte[8];
+      PayloadHelper.encodeInt(result.startOffset(), data, 0);
+      PayloadHelper.encodeInt(result.endOffset(), data, 4);
+      Payload payload = new Payload(data);
+      result.setPayload(payload);
     }
     return result;
   }
-}
+}
\ No newline at end of file

Modified: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java?rev=604870&r1=604869&r2=604870&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
(original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
Mon Dec 17 05:55:46 2007
@@ -17,9 +17,9 @@
  */
 
 import junit.framework.TestCase;
+import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 
 import java.io.IOException;
@@ -53,7 +53,7 @@
         byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise
we should use offset, length
         assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length
== tok.getPayload().length());
         assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset()
== 0);
-        float pay = NumericPayloadTokenFilter.decodePayload(bytes);
+        float pay = PayloadHelper.decodeFloat(bytes);
         assertTrue(pay + " does not equal: " + 3, pay == 3);
       } else {
         assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));

Copied: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
(from r603640, lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java)
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java?p2=lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java&p1=lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java&r1=603640&r2=604870&rev=604870&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
(original)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilterTest.java
Mon Dec 17 05:55:46 2007
@@ -17,18 +17,17 @@
  */
 
 import junit.framework.TestCase;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.index.Payload;
 
 import java.io.IOException;
 import java.io.StringReader;
 
-public class NumericPayloadTokenFilterTest extends TestCase {
+public class TokenOffsetPayloadTokenFilterTest extends TestCase {
 
 
-  public NumericPayloadTokenFilterTest(String s) {
+  public TokenOffsetPayloadTokenFilterTest(String s) {
     super(s);
   }
 
@@ -42,38 +41,23 @@
   public void test() throws IOException {
     String test = "The quick red fox jumped over the lazy brown dogs";
 
-    NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new
WhitespaceTokenizer(new StringReader(test))), 3, "D");
+    TokenOffsetPayloadTokenFilter nptf = new TokenOffsetPayloadTokenFilter(new WhitespaceTokenizer(new
StringReader(test)));
     Token tok = new Token();
-    boolean seenDogs = false;
+    int count = 0;
     while ((tok = nptf.next(tok)) != null){
-      if (tok.termText().equals("dogs")){
-        seenDogs = true;
-        assertTrue(tok.type() + " is not equal to " + "D", tok.type().equals("D") == true);
-        assertTrue("tok.getPayload() is null and it shouldn't be", tok.getPayload() != null);
-        byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise
we should use offset, length
-        assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length
== tok.getPayload().length());
-        assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset()
== 0);
-        float pay = NumericPayloadTokenFilter.decodePayload(bytes);
-        assertTrue(pay + " does not equal: " + 3, pay == 3);
-      } else {
-        assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
-      }
-    }
-    assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
-  }
-
-  private class WordTokenFilter extends TokenFilter {
-    private WordTokenFilter(TokenStream input) {
-      super(input);
+      assertTrue("tok is null and it shouldn't be", tok != null);
+      Payload pay = tok.getPayload();
+      assertTrue("pay is null and it shouldn't be", pay != null);
+      byte [] data = pay.getData();
+      int start = PayloadHelper.decodeInt(data, 0);
+      assertTrue(start + " does not equal: " + tok.startOffset(), start == tok.startOffset());
+      int end = PayloadHelper.decodeInt(data, 4);
+      assertTrue(end + " does not equal: " + tok.endOffset(), end == tok.endOffset());
+      count++;
     }
+    assertTrue(count + " does not equal: " + 10, count == 10);
 
-    public Token next(Token result) throws IOException {
-      result = input.next(result);
-      if (result != null && result.termText().equals("dogs")) {
-        result.setType("D");
-      }
-      return result;
-    }
   }
+
 
 }



Mime
View raw message