lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From uschind...@apache.org
Subject svn commit: r890791 - in /lucene/java/trunk/contrib: CHANGES.txt analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java analyzers/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java
Date Tue, 15 Dec 2009 13:27:28 GMT
Author: uschindler
Date: Tue Dec 15 13:27:27 2009
New Revision: 890791

URL: http://svn.apache.org/viewvc?rev=890791&view=rev
Log:
LUCENE-2157: DelimitedPayloadTokenFilter no longer copies the buffer over itsself, instead
it sets the length to the offset of the delimiter. Also optimizes logic and IdentityEncoder
to use NIO.

Modified:
    lucene/java/trunk/contrib/CHANGES.txt
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
    lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=890791&r1=890790&r2=890791&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Tue Dec 15 13:27:27 2009
@@ -65,6 +65,12 @@
    into core, and moved the ICU-based collation support into contrib/icu.  
    (Robert Muir)
 
+Optimizations
+
+ * LUCENE-2157: DelimitedPayloadTokenFilter no longer copies the buffer
+   over itsself. Instead it sets only the length. This patch also optimizes
+   the logic of the filter and uses NIO for IdentityEncoder. (Uwe Schindler)
+
 Test Cases
 
  * LUCENE-2115: Cutover contrib tests to use Java5 generics.  (Kay Kay

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java?rev=890791&r1=890790&r2=890791&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
(original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
Tue Dec 15 13:27:27 2009
@@ -61,26 +61,19 @@
 
   @Override
   public boolean incrementToken() throws IOException {
-    boolean result = false;
     if (input.incrementToken()) {
       final char[] buffer = termAtt.termBuffer();
       final int length = termAtt.termLength();
-      //look for the delimiter
-      boolean seen = false;
       for (int i = 0; i < length; i++) {
         if (buffer[i] == delimiter) {
-          termAtt.setTermBuffer(buffer, 0, i);
           payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
-          seen = true;
-          break;//at this point, we know the whole piece, so we can exit.  If we don't see
the delimiter, then the termAtt is the same
+          termAtt.setTermLength(i); // simply set a new length
+          return true;
         }
       }
-      if (seen == false) {
-        //no delimiter
-        payAtt.setPayload(null);
-      }
-      result = true;
-    }
-    return result;
+      // we have not seen the delimiter
+      payAtt.setPayload(null);
+      return true;
+    } else return false;
   }
 }

Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java?rev=890791&r1=890790&r2=890791&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java
(original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/IdentityEncoder.java
Tue Dec 15 13:27:27 2009
@@ -18,9 +18,9 @@
 
 import org.apache.lucene.index.Payload;
 
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
 import java.nio.charset.Charset;
-import java.util.Arrays;
-import java.io.UnsupportedEncodingException;
 
 
 /**
@@ -30,28 +30,30 @@
 public class IdentityEncoder extends AbstractEncoder implements PayloadEncoder{
 
   protected Charset charset = Charset.forName("UTF-8");
-  protected String charsetName = "UTF-8";  //argh, stupid 1.4
+  
+  /** @deprecated This field is no longer used. Use {@link #charset} instead. */
+  @Deprecated
+  protected String charsetName = charset.name();
 
   public IdentityEncoder() {
   }
 
   public IdentityEncoder(Charset charset) {
     this.charset = charset;
+    // @deprecated, remove this in 4.0:
     charsetName = charset.name();
   }
 
 
   public Payload encode(char[] buffer, int offset, int length) {
-    //what's the most efficient way to get a byte [] from a char[] array
-    //Do we have to go through String?
-    String tmp = new String(buffer, offset, length);
-    Payload result = null;//Can we avoid allocating by knowing where using the new API?
-    try {
-      result = new Payload(tmp.getBytes(charsetName));
-    } catch (UnsupportedEncodingException e) {
-      //should never hit this, since we get the name from the Charset
+    final ByteBuffer bb = charset.encode(CharBuffer.wrap(buffer, offset, length));
+    if (bb.hasArray()) {
+      return new Payload(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
+    } else {
+      // normally it should always have an array, but who knows?
+      final byte[] b = new byte[bb.remaining()];
+      bb.get(b);
+      return new Payload(b);
     }
-
-    return result;
   }
 }



Mime
View raw message