lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r602081 - in /lucene/java/trunk/contrib/analyzers/src: java/org/apache/lucene/analysis/payloads/ java/org/apache/lucene/analysis/sinks/ test/org/apache/lucene/analysis/payloads/ test/org/apache/lucene/analysis/sinks/
Date Fri, 07 Dec 2007 12:21:50 GMT
Author: gsingers
Date: Fri Dec  7 04:21:49 2007
New Revision: 602081

URL: http://svn.apache.org/viewvc?rev=602081&view=rev
Log:
LUCENE-1077 new sinks and payloads analysis packages

Added:
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
  (with props)
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
  (with props)
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html
  (with props)
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
  (with props)
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java
  (with props)
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
  (with props)
    lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html
  (with props)
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
  (with props)
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
  (with props)
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
  (with props)
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
  (with props)
    lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
  (with props)

Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,85 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.Payload;
+
+import java.io.IOException;
+
+
+/**
+ * Assigns a payload to a token based on the {@link org.apache.lucene.analysis.Token#type()}
+ *
+ **/
+public class NumericPayloadTokenFilter extends TokenFilter {
+
+  private String typeMatch;
+  private Payload thePayload;
+
+  public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
+    super(input);
+    //Need to encode the payload
+    thePayload = new Payload(encodePayload(payload));
+    this.typeMatch = typeMatch;
+  }
+
+  public static byte[] encodePayload(float payload) {
+    byte[] result = new byte[4];
+    int tmp = Float.floatToIntBits(payload);
+    result[0] = (byte)(tmp >> 24);
+    result[1] = (byte)(tmp >> 16);
+    result[2] = (byte)(tmp >>  8);
+    result[3] = (byte) tmp;
+
+    return result;
+  }
+
+  /**
+   * @see #decodePayload(byte[], int)
+   * @see #encodePayload(float)
+   */
+  public static float decodePayload(byte [] bytes){
+    return decodePayload(bytes, 0);
+  }
+
+  /**
+   * Decode the payload that was encoded using {@link #encodePayload(float)}.
+   * NOTE: the length of the array must be at least offset + 4 long.
+   * @param bytes The bytes to decode
+   * @param offset The offset into the array.
+   * @return The float that was encoded
+   *
+   * @see #encodePayload(float) 
+   */
+  public static final float decodePayload(byte [] bytes, int offset){
+    int tmp = ((bytes[offset] & 0xFF) << 24) | ((bytes[offset + 1] & 0xFF)
<< 16)
+         | ((bytes[offset + 2] & 0xFF) <<  8) |  (bytes[offset + 3] & 0xFF);
+    return Float.intBitsToFloat(tmp);
+  }
+
+  public Token next(Token result) throws IOException {
+    result = input.next(result);
+    if (result != null && result.type().equals(typeMatch)){
+      result.setPayload(thePayload);
+    }
+    return result;
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,49 @@
+package org.apache.lucene.analysis.payloads;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.Payload;
+
+import java.io.IOException;
+
+
+/**
+ * Makes the {@link org.apache.lucene.analysis.Token#type()} a payload.
+ *
+ * Encodes the type using {@link String#getBytes(String)} with "UTF-8" as the encoding
+ *
+ **/
+public class TypeAsPayloadTokenFilter extends TokenFilter {
+
+  public TypeAsPayloadTokenFilter(TokenStream input) {
+    super(input);
+
+  }
+
+
+  public Token next(Token result) throws IOException {
+    result = input.next(result);
+    if (result != null && result.type() != null && result.type().equals("")
== false){
+      result.setPayload(new Payload(result.type().getBytes("UTF-8")));
+    }
+    return result;
+  }
+}
\ No newline at end of file

Propchange: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html
(added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,31 @@
+<HTML>
+ <!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ --><HEAD>
+    <TITLE>org.apache.lucene.analysis.payloads</TITLE>
+</HEAD>
+<BODY>
+<DIV>Provides various convenience classes for creating payloads on Tokens.
+</DIV>
+<DIV>&nbsp;</DIV>
+<DIV align="center">
+Copyright &copy; 2007 <A HREF="http://www.apache.org">Apache Software Foundation</A>
+</DIV>
+</BODY>
+</HTML>
\ No newline at end of file

Propchange: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/payloads/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,87 @@
+package org.apache.lucene.analysis.sinks;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.SinkTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.text.ParseException;
+import java.util.List;
+import java.util.Date;
+
+
+/**
+ * Attempts to parse the {@link org.apache.lucene.analysis.Token#termBuffer()} as a Date
using a {@link java.text.DateFormat}.
+ * If the value is a Date, it will add it to the sink.
+ * <p/>
+ * Also marks the sink token with {@link org.apache.lucene.analysis.Token#type()} equal to
{@link #DATE_TYPE}
+ *
+ *
+ **/
+public class DateRecognizerSinkTokenizer extends SinkTokenizer {
+  public static final String DATE_TYPE = "date";
+
+  protected DateFormat dateFormat;
+
+  /**
+   * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat}
object.
+   */
+  public DateRecognizerSinkTokenizer() {
+    this(null, SimpleDateFormat.getDateInstance());
+  }
+
+  public DateRecognizerSinkTokenizer(DateFormat dateFormat) {
+    this(null, dateFormat);
+  }
+
+  /**
+   * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat}
object.
+   * @param input The input list of Tokens that are already Dates.  They should be marked
as type {@link #DATE_TYPE} for completeness
+   */
+  public DateRecognizerSinkTokenizer(List/*<Token>*/ input) {
+    this(input, SimpleDateFormat.getDateInstance());
+  }
+
+  /**
+   *
+   * @param input
+   * @param dateFormat The date format to use to try and parse the date.  Note, this SinkTokenizer
makes no attempt to synchronize the DateFormat object
+   */
+  public DateRecognizerSinkTokenizer(List/*<Token>*/ input, DateFormat dateFormat)
{
+    super(input);
+    this.dateFormat = dateFormat;
+  }
+
+
+  public void add(Token t) {
+    //Check to see if this token is a date
+    if (t != null) {
+      try {
+        Date date = dateFormat.parse(new String(t.termBuffer(), 0, t.termLength()));//We
don't care about the date, just that we can parse it as a date
+        if (date != null) {
+          t.setType(DATE_TYPE);
+          lst.add(t.clone());
+        }
+      } catch (ParseException e) {
+
+      }
+    }
+
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,55 @@
+package org.apache.lucene.analysis.sinks;
+
+import org.apache.lucene.analysis.SinkTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.io.IOException;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Counts the tokens as they go by and saves to the internal list those between the range
of lower and upper, exclusive of upper
+ *
+ **/
+public class TokenRangeSinkTokenizer extends SinkTokenizer {
+  private int lower;
+  private int upper;
+  private int count;
+
+  public TokenRangeSinkTokenizer(int lower, int upper) {
+    this.lower = lower;
+    this.upper = upper;
+  }
+
+  public TokenRangeSinkTokenizer(int initCap, int lower, int upper) {
+    super(initCap);
+    this.lower = lower;
+    this.upper = upper;
+  }
+
+  public void add(Token t) {
+    if (count >= lower && count < upper){
+      super.add(t);
+    }
+    count++;
+  }
+
+  public void reset() throws IOException {
+    count = 0;
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.sinks;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.SinkTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.util.List;
+
+
+/**
+ * If the {@link org.apache.lucene.analysis.Token#type()} matches the passed in <code>typeToMatch</code>
then
+ * add it to the sink
+ *
+ **/
+public class TokenTypeSinkTokenizer extends SinkTokenizer {
+
+  private String typeToMatch;
+
+  public TokenTypeSinkTokenizer(String typeToMatch) {
+    this.typeToMatch = typeToMatch;
+  }
+
+  public TokenTypeSinkTokenizer(int initCap, String typeToMatch) {
+    super(initCap);
+    this.typeToMatch = typeToMatch;
+  }
+
+  public TokenTypeSinkTokenizer(List/*<Token>*/ input, String typeToMatch) {
+    super(input);
+    this.typeToMatch = typeToMatch;
+  }
+
+  public void add(Token t) {
+    //check to see if this is a Category
+    if (t != null && typeToMatch.equals(t.type())){
+      lst.add(t.clone());
+    }
+  }
+}

Propchange: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html
(added)
+++ lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,30 @@
+<HTML>
+ <!--
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ --><HEAD>
+    <TITLE>org.apache.lucene.analysis.sinks</TITLE>
+</HEAD>
+<BODY>
+<DIV>Implementations of the SinkTokenizer that might be useful.
+</DIV>
+<DIV>&nbsp;</DIV>
+<DIV align="center">
+Copyright &copy; 2007 <A HREF="http://www.apache.org">Apache Software Foundation</A>
+</DIV>
+</BODY>
+</HTML>
\ No newline at end of file

Propchange: lucene/java/trunk/contrib/analyzers/src/java/org/apache/lucene/analysis/sinks/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,79 @@
+package org.apache.lucene.analysis.payloads;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+public class NumericPayloadTokenFilterTest extends TestCase {
+
+
+  public NumericPayloadTokenFilterTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() throws IOException {
+    String test = "The quick red fox jumped over the lazy brown dogs";
+
+    NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new
WhitespaceTokenizer(new StringReader(test))), 3, "D");
+    Token tok = new Token();
+    boolean seenDogs = false;
+    while ((tok = nptf.next(tok)) != null){
+      if (tok.termText().equals("dogs")){
+        seenDogs = true;
+        assertTrue(tok.type() + " is not equal to " + "D", tok.type().equals("D") == true);
+        assertTrue("tok.getPayload() is null and it shouldn't be", tok.getPayload() != null);
+        byte [] bytes = tok.getPayload().getData();//safe here to just use the bytes, otherwise
we should use offset, length
+        assertTrue(bytes.length + " does not equal: " + tok.getPayload().length(), bytes.length
== tok.getPayload().length());
+        assertTrue(tok.getPayload().getOffset() + " does not equal: " + 0, tok.getPayload().getOffset()
== 0);
+        float pay = NumericPayloadTokenFilter.decodePayload(bytes);
+        assertTrue(pay + " does not equal: " + 3, pay == 3);
+      } else {
+        assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
+      }
+    }
+    assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
+  }
+
+  private class WordTokenFilter extends TokenFilter {
+    private WordTokenFilter(TokenStream input) {
+      super(input);
+    }
+
+    public Token next(Token result) throws IOException {
+      result = input.next(result);
+      if (result != null && result.termText().equals("dogs")) {
+        result.setType("D");
+      }
+      return result;
+    }
+  }
+
+}
\ No newline at end of file

Propchange: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,76 @@
+package org.apache.lucene.analysis.payloads;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+public class TypeAsPayloadTokenFilterTest extends TestCase {
+
+
+  public TypeAsPayloadTokenFilterTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+
+  public void test() throws IOException {
+    String test = "The quick red fox jumped over the lazy brown dogs";
+
+    TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new
WhitespaceTokenizer(new StringReader(test))));
+    Token tok = new Token();
+    int count = 0;
+    while ((tok = nptf.next(tok)) != null){
+      assertTrue(tok.type() + " is not null and it should be", tok.type().equals(String.valueOf(Character.toUpperCase(tok.termBuffer()[0]))));
+      assertTrue("tok.getPayload() is null and it shouldn't be", tok.getPayload() != null);
+      String type = new String(tok.getPayload().getData(), "UTF-8");
+      assertTrue("type is null and it shouldn't be", type != null);
+      assertTrue(type + " is not equal to " + tok.type(), type.equals(tok.type()) == true);
+      count++;
+    }
+    assertTrue(count + " does not equal: " + 10, count == 10);
+  }
+
+  private class WordTokenFilter extends TokenFilter {
+    private WordTokenFilter(TokenStream input) {
+      super(input);
+    }
+
+
+
+    public Token next(Token result) throws IOException {
+      result = input.next(result);
+      if (result != null) {
+        result.setType(String.valueOf(Character.toUpperCase(result.termBuffer()[0])));
+      }
+      return result;
+    }
+  }
+
+}
\ No newline at end of file

Propchange: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,60 @@
+package org.apache.lucene.analysis.sinks;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TeeTokenFilter;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.io.StringReader;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+
+public class DateRecognizerSinkTokenizerTest extends TestCase {
+
+
+  public DateRecognizerSinkTokenizerTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() throws IOException {
+    DateRecognizerSinkTokenizer sink = new DateRecognizerSinkTokenizer(new SimpleDateFormat("MM/dd/yyyy"));
+    String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006  The dogs
finally reacted on 7/12/2006";
+    TeeTokenFilter tee = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(test)),
sink);
+    Token tok = null;
+    int count = 0;
+    while ((tok = tee.next()) != null){
+      assertTrue("tok is null and it shouldn't be", tok != null);
+      if (tok.termBuffer()[0] == '7'){
+        assertTrue(tok.type() + " is not equal to " + DateRecognizerSinkTokenizer.DATE_TYPE,
+                tok.type().equals(DateRecognizerSinkTokenizer.DATE_TYPE) == true);
+      }
+      count++;
+    }
+    assertTrue(count + " does not equal: " + 18, count == 18);
+    assertTrue("sink Size: " + sink.getTokens().size() + " is not: " + 2, sink.getTokens().size()
== 2);
+
+  }
+}
\ No newline at end of file

Propchange: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,54 @@
+package org.apache.lucene.analysis.sinks;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.TeeTokenFilter;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.Token;
+
+import java.io.StringReader;
+import java.io.IOException;
+
+public class TokenRangeSinkTokenizerTest extends TestCase {
+
+
+  public TokenRangeSinkTokenizerTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() throws IOException {
+    TokenRangeSinkTokenizer rangeToks = new TokenRangeSinkTokenizer(2, 4);
+    String test = "The quick red fox jumped over the lazy brown dogs";
+    TeeTokenFilter tee = new TeeTokenFilter(new WhitespaceTokenizer(new StringReader(test)),
rangeToks);
+    Token tok = null;
+    int count = 0;
+    while ((tok = tee.next()) != null){
+      assertTrue("tok is null and it shouldn't be", tok != null);
+      count++;
+    }
+    assertTrue(count + " does not equal: " + 10, count == 10);
+    assertTrue("rangeToks Size: " + rangeToks.getTokens().size() + " is not: " + 2, rangeToks.getTokens().size()
== 2);
+  }
+}
\ No newline at end of file

Propchange: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenRangeSinkTokenizerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java?rev=602081&view=auto
==============================================================================
--- lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
(added)
+++ lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
Fri Dec  7 04:21:49 2007
@@ -0,0 +1,72 @@
+package org.apache.lucene.analysis.sinks;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.payloads.NumericPayloadTokenFilter;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+public class TokenTypeSinkTokenizerTest extends TestCase {
+
+
+  public TokenTypeSinkTokenizerTest(String s) {
+    super(s);
+  }
+
+  protected void setUp() {
+  }
+
+  protected void tearDown() {
+
+  }
+
+  public void test() throws IOException {
+    TokenTypeSinkTokenizer sink = new TokenTypeSinkTokenizer("D");
+    String test = "The quick red fox jumped over the lazy brown dogs";
+
+    TeeTokenFilter ttf = new TeeTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(new
StringReader(test))), sink);
+    Token tok = new Token();
+    boolean seenDogs = false;
+    while ((tok = ttf.next(tok)) != null) {
+      if (tok.termText().equals("dogs")) {
+        seenDogs = true;
+        assertTrue(tok.type() + " is not equal to " + "D", tok.type().equals("D") == true);
+      } else {
+        assertTrue(tok.type() + " is not null and it should be", tok.type().equals("word"));
+      }
+    }
+    assertTrue(seenDogs + " does not equal: " + true, seenDogs == true);
+    assertTrue("sink Size: " + sink.getTokens().size() + " is not: " + 1, sink.getTokens().size()
== 1);
+  }
+
+  private class WordTokenFilter extends TokenFilter {
+    private WordTokenFilter(TokenStream input) {
+      super(input);
+    }
+
+    public Token next(Token result) throws IOException {
+      result = input.next(result);
+      if (result != null && result.termText().equals("dogs")) {
+        result.setType("D");
+      }
+      return result;
+    }
+  }
+}
\ No newline at end of file

Propchange: lucene/java/trunk/contrib/analyzers/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message