lucene-solr-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r794669 - in /lucene/solr/trunk: example/exampledocs/ example/solr/conf/ src/java/org/apache/solr/analysis/ src/test/org/apache/solr/analysis/
Date Thu, 16 Jul 2009 13:36:24 GMT
Author: gsingers
Date: Thu Jul 16 13:36:24 2009
New Revision: 794669

URL: http://svn.apache.org/viewvc?rev=794669&view=rev
Log:
SOLR-1285: Add DelimitedPayloadTokenFilterFactory, tests and short example

Added:
    lucene/solr/trunk/example/exampledocs/payload.xml
      - copied, changed from r794408, lucene/solr/trunk/example/exampledocs/mem.xml
    lucene/solr/trunk/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java
      - copied, changed from r794408, lucene/solr/trunk/src/java/org/apache/solr/analysis/ArabicLetterTokenizerFactory.java
    lucene/solr/trunk/src/test/org/apache/solr/analysis/TestDelimitedPayloadTokenFilterFactory.java
      - copied, changed from r794408, lucene/solr/trunk/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java
Modified:
    lucene/solr/trunk/example/solr/conf/schema.xml

Copied: lucene/solr/trunk/example/exampledocs/payload.xml (from r794408, lucene/solr/trunk/example/exampledocs/mem.xml)
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/exampledocs/payload.xml?p2=lucene/solr/trunk/example/exampledocs/payload.xml&p1=lucene/solr/trunk/example/exampledocs/mem.xml&r1=794408&r2=794669&rev=794669&view=diff
==============================================================================
--- lucene/solr/trunk/example/exampledocs/mem.xml (original)
+++ lucene/solr/trunk/example/exampledocs/payload.xml Thu Jul 16 13:36:24 2009
@@ -17,7 +17,7 @@
 
 <add>
 <doc>
-  <field name="id">TWINX2048-3200PRO</field>
+  <field name="id">TWINX2048-3200PRO-payload</field>
   <field name="name">CORSAIR  XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400
(PC 3200) Dual Channel Kit System Memory - Retail</field>
   <field name="manu">Corsair Microsystems Inc.</field>
   <field name="cat">electronics</field>
@@ -26,10 +26,11 @@
   <field name="price">185</field>
   <field name="popularity">5</field>
   <field name="inStock">true</field>
+  <field name="catPay">electronics|6.0 memory|3.0</field>
 </doc>
 
 <doc>
-  <field name="id">VS1GB400C3</field>
+  <field name="id">VS1GB400C3-payload</field>
   <field name="name">CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC
3200) System Memory - Retail</field>
   <field name="manu">Corsair Microsystems Inc.</field>
   <field name="cat">electronics</field>
@@ -37,10 +38,11 @@
   <field name="price">74.99</field>
   <field name="popularity">7</field>
   <field name="inStock">true</field>
+  <field name="catPay">electronics|4.0 memory|2.0</field>
 </doc>
 
 <doc>
-  <field name="id">VDBDB1A16</field>
+  <field name="id">VDBDB1A16-payload</field>
   <field name="name">A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200)
System Memory - OEM</field>
   <field name="manu">A-DATA Technology Inc.</field>
   <field name="cat">electronics</field>
@@ -49,10 +51,7 @@
   <!-- note: price is missing on this one -->
   <field name="popularity">5</field>
   <field name="inStock">true</field>
-
+  <field name="catPay">electronics|0.9 memory|0.1</field>
 </doc>
-
-
-
 </add>
 

Modified: lucene/solr/trunk/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/schema.xml?rev=794669&r1=794668&r2=794669&view=diff
==============================================================================
--- lucene/solr/trunk/example/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/example/solr/conf/schema.xml Thu Jul 16 13:36:24 2009
@@ -300,7 +300,22 @@
         <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
       </analyzer>
-    </fieldtype> 
+    </fieldtype>
+
+    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!--
+         Attributes:
+         delimiter - a one character delimiter - will throw an exception if the delim is
more than one char.  Default is | (pipe)
+         encoder - Values: float -> org.apache.lucene.analysis.payloads.FloatEncoder,
integer -> o.a.l.a.p.IntegerEncoder
+              identity -> o.a.l.a.p.IdentityEncoder, Fully Qualified class name implementing
PayloadEncoder
+              Encoder must have a no arg constructor.
+         -->
+        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
+      </analyzer>
+    </fieldtype>
+
     
 
     <!-- since fields of this type are by default not stored or indexed, any data added
to 
@@ -366,6 +381,8 @@
         results by manufacturer.  copied from "manu" via copyField -->
    <field name="manu_exact" type="string" indexed="true" stored="false"/>
 
+   <field name="catPay" type="payloads" indexed="true" stored="true"/>
+
    <!-- Here, default is used to create a "timestamp" field indicating
         When each document was indexed.
      -->

Copied: lucene/solr/trunk/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java
(from r794408, lucene/solr/trunk/src/java/org/apache/solr/analysis/ArabicLetterTokenizerFactory.java)
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java?p2=lucene/solr/trunk/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java&p1=lucene/solr/trunk/src/java/org/apache/solr/analysis/ArabicLetterTokenizerFactory.java&r1=794408&r2=794669&rev=794669&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/analysis/ArabicLetterTokenizerFactory.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/analysis/DelimitedPayloadTokenFilterFactory.java
Thu Jul 16 13:36:24 2009
@@ -17,18 +17,57 @@
  */
 
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
+import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
+import org.apache.lucene.analysis.payloads.PayloadEncoder;
+import org.apache.lucene.analysis.payloads.FloatEncoder;
+import org.apache.lucene.analysis.payloads.IntegerEncoder;
+import org.apache.lucene.analysis.payloads.IdentityEncoder;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.util.plugin.ResourceLoaderAware;
 
-import java.io.Reader;
+import java.util.Map;
 
 
 /**
  *
  *
  **/
-public class ArabicLetterTokenizerFactory extends BaseTokenizerFactory{
+public class DelimitedPayloadTokenFilterFactory extends BaseTokenFilterFactory implements
ResourceLoaderAware {
+  public static final String ENCODER_ATTR = "encoder";
+  public static final String DELIMITER_ATTR = "delimiter";
 
-  public ArabicLetterTokenizer create(Reader input) {
-    return new ArabicLetterTokenizer(input);
+  private PayloadEncoder encoder;
+  private char delimiter = '|';
+
+  public DelimitedPayloadTokenFilter create(TokenStream input) {
+    return new DelimitedPayloadTokenFilter(input, delimiter, encoder);
+  }
+
+  @Override
+  public void init(Map<String, String> args) {
+    super.init(args);
+  }
+
+  public void inform(ResourceLoader loader) {
+    String encoderClass = args.get(ENCODER_ATTR);
+    if (encoderClass.equals("float")){
+      encoder = new FloatEncoder();
+    } else if (encoderClass.equals("integer")){
+      encoder = new IntegerEncoder();
+    } else if (encoderClass.equals("identity")){
+      encoder = new IdentityEncoder();
+    } else {
+      encoder = (PayloadEncoder) loader.newInstance(encoderClass);
+    }
+
+    String delim = args.get(DELIMITER_ATTR);
+    if (delim != null){
+      if (delim.length() == 1) {
+        delimiter = delim.charAt(0);
+      } else{
+        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Delimiter must be
one character only");
+      }
+    }
   }
-}
+}
\ No newline at end of file

Copied: lucene/solr/trunk/src/test/org/apache/solr/analysis/TestDelimitedPayloadTokenFilterFactory.java
(from r794408, lucene/solr/trunk/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java)
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/analysis/TestDelimitedPayloadTokenFilterFactory.java?p2=lucene/solr/trunk/src/test/org/apache/solr/analysis/TestDelimitedPayloadTokenFilterFactory.java&p1=lucene/solr/trunk/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java&r1=794408&r2=794669&rev=794669&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/analysis/TestDelimitedPayloadTokenFilterFactory.java
Thu Jul 16 13:36:24 2009
@@ -25,49 +25,68 @@
 
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.payloads.FloatEncoder;
+import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
+import org.apache.lucene.analysis.payloads.PayloadHelper;
+import org.apache.lucene.util.Attribute;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.common.ResourceLoader;
+import org.apache.solr.util.AbstractSolrTestCase;
 
-public class TestPatternTokenizerFactory extends AnalysisTestCase 
-{
-	public void testSplitting() throws Exception 
-  {
-    String qpattern = "\\'([^\\']+)\\'"; // get stuff between "'"
-    String[][] tests = {
-      // group  pattern        input                    output
-      { "-1",   "--",          "aaa--bbb--ccc",         "aaa bbb ccc" },
-      { "-1",   ":",           "aaa:bbb:ccc",           "aaa bbb ccc" },
-      { "-1",   "\\p{Space}",  "aaa   bbb \t\tccc  ",   "aaa   bbb   ccc" },
-      { "-1",   ":",           "boo:and:foo",           "boo and foo" },
-      { "-1",   "o",           "boo:and:foo",           "b  :and:f" },
-      { "0",    ":",           "boo:and:foo",           ": :" },
-      { "0",    qpattern,      "aaa 'bbb' 'ccc'",       "'bbb' 'ccc'" },
-      { "1",    qpattern,      "aaa 'bbb' 'ccc'",       "bbb ccc" }
-    };
-    
-    
+public class TestDelimitedPayloadTokenFilterFactory extends AbstractSolrTestCase{
+
+
+  public String getSchemaFile() {
+    return "schema.xml";
+  }
+
+  public String getSolrConfigFile() {
+    return "solrconfig.xml";
+  }
+
+  public void testEncoder() throws Exception {
     Map<String,String> args = new HashMap<String, String>();
-    for( String[] test : tests ) {
-      args.put( PatternTokenizerFactory.GROUP, test[0] );
-      args.put( PatternTokenizerFactory.PATTERN, test[1] );
-
-      PatternTokenizerFactory tokenizer = new PatternTokenizerFactory();
-      tokenizer.init( args );
-      
-      TokenStream stream = tokenizer.create( new StringReader( test[2] ) );
-      String out = TestHyphenatedWordsFilter.tsToString( stream );
-      System.out.println( test[2] + " ==> " + out );
-      
-      assertEquals("pattern: "+test[2], test[3], out );
-      
-      // Make sure it is the same as if we called 'split'
-      if( "-1".equals( test[0] ) ) {
-        String[] split = test[2].split( test[1] );
-        stream = tokenizer.create( new StringReader( test[2] ) );
-        int i=0;
-        for( Token t = stream.next(); null != t; t = stream.next() ) 
-        {
-          assertEquals( "split: "+test[1] + " "+i, split[i++], new String(t.termBuffer(),
0, t.termLength()) );
-        }
-      }
-    } 
-	}
+    args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, "float");
+    DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
+    factory.init(args);
+    ResourceLoader loader = h.getCore().getResourceLoader();
+    factory.inform(loader);
+
+    TokenStream input = new WhitespaceTokenizer(new StringReader("the|0.1 quick|0.1 red|0.1"));
+    DelimitedPayloadTokenFilter tf = factory.create(input);
+    while (tf.incrementToken()){
+      PayloadAttribute payAttr = (PayloadAttribute) tf.getAttribute(PayloadAttribute.class);
+      assertTrue("payAttr is null and it shouldn't be", payAttr != null);
+      byte[] payData = payAttr.getPayload().getData();
+      assertTrue("payData is null and it shouldn't be", payData != null);
+      assertTrue("payData is null and it shouldn't be", payData != null);
+      float payFloat = PayloadHelper.decodeFloat(payData);
+      assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
+    }
+  }
+
+  public void testDelim() throws Exception {
+    Map<String,String> args = new HashMap<String, String>();
+    args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, FloatEncoder.class.getName());
+    args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*");
+    DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
+    factory.init(args);
+    ResourceLoader loader = h.getCore().getResourceLoader();
+    factory.inform(loader);
+
+    TokenStream input = new WhitespaceTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"));
+    DelimitedPayloadTokenFilter tf = factory.create(input);
+    while (tf.incrementToken()){
+      PayloadAttribute payAttr = (PayloadAttribute) tf.getAttribute(PayloadAttribute.class);
+      assertTrue("payAttr is null and it shouldn't be", payAttr != null);
+      byte[] payData = payAttr.getPayload().getData();
+      assertTrue("payData is null and it shouldn't be", payData != null);
+      float payFloat = PayloadHelper.decodeFloat(payData);
+      assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
+    }
+  }
 }
+



Mime
View raw message