lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tomm...@apache.org
Subject svn commit: r1442111 - in /lucene/dev/branches/branch_4x: ./ dev-tools/ lucene/ lucene/analysis/ lucene/analysis/icu/src/java/org/apache/lucene/collation/ lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ lucene/analysis/uima/src/test-file...
Date Mon, 04 Feb 2013 13:40:47 GMT
Author: tommaso
Date: Mon Feb  4 13:40:45 2013
New Revision: 1442111

URL: http://svn.apache.org/viewvc?rev=1442111&view=rev
Log:
LUCENE-4749 - merged back to branch_4x

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/BUILD.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/JRE_VERSION_MIGRATION.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/MIGRATE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/README.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/SYSTEM_REQUIREMENTS.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilterFactory.java
  (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
    lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
    lucene/dev/branches/branch_4x/lucene/backwards/   (props changed)
    lucene/dev/branches/branch_4x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_4x/lucene/build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/   (props changed)
    lucene/dev/branches/branch_4x/lucene/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
  (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip
  (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip
  (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip
  (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip
  (props changed)
    lucene/dev/branches/branch_4x/lucene/demo/   (props changed)
    lucene/dev/branches/branch_4x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_4x/lucene/grouping/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/ivy-settings.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/join/   (props changed)
    lucene/dev/branches/branch_4x/lucene/licenses/   (props changed)
    lucene/dev/branches/branch_4x/lucene/memory/   (props changed)
    lucene/dev/branches/branch_4x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_4x/lucene/module-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queryparser/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_4x/lucene/site/   (props changed)
    lucene/dev/branches/branch_4x/lucene/spatial/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/tools/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/README.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/SYSTEM_REQUIREMENTS.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/cloud-dev/   (props changed)
    lucene/dev/branches/branch_4x/solr/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/contrib/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/example/   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpclient-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpclient-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpcore-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpcore-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpmime-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpmime-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/scripts/   (props changed)
    lucene/dev/branches/branch_4x/solr/site/   (props changed)
    lucene/dev/branches/branch_4x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_4x/solr/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/solr/testlogging.properties   (props changed)
    lucene/dev/branches/branch_4x/solr/webapp/   (props changed)

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
Mon Feb  4 13:40:45 2013
@@ -28,6 +28,8 @@ import org.apache.uima.resource.Resource
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Abstract base implementation of a {@link Tokenizer} which is able to analyze the given
input with a
@@ -39,10 +41,10 @@ public abstract class BaseUIMATokenizer 
   protected final AnalysisEngine ae;
   protected final CAS cas;
 
-  protected BaseUIMATokenizer(Reader reader, String descriptorPath) {
+  protected BaseUIMATokenizer(Reader reader, String descriptorPath, Map<String, Object>
configurationParameters) {
     super(reader);
     try {
-      ae = AEProviderFactory.getInstance().getAEProvider(descriptorPath).getAE();
+      ae = AEProviderFactory.getInstance().getAEProvider(null, descriptorPath, configurationParameters).getAE();
       cas = ae.newCAS();
     } catch (ResourceInitializationException e) {
       throw new RuntimeException(e);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
Mon Feb  4 13:40:45 2013
@@ -26,6 +26,7 @@ import org.apache.uima.cas.text.Annotati
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Map;
 
 /**
  * a {@link Tokenizer} which creates tokens from UIMA Annotations
@@ -40,8 +41,8 @@ public final class UIMAAnnotationsTokeni
 
   private int finalOffset = 0;
 
-  public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Reader input)
{
-    super(input, descriptorPath);
+  public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String,
Object> configurationParameters, Reader input) {
+    super(input, descriptorPath, configurationParameters);
     this.tokenTypeString = tokenType;
     this.termAttr = addAttribute(CharTermAttribute.class);
     this.offsetAttr = addAttribute(OffsetAttribute.class);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
Mon Feb  4 13:40:45 2013
@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.util.T
 import org.apache.lucene.analysis.uima.UIMAAnnotationsTokenizer;
 
 import java.io.Reader;
+import java.util.HashMap;
 import java.util.Map;
 
 /**
@@ -31,19 +32,29 @@ public class UIMAAnnotationsTokenizerFac
 
   private String descriptorPath;
   private String tokenType;
+  private Map<String, Object> configurationParameters;
 
   @Override
   public void init(Map<String, String> args) {
     super.init(args);
-    descriptorPath = args.get("descriptorPath");
-    tokenType = args.get("tokenType");
-    if (descriptorPath == null || tokenType == null) {
-      throw new IllegalArgumentException("Both descriptorPath and tokenType are mandatory");
+    configurationParameters = new HashMap<String, Object>();
+    for (String k : args.keySet()) {
+      if (k.equals("tokenType")) {
+        tokenType = args.get("tokenType");
+      } else if (k.equals("descriptorPath")) {
+        descriptorPath = args.get("descriptorPath");
+      } else {
+        configurationParameters.put(k, args.get(k));
+      }
     }
+    if (descriptorPath == null || tokenType == null ) {
+      throw new IllegalArgumentException("descriptorPath and tokenType are mandatory");
+    }
+
   }
 
   @Override
   public Tokenizer create(Reader input) {
-    return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, input);
+    return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters,
input);
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
Mon Feb  4 13:40:45 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.uima;
 import org.apache.lucene.analysis.Analyzer;
 
 import java.io.Reader;
+import java.util.Map;
 
 /**
  * An {@link Analyzer} which use the {@link UIMAAnnotationsTokenizer} for creating tokens
@@ -28,15 +29,17 @@ public final class UIMABaseAnalyzer exte
 
   private final String descriptorPath;
   private final String tokenType;
+  private final Map<String, Object> configurationParameters;
 
-  public UIMABaseAnalyzer(String descriptorPath, String tokenType) {
+  public UIMABaseAnalyzer(String descriptorPath, String tokenType, Map<String, Object>
configurationParameters) {
     this.descriptorPath = descriptorPath;
     this.tokenType = tokenType;
+    this.configurationParameters = configurationParameters;
   }
 
   @Override
   protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-    return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType,
reader));
+    return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType,
configurationParameters, reader));
   }
 
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
Mon Feb  4 13:40:45 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.uima;
 import org.apache.lucene.analysis.Analyzer;
 
 import java.io.Reader;
+import java.util.Map;
 
 /**
  * {@link Analyzer} which uses the {@link UIMATypeAwareAnnotationsTokenizer} for the tokenization
phase
@@ -28,15 +29,17 @@ public final class UIMATypeAwareAnalyzer
   private final String descriptorPath;
   private final String tokenType;
   private final String featurePath;
+  private final Map<String, Object> configurationParameters;
 
-  public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath)
{
+  public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath,
Map<String, Object> configurationParameters) {
     this.descriptorPath = descriptorPath;
     this.tokenType = tokenType;
     this.featurePath = featurePath;
+    this.configurationParameters = configurationParameters;
   }
 
   @Override
   protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-    return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath,
tokenType, featurePath, reader));
+    return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath,
tokenType, featurePath, configurationParameters, reader));
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
Mon Feb  4 13:40:45 2013
@@ -29,6 +29,7 @@ import org.apache.uima.cas.text.Annotati
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Map;
 
 /**
  * A {@link Tokenizer} which creates token from UIMA Annotations filling also their {@link
TypeAttribute} according to
@@ -50,8 +51,8 @@ public final class UIMATypeAwareAnnotati
 
   private int finalOffset = 0;
 
-  public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String
typeAttributeFeaturePath, Reader input) {
-    super(input, descriptorPath);
+  public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String
typeAttributeFeaturePath, Map<String, Object> configurationParameters, Reader input)
{
+    super(input, descriptorPath, configurationParameters);
     this.tokenTypeString = tokenType;
     this.termAttr = addAttribute(CharTermAttribute.class);
     this.typeAttr = addAttribute(TypeAttribute.class);

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
Mon Feb  4 13:40:45 2013
@@ -18,10 +18,10 @@ package org.apache.lucene.analysis.uima;
  */
 
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.uima.UIMATypeAwareAnnotationsTokenizer;
 import org.apache.lucene.analysis.util.TokenizerFactory;
 
 import java.io.Reader;
+import java.util.HashMap;
 import java.util.Map;
 
 /**
@@ -32,13 +32,23 @@ public class UIMATypeAwareAnnotationsTok
   private String descriptorPath;
   private String tokenType;
   private String featurePath;
+  private Map<String, Object> configurationParameters;
 
   @Override
   public void init(Map<String, String> args) {
     super.init(args);
-    descriptorPath = args.get("descriptorPath");
-    tokenType = args.get("tokenType");
-    featurePath = args.get("featurePath");
+    configurationParameters = new HashMap<String, Object>();
+    for (String k : args.keySet()) {
+      if (k.equals("featurePath")) {
+        featurePath = args.get("featurePath");
+      } else if (k.equals("tokenType")) {
+        tokenType = args.get("tokenType");
+      } else if (k.equals("descriptorPath")) {
+        descriptorPath = args.get("descriptorPath");
+      } else {
+        configurationParameters.put(k, args.get(k));
+      }
+    }
     if (descriptorPath == null || tokenType == null || featurePath == null) {
       throw new IllegalArgumentException("descriptorPath, tokenType, and featurePath are
mandatory");
     }
@@ -46,6 +56,6 @@ public class UIMATypeAwareAnnotationsTok
 
   @Override
   public Tokenizer create(Reader input) {
-    return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath,
input);
+    return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath,
configurationParameters, input);
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
Mon Feb  4 13:40:45 2013
@@ -20,7 +20,7 @@
   <primitive>true</primitive>
   <annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleEntityAnnotator</annotatorImplementationName>
   <analysisEngineMetaData>
-    <name>DummyPoSTagger</name>
+    <name>EntityAnnotator</name>
     <description/>
     <version>1.0</version>
     <vendor>ASF</vendor>

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
Mon Feb  4 13:40:45 2013
@@ -20,9 +20,28 @@
   <primitive>true</primitive>
   <annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleWSTokenizerAnnotator</annotatorImplementationName>
   <analysisEngineMetaData>
-    <name>DummyPoSTagger</name>
+    <name>WSTokenizer</name>
     <version>1.0</version>
     <vendor>ASF</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>line-end</name>
+        <description>
+          the string used as line end
+        </description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+        <nameValuePair>
+          <name>line-end</name>
+          <value>
+            <string>\n</string>
+          </value>
+        </nameValuePair>
+    </configurationParameterSettings>
     <typeSystemDescription>
       <types>
         <typeDescription>

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
Mon Feb  4 13:40:45 2013
@@ -36,6 +36,8 @@ import org.junit.Before;
 import org.junit.Test;
 
 import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Testcase for {@link UIMABaseAnalyzer}
@@ -48,7 +50,7 @@ public class UIMABaseAnalyzerTest extend
   @Before
   public void setUp() throws Exception {
     super.setUp();
-    analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation");
+    analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation",
null);
   }
 
   @Override
@@ -120,7 +122,15 @@ public class UIMABaseAnalyzerTest extend
 
   @Test
   public void testRandomStrings() throws Exception {
-    checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation"),
+    checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation",
null),
+        100 * RANDOM_MULTIPLIER);
+  }
+
+  @Test
+  public void testRandomStringsWithConfigurationParameters() throws Exception {
+    Map<String, Object> cp = new HashMap<String, Object>();
+    cp.put("line-end", "\r");
+    checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestWSTokenizerAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation",
cp),
         100 * RANDOM_MULTIPLIER);
   }
 

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
Mon Feb  4 13:40:45 2013
@@ -37,7 +37,7 @@ public class UIMATypeAwareAnalyzerTest e
   public void setUp() throws Exception {
     super.setUp();
     analyzer = new UIMATypeAwareAnalyzer("/uima/AggregateSentenceAE.xml",
-        "org.apache.uima.TokenAnnotation", "posTag");
+        "org.apache.uima.TokenAnnotation", "posTag", null);
   }
 
   @Override
@@ -63,7 +63,7 @@ public class UIMATypeAwareAnalyzerTest e
   @Test
   public void testRandomStrings() throws Exception {
     checkRandomData(random(), new UIMATypeAwareAnalyzer("/uima/TestAggregateSentenceAE.xml",
-        "org.apache.lucene.uima.ts.TokenAnnotation", "pos"), 100 * RANDOM_MULTIPLIER);
+        "org.apache.lucene.uima.ts.TokenAnnotation", "pos", null), 100 * RANDOM_MULTIPLIER);
   }
 
 }

Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
Mon Feb  4 13:40:45 2013
@@ -17,11 +17,13 @@ package org.apache.lucene.analysis.uima.
  * limitations under the License.
  */
 
+import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
 
 /**
  * Dummy implementation of a UIMA based whitespace tokenizer
@@ -30,15 +32,21 @@ public class SampleWSTokenizerAnnotator 
 
   private final static String TOKEN_TYPE = "org.apache.lucene.uima.ts.TokenAnnotation";
   private final static String SENTENCE_TYPE = "org.apache.lucene.uima.ts.SentenceAnnotation";
-  private static final String CR = "\n";
+  private String lineEnd;
   private static final String WHITESPACE = " ";
 
   @Override
+  public void initialize(UimaContext aContext) throws ResourceInitializationException {
+    super.initialize(aContext);
+    lineEnd = String.valueOf(aContext.getConfigParameterValue("line-end"));
+  }
+
+  @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException {
     Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
     Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
     int i = 0;
-    for (String sentenceString : jCas.getDocumentText().split(CR)) {
+    for (String sentenceString : jCas.getDocumentText().split(lineEnd)) {
       // add the sentence
       AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
       jCas.addFsToIndexes(sentenceAnnotation);



Mime
View raw message