ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From stevenbeth...@apache.org
Subject svn commit: r1497555 [1/2] - in /ctakes/trunk/ctakes-relation-extractor: ./ desc/analysis_engine/ src/main/java/org/apache/ctakes/relationextractor/ae/ src/main/java/org/apache/ctakes/relationextractor/cr/ src/main/java/org/apache/ctakes/relationextrac...
Date Thu, 27 Jun 2013 20:14:43 GMT
Author: stevenbethard
Date: Thu Jun 27 20:14:42 2013
New Revision: 1497555

URL: http://svn.apache.org/r1497555
Log:
CTAKES-190 - Fixes most of relation-extractor issues. All code now uses SHARPKnowtatorXMLReader to ensure that all annotations are loaded with their correct type system types. Relation extractor classes produce DegreeOfTextRelation and LocationOfTextRelation objects. The modifier extractor has also been updated to work with the new types, but is not yet able to produce things like BodyLateralityModifier - instead it just produces generic Modifiers with their typeID set appropriately.

Added:
    ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/LocationOfRelationExtractorAnnotator.xml
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ParameterSettings.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/SHARPXMI.java
Removed:
    ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/EntityMentionPairRelationExtractorAnnotator.xml
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/EntityMentionPairRelationExtractorAnnotator.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/cr/
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/PreprocessAndWriteXmi.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/GoldEntityAndRelationReaderPipeline.java
Modified:
    ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml
    ctakes/trunk/ctakes-relation-extractor/pom.xml
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/DegreeOfRelationExtractorAnnotator.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/ModifierExtractorAnnotator.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ModifierExtractorEvaluation.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/pipelines/RelationExtractorTrain.java
    ctakes/trunk/ctakes-relation-extractor/src/test/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotatorsTest.java

Added: ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/LocationOfRelationExtractorAnnotator.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/LocationOfRelationExtractorAnnotator.xml?rev=1497555&view=auto
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/LocationOfRelationExtractorAnnotator.xml (added)
+++ ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/LocationOfRelationExtractorAnnotator.xml Thu Jun 27 20:14:42 2013
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>org.apache.ctakes.relationextractor.ae.LocationOfRelationExtractorAnnotator</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>LocationOfRelationExtractorAnnotator</name>
+        <description>Descriptor automatically generated by uimaFIT</description>
+        <version>unknown</version>
+        <vendor>org.apache.ctakes.relationextractor.ae</vendor>
+        <configurationParameters>
+            <configurationParameter>
+                <name>ProbabilityOfKeepingANegativeExample</name>
+                <description>probability that a negative example should be retained for training</description>
+                <type>Float</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>org.cleartk.classifier.CleartkAnnotator.classifierFactoryClassName</name>
+                <description>provides the full name of the ClassifierFactory class to be used.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>org.cleartk.classifier.CleartkAnnotator.dataWriterFactoryClassName</name>
+                <description>provides the full name of the DataWriterFactory class to be used.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>org.cleartk.classifier.CleartkAnnotator.isTraining</name>
+                <description>determines whether this annotator is writing training data or using a classifier to annotate. Normally inferred automatically based on whether or not a DataWriterFactory class has been set.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>org.cleartk.classifier.jar.GenericJarClassifierFactory.classifierJarPath</name>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>org.cleartk.classifier.CleartkAnnotator.classifierFactoryClassName</name>
+                <value>
+                    <string>org.cleartk.classifier.jar.JarClassifierFactory</string>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>org.cleartk.classifier.CleartkAnnotator.dataWriterFactoryClassName</name>
+                <value>
+                    <string>org.cleartk.classifier.jar.DefaultDataWriterFactory</string>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>ProbabilityOfKeepingANegativeExample</name>
+                <value>
+                    <float>1.0</float>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>org.cleartk.classifier.jar.GenericJarClassifierFactory.classifierJarPath</name>
+                <value>
+                    <string>/org/apache/ctakes/relationextractor/models/location_of/model.jar</string>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
+        <typeSystemDescription>
+            <imports>
+                <import name="org.apache.ctakes.typesystem.types.TypeSystem"/>
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities/>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>false</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>

Modified: ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml?rev=1497555&r1=1497554&r2=1497555&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml (original)
+++ ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorAggregate.xml Thu Jun 27 20:14:42 2013
@@ -3,25 +3,18 @@
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>false</primitive>
     <delegateAnalysisEngineSpecifiers>
-        <delegateAnalysisEngine key="EntityMentionPairRelationExtractorAnnotator">
+        <delegateAnalysisEngine key="LocationOfRelationExtractorAnnotator">
             <analysisEngineDescription>
                 <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
                 <primitive>true</primitive>
-                <annotatorImplementationName>org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator</annotatorImplementationName>
+                <annotatorImplementationName>org.apache.ctakes.relationextractor.ae.LocationOfRelationExtractorAnnotator</annotatorImplementationName>
                 <analysisEngineMetaData>
-                    <name>EntityMentionPairRelationExtractorAnnotator</name>
+                    <name>LocationOfRelationExtractorAnnotator</name>
                     <description>Descriptor automatically generated by uimaFIT</description>
                     <version>unknown</version>
                     <vendor>org.apache.ctakes.relationextractor.ae</vendor>
                     <configurationParameters>
                         <configurationParameter>
-                            <name>ClassifyBothDirections</name>
-                            <description>run the classifier in both directions, that is, classify each pair of events {X,Y} once in the order X-to-Y and once in the order Y-to-X (default: classify each pair of events {X, Y} once, giving the label 'R' if a relation exists with the order X-to-Y, and 'R-1' if a relation exists with the order Y-to-X)</description>
-                            <type>Boolean</type>
-                            <multiValued>false</multiValued>
-                            <mandatory>false</mandatory>
-                        </configurationParameter>
-                        <configurationParameter>
                             <name>ProbabilityOfKeepingANegativeExample</name>
                             <description>probability that a negative example should be retained for training</description>
                             <type>Float</type>
@@ -72,19 +65,13 @@
                         <nameValuePair>
                             <name>ProbabilityOfKeepingANegativeExample</name>
                             <value>
-                                <float>0.5</float>
+                                <float>1.0</float>
                             </value>
                         </nameValuePair>
                         <nameValuePair>
                             <name>org.cleartk.classifier.jar.GenericJarClassifierFactory.classifierJarPath</name>
                             <value>
-                                <string>/org/apache/ctakes/relationextractor/models/em_pair/model.jar</string>
-                            </value>
-                        </nameValuePair>
-                        <nameValuePair>
-                            <name>ClassifyBothDirections</name>
-                            <value>
-                                <boolean>false</boolean>
+                                <string>/org/apache/ctakes/relationextractor/models/location_of/model.jar</string>
                             </value>
                         </nameValuePair>
                     </configurationParameterSettings>
@@ -111,6 +98,9 @@
                     <delegateAnalysisEngine key="TokenizerAnnotator">
                         <import location="../../../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml"/>
                     </delegateAnalysisEngine>
+                    <delegateAnalysisEngine key="ClearNLPAE">
+                        <import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPDependencyParserAE.xml"/>
+                    </delegateAnalysisEngine>
                     <delegateAnalysisEngine key="ContextDependentTokenizerAnnotator">
                         <import location="../../../ctakes-context-tokenizer/desc/analysis_engine/ContextDependentTokenizerAnnotator.xml"/>
                     </delegateAnalysisEngine>
@@ -141,9 +131,6 @@
                     <delegateAnalysisEngine key="Chunker">
                         <import location="../../../ctakes-chunker/desc/Chunker.xml"/>
                     </delegateAnalysisEngine>
-                    <delegateAnalysisEngine key="ClearNLPAE">
-                        <import location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPDependencyParserAE.xml"/>
-                    </delegateAnalysisEngine>
                     <delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
                         <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
                     </delegateAnalysisEngine>
@@ -445,7 +432,7 @@
                 <node>RelationExtractorPreprocessor</node>
                 <node>ModifierExtractorAnnotator</node>
                 <node>DegreeOfRelationExtractorAnnotator</node>
-                <node>EntityMentionPairRelationExtractorAnnotator</node>
+                <node>LocationOfRelationExtractorAnnotator</node>
             </fixedFlow>
         </flowConstraints>
         <capabilities/>

Modified: ctakes/trunk/ctakes-relation-extractor/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/pom.xml?rev=1497555&r1=1497554&r2=1497555&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/pom.xml (original)
+++ ctakes/trunk/ctakes-relation-extractor/pom.xml Thu Jun 27 20:14:42 2013
@@ -52,31 +52,32 @@
 			<dependency>
 				<groupId>org.cleartk</groupId>
 				<artifactId>cleartk-eval</artifactId>
+				<version>0.9.2</version>
 			</dependency>
 			<dependency>
 				<groupId>org.cleartk</groupId>
 				<artifactId>cleartk-ml</artifactId>
+				<version>1.3.0</version>
 			</dependency>
 			<dependency>
 				<groupId>org.cleartk</groupId>
 				<artifactId>cleartk-ml-libsvm</artifactId>
+				<version>1.2.0</version>
 			</dependency>
 			<dependency>
 				<groupId>org.cleartk</groupId>
 				<artifactId>cleartk-ml-opennlp-maxent</artifactId>
+				<version>1.1.2</version>
 			</dependency>
 			<dependency>
 				<groupId>org.cleartk</groupId>
 				<artifactId>cleartk-util</artifactId>
+				<version>0.9.2</version>
 			</dependency>
-			<dependency>
-				<groupId>org.cleartk</groupId>
-				<artifactId>cleartk-test-util</artifactId>
-			</dependency>
-			<dependency>
-				<groupId>args4j</groupId>
-				<artifactId>args4j</artifactId>
-			</dependency>
+		<dependency>
+			<groupId>com.lexicalscope.jewelcli</groupId>
+			<artifactId>jewelcli</artifactId>
+		</dependency>
 			<dependency>
 				<groupId>commons-io</groupId>
 				<artifactId>commons-io</artifactId>

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/DegreeOfRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/DegreeOfRelationExtractorAnnotator.java?rev=1497555&r1=1497554&r2=1497555&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/DegreeOfRelationExtractorAnnotator.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/DegreeOfRelationExtractorAnnotator.java Thu Jun 27 20:14:42 2013
@@ -21,7 +21,11 @@ package org.apache.ctakes.relationextrac
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.DegreeOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textsem.Modifier;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
 import org.apache.uima.jcas.JCas;
@@ -29,30 +33,58 @@ import org.apache.uima.jcas.tcas.Annotat
 import org.uimafit.util.JCasUtil;
 
 /**
- * Identifies Degree_Of relation between entities and modifiers
- *
+ * Identifies Degree_Of relations between {@link EventMention}s and
+ * {@link Modifier}s.
  */
 public class DegreeOfRelationExtractorAnnotator extends RelationExtractorAnnotator {
 
-	@Override
-	public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
-			JCas identifiedAnnotationView, Annotation sentence) {
-		
-		List<EntityMention> entities = JCasUtil.selectCovered(identifiedAnnotationView, EntityMention.class, sentence);
-		List<Modifier> modifiers = JCasUtil.selectCovered(identifiedAnnotationView, Modifier.class, sentence);
-		
-		List<IdentifiedAnnotationPair> pairs = new ArrayList<IdentifiedAnnotationPair>();
-		for (EntityMention entity : entities) {
-			for (Modifier modifier : modifiers) {
-				pairs.add(new IdentifiedAnnotationPair(entity, modifier));
-			}
-		}
-		return pairs;
-	}
-
-	@Override
-	protected Class<? extends Annotation> getCoveringClass() {
-		// TODO Auto-generated method stub
-		return Sentence.class;
-	}
+  @Override
+  protected Class<? extends BinaryTextRelation> getRelationClass() {
+    return DegreeOfTextRelation.class;
+  }
+
+  @Override
+  public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+      JCas identifiedAnnotationView,
+      Annotation sentence) {
+
+    List<EventMention> events =
+        JCasUtil.selectCovered(identifiedAnnotationView, EventMention.class, sentence);
+    List<Modifier> modifiers =
+        JCasUtil.selectCovered(identifiedAnnotationView, Modifier.class, sentence);
+
+    List<IdentifiedAnnotationPair> pairs = new ArrayList<IdentifiedAnnotationPair>();
+    for (EventMention event : events) {
+      for (Modifier modifier : modifiers) {
+        pairs.add(new IdentifiedAnnotationPair(event, modifier));
+      }
+    }
+    return pairs;
+  }
+
+  @Override
+  protected void createRelation(
+      JCas jCas,
+      IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2,
+      String predictedCategory) {
+    RelationArgument relArg1 = new RelationArgument(jCas);
+    relArg1.setArgument(arg1);
+    relArg1.setRole("Argument");
+    relArg1.addToIndexes();
+    RelationArgument relArg2 = new RelationArgument(jCas);
+    relArg2.setArgument(arg2);
+    relArg2.setRole("Related_to");
+    relArg2.addToIndexes();
+    DegreeOfTextRelation relation = new DegreeOfTextRelation(jCas);
+    relation.setArg1(relArg1);
+    relation.setArg2(relArg2);
+    relation.setCategory(predictedCategory);
+    relation.addToIndexes();
+  }
+
+  @Override
+  protected Class<? extends Annotation> getCoveringClass() {
+    return Sentence.class;
+  }
 }

Added: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java?rev=1497555&view=auto
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java (added)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java Thu Jun 27 20:14:42 2013
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.relationextractor.ae;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+
+/**
+ * Identifies Location_Of relations between {@link EventMention}s and
+ * {@link AnatomicalSiteMention}s.
+ */
+public class LocationOfRelationExtractorAnnotator extends RelationExtractorAnnotator {
+
+  @Override
+  protected Class<? extends BinaryTextRelation> getRelationClass() {
+    return LocationOfTextRelation.class;
+  }
+
+  @Override
+  public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+      JCas identifiedAnnotationView,
+      Annotation sentence) {
+
+    List<EventMention> events =
+        JCasUtil.selectCovered(identifiedAnnotationView, EventMention.class, sentence);
+    List<AnatomicalSiteMention> sites =
+        JCasUtil.selectCovered(identifiedAnnotationView, AnatomicalSiteMention.class, sentence);
+
+    List<IdentifiedAnnotationPair> pairs = new ArrayList<IdentifiedAnnotationPair>();
+    for (EventMention event : events) {
+      for (AnatomicalSiteMention site : sites) {
+        pairs.add(new IdentifiedAnnotationPair(event, site));
+      }
+    }
+    return pairs;
+  }
+
+  @Override
+  protected void createRelation(
+      JCas jCas,
+      IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2,
+      String predictedCategory) {
+    RelationArgument relArg1 = new RelationArgument(jCas);
+    relArg1.setArgument(arg1);
+    relArg1.setRole("Argument");
+    relArg1.addToIndexes();
+    RelationArgument relArg2 = new RelationArgument(jCas);
+    relArg2.setArgument(arg2);
+    relArg2.setRole("Related_to");
+    relArg2.addToIndexes();
+    LocationOfTextRelation relation = new LocationOfTextRelation(jCas);
+    relation.setArg1(relArg1);
+    relation.setArg2(relArg2);
+    relation.setCategory(predictedCategory);
+    relation.addToIndexes();
+  }
+
+  @Override
+  protected Class<? extends Annotation> getCoveringClass() {
+    return Sentence.class;
+  }
+}

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/ModifierExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/ModifierExtractorAnnotator.java?rev=1497555&r1=1497554&r2=1497555&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/ModifierExtractorAnnotator.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/ModifierExtractorAnnotator.java Thu Jun 27 20:14:42 2013
@@ -115,6 +115,7 @@ public class ModifierExtractorAnnotator 
 
       // convert classifications to Modifiers
       if (!this.isTraining()) {
+        // TODO: don't just create Modifiers, create the XXXModifier subtypes
         this.chunking.createChunks(jCas, tokens, outcomes);
       }
     }

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java?rev=1497555&r1=1497554&r2=1497555&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java Thu Jun 27 20:14:42 2013
@@ -57,23 +57,24 @@ public abstract class RelationExtractorA
 
   public static final String NO_RELATION_CATEGORY = "-NONE-";
 
-  public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE = "ProbabilityOfKeepingANegativeExample";
+  public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE =
+      "ProbabilityOfKeepingANegativeExample";
 
   @ConfigurationParameter(
       name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
       mandatory = false,
       description = "probability that a negative example should be retained for training")
   protected double probabilityOfKeepingANegativeExample = 1.0;
-  
+
   protected Random coin = new Random(0);
 
   private List<RelationFeaturesExtractor> featureExtractors = this.getFeatureExtractors();
-  
+
   private Class<? extends Annotation> coveringClass = getCoveringClass();
 
   /**
-   * Defines the list of feature extractors used by the classifier.
-   * Subclasses may override this method to provide a different set of feature extractors. 
+   * Defines the list of feature extractors used by the classifier. Subclasses
+   * may override this method to provide a different set of feature extractors.
    * 
    * @return The list of feature extractors to use.
    */
@@ -84,20 +85,27 @@ public abstract class RelationExtractorA
         new PhraseChunkingExtractor(),
         new NamedEntityFeaturesExtractor(),
         new DependencyTreeFeaturesExtractor(),
-        new DependencyPathFeaturesExtractor()
-        );
+        new DependencyPathFeaturesExtractor());
   }
- 
+
+  protected Class<? extends BinaryTextRelation> getRelationClass() {
+    return BinaryTextRelation.class;
+  }
+
   /*
-   * Defines the type of annotation that the relation exists within (sentence, document, segment) 
+   * Defines the type of annotation that the relation exists within (sentence,
+   * document, segment)
    */
   protected abstract Class<? extends Annotation> getCoveringClass();
-  
+
   /**
-   * Selects the relevant mentions/annotations within a covering annotation for relation identification/extraction.
+   * Selects the relevant mentions/annotations within a covering annotation for
+   * relation identification/extraction.
    */
-  protected abstract List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(JCas identifiedAnnotationView, Annotation coveringAnnotation);
-  
+  protected abstract List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+      JCas identifiedAnnotationView,
+      Annotation coveringAnnotation);
+
   /**
    * Workaround for https://code.google.com/p/cleartk/issues/detail?id=346
    * 
@@ -107,7 +115,7 @@ public abstract class RelationExtractorA
     String modelPathParam = GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH;
     String modelPath = (String) context.getConfigParameterValue(modelPathParam);
     if (modelPath != null) {
-      URL modelClasspathURL = RelationExtractorAnnotator.class.getResource(modelPath); 
+      URL modelClasspathURL = RelationExtractorAnnotator.class.getResource(modelPath);
       if (modelClasspathURL != null) {
         UimaContextAdmin contextAdmin = (UimaContextAdmin) context;
         ConfigurationManager manager = contextAdmin.getConfigurationManager();
@@ -135,7 +143,7 @@ public abstract class RelationExtractorA
     relationLookup = new HashMap<List<Annotation>, BinaryTextRelation>();
     if (this.isTraining()) {
       relationLookup = new HashMap<List<Annotation>, BinaryTextRelation>();
-      for (BinaryTextRelation relation : JCasUtil.select(jCas, BinaryTextRelation.class)) {
+      for (BinaryTextRelation relation : JCasUtil.select(jCas, this.getRelationClass())) {
         Annotation arg1 = relation.getArg1().getArgument();
         Annotation arg2 = relation.getArg2().getArgument();
         // The key is a list of args so we can do bi-directional lookup
@@ -146,68 +154,74 @@ public abstract class RelationExtractorA
     // walk through each sentence in the text
     for (Annotation coveringAnnotation : JCasUtil.select(jCas, coveringClass)) {
 
-    	// collect all relevant relation arguments from the sentence
-    	List<IdentifiedAnnotationPair> candidatePairs = this.getCandidateRelationArgumentPairs(jCas, coveringAnnotation);
-
-    	// walk through the pairs of annotations
-    	for (IdentifiedAnnotationPair pair : candidatePairs) {
-    		IdentifiedAnnotation arg1 = pair.getArg1();
-    		IdentifiedAnnotation arg2 = pair.getArg2();
-    		// apply all the feature extractors to extract the list of features
-    		List<Feature> features = new ArrayList<Feature>();
-    		for (RelationFeaturesExtractor extractor : this.featureExtractors) {
-    			features.addAll(extractor.extract(jCas, arg1, arg2));
-    		}
-
-    		// sanity check on feature values
-    		for (Feature feature : features) {
-    			if (feature.getValue() == null) {
-    				String message = "Null value found in %s from %s";
-    				throw new IllegalArgumentException(String.format(message, feature, features));
-    			}
-    		}
-
-    		// during training, feed the features to the data writer
-    		if (this.isTraining()) {
-    			String category = this.getRelationCategory(relationLookup, arg1, arg2);
-    			if (category == null) { continue; }
-    			
-    			// create a classification instance and write it to the training data
-    			this.dataWriter.write(new Instance<String>(category, features));
-    		}
-
-    		// during classification feed the features to the classifier and create annotations
-    		else {
-    			String predictedCategory = this.classify(features); 
-
-    			// add a relation annotation if a true relation was predicted
-    			if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {
-
-    				// if we predict an inverted relation, reverse the order of the arguments
-    				if (predictedCategory.endsWith("-1")) {
-    					predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2);
-    					IdentifiedAnnotation temp = arg1;
-    					arg1 = arg2;
-    					arg2 = temp;
-    				}
-
-    				createRelation(jCas, arg1, arg2, predictedCategory);
-    			}
-    		}
-    	} // end pair in pairs
+      // collect all relevant relation arguments from the sentence
+      List<IdentifiedAnnotationPair> candidatePairs =
+          this.getCandidateRelationArgumentPairs(jCas, coveringAnnotation);
+
+      // walk through the pairs of annotations
+      for (IdentifiedAnnotationPair pair : candidatePairs) {
+        IdentifiedAnnotation arg1 = pair.getArg1();
+        IdentifiedAnnotation arg2 = pair.getArg2();
+        // apply all the feature extractors to extract the list of features
+        List<Feature> features = new ArrayList<Feature>();
+        for (RelationFeaturesExtractor extractor : this.featureExtractors) {
+          features.addAll(extractor.extract(jCas, arg1, arg2));
+        }
+
+        // sanity check on feature values
+        for (Feature feature : features) {
+          if (feature.getValue() == null) {
+            String message = "Null value found in %s from %s";
+            throw new IllegalArgumentException(String.format(message, feature, features));
+          }
+        }
+
+        // during training, feed the features to the data writer
+        if (this.isTraining()) {
+          String category = this.getRelationCategory(relationLookup, arg1, arg2);
+          if (category == null) {
+            continue;
+          }
+
+          // create a classification instance and write it to the training data
+          this.dataWriter.write(new Instance<String>(category, features));
+        }
+
+        // during classification feed the features to the classifier and create
+        // annotations
+        else {
+          String predictedCategory = this.classify(features);
+
+          // add a relation annotation if a true relation was predicted
+          if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {
+
+            // if we predict an inverted relation, reverse the order of the
+            // arguments
+            if (predictedCategory.endsWith("-1")) {
+              predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2);
+              IdentifiedAnnotation temp = arg1;
+              arg1 = arg2;
+              arg2 = temp;
+            }
+
+            createRelation(jCas, arg1, arg2, predictedCategory);
+          }
+        }
+      } // end pair in pairs
     } // end for(Sentence)
   }
-  
-  
+
   /**
-   * Looks up the arguments in the specified lookup table and converts the relation
-   * into a label for classification
+   * Looks up the arguments in the specified lookup table and converts the
+   * relation into a label for classification
    * 
-   * @return If this category should not be processed for training return <i>null</i>
-   *         otherwise it returns the label sent to the datawriter
+   * @return If this category should not be processed for training return
+   *         <i>null</i> otherwise it returns the label sent to the datawriter
    */
-  protected String getRelationCategory(Map<List<Annotation>, BinaryTextRelation> relationLookup,
-		  IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+  protected String getRelationCategory(
+      Map<List<Annotation>, BinaryTextRelation> relationLookup,
+      IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2) {
     BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
     String category;
     if (relation != null) {
@@ -221,11 +235,12 @@ public abstract class RelationExtractorA
   }
 
   /**
-   * Predict an outcome given a set of features.
-   * By default, this simply delegates to the object's <code>classifier</code>.
-   * Subclasses may override this method to implement more complex classification procedures. 
+   * Predict an outcome given a set of features. By default, this simply
+   * delegates to the object's <code>classifier</code>. Subclasses may override
+   * this method to implement more complex classification procedures.
    * 
-   * @param features The features to be classified.
+   * @param features
+   *          The features to be classified.
    * @return The predicted outcome (label) for the features.
    */
   protected String classify(List<Feature> features) throws CleartkProcessingException {
@@ -233,42 +248,56 @@ public abstract class RelationExtractorA
   }
 
   /**
-   * Create a UIMA relation type based on arguments and the relation label.
-   * This allows subclasses to create/define their own types: e.g.
-   * coreference can create CoreferenceRelation instead of BinaryTextRelation
-   * @param jCas - JCas object, needed to create new UIMA types
-   * @param arg1 - First argument to relation
-   * @param arg2 - Second argument to relation
-   * @param predictedCategory - Name of relation
-   */
-  protected void createRelation(JCas jCas, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2, String predictedCategory){
-		// add the relation to the CAS
-		RelationArgument relArg1 = new RelationArgument(jCas);
-		relArg1.setArgument(arg1);
-		relArg1.setRole("Argument");
-		relArg1.addToIndexes();
-		RelationArgument relArg2 = new RelationArgument(jCas);
-		relArg2.setArgument(arg2);
-		relArg2.setRole("Related_to");
-		relArg2.addToIndexes();
-		BinaryTextRelation relation = new BinaryTextRelation(jCas);
-		relation.setArg1(relArg1);
-		relation.setArg2(relArg2);
-		relation.setCategory(predictedCategory);
-		relation.addToIndexes();
+   * Create a UIMA relation type based on arguments and the relation label. This
+   * allows subclasses to create/define their own types: e.g. coreference can
+   * create CoreferenceRelation instead of BinaryTextRelation
+   * 
+   * @param jCas
+   *          - JCas object, needed to create new UIMA types
+   * @param arg1
+   *          - First argument to relation
+   * @param arg2
+   *          - Second argument to relation
+   * @param predictedCategory
+   *          - Name of relation
+   */
+  protected void createRelation(
+      JCas jCas,
+      IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2,
+      String predictedCategory) {
+    // add the relation to the CAS
+    RelationArgument relArg1 = new RelationArgument(jCas);
+    relArg1.setArgument(arg1);
+    relArg1.setRole("Argument");
+    relArg1.addToIndexes();
+    RelationArgument relArg2 = new RelationArgument(jCas);
+    relArg2.setArgument(arg2);
+    relArg2.setRole("Related_to");
+    relArg2.addToIndexes();
+    BinaryTextRelation relation = new BinaryTextRelation(jCas);
+    relation.setArg1(relArg1);
+    relation.setArg2(relArg2);
+    relation.setCategory(predictedCategory);
+    relation.addToIndexes();
   }
-  
+
   public static class IdentifiedAnnotationPair {
-	  
-	 private final IdentifiedAnnotation arg1;
-	 private final IdentifiedAnnotation arg2;
-	 public IdentifiedAnnotationPair(IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
-		 this.arg1 = arg1;
-		 this.arg2 = arg2;
-	 }
-	 
-	 public final IdentifiedAnnotation getArg1() { return arg1; }
-		 
-	 public final IdentifiedAnnotation getArg2() { return arg2; }
+
+    private final IdentifiedAnnotation arg1;
+    private final IdentifiedAnnotation arg2;
+
+    public IdentifiedAnnotationPair(IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+      this.arg1 = arg1;
+      this.arg2 = arg2;
+    }
+
+    public final IdentifiedAnnotation getArg1() {
+      return arg1;
+    }
+
+    public final IdentifiedAnnotation getArg2() {
+      return arg2;
+    }
   }
 }

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java?rev=1497555&r1=1497554&r2=1497555&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/data/GoldAnnotationStatsCalculator.java Thu Jun 27 20:14:42 2013
@@ -22,7 +22,7 @@ import java.util.Collection;
 import java.util.List;
 
 import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.LocationOfRelationExtractorAnnotator;
 import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
@@ -150,7 +150,7 @@ public class GoldAnnotationStatsCalculat
     
     for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
       if(targetRelationType.equals("location_of")) {
-        EntityMentionPairRelationExtractorAnnotator emPairAnnot = new EntityMentionPairRelationExtractorAnnotator();
+        LocationOfRelationExtractorAnnotator emPairAnnot = new LocationOfRelationExtractorAnnotator();
         List<IdentifiedAnnotationPair> pairs = emPairAnnot.getCandidateRelationArgumentPairs(goldView, sentence);
         entityMentionPairCount += pairs.size();
       } 
@@ -167,7 +167,7 @@ public class GoldAnnotationStatsCalculat
     for(Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
          
       if(targetRelationType.equals("location_of")) {
-        EntityMentionPairRelationExtractorAnnotator emPairAnnot = new EntityMentionPairRelationExtractorAnnotator();
+        LocationOfRelationExtractorAnnotator emPairAnnot = new LocationOfRelationExtractorAnnotator();
         List<IdentifiedAnnotationPair> pairs = emPairAnnot.getCandidateRelationArgumentPairs(goldView, sentence);
         for(IdentifiedAnnotationPair pair : pairs) {
           String type1 = getEntityType(pair.getArg1().getTypeID());

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ModifierExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ModifierExtractorEvaluation.java?rev=1497555&r1=1497554&r2=1497555&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ModifierExtractorEvaluation.java (original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ModifierExtractorEvaluation.java Thu Jun 27 20:14:42 2013
@@ -19,130 +19,108 @@
 package org.apache.ctakes.relationextractor.eval;
 
 import java.io.File;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
+
+import javax.annotation.Nullable;
+
+import org.apache.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
+import org.apache.ctakes.relationextractor.eval.SHARPXMI.EvaluationOptions;
+import org.apache.ctakes.typesystem.type.textsem.Modifier;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
 import org.cleartk.classifier.jar.DefaultDataWriterFactory;
 import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
 import org.cleartk.classifier.jar.GenericJarClassifierFactory;
 import org.cleartk.classifier.jar.JarClassifierBuilder;
-import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
+import org.cleartk.classifier.liblinear.LIBLINEARStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
-import org.cleartk.eval.Evaluation_ImplBase;
-import org.cleartk.util.Options_ImplBase;
-import org.kohsuke.args4j.Option;
 import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.factory.AnalysisEngineFactory;
-import org.uimafit.factory.CollectionReaderFactory;
-import org.uimafit.factory.TypeSystemDescriptionFactory;
 import org.uimafit.pipeline.JCasIterable;
 import org.uimafit.pipeline.SimplePipeline;
 import org.uimafit.util.JCasUtil;
 
-import org.apache.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
-import org.apache.ctakes.typesystem.type.textsem.Modifier;
-
-public class ModifierExtractorEvaluation extends Evaluation_ImplBase<File, AnnotationStatistics<String>> {
-
-  public static class Options extends Options_ImplBase {
-    
-    @Option(
-        name = "--train-dir",
-        usage = "specify the directory contraining the XMI training files (for example, /NLP/Corpus/Relations/sharp/xmi/train)",
-        required = true)
-    public File trainDirectory;
-    
-    @Option(
-        name = "--n-folds",
-        usage = "number of folds for use in cross-validation (only used when --test-dir is not provided)")
-    public int nFolds = 10;
-    
-    @Option(
-        name = "--test-dir",
-        usage = "specify the directory contraining the XMI testing files (for example, /NLP/Corpus/Relations/sharp/xmi/dev)")
-    public File testDirectory;
-  }
+import com.google.common.base.Function;
+import com.google.common.collect.Lists;
+import com.lexicalscope.jewel.cli.CliFactory;
+
+public class ModifierExtractorEvaluation extends SHARPXMI.Evaluation_ImplBase {
+
+  public static final ParameterSettings BEST_PARAMETERS = new ParameterSettings(
+      LIBLINEARStringOutcomeDataWriter.class,
+      new String[] { "-s", "1", "-c", "0.5" });
 
   public static void main(String[] args) throws Exception {
-    Options options = new Options();
-    options.parseOptions(args);
-    List<File> trainFiles = Arrays.asList(options.trainDirectory.listFiles());
-    File modelsDir = new File("target/models/modifier");
-
-    ModifierExtractorEvaluation evaluation = new ModifierExtractorEvaluation(
-        modelsDir,
-        "-t",
-        "0",
-        "-h",
-        "0",
-        "-c",
-        "1000");
-    
-    AnnotationStatistics<String> overallStats;
-    if (options.testDirectory == null) {
-      List<AnnotationStatistics<String>> foldStats;
-      foldStats = evaluation.crossValidation(trainFiles, options.nFolds);
-      overallStats = AnnotationStatistics.addAll(foldStats);
-    } else {
-      List<File> testFiles = Arrays.asList(options.testDirectory.listFiles());
-      overallStats = evaluation.trainAndTest(trainFiles, testFiles);
+    // parse the options, validate them, and generate XMI if necessary
+    final EvaluationOptions options = CliFactory.parseArguments(EvaluationOptions.class, args);
+    SHARPXMI.validate(options);
+    SHARPXMI.generateXMI(options);
+
+    // determine the grid of parameters to search through
+    // for the full set of LIBLINEAR parameters, see:
+    // https://github.com/bwaldvogel/liblinear-java/blob/master/src/main/java/de/bwaldvogel/liblinear/Train.java
+    List<ParameterSettings> gridOfSettings = Lists.newArrayList();
+    for (int solver : new int[] { 0 /* logistic regression */, 1 /* SVM */}) {
+      for (double svmCost : new double[] { 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 50, 100 }) {
+        gridOfSettings.add(new ParameterSettings(
+            LIBLINEARStringOutcomeDataWriter.class,
+            new String[] { "-s", String.valueOf(solver), "-c", String.valueOf(svmCost) }));
+      }
     }
-    System.err.println("Overall:");
-    System.err.println(overallStats);
+
+    // run the evaluation
+    SHARPXMI.evaluate(
+        options,
+        BEST_PARAMETERS,
+        gridOfSettings,
+        new Function<ParameterSettings, ModifierExtractorEvaluation>() {
+          @Override
+          public ModifierExtractorEvaluation apply(@Nullable ParameterSettings params) {
+            return new ModifierExtractorEvaluation(new File("target/models/modifier"), params);
+          }
+        });
   }
 
-  private String[] trainingArguments;
+  private ParameterSettings parameterSettings;
 
-  public ModifierExtractorEvaluation(File directory, String... trainingArguments) {
+  public ModifierExtractorEvaluation(File directory, ParameterSettings parameterSettings) {
     super(directory);
-    this.trainingArguments = trainingArguments;
-  }
-
-  @Override
-  public CollectionReader getCollectionReader(List<File> items)
-      throws ResourceInitializationException {
-    String[] paths = new String[items.size()];
-    for (int i = 0; i < paths.length; ++i) {
-      paths[i] = items.get(i).getPath();
-    }
-    return CollectionReaderFactory.createCollectionReader(
-        XMIReader.class,
-        TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath(),
-        XMIReader.PARAM_FILES,
-        paths);
+    this.parameterSettings = parameterSettings;
   }
 
   @Override
   public void train(CollectionReader collectionReader, File directory) throws Exception {
+    System.err.printf("%s: %s:\n", this.getClass().getSimpleName(), directory.getName());
+    System.err.println(this.parameterSettings);
+
     SimplePipeline.runPipeline(
         collectionReader,
         AnalysisEngineFactory.createPrimitiveDescription(OnlyGoldModifiers.class),
         ModifierExtractorAnnotator.getDescription(
             DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME,
-            LIBSVMStringOutcomeDataWriter.class,
+            this.parameterSettings.dataWriterClass,
             DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
             directory.getPath()));
-    JarClassifierBuilder.trainAndPackage(directory, this.trainingArguments);
+    JarClassifierBuilder.trainAndPackage(directory, this.parameterSettings.trainingArguments);
   }
 
   @Override
   protected AnnotationStatistics<String> test(CollectionReader collectionReader, File directory)
       throws Exception {
-    AnalysisEngine classifierAnnotator = AnalysisEngineFactory.createPrimitive(ModifierExtractorAnnotator.getDescription(
-        GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
-        new File(directory, "model.jar").getPath()));
+    AnalysisEngine classifierAnnotator =
+        AnalysisEngineFactory.createPrimitive(ModifierExtractorAnnotator.getDescription(
+            GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH,
+            JarClassifierBuilder.getModelJarFile(directory)));
 
     AnnotationStatistics<String> stats = new AnnotationStatistics<String>();
     for (JCas jCas : new JCasIterable(collectionReader, classifierAnnotator)) {
       JCas goldView;
       try {
-        goldView = jCas.getView(GOLD_VIEW_NAME);
+        goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
       } catch (CASException e) {
         throw new AnalysisEngineProcessException(e);
       }
@@ -150,15 +128,14 @@ public class ModifierExtractorEvaluation
       Collection<Modifier> systemModifiers = JCasUtil.select(jCas, Modifier.class);
       stats.add(goldModifiers, systemModifiers);
     }
-    System.err.println(directory.getName() + ":");
-    System.err.println(stats);
+    System.err.print(stats);
+    System.err.println();
     return stats;
   }
 
-  private static final String GOLD_VIEW_NAME = "GoldView";
-
   /**
-   * Class that copies the manual {@link Modifier} annotations to the default CAS.
+   * Class that copies the manual {@link Modifier} annotations to the default
+   * CAS.
    */
   public static class OnlyGoldModifiers extends JCasAnnotator_ImplBase {
 
@@ -166,7 +143,7 @@ public class ModifierExtractorEvaluation
     public void process(JCas jCas) throws AnalysisEngineProcessException {
       JCas goldView;
       try {
-        goldView = jCas.getView(GOLD_VIEW_NAME);
+        goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME);
       } catch (CASException e) {
         throw new AnalysisEngineProcessException(e);
       }

Added: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ParameterSettings.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ParameterSettings.java?rev=1497555&view=auto
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ParameterSettings.java (added)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/ParameterSettings.java Thu Jun 27 20:14:42 2013
@@ -0,0 +1,68 @@
+package org.apache.ctakes.relationextractor.eval;
+
+import java.util.Arrays;
+
+import org.cleartk.classifier.DataWriter;
+import org.cleartk.eval.AnnotationStatistics;
+
+import com.google.common.base.Objects;
+import com.google.common.base.Objects.ToStringHelper;
+
+/**
+ * Holds a set of parameters for a relation extraction model
+ */
+public class ParameterSettings {
+
+  public Class<? extends DataWriter<String>> dataWriterClass;
+
+  public Object[] configurationParameters;
+
+  public String[] trainingArguments;
+
+  public AnnotationStatistics<String> stats;
+
+  public ParameterSettings(
+      Class<? extends DataWriter<String>> dataWriterClass,
+      Object[] additionalConfigurationParameters,
+      String[] trainingArguments) {
+    super();
+    this.dataWriterClass = dataWriterClass;
+    this.configurationParameters = additionalConfigurationParameters;
+    this.trainingArguments = trainingArguments;
+  }
+
+  public ParameterSettings(
+      Class<? extends DataWriter<String>> dataWriterClass,
+      String[] trainingArguments) {
+    this(dataWriterClass, new Object[0], trainingArguments);
+  }
+
+  @Override
+  public String toString() {
+    ToStringHelper helper = Objects.toStringHelper(this);
+    helper.add("dataWriterClass", this.dataWriterClass.getName());
+    helper.add("configurationParameters", Arrays.asList(this.configurationParameters));
+    helper.add("trainingArguments", Arrays.asList(this.trainingArguments));
+    return helper.toString();
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(
+        this.dataWriterClass,
+        Arrays.deepHashCode(this.configurationParameters),
+        Arrays.hashCode(this.trainingArguments));
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (!(obj instanceof ParameterSettings)) {
+      return false;
+    }
+    ParameterSettings that = (ParameterSettings) obj;
+    return Objects.equal(this.dataWriterClass, that.dataWriterClass)
+        && Arrays.equals(this.configurationParameters, that.configurationParameters)
+        && Arrays.equals(this.trainingArguments, that.trainingArguments);
+  }
+
+}
\ No newline at end of file



Mime
View raw message