ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1488186 - in /ctakes/sandbox/ctakes-coref-cleartk: desc/ desc/analysis_engine/ desc/analysis_engine/CoreferencePreprocessor.xml src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java
Date Fri, 31 May 2013 12:09:28 GMT
Author: tmill
Date: Fri May 31 12:09:28 2013
New Revision: 1488186

URL: http://svn.apache.org/r1488186
Log:
Slimmed down eval file and set up to take arguments. test() is still empty.

Added:
    ctakes/sandbox/ctakes-coref-cleartk/desc/
    ctakes/sandbox/ctakes-coref-cleartk/desc/analysis_engine/
    ctakes/sandbox/ctakes-coref-cleartk/desc/analysis_engine/CoreferencePreprocessor.xml
Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java

Added: ctakes/sandbox/ctakes-coref-cleartk/desc/analysis_engine/CoreferencePreprocessor.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/desc/analysis_engine/CoreferencePreprocessor.xml?rev=1488186&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/desc/analysis_engine/CoreferencePreprocessor.xml (added)
+++ ctakes/sandbox/ctakes-coref-cleartk/desc/analysis_engine/CoreferencePreprocessor.xml Fri
May 31 12:09:28 2013
@@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="UTF-8"?><analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>false</primitive>
+  <delegateAnalysisEngineSpecifiers>
+    <delegateAnalysisEngine key="ConstituencyParserAnnotator">
+      <import location="../../../ctakes-constituency-parser/desc/ConstituencyParserAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="TokenizerAnnotator">
+      <import location="../../../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="ContextDependentTokenizerAnnotator">
+      <import location="../../../ctakes-context-tokenizer/desc/analysis_engine/ContextDependentTokenizerAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="SentenceDetectorAnnotator">
+      <import location="../../../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="SimpleSegmentAnnotator">
+      <import location="../../../ctakes-core/desc/analysis_engine/SimpleSegmentAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="POSTagger">
+      <import location="../../../ctakes-pos-tagger/desc/POSTagger.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="Chunker">
+      <import location="../../../ctakes-chunker/desc/Chunker.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="LookupWindowAnnotator">
+      <import location="../../../ctakes-clinical-pipeline/desc/analysis_engine/LookupWindowAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="DictionaryLookupAnnotator">
+      <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotator.xml"/>
+    </delegateAnalysisEngine>
+    <delegateAnalysisEngine key="LvgAnnotator">
+      <import location="../../../ctakes-lvg/desc/analysis_engine/LvgAnnotator.xml"/>
+    </delegateAnalysisEngine>
+  </delegateAnalysisEngineSpecifiers>
+  <analysisEngineMetaData>
+    <name>CorefPreProcessor</name>
+    <description>Runs the complete pipeline for annotating clinical documents in plain
text format.</description>
+    <version/>
+    <vendor/>
+    <configurationParameters searchStrategy="language_fallback">
+      <configurationParameter>
+        <name>SegmentID</name>
+        <description/>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+        <overrides>
+          <parameter>SimpleSegmentAnnotator/SegmentID</parameter>
+        </overrides>
+      </configurationParameter>
+      <configurationParameter>
+        <name>ChunkCreatorClass</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+        <overrides>
+          <parameter>Chunker/ChunkCreatorClass</parameter>
+        </overrides>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>ChunkCreatorClass</name>
+        <value>
+          <string>org.apache.ctakes.chunker.ae.PhraseTypeChunkCreator</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <flowConstraints>
+      <fixedFlow>
+        <node>SimpleSegmentAnnotator</node>
+        <node>SentenceDetectorAnnotator</node>
+        <node>TokenizerAnnotator</node>
+        <node>LvgAnnotator</node>
+        <node>ContextDependentTokenizerAnnotator</node>
+        <node>POSTagger</node>
+        <node>Chunker</node>
+        <node>LookupWindowAnnotator</node>
+        <node>DictionaryLookupAnnotator</node>
+        <node>ConstituencyParserAnnotator</node>
+      </fixedFlow>
+    </flowConstraints>
+    <typePriorities>
+      <name>Ordering</name>
+      <description>For subiterator</description>
+      <version>1.0</version>
+      <priorityList>
+        <type>org.apache.ctakes.typesystem.type.textspan.Segment</type>
+        <type>org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+        <type>org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+      </priorityList>
+      <priorityList>
+        <type>org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+        <type>org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation</type>
+      </priorityList>
+    </typePriorities>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java?rev=1488186&r1=1488185&r2=1488186&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/eval/EvaluationOfCoreferencePairs.java
Fri May 31 12:09:28 2013
@@ -1,73 +1,51 @@
 package org.apache.ctakes.coreference.eval;
 
 import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
+import java.util.Arrays;
 import java.util.List;
 
-import org.apache.ctakes.chunker.ae.Chunker;
-import org.apache.ctakes.chunker.ae.DefaultChunkCreator;
-import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
-import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
-import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
-import org.apache.ctakes.core.ae.OverlapAnnotator;
-import org.apache.ctakes.core.ae.SentenceDetector;
-import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
-import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
-import org.apache.ctakes.core.resource.FileLocator;
-import org.apache.ctakes.core.resource.FileResourceImpl;
-import org.apache.ctakes.core.resource.JdbcConnectionResourceImpl;
-import org.apache.ctakes.core.resource.LuceneIndexReaderResourceImpl;
-import org.apache.ctakes.core.resource.SuffixMaxentModelResourceImpl;
 import org.apache.ctakes.coreference.ae.NamedEntityCoreferenceResolver;
-import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator;
-import org.apache.ctakes.lvg.ae.LvgAnnotator;
-import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
-import org.apache.ctakes.postagger.POSTagger;
+import org.apache.ctakes.coreference.eval.PreprocessAndWriteXmi.Options;
 import org.apache.ctakes.relationextractor.eval.XMIReader;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
 import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
-import org.apache.ctakes.typesystem.type.textspan.Segment;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
-import org.apache.uima.UIMAException;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.impl.XmiCasSerializer;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.util.XMLSerializer;
 import org.cleartk.classifier.jar.JarClassifierBuilder;
 import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
 import org.cleartk.eval.AnnotationStatistics;
 import org.cleartk.eval.Evaluation_ImplBase;
+import org.cleartk.util.Options_ImplBase;
 import org.cleartk.util.ViewURIUtil;
-import org.cleartk.util.ae.UriToDocumentTextAnnotator;
+import org.kohsuke.args4j.Option;
 import org.uimafit.component.JCasAnnotator_ImplBase;
-import org.uimafit.component.ViewCreatorAnnotator;
-import org.uimafit.component.ViewTextCopierAnnotator;
-import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.factory.AggregateBuilder;
-import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.CollectionReaderFactory;
-import org.uimafit.factory.ExternalResourceFactory;
-import org.uimafit.factory.TypePrioritiesFactory;
 import org.uimafit.factory.TypeSystemDescriptionFactory;
 import org.uimafit.pipeline.SimplePipeline;
-import org.uimafit.testing.util.HideOutput;
 import org.uimafit.util.JCasUtil;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
 
 public class EvaluationOfCoreferencePairs extends
 		Evaluation_ImplBase<File, AnnotationStatistics<String>> {
+  public static class Options extends Options_ImplBase {
 
+    @Option(name = "-e", 
+        aliases = "--test", 
+        usage = "specify the directory contraining the xmis for the test partition",
+        required = true)
+    public File testDirectory;
+
+    @Option(name = "-t", 
+        aliases = "--train", 
+        usage = "specify the directory contraining the xmis for the training partition",
+        required = true)
+    public File traingDirectory;
+  }
+  
 	public static final String GOLD_VIEW_NAME = "GOLD_VIEW";
-  private boolean xmiExists = false;
-  private File xmiDirectory = null;
+//  private boolean xmiExists = false;
+//  private File xmiDirectory = null;
   
 	public EvaluationOfCoreferencePairs(File baseDirectory) {
 		super(baseDirectory);
@@ -133,6 +111,7 @@ public class EvaluationOfCoreferencePair
 //    return aggregateBuilder;
 //  }
 
+	/*
   protected AggregateBuilder getXMIWritingPreprocessorAggregateBuilder()
       throws Exception {
     AggregateBuilder aggregateBuilder = new AggregateBuilder();
@@ -372,6 +351,7 @@ public class EvaluationOfCoreferencePair
       }
     }
   }
+  */
 /*
   public static class XMIReader extends JCasAnnotator_ImplBase {
 
@@ -420,9 +400,21 @@ public class EvaluationOfCoreferencePair
 
 	/**
 	 * @param args
+	 * @throws Exception 
 	 */
-	public static void main(String[] args) {
-		// TODO Auto-generated method stub
-
+	public static void main(String[] args) throws Exception {
+    Options options = new Options();
+    options.parseOptions(args);
+	  List<File> trainItems = getFiles(options.traingDirectory);
+	  List<File> testItems = getFiles(options.testDirectory);
+	  
+	  EvaluationOfCoreferencePairs eval = new EvaluationOfCoreferencePairs(new File("target/models/"));
+	  AnnotationStatistics<String> stats = eval.trainAndTest(trainItems, testItems);
+	  System.err.println(stats);
 	}
+
+  private static List<File> getFiles(File directory) {
+    File[] files = directory.listFiles();
+    return Arrays.asList(files);
+  }
 }



Mime
View raw message