ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1560816 - /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Preprocess.java
Date Thu, 23 Jan 2014 21:32:28 GMT
Author: dligach
Date: Thu Jan 23 21:32:27 2014
New Revision: 1560816

URL: http://svn.apache.org/r1560816
Log:
got the preprocessing pipeline to load gold annotations from SHARP data using SHARP knowtator
reader

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Preprocess.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Preprocess.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Preprocess.java?rev=1560816&r1=1560815&r2=1560816&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Preprocess.java
(original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Preprocess.java
Thu Jan 23 21:32:27 2014
@@ -13,6 +13,7 @@ import org.apache.ctakes.chunker.ae.Defa
 import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
 import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator;
 import org.apache.ctakes.core.ae.OverlapAnnotator;
+import org.apache.ctakes.core.ae.SHARPKnowtatorXMLReader;
 import org.apache.ctakes.core.ae.SentenceDetector;
 import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
 import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
@@ -26,6 +27,7 @@ import org.apache.ctakes.dictionary.look
 import org.apache.ctakes.lvg.ae.LvgAnnotator;
 import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
 import org.apache.ctakes.postagger.POSTagger;
+import org.apache.ctakes.relationextractor.eval.SHARPXMI.DocumentIDAnnotator;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
 import org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation;
@@ -62,7 +64,7 @@ public class Preprocess {
 
   public static final String GOLD_VIEW_NAME = "GoldView";
 
-  public static File inputDirectory = new File("/Users/dima/Boston/Data/Sharp/Cloud/sharp/text/train/");
+  public static File inputDirectory = new File("/Users/Dima/Boston/Data/Sharp/SeedCorpus/Text/");
   public static String outputDirectory = "/Users/Dima/Temp/";
 
   public static void main(String[] args) throws Exception {
@@ -94,22 +96,16 @@ public class Preprocess {
         CAS.NAME_DEFAULT_SOFA,
         ViewTextCopierAnnotator.PARAM_DESTINATION_VIEW_NAME,
         GOLD_VIEW_NAME));
-
-//    switch (this.xmlFormat) {
-//    case Anafora:
-//      aggregateBuilder.add(
-//          THYMEAnaforaXMLReader.getDescription(this.xmlDirectory),
-//          CAS.NAME_DEFAULT_SOFA,
-//          GOLD_VIEW_NAME);
-//      break;
-//    case Knowtator:
-//      aggregateBuilder.add(
-//          THYMEKnowtatorXMLReader.getDescription(this.xmlDirectory),
-//          CAS.NAME_DEFAULT_SOFA,
-//          GOLD_VIEW_NAME);
-//      break;
-//    }
-
+    // need document ids so that SHARP XML reader can figure out the path to xml files
+    aggregateBuilder.add(
+        AnalysisEngineFactory.createPrimitiveDescription(DocumentIDAnnotator.class),
+        CAS.NAME_DEFAULT_SOFA,
+        GOLD_VIEW_NAME);
+    aggregateBuilder.add(
+        AnalysisEngineFactory.createPrimitiveDescription(SHARPKnowtatorXMLReader.class),
+        CAS.NAME_DEFAULT_SOFA,
+        GOLD_VIEW_NAME);
+    
     // identify segments
     aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
     // identify sentences



Mime
View raw message