ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1746592 - /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
Date Thu, 02 Jun 2016 15:35:29 GMT
Author: dligach
Date: Thu Jun  2 15:35:29 2016
New Revision: 1746592

URL: http://svn.apache.org/viewvc?rev=1746592&view=rev
Log:
train and dev data now written to file instead of to stdout

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java?rev=1746592&r1=1746591&r2=1746592&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
(original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
Thu Jun  2 15:35:29 2016
@@ -19,13 +19,17 @@
 package org.apache.ctakes.pipelines;
 
 import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
 import org.apache.ctakes.temporal.duration.Utils;
 import org.apache.ctakes.temporal.eval.CommandLine;
 import org.apache.ctakes.temporal.eval.THYMEData;
@@ -38,6 +42,7 @@ import org.apache.uima.analysis_engine.A
 import org.apache.uima.cas.CASException;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.pipeline.SimplePipeline;
 import org.apache.uima.fit.util.JCasUtil;
@@ -54,6 +59,9 @@ import com.lexicalscope.jewel.cli.Option
  */
 public class PositiveAndNegativeExamplePrinter {
 
+  public static final String trainDataFile = "/Users/Dima/Temp/train.txt";
+  public static final String devDataFile = "/Users/Dima/Temp/dev.txt";
+  
   static interface Options {
 
     @Option(longName = "xmi-dir")
@@ -64,18 +72,31 @@ public class PositiveAndNegativeExampleP
   }
 
   public static void main(String[] args) throws Exception {
-
+    
     Options options = CliFactory.parseArguments(Options.class, args);
 
     List<Integer> patientSets = options.getPatients().getList();
     List<Integer> trainItems = THYMEData.getPatientSets(patientSets, THYMEData.TRAIN_REMAINDERS);
     List<Integer> devItems = THYMEData.getPatientSets(patientSets, THYMEData.DEV_REMAINDERS);
+    
     List<File> trainFiles = Utils.getFilesFor(trainItems, options.getInputDirectory());
     List<File> devFiles = Utils.getFilesFor(devItems, options.getInputDirectory());
     
-    CollectionReader collectionReader = Utils.getCollectionReader(devFiles);
-    AnalysisEngine annotationConsumer = AnalysisEngineFactory.createEngine(RelationSnippetPrinter.class);
-    SimplePipeline.runPipeline(collectionReader, annotationConsumer);
+    // write training data to file
+    CollectionReader trainCollectionReader = Utils.getCollectionReader(trainFiles);
+    AnalysisEngine trainDataWriter = AnalysisEngineFactory.createEngine(
+        RelationSnippetPrinter.class,
+        "OutputFile",
+        trainDataFile);
+    SimplePipeline.runPipeline(trainCollectionReader, trainDataWriter);
+    
+    // write dev data to file
+    CollectionReader devCollectionReader = Utils.getCollectionReader(devFiles);
+    AnalysisEngine devDataWriter = AnalysisEngineFactory.createEngine(
+        RelationSnippetPrinter.class,
+        "OutputFile",
+        devDataFile);
+    SimplePipeline.runPipeline(devCollectionReader, devDataWriter);
   }
 
   /**
@@ -85,6 +106,12 @@ public class PositiveAndNegativeExampleP
    */
   public static class RelationSnippetPrinter extends JCasAnnotator_ImplBase {
 
+    @ConfigurationParameter(
+        name = "OutputFile",
+        mandatory = true,
+        description = "path to the output file")
+    private String outputFile;
+    
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
 
@@ -123,7 +150,7 @@ public class PositiveAndNegativeExampleP
             }
             BinaryTextRelation relation = relationLookup.get(Arrays.asList(mention1, mention2));
             if(mention1.getBegin() > mention2.getBegin()) {
-              continue; // will worry about this later  
+              continue; // will worry about these later  
             }
             
             String label;
@@ -134,14 +161,19 @@ public class PositiveAndNegativeExampleP
             }
 
             String context = getTextBetween(systemView, mention1, mention2); 
-            String text = String.format("%s|%s", label, context);
+            String text = String.format("%s|%s\n", label, context);
             eventEventRelationsInSentence.add(text.toLowerCase());
           }
         }
 
         if(eventEventRelationsInSentence.size() > 0) {
           for(String text : eventEventRelationsInSentence) {
-            System.out.println(text);
+            Path path = Paths.get(outputFile);
+            try {
+              Files.write(path, text.getBytes(), StandardOpenOption.APPEND);
+            } catch (IOException e) {
+              throw new AnalysisEngineProcessException(e);
+            }
           }
         }
       }
@@ -150,16 +182,18 @@ public class PositiveAndNegativeExampleP
 
   public static String getTextBetween(JCas jCas, Annotation arg1, Annotation arg2)  {
     
+    // todo: use 2 tokens for things like 'during those hospitalizations ...'
+    // todo: ensure previous tokens are in the same sentence
     StringBuilder tokens = new StringBuilder();
     List<BaseToken> preceeding = JCasUtil.selectPreceding(jCas, BaseToken.class, arg1,
1);
     tokens.append(preceeding.get(0).getCoveredText() + " ");
     
     List<BaseToken> baseTokens = JCasUtil.selectBetween(jCas, BaseToken.class, arg1,
arg2);
-    tokens.append("<arg1-start> " + arg1.getCoveredText() + " <arg1-end> ");
+    tokens.append("" + arg1.getCoveredText() + " ");
     for(BaseToken baseToken : baseTokens) {
       tokens.append(baseToken.getCoveredText() + " ");
     }
-    tokens.append("<arg2-start> " + arg2.getCoveredText() + " <arg2-end> ");
+    tokens.append("" + arg2.getCoveredText() + " ");
 
     List<BaseToken> following = JCasUtil.selectFollowing(jCas, BaseToken.class, arg1,
1);
     tokens.append(following.get(0).getCoveredText());



Mime
View raw message