ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1743091 - in /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes: consumers/GoldRelationPrinter.java pipelines/PositiveAndNegativeExamplePrinter.java
Date Tue, 10 May 2016 00:49:56 GMT
Author: dligach
Date: Tue May 10 00:49:56 2016
New Revision: 1743091

URL: http://svn.apache.org/viewvc?rev=1743091&view=rev
Log:
Data dump for CNN experiments

Added:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/GoldRelationPrinter.java
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java

Added: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/GoldRelationPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/GoldRelationPrinter.java?rev=1743091&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/GoldRelationPrinter.java
(added)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/consumers/GoldRelationPrinter.java
Tue May 10 00:49:56 2016
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.consumers;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.utils.Utils;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
+
+/**
+ * Read cTAKES annotations from XMI files.
+ *  
+ * @author dmitriy dligach
+ */
+public class GoldRelationPrinter {
+
+  static interface Options {
+
+    @Option(
+        longName = "xmi-dir",
+        description = "path to xmi files")
+    public File getInputDirectory();
+  }
+
+  public static void main(String[] args) throws Exception {
+
+    Options options = CliFactory.parseArguments(Options.class, args);
+    CollectionReader collectionReader = Utils.getCollectionReader(options.getInputDirectory());
+    AnalysisEngine annotationConsumer = AnalysisEngineFactory.createEngine(RelationPrinter.class);
+    SimplePipeline.runPipeline(collectionReader, annotationConsumer);
+  }
+
+  public static class RelationPrinter extends JCasAnnotator_ImplBase {
+
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+      JCas goldView;
+      try {
+        goldView = jCas.getView("GoldView");
+      } catch (CASException e) {
+        throw new AnalysisEngineProcessException(e);
+      }
+
+      JCas systemView;
+      try {
+        systemView = jCas.getView("_InitialView");
+      } catch (CASException e) {
+        throw new AnalysisEngineProcessException(e);
+      }
+
+      for(BinaryTextRelation binaryTextRelation : JCasUtil.select(goldView, BinaryTextRelation.class))
{
+
+        IdentifiedAnnotation entity1 = (IdentifiedAnnotation) binaryTextRelation.getArg1().getArgument();
+        IdentifiedAnnotation entity2 = (IdentifiedAnnotation) binaryTextRelation.getArg2().getArgument();
+
+        String category = binaryTextRelation.getCategory();
+
+        String arg1 = entity1.getCoveredText().toLowerCase();
+        String arg2 = entity2.getCoveredText().toLowerCase();
+
+//        List<Sentence> enclosingSentences = JCasUtil.selectCovering(
+//            systemView, 
+//            Sentence.class,
+//            entity1.getBegin(), 
+//            entity2.getEnd());
+
+        // System.out.format("%s|%s|%s|%s\n", category, arg1, arg2, enclosingSentences.get(0).getCoveredText());
+      }
+    }
+  }
+}

Added: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java?rev=1743091&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
(added)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/pipelines/PositiveAndNegativeExamplePrinter.java
Tue May 10 00:49:56 2016
@@ -0,0 +1,196 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.pipelines;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.temporal.duration.Utils;
+import org.apache.ctakes.temporal.eval.CommandLine;
+import org.apache.ctakes.temporal.eval.THYMEData;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.util.ViewUriUtil;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.fit.util.JCasUtil;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
+
+/**
+ * Print gold standard relations and their context.
+ * 
+ * @author dmitriy dligach
+ */
+public class PositiveAndNegativeExamplePrinter {
+
+  static interface Options {
+
+    @Option(longName = "xmi-dir")
+    public File getInputDirectory();
+
+    @Option(longName = "patients")
+    public CommandLine.IntegerRanges getPatients();
+  }
+
+  public static void main(String[] args) throws Exception {
+
+    Options options = CliFactory.parseArguments(Options.class, args);
+
+    List<Integer> patientSets = options.getPatients().getList();
+    List<Integer> trainItems = THYMEData.getPatientSets(patientSets, THYMEData.TRAIN_REMAINDERS);
+    List<File> trainFiles = Utils.getFilesFor(trainItems, options.getInputDirectory());
+    CollectionReader collectionReader = Utils.getCollectionReader(trainFiles);
+
+    AnalysisEngine annotationConsumer = AnalysisEngineFactory.createEngine(
+        RelationContextPrinter.class);
+
+    SimplePipeline.runPipeline(collectionReader, annotationConsumer);
+  }
+
+  /**
+   * Print gold standard relations and their context.
+   * 
+   * @author dmitriy dligach
+   */
+  public static class RelationContextPrinter extends JCasAnnotator_ImplBase {
+
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+      JCas goldView;
+      try {
+        goldView = jCas.getView("GoldView");
+      } catch (CASException e) {
+        throw new AnalysisEngineProcessException(e);
+      }
+
+      JCas systemView;
+      try {
+        systemView = jCas.getView("_InitialView");
+      } catch (CASException e) {
+        throw new AnalysisEngineProcessException(e);
+      }
+
+      // can't iterate over binary text relations in a sentence, so need
+      // a lookup from pair of annotations to binary text relation
+      Map<List<Annotation>, BinaryTextRelation> relationLookup = new HashMap<>();
+      for(BinaryTextRelation relation : JCasUtil.select(goldView, BinaryTextRelation.class))
{
+        Annotation arg1 = relation.getArg1().getArgument();
+        Annotation arg2 = relation.getArg2().getArgument();
+        relationLookup.put(Arrays.asList(arg1, arg2), relation);
+      }
+
+      File noteFile = new File(ViewUriUtil.getURI(jCas).toString());
+      String fileName = noteFile.getName();
+
+      for(Sentence sentence : JCasUtil.select(systemView, Sentence.class)) {
+        List<String> formattedRelationsInSentence = new ArrayList<>();
+        List<EventMention> eventMentionsInSentence = JCasUtil.selectCovered(goldView,
EventMention.class, sentence);
+        List<TimeMention> timeMentionsInSentence = JCasUtil.selectCovered(goldView,
TimeMention.class, sentence);
+
+        // retrieve event-time relations in this sentece
+        for(EventMention eventMention : eventMentionsInSentence) {
+          for(TimeMention timeMention : timeMentionsInSentence) {
+            // there are relations where arg1=time and arg2=event
+            BinaryTextRelation timeEventRel = relationLookup.get(Arrays.asList(timeMention,
eventMention));
+            if(timeEventRel != null) {
+              String text = String.format("%s(%s, %s)", timeEventRel.getCategory(), timeMention.getCoveredText(),
eventMention.getCoveredText());
+              // formattedRelationsInSentence.add(text);
+            } 
+            // and relations where arg1=event and arg2=time
+            BinaryTextRelation eventTimeRel = relationLookup.get(Arrays.asList(eventMention,
timeMention));
+            if(eventTimeRel != null) {
+              String text = String.format("%s(%s, %s)", eventTimeRel.getCategory(), eventMention.getCoveredText(),
timeMention.getCoveredText());
+              // formattedRelationsInSentence.add(text);
+            }
+          }
+        }
+        // TODO: in sentence or in entire document?
+        // retrieve event-event relations in this sentence
+        for(EventMention mention1 : eventMentionsInSentence) {
+          for(EventMention mention2 : eventMentionsInSentence) {
+            if(mention1 == mention2) {
+              continue;
+            }
+            BinaryTextRelation relation = relationLookup.get(Arrays.asList(mention1, mention2));
+
+            String label;
+            if(relation == null) {
+              label = "0";    // no relation
+            } else if(relation.getCategory().equals("CONTAINS")) {
+              if(mention1.getBegin() < mention2.getBegin()) {
+                label = "1";  // forward relation
+              } else { 
+                label = "-1"; // reverse relation
+              }
+            } else {    
+              label = "0";    // non-contains relation
+            }
+            
+            String context = getTextBetweenAnnotations(systemView, mention1, mention2); 
+            String text = String.format("%s|%s|%s|%s", 
+                label,
+                mention1.getCoveredText(), 
+                mention2.getCoveredText(), 
+                context);
+            formattedRelationsInSentence.add(text);
+          }
+        }
+
+        if(formattedRelationsInSentence.size() > 0) {
+          // System.out.println(fileName + ": " + sentence.getCoveredText());
+          for(String text : formattedRelationsInSentence) {
+            System.out.println(text);
+          }
+        }
+      }
+    }
+  }
+
+  private static String getTextBetweenAnnotations(JCas jCas, Annotation arg1, Annotation
arg2) {
+
+    final int windowSize = 0;
+
+    String text = jCas.getDocumentText();
+    int leftArgBegin = Math.min(arg1.getBegin(), arg2.getBegin());
+    int rightArgEnd = Math.max(arg1.getEnd(), arg2.getEnd());
+    int begin = Math.max(0, leftArgBegin - windowSize);
+    int end = Math.min(text.length(), rightArgEnd + windowSize); 
+
+    return text.substring(begin, end).replaceAll("[\r\n]", " ");
+  }
+}



Mime
View raw message