ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1736654 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNonrelatedEvents.java
Date Fri, 25 Mar 2016 20:28:54 GMT
Author: tmill
Date: Fri Mar 25 20:28:54 2016
New Revision: 1736654

URL: http://svn.apache.org/viewvc?rev=1736654&view=rev
Log:
Added script for printing out events not involved in any relation.

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNonrelatedEvents.java

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNonrelatedEvents.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNonrelatedEvents.java?rev=1736654&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNonrelatedEvents.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/data/analysis/PrintNonrelatedEvents.java
Fri Mar 25 20:28:54 2016
@@ -0,0 +1,94 @@
+package org.apache.ctakes.temporal.data.analysis;
+
+import java.io.File;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.ctakes.temporal.eval.CommandLine;
+import org.apache.ctakes.temporal.eval.Evaluation_ImplBase.XMIReader;
+import org.apache.ctakes.temporal.eval.THYMEData;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.factory.AggregateBuilder;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.pipeline.JCasIterator;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.util.ViewUriUtil;
+import org.cleartk.util.ae.UriToDocumentTextAnnotator;
+import org.cleartk.util.cr.UriCollectionReader;
+
+import com.lexicalscope.jewel.cli.CliFactory;
+import com.lexicalscope.jewel.cli.Option;
+
+public class PrintNonrelatedEvents {
+  static interface Options {
+    @Option(longName = "xmi")
+    public File getXMIDirectory();
+
+    @Option(longName = "patients")
+    public CommandLine.IntegerRanges getPatients();
+
+    @Option(longName = "text")
+    public File getRawTextDirectory();
+  }
+
+  public static void main(String[] args) throws ResourceInitializationException, CASException,
AnalysisEngineProcessException {
+    Options options = CliFactory.parseArguments(Options.class, args);
+    List<Integer> patientSets = options.getPatients().getList();
+    List<Integer> trainItems = THYMEData.getPatientSets(patientSets, THYMEData.TRAIN_REMAINDERS);
+    List<File> files = THYMEData.getFilesFor(trainItems, options.getRawTextDirectory());
+
+    CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files);
+    AggregateBuilder aggregateBuilder = new AggregateBuilder();
+    aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription());
+    aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(
+        XMIReader.class,
+        XMIReader.PARAM_XMI_DIRECTORY,
+        options.getXMIDirectory()));
+
+    int totalNumEvents = 0;
+    int totalUnrelatedEvents = 0;
+    
+    for (Iterator<JCas> casIter = new JCasIterator(reader, aggregateBuilder.createAggregate());
casIter.hasNext();) {
+      int docNumEvents = 0;
+      int docUnrelatedEvents = 0;
+      
+      JCas jCas = casIter.next();
+      JCas goldView = jCas.getView("GoldView");
+      
+      String docUri = ViewUriUtil.getURI(jCas).toString();
+      if(docUri.contains("path")){
+        continue;
+      }
+      
+      System.out.println("Processing note " + docUri);
+      Set<Annotation> argSet = new HashSet<>();
+      for(RelationArgument arg : JCasUtil.select(goldView, RelationArgument.class)){
+        argSet.add(arg.getArgument());
+      }
+      
+      for(EventMention goldEvent : JCasUtil.select(goldView, EventMention.class)){
+        docNumEvents++;
+        if(!argSet.contains(goldEvent)){
+          docUnrelatedEvents++;
+          System.out.println(String.format("Event at span (%d, %d) with text %s and doctimerel=%s
is not involved in any relation.", 
+              goldEvent.getBegin(), goldEvent.getEnd(), goldEvent.getCoveredText(), goldEvent.getEvent().getProperties().getDocTimeRel()));
+          
+        }
+      }
+      totalNumEvents += docNumEvents;
+      totalUnrelatedEvents += docUnrelatedEvents;
+      System.out.println(String.format("This document had %d total events, %d of which were
not related to anything", docNumEvents, docUnrelatedEvents));
+    }
+    System.out.println(String.format("This corpus had %d total events, %d of which were not
related to anything", totalNumEvents, totalUnrelatedEvents));
+  }
+
+}



Mime
View raw message