ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1660963 [19/19] - in /ctakes/sandbox/timelanes: META-INF/ edu/ edu/mayo/ edu/mayo/bmi/ edu/mayo/bmi/annotation/ edu/mayo/bmi/annotation/knowtator/ org/ org/chboston/ org/chboston/cnlp/ org/chboston/cnlp/anafora/ org/chboston/cnlp/anafora/a...
Date Thu, 19 Feb 2015 18:06:17 GMT
Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlus.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlus.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlus.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlus.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,181 @@
+package org.chboston.cnlp.timeline.timespan.plus;
+
+import net.jcip.annotations.Immutable;
+import org.chboston.cnlp.timeline.timespan.AbstractTimeSpan;
+import org.chboston.cnlp.timeline.timespan.EndPointer;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 7/30/13
+ */
+@Immutable
+final public class TimeSpanPlus extends AbstractTimeSpan implements PointedTimeSpan {
+
+   static public final TimeSpanPlus UNKNOWN_TIMESPAN_PLUS
+         = new TimeSpanPlus( TimeEndPoint.NULL_END_POINT, TimeEndPoint.NULL_END_POINT );
+
+
+   final private TimeEndPoint _startTime;
+   final private TimeEndPoint _stopTime;
+
+   public TimeSpanPlus( final TimeEndPoint startTime, final TimeEndPoint stopTime ) {
+      if ( startTime.getMillis() > stopTime.getMillis() ) {
+         // Hopefully this never happens ...
+         _startTime = new TimeEndPoint( EndPointer.OVERLAP, stopTime.getMillis(), stopTime.isFuzzy() );
+         _stopTime = new TimeEndPoint( EndPointer.OVERLAP, startTime.getMillis(), startTime.isFuzzy() );
+         return;
+      }
+      _startTime = startTime;
+      _stopTime = stopTime;
+   }
+
+   public TimeEndPoint getStartTime() {
+      return _startTime;
+   }
+
+   public TimeEndPoint getStopTime() {
+      return _stopTime;
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public long getStartMillis() {
+      return _startTime.getMillis();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public long getStopMillis() {
+      return _stopTime.getMillis();
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean isFuzzyDate() {
+      return _startTime.isFuzzy() || _stopTime.isFuzzy();
+   }
+
+   public String getRelationText() {
+      if ( this.equals( UNKNOWN_TIMESPAN_PLUS ) ) {
+         return "Unknown";
+      }
+      final EndPointer startPointer = _startTime.getPointer();
+      final EndPointer stopPointer = _stopTime.getPointer();
+      String prefix = null;
+      if ( isSingleDate() ) {
+         if ( startPointer == EndPointer.BEFORE
+              && stopPointer == EndPointer.BEFORE ) {
+            prefix = "Occurs Before";
+         } else if ( startPointer == EndPointer.AFTER
+                     && stopPointer == EndPointer.AFTER ) {
+            prefix = "Occurs After";
+         } else if ( startPointer == EndPointer.BEFORE
+                     && stopPointer == EndPointer.EQUAL ) {
+            prefix = "Ends on";
+         } else if ( startPointer == EndPointer.EQUAL
+                     && stopPointer == EndPointer.AFTER ) {
+            prefix = "Begins on";
+         }
+      }
+      if ( prefix == null || prefix.isEmpty() ) {
+         if ( startPointer == EndPointer.BEFORE
+              && stopPointer == EndPointer.BEFORE ) {
+            prefix = "Starts before, ends within";
+         } else if ( startPointer == EndPointer.BEFORE
+                     && stopPointer == EndPointer.EQUAL ) {
+            prefix = "Starts before, ends with";
+         } else if ( startPointer == EndPointer.BEFORE
+                     && stopPointer == EndPointer.OVERLAP ) {
+            prefix = "Starts before, overlaps";
+         } else if ( startPointer == EndPointer.BEFORE
+                     && stopPointer == EndPointer.AFTER ) {
+            prefix = "Starts before, ends after";
+
+         } else if ( startPointer == EndPointer.AFTER
+                     && stopPointer == EndPointer.BEFORE ) {
+            prefix = "Is Within";
+         } else if ( startPointer == EndPointer.AFTER
+                     && stopPointer == EndPointer.EQUAL ) {
+            prefix = "Starts within, ends with";
+         } else if ( startPointer == EndPointer.AFTER
+                     && stopPointer == EndPointer.OVERLAP ) {
+            prefix = "Starts within, overlaps";
+         } else if ( startPointer == EndPointer.AFTER
+                     && stopPointer == EndPointer.AFTER ) {
+            prefix = "Starts within, ends after";
+
+         } else if ( startPointer == EndPointer.EQUAL
+                     && stopPointer == EndPointer.BEFORE ) {
+            prefix = "Starts with, ends before";
+         } else if ( startPointer == EndPointer.EQUAL
+                     && stopPointer == EndPointer.EQUAL ) {
+            prefix = "Starts with, ends with";
+         } else if ( startPointer == EndPointer.EQUAL
+                     && stopPointer == EndPointer.OVERLAP ) {
+            prefix = "Starts with, overlaps";
+         } else if ( startPointer == EndPointer.EQUAL
+                     && stopPointer == EndPointer.AFTER ) {
+            prefix = "Starts with, ends after";
+
+         } else if ( startPointer == EndPointer.OVERLAP
+                     && stopPointer == EndPointer.BEFORE ) {
+            prefix = "Overlaps, ends within";
+         } else if ( startPointer == EndPointer.OVERLAP
+                     && stopPointer == EndPointer.EQUAL ) {
+            prefix = "Overlaps, ends with";
+         } else if ( startPointer == EndPointer.OVERLAP
+                     && stopPointer == EndPointer.OVERLAP ) {
+            prefix = "Overlaps";
+         } else if ( startPointer == EndPointer.OVERLAP
+                     && stopPointer == EndPointer.AFTER ) {
+            prefix = "Overlaps, ends after";
+         }
+      }
+      return prefix;
+   }
+
+   public String getSpanText() {
+      if ( this.equals( UNKNOWN_TIMESPAN_PLUS ) ) {
+         return "";
+      }
+      return super.toString();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public String toString() {
+      return getRelationText() + " " + getSpanText();
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int hashCode() {
+      return _startTime.hashCode() + 3 * _stopTime.hashCode();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean equals( final Object object ) {
+      return object instanceof TimeSpanPlus
+             && ((TimeSpanPlus)object)._startTime.equals( _startTime )
+             && ((TimeSpanPlus)object)._stopTime.equals( _stopTime );
+   }
+
+
+}

Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlusComparator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlusComparator.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlusComparator.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/timespan/plus/TimeSpanPlusComparator.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,56 @@
+package org.chboston.cnlp.timeline.timespan.plus;
+
+
+import java.util.Comparator;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 8/2/13
+ */
+public enum TimeSpanPlusComparator implements Comparator<PointedTimeSpan> {
+   INSTANCE;
+
+   static public TimeSpanPlusComparator getInstance() {
+      return INSTANCE;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int compare( final PointedTimeSpan timeSpan1, final PointedTimeSpan timeSpan2 ) {
+      final int startComparison = TimeEndPointComparator.getInstance().compare( timeSpan1.getStartTime(),
+            timeSpan2.getStartTime() );
+      if ( startComparison != 0 ) {
+         return startComparison;
+      }
+      return TimeEndPointComparator.getInstance().compare( timeSpan1.getStopTime(), timeSpan2.getStopTime() );
+   }
+
+   static private enum TimeEndPointComparator implements Comparator<TimeEndPoint> {
+      INSTANCE;
+
+      static public TimeEndPointComparator getInstance() {
+         return INSTANCE;
+      }
+
+      /**
+       * {@inheritDoc}
+       */
+      @Override
+      public int compare( final TimeEndPoint endPoint1, final TimeEndPoint endPoint2 ) {
+         final long millis1 = endPoint1.getMillis();
+         final long millis2 = endPoint2.getMillis();
+         if ( millis1 < millis2 ) {
+            return -1;
+         } else if ( millis2 < millis1 ) {
+            return 1;
+         }
+         return endPoint1.getPointer().getOrder() - endPoint2.getPointer().getOrder();
+      }
+
+   }
+
+
+}

Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/todo.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/todo.txt?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/todo.txt (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/todo.txt Thu Feb 19 18:06:13 2015
@@ -0,0 +1,27 @@
+ok 1.  UmlsEvent to use RelativeTimeSpan instead of its own TimexRel enum
+1.  Create UmlsEvent by Relation parse with RelativeTimeSpan
+2.  Modify TimeSpanRenderer Before and After
+3.  Get rid of Linked Scrollers
+4.  Add to Semantic Type collection on Search
+5.  Add remove button to left of each event lane
+
+4.  Add button to expand / collapse semantic type
+
+10. Fix update on Header for Events
+
+
+
+
+
+X?- Colors on dates for UMLS types
+X?- Add I2B2 Color Scheme
+- Get Semantic Types for Events
+- Cull / Combine by coreference
+- "Lifeline Date" header listing dates w/o overlap that change with zoom
+- "&" and "|" searches
+?- Sort by "criticality" of event
+- Link timexRel events across timespans : AFTER> - - - <BEFORE
+?- Mark Permanence of Disease
+- Improve zoom bar labels (1x 2x ... 100% 200% ...)
+- Fix VerticalMimicPanel - resize updates not being painted properly
+

Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/CuiPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/CuiPrinter.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/CuiPrinter.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/CuiPrinter.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,97 @@
+package org.chboston.cnlp.timeline.util;
+
+import org.chboston.cnlp.anafora.annotation.parser.AnaforaXmlParser;
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStore;
+import org.chboston.cnlp.nlp.annotation.attribute.DefinedAttributeType;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+import org.chboston.cnlp.nlp.annotation.parser.AnnotationsParser;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.logging.Logger;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/19/2014
+ */
+final public class CuiPrinter {
+
+   static private final Logger LOGGER = Logger.getLogger( "CuiPrinter" );
+
+   private CuiPrinter() {}
+
+
+
+   static private void writeCuis( final File outputFile, final AnnotationStore annotationStore ) {
+      final Collection<Entity> entities = annotationStore.getNamedEntities();
+      try ( Writer writer = new BufferedWriter( new FileWriter( outputFile ) ) ) {
+         for ( Entity entity : entities ) {
+            String cui = entity.getAttributeValue( DefinedAttributeType.CUI );
+            if ( cui == null || cui.isEmpty() ) {
+               cui = "UNKNOWN";
+            }
+            String tui = entity.getAttributeValue( DefinedAttributeType.TUI );
+            if ( tui == null || tui.isEmpty() ) {
+               tui = "UNKNOWN";
+            }
+            writer.write( entity.getTextSpan().getStartIndex() + "," + entity.getTextSpan().getEndIndex()
+                          + "  " + cui + "_" + tui + " " + entity.getSpannedText() + "\n" );
+         }
+         writer.write( "Total Words: " + annotationStore.getWordCount() + "\n");
+         writer.write( "Total Annotations: " + entities.size() + "\n" );
+      } catch (IOException ioE ) {
+         LOGGER.severe( ioE.getMessage() );
+      }
+   }
+
+
+   static private void copyGoldEntityXmls( final File inputDir, final File outputDir ) {
+      final String[] fileNames = inputDir.list();
+      if ( fileNames == null ) {
+         return;
+      }
+      File bestXml = null;
+      long longestLength = 0;
+      for ( String fileName : fileNames ) {
+         if ( fileName.endsWith( ".UMLS-Entity.gold.completed.xml" ) ) {
+            bestXml = new File( inputDir, fileName );
+            break;
+         }
+         if ( fileName.contains( ".UMLS-Entity" ) ) {
+            final File entityXml = new File( inputDir, fileName );
+            if ( entityXml.length() > longestLength ) {
+               bestXml = entityXml;
+               longestLength = entityXml.length();
+            }
+         }
+      }
+      if ( bestXml == null ) {
+         return;
+      }
+      final AnnotationsParser parser = new AnaforaXmlParser();
+      parser.setDocumentTextFile( new File( inputDir, inputDir.getName() + ".txt" ) );
+      parser.parseFile( bestXml.getPath() );
+      final AnnotationStore annotationStore = parser.getAnnotationStore();
+      final File outputFile = new File( outputDir, bestXml.getName() + ".out" );
+      writeCuis( outputFile, annotationStore );
+   }
+
+
+   public static void main( String... args ) {
+      final String inputParentPath = "C:\\Spiffy\\prj_thyme\\data\\internal\\annotations\\release_gold\\ColonCancer";//args[0];
+      final String outputDirPath = "C:\\Spiffy\\prj_thyme\\output\\temp\\release_gold_cuis";//args[1];
+      final File outputDir = new File( outputDirPath );
+
+      final File inputParentDir = new File( inputParentPath );
+      final File[] inputDirs = inputParentDir.listFiles();
+      for ( File inputDir : inputDirs ) {
+         copyGoldEntityXmls( inputDir, outputDir );
+      }
+   }
+
+
+}

Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/GoldSerializer.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/GoldSerializer.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/GoldSerializer.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/GoldSerializer.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,149 @@
+package org.chboston.cnlp.timeline.util;
+
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStore;
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStoreFactory;
+import org.chboston.cnlp.timeline.gui.qaclipper.TimelineAnaforaWriter5;
+import org.chboston.cnlp.timeline.timeline.Timeline;
+import org.chboston.cnlp.timeline.timeline.TimelineFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.Collection;
+import java.util.HashSet;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 8/15/14
+ */
+final public class GoldSerializer {
+
+   private GoldSerializer() {
+   }
+
+   static private final File ROOT_DIR = new File( "C:/Spiffy/Data/IAA/THYME5/IaaInput_ColonCancer/a_ux" );
+   static private final File IAA_IN_DIR = new File( "C:/Spiffy/Data/IAA/THYME5/IaaInput_ColonCancer/a_gold_ux" );
+
+   static private final String SERIALIZATIONS = "C:/Spiffy/Output/Timeline/Serialized/Gold/ColonCancer";
+
+   //   static private final File ROOT_IN_DIR = new File( "C:\\Spiffy\\prj_thyme\\data\\external\\extrinsic\\ColonCancer" );
+//   static private final File ROOT_OUT_DIR = new File( "C:\\Spiffy\\prj_thyme\\output\\permanent\\extrinsic\\ColonCancer" );
+   static private final File ROOT_IN_DIR
+         = new File( "C:\\Spiffy\\prj_thyme\\data\\internal\\corpus\\colon_cancer\\processed_test" );
+   static private final File ROOT_OUT_DIR
+         = new File( "C:\\Spiffy\\prj_thyme\\output\\permanent\\extrinsic\\colon_cancer\\from_xmi" );
+
+
+   public static void main( final String[] args ) {
+//      makeCopies();
+//      copyNotes();
+//      serializeTimelines();
+//      serializeTimelines( ROOT_IN_DIR, ROOT_OUT_DIR );
+      serializeXmiTimelines( ROOT_IN_DIR, ROOT_OUT_DIR );
+   }
+
+   static private void serializeTimelines( final File rootInDir, final File rootOutDir ) {
+      final File[] subDirs = rootInDir.listFiles();
+      if ( subDirs == null ) {
+         return;
+      }
+      for ( File subDir : subDirs ) {
+         final AnnotationStore annotationStore
+               = AnnotationStoreFactory
+               .createAnnotationCollection2( subDir, new File( subDir, subDir.getName() + ".txt" ) );
+         if ( annotationStore.getTimeRelations().isEmpty() ) {
+            continue;
+         }
+         System.out.println( "Serializing " + subDir.getName() );
+         final Timeline timeline = TimelineFactory.createTimeline( subDir.getName(), annotationStore );
+         final File outSubDir = new File( rootOutDir, subDir.getName() );
+         outSubDir.mkdirs();
+         TimelineAnaforaWriter5.writeTimeline( outSubDir.getPath() + "/" + outSubDir.getName(), timeline );
+      }
+   }
+
+   static private void serializeXmiTimelines( final File rootInDir, final File rootOutDir ) {
+      final File[] xmiFiles = rootInDir.listFiles();
+      if ( xmiFiles == null ) {
+         return;
+      }
+      for ( File xmiFile : xmiFiles ) {
+         final AnnotationStore annotationStore
+               = AnnotationStoreFactory.createAnnotationCollection( xmiFile.getPath() );
+         if ( annotationStore.getTimeRelations().isEmpty() ) {
+            continue;
+         }
+         System.out.println( "Serializing " + xmiFile.getName() );
+         final Timeline timeline = TimelineFactory.createTimeline( xmiFile.getName(), annotationStore );
+         rootOutDir.mkdirs();
+         TimelineAnaforaWriter5.writeTimeline( rootOutDir.getPath() + "/" + xmiFile.getName(), timeline );
+      }
+   }
+
+
+   static private void serializeTimelines() {
+      final File[] subDirs = IAA_IN_DIR.listFiles();
+      if ( subDirs == null ) {
+         return;
+      }
+      for ( File subDir : subDirs ) {
+         final AnnotationStore annotationStore
+               = AnnotationStoreFactory.createAnnotationCollection2( subDir.getPath(), subDir.getName() );
+         if ( annotationStore.getTimeRelations().isEmpty() ) {
+            continue;
+         }
+         System.out.println( "Serializing " + subDir.getName() );
+         final Timeline timeline = TimelineFactory.createTimeline( subDir.getName(), annotationStore );
+         final String outputPath = SERIALIZATIONS + "/" + subDir.getName();
+         TimelineAnaforaWriter5.writeTimeline( outputPath, timeline );
+      }
+   }
+
+
+   static private void makeCopies() {
+      final Collection<String> setNames = new HashSet<>();
+      final String[] fileNames = ROOT_DIR.list();
+      for ( String fileName : fileNames ) {
+         setNames.add( fileName.substring( 0, fileName.indexOf( '.' ) ) );
+      }
+      for ( String setName : setNames ) {
+         final File setDir = new File( IAA_IN_DIR, setName );
+         setDir.mkdir();
+         for ( String fileName : fileNames ) {
+            if ( fileName.startsWith( setName ) ) {
+               final File inputFile = new File( ROOT_DIR, fileName );
+               final File outputFile = new File( setDir, fileName );
+               System.out.println( inputFile.getPath() + " > " + outputFile.getPath() );
+               try {
+                  Files.copy( inputFile.toPath(), outputFile.toPath() );
+               } catch ( IOException ioE ) {
+                  System.err.println( ioE.getMessage() );
+               }
+            }
+         }
+      }
+   }
+
+
+   static private void copyNotes() {
+      final File rootDir = new File( "C:/Spiffy/Data/IAA/THYME5/ColonCancer" );
+      final File[] subDirs = rootDir.listFiles();
+      for ( File subDir : subDirs ) {
+         final File noteFile = new File( subDir, subDir.getName() );
+         if ( !noteFile.exists() ) {
+            System.out.println( "No note for " + subDir.getName() );
+            continue;
+         }
+         final File outputDir = new File( IAA_IN_DIR, subDir.getName() );
+         final File outputFile = new File( outputDir, subDir.getName() );
+         try {
+            Files.copy( noteFile.toPath(), outputFile.toPath() );
+         } catch ( IOException ioE ) {
+            System.err.println( ioE.getMessage() );
+         }
+      }
+   }
+
+
+}

Added: ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/SimpleStoreWriter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/SimpleStoreWriter.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/SimpleStoreWriter.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/timeline/util/SimpleStoreWriter.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,226 @@
+package org.chboston.cnlp.timeline.util;
+
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStore;
+import org.chboston.cnlp.nlp.annotation.annotation.store.AnnotationStoreFactory;
+import org.chboston.cnlp.nlp.annotation.attribute.DefinedAttributeType;
+import org.chboston.cnlp.nlp.annotation.coreference.CoreferenceChain;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+import org.chboston.cnlp.nlp.annotation.relation.Relation;
+import org.chboston.cnlp.timeline.timeline.Timeline;
+import org.chboston.cnlp.timeline.timeline.TimelineFactory;
+import org.chboston.cnlp.timeline.timespan.plus.PointedTimeSpan;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.logging.FileHandler;
+import java.util.logging.LogRecord;
+import java.util.logging.Logger;
+import java.util.logging.SimpleFormatter;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/12/2015
+ */
+public class SimpleStoreWriter {
+
+   static private final Logger LOGGER = Logger.getLogger( "SimpleStoreWriter" );
+
+
+//   static private final File ROOT_IN_DIR
+//         = new File( "C:\\Spiffy\\prj_darth_phenome\\data\\internal\\xmi\\27Notes_Jan9_2015" );
+//   static private final File ROOT_OUT_DIR
+//         = new File( "C:\\Spiffy\\prj_darth_phenome\\data\\internal\\xmi\\27Notes_Jan9_2015\\simple2" );
+   static private final boolean XMI = false;
+static private final File ROOT_IN_DIR
+//      = new File( "C:\\Spiffy\\prj_thyme\\data\\internal\\x_eval\\annotations\\layer\\aggregate\\ctakes\\colon\\test" );
+      = new File( "C:\\Spiffy\\prj_thyme\\data\\internal\\annotations\\release_gold\\ColonCancer" );
+   static private final File ROOT_OUT_DIR
+//         = new File( "C:\\Spiffy\\prj_thyme\\output\\temp\\x_eval\\annotations\\layer\\aggregate\\ctakes\\colon\\negTest" );
+         = new File( "C:\\Spiffy\\prj_thyme\\output\\temp\\x_eval\\annotations\\layer\\aggregate\\gold\\colon\\negTest" );
+
+
+   static private void writeXmiStores( final File rootInDir, final File rootOutDir ) {
+      rootOutDir.mkdirs();
+      final StringBuilder sb = XMI ? runXmiDir( rootInDir, rootOutDir ) : runAnaforaDir( rootInDir, rootOutDir );
+      final String name = "Summary";
+      try ( final Writer writer = new BufferedWriter( new FileWriter( ROOT_OUT_DIR + "/" + name + ".txt" ) ) ) {
+         writer.write( String.format( "%1$40s   NE Evnt Time  Chn Rltn TLnk Span\n", name ) );
+         writer.write( sb.toString() );
+         writer.write( "\n" );
+         writer.write( String.format( "%1$40s %2$4d %3$4d %4$4d %5$4d %6$4d %7$4d %8$4d\n", "Total",
+               _entities, _events, _timex3s, _chains, _relations, _tlinks, _spans ) );
+      } catch ( IOException ioE ) {
+         LOGGER.severe( ioE.getMessage() );
+      }
+      try ( final Writer writer = new BufferedWriter( new FileWriter( ROOT_OUT_DIR + "/" + name + ".err.txt" ) ) ) {
+         for ( String unparsed : _unparsedTimexList ) {
+            writer.write( unparsed + "\n" );
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.severe( ioE.getMessage() );
+      }
+   }
+
+   static private StringBuilder runXmiDir( final File xmiDir, final File rootOutDir ) {
+      final StringBuilder sb = new StringBuilder();
+      final File[] xmiFiles = xmiDir.listFiles();
+      if ( xmiFiles == null ) {
+         return sb;
+      }
+      for ( File xmiFile : xmiFiles ) {
+         if ( xmiFile.isDirectory() ) {
+            sb.append( runXmiDir( xmiFile, rootOutDir ) );
+            continue;
+         }
+         final AnnotationStore annotationStore
+               = AnnotationStoreFactory.createAnnotationCollection( xmiFile.getPath() );
+         final String countText = writeAnnotationStore( rootOutDir, xmiFile.getName(), annotationStore );
+         sb.append( countText );
+      }
+      return sb;
+   }
+
+   static private StringBuilder runAnaforaDir( final File anaforaDir, final File rootOutDir ) {
+      final StringBuilder sb = new StringBuilder();
+      final File[] anaforaFiles = anaforaDir.listFiles();
+      if ( anaforaFiles == null ) {
+         return sb;
+      }
+      for ( File anaforaFile : anaforaFiles ) {
+         if ( anaforaFile.isDirectory() ) {
+            sb.append( runAnaforaDir( anaforaFile, rootOutDir ) );
+            continue;
+         }
+         if ( !anaforaFile.getName().endsWith( ".txt" ) ) {
+            continue;
+         }
+         final AnnotationStore annotationStore
+               = AnnotationStoreFactory.createAnnotationCollection( anaforaFile.getPath() );
+         final String countText = writeAnnotationStore( rootOutDir, anaforaFile.getName(), annotationStore );
+         sb.append( countText );
+      }
+      return sb;
+   }
+
+
+
+   static private final Collection<String> _unparsedTimexList = new ArrayList<>();
+   static private int _entities;
+   static private int _events;
+   static private int _timex3s;
+   static private int _chains;
+   static private int _relations;
+   static private int _tlinks;
+   static private int _spans;
+
+
+   static private String writeAnnotationStore( final File rootOutDir, final String name,
+                                               final AnnotationStore annotationStore ) {
+      final File outputFile = new File( rootOutDir, name + ".simple.txt" );
+      final File errorFile = new File( rootOutDir, name + ".error.txt" );
+      final Logger timeSpanFactoryLogger = Logger.getLogger( "TimeSpanFactory" );
+      final Logger tlinkCloserLogger = Logger.getLogger( "TLinkTypeArray3" );
+      LOGGER.info( "Writing Simple " + outputFile.getPath() );
+      try ( final Writer writer = new BufferedWriter( new FileWriter( outputFile ) ) ) {
+         final FileHandler errorHandler = new FileHandler( errorFile.getPath() );
+         final SimpleFormatter errorFormatter = new SimpleFormatter() {
+            public synchronized String format( final LogRecord record ) {
+               _unparsedTimexList.add( formatMessage( record ) );
+               return formatMessage( record ) + "\n";
+            }
+         };
+         errorHandler.setFormatter( errorFormatter );
+         timeSpanFactoryLogger.addHandler( errorHandler );
+         tlinkCloserLogger.addHandler( errorHandler );
+         Collection<Entity> entities = annotationStore.getNamedEntities();
+         final int entityCount = entities.size();
+         _entities += entityCount;
+         for ( Entity entity : entities ) {
+            final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+                  entity.getClassType(), entity.getSpannedTextRepresentation(),
+                  entity.getTextSpan().getStartIndex(), entity.getTextSpan().getEndIndex() );
+            writer.write( lineText + "\n" );
+         }
+         entities = annotationStore.getEvents();
+         final int eventCount = entities.size();
+         _events += eventCount;
+         for ( Entity entity : entities ) {
+            final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+                  entity.getClassType(), entity.getSpannedTextRepresentation(),
+                  entity.getTextSpan().getStartIndex(), entity.getTextSpan().getEndIndex() );
+            writer.write( lineText + "\n" );
+         }
+         entities = annotationStore.getTimes();
+         final int timesCount = entities.size();
+         _timex3s += timesCount;
+         for ( Entity entity : entities ) {
+            final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+                  entity.getClassType(), entity.getSpannedTextRepresentation(),
+                  entity.getTextSpan().getStartIndex(), entity.getTextSpan().getEndIndex() );
+            writer.write( lineText + "\n" );
+         }
+         final Collection<CoreferenceChain> chains  = annotationStore.getCoreferenceChains();
+         int chainCount = 0;
+         for ( CoreferenceChain chain : chains ) {
+            if ( chain.getChainLength() == 1 || !chain.getSpannedTextRepresentation().contains( " ... " ) ) {
+               continue;
+            }
+            final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+                  chain.getClassType(), chain.getSpannedTextRepresentation(),
+                  chain.getTextSpan().getStartIndex(), chain.getTextSpan().getEndIndex() );
+            writer.write( lineText + "\n" );
+            chainCount++;
+         }
+         _chains += chainCount;
+         Collection<Relation> relations = annotationStore.getUmlsRelations();
+         final int relationCount = relations.size();
+         _relations += relationCount;
+         for ( Relation relation : relations ) {
+            final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+                  relation.getClassType(), relation.getSpannedTextRepresentation(),
+                  relation.getTextSpan().getStartIndex(), relation.getTextSpan().getEndIndex() );
+            writer.write( lineText + "\n" );
+         }
+         relations = annotationStore.getTimeRelations();
+         final int tlinkCount = relations.size();
+         _tlinks += tlinkCount;
+         for ( Relation relation : relations ) {
+            final String tlinkType = relation.getFirstEntity().getSpannedTextRepresentation()
+                                     + " " + relation.getAttributeValue( DefinedAttributeType.RELATION_TYPE )
+                                     + " " + relation.getSecondEntity().getSpannedTextRepresentation();
+            final String lineText = String.format( "%1$20s | %2$30s | %3$3d,%4$3d",
+                  relation.getClassType(), tlinkType,
+                  relation.getTextSpan().getStartIndex(), relation.getTextSpan().getEndIndex() );
+            writer.write( lineText + "\n" );
+         }
+         final Timeline timeline = TimelineFactory.createTimeline( name, annotationStore );
+         _spans += timeline.getTimeSpans().size();
+         for ( PointedTimeSpan timeSpan : timeline ) {
+            writer.write( timeSpan + "\n" );
+         }
+         writer.write( "\n\n" );
+         writer.write( annotationStore.getDocumentText() );
+         writer.write( "\n\n" );
+         writer.write( "  NE Evnt Time  Chn Rltn TLnk Span\n" );
+         final String countText =
+               String.format( "%1$4d %2$4d %3$4d %4$4d %5$4d %6$4d %7$4d\n",
+                     entityCount, eventCount, timesCount, chainCount,
+                     relationCount, tlinkCount, timeline.getTimeSpans().size() );
+         writer.write( countText + "\n" );
+         errorHandler.flush();
+         errorHandler.close();
+         timeSpanFactoryLogger.removeHandler( errorHandler );
+         tlinkCloserLogger.removeHandler( errorHandler );
+         return String.format( "%1$40s ", name ) + countText;
+      } catch ( IOException ioE ) {
+         LOGGER.severe( ioE.getMessage() );
+      }
+      return "";
+   }
+
+   public static void main( final String... args ) {
+      writeXmiStores( ROOT_IN_DIR, ROOT_OUT_DIR );
+   }
+}

Added: ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/XmiEolFixer.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/XmiEolFixer.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/XmiEolFixer.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/XmiEolFixer.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,75 @@
+package org.chboston.cnlp.xmi;
+
+import java.io.*;
+import java.util.logging.Logger;
+import java.util.regex.Pattern;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/13/2014
+ */
+final public class XmiEolFixer {
+
+   static private final Logger LOGGER = Logger.getLogger( "XmiEolFixer" );
+
+   private XmiEolFixer() {
+   }
+
+   static private final String INPUT_DIR_PATH
+         = "C:\\Spiffy\\prj_darth_phenome\\data\\internal\\xmi\\27Notes_Jan9_2015";
+   static private final String OUTPUT_DIR_PATH
+         = "C:\\Spiffy\\prj_darth_phenome\\data\\internal\\xmi\\27Notes_Jan9_2015\\xml";
+
+
+   public static void main( String[] args ) {
+      final File inputDir = new File( INPUT_DIR_PATH );
+      final File[] files = inputDir.listFiles();
+      if ( files == null ) {
+         LOGGER.warning( "No files in " + inputDir.getPath() );
+         System.exit( 0 );
+      }
+      final Pattern pattern = Pattern.compile( "><" );
+      final char[] buffer = new char[ 1024 ];
+      for ( File file : files ) {
+         if ( !file.getName().endsWith( ".xmi" ) ) {
+            continue;
+         }
+         final String filePathOld = file.getPath();
+         final String filePathNew = OUTPUT_DIR_PATH + "/" + file.getName() + ".xml";
+         try ( BufferedReader reader = new BufferedReader( new FileReader( filePathOld ) );
+               Writer writer = new BufferedWriter( new FileWriter( filePathNew ) ) ) {
+            int length = reader.read( buffer );
+            while ( length > 0 ) {
+               final String text = new String( buffer, 0, length );
+               if ( text.startsWith( "><" ) ) {
+                  writer.write( ">\n<" );
+               } else if ( text.startsWith( "<" ) ) {
+                  writer.write( "<" );
+               }
+               final String[] lines = pattern.split( text );
+               if ( lines.length == 1 ) {
+                  writer.write( lines[ 0 ] );
+               } else if ( lines.length > 1 ) {
+                  writer.write( lines[ 0 ] + ">\n" );
+                  for ( int i = 1; i < lines.length - 1; i++ ) {
+                     if ( !lines[ i ].isEmpty() ) {
+                        writer.write( "<" + lines[ i ] + ">\n" );
+                     }
+                  }
+                  writer.write( "<" + lines[ lines.length - 1 ] );
+               }
+               if ( text.endsWith( "><" ) && text.length() > 2 ) {
+                  writer.write( ">\n<" );
+               } else if ( text.endsWith( ">" ) ) {
+                  writer.write( ">\n" );
+               }
+               length = reader.read( buffer );
+            }
+         } catch ( IOException ioE ) {
+            LOGGER.severe( ioE.getMessage() );
+         }
+      }
+   }
+
+}

Added: ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/UimaXmiParser.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/UimaXmiParser.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/UimaXmiParser.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/UimaXmiParser.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,695 @@
+package org.chboston.cnlp.xmi.parser;
+
+import org.chboston.cnlp.nlp.annotation.annotation.store.ImmutableAnnotationStore;
+import org.chboston.cnlp.nlp.annotation.attribute.AttributeType;
+import org.chboston.cnlp.nlp.annotation.attribute.AttributeTypeFactory;
+import org.chboston.cnlp.nlp.annotation.attribute.DefaultAttribute;
+import org.chboston.cnlp.nlp.annotation.attribute.DefinedAttributeType;
+import org.chboston.cnlp.nlp.annotation.classtype.ClassType;
+import org.chboston.cnlp.nlp.annotation.classtype.CustomClassType;
+import org.chboston.cnlp.nlp.annotation.classtype.SemanticClassType;
+import org.chboston.cnlp.nlp.annotation.classtype.TemporalClassType;
+import org.chboston.cnlp.nlp.annotation.coreference.CoreferenceChain;
+import org.chboston.cnlp.nlp.annotation.coreference.CoreferenceChainSpanComparator;
+import org.chboston.cnlp.nlp.annotation.coreference.CoreferenceFactory;
+import org.chboston.cnlp.nlp.annotation.entity.DefaultEntity;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+import org.chboston.cnlp.nlp.annotation.parser.AbstractAnnotationXmlParser;
+import org.chboston.cnlp.nlp.annotation.relation.DefaultRelation;
+import org.chboston.cnlp.nlp.annotation.relation.Relation;
+import org.chboston.cnlp.nlp.annotation.textspan.DefaultTextSpan;
+import org.chboston.cnlp.nlp.annotation.textspan.DiscontiguousTextSpan;
+import org.chboston.cnlp.nlp.annotation.textspan.TextSpan;
+import org.jdom.Attribute;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.input.SAXBuilder;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.logging.Logger;
+import java.util.regex.Pattern;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 10/13/2014
+ */
+final public class UimaXmiParser extends AbstractAnnotationXmlParser {
+
+   static private final Logger LOGGER = Logger.getLogger( "UimaXmiParser" );
+
+   // TODO Refactor to use XmiTag
+   static private final String ROOT_ELEMENT_NAME = "XMI";
+   static private final String DOCUMENT_TEXT_NAME = "Sofa";
+   static private final String EVENT_MENTION = "EventMention";
+   static private final String TIME_MENTION = "TimeMention";
+   static private final String DATE_ANNOTATION = "DateAnnotation";
+   static private final String TLINK = "TemporalTextRelation";
+   static private final String UMLS = "UmlsConcept";
+
+   static private final String EVENT = "Event";
+   static private final String EVENT_PROPERTIES = "EventProperties";
+
+   static private final String EVENT_ID_KEY = "event";
+   static private final String PROPERTIES_ID_KEY = "properties";
+
+
+   static private final String SIGN_SYMPTOM = "SignSymptomMention";
+   static private final String PROCEDURE = "ProcedureMention";
+   static private final String DISEASE = "DiseaseDisorderMention";
+   static private final String MEDICATION = "MedicationMention";
+   static private final String ANATOMIC_SITE = "AnatomicalSiteMention";
+
+   static private final String CONCEPT_ARRAY = "ontologyConceptArr";
+   static private final String UMLS_CONCEPT = "UmlsConcept";
+
+   static private final String ID = "id";
+   static private final String DOCUMENT_TEXT = "sofaString";
+   static private final String BEGIN = "begin";
+   static private final String END = "end";
+   static private final String DOC_TIME_REL = "docTimeRel";
+   static private final String POLARITY = "polarity";
+   static private final String LINK_TYPE = "category";
+   static private final String LINK_ARG1 = "arg1";
+   static private final String LINK_ARG2 = "arg2";
+   static private final String RELATION_ARG = "RelationArgument";
+   static private final String ARGUMENT = "argument";
+   static private final String CUI = "cui";
+   static private final String TUI = "tui";
+
+
+   static private final String COREF_RELATION = "CoreferenceRelation";
+   static private final String COREF_CHAIN = "CollectionTextRelation";
+   static private final String COREF_MEMBERS = "members";
+
+
+   static private final String INPUT_DIR_PATH = "C:\\Spiffy\\prj_darth_phenome\\data\\internal\\xmi\\Oct10_2014";
+
+   /**
+    * @param filePath path to file with annotation information
+    * @return true if this AnnotationsParser can properly handle the given file
+    */
+   static public boolean canParse( final String filePath ) {
+      final SAXBuilder saxBuilder = new SAXBuilder();
+      try {
+         final Document document = saxBuilder.build( filePath );
+         final Element rootElement = document.getRootElement();
+         return rootElement != null && rootElement.getName().equals( ROOT_ELEMENT_NAME );
+      } catch ( JDOMException jdomE ) {
+         LOGGER.severe( jdomE.getMessage() );
+         return false;
+      } catch ( IOException ioE ) {
+         LOGGER.severe( ioE.getMessage() );
+         return false;
+      }
+   }
+
+   public boolean preParseFile( final String xmlFilePath ) {
+      return true;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean parseFile( final String xmlFilePath ) {
+      reset();
+      if ( xmlFilePath == null || xmlFilePath.isEmpty() ) {
+         return false;
+      }
+      final File xmlFile = new File( xmlFilePath );
+      if ( !xmlFile.canRead() ) {
+         return false;
+      }
+      final SAXBuilder saxBuilder = new SAXBuilder();
+      try {
+         final Document document = saxBuilder.build( xmlFile );
+         final Element rootElement = document.getRootElement();
+         final String documentText = getDocumentText( rootElement );
+         setDocumentText( documentText );
+         final Map<String, Element> eventMap = createElementIdMap( rootElement, EVENT );
+         final Map<String, Element> eventPropertiesMap = createElementIdMap( rootElement, EVENT_PROPERTIES );
+         final Map<String, Entity> entityMap = getEntityMap( rootElement, eventMap, eventPropertiesMap );
+         final List<Entity> entityList = XmiEntitySegregator.getNamedEntities( entityMap.values() );
+         final List<Entity> eventList = XmiEntitySegregator.getEvents( entityMap.values() );
+         final List<Entity> timexList = XmiEntitySegregator.getTimes( entityMap.values() );
+         final Map<String, String> relationArgMap = getRelationArgMap( rootElement );
+         final List<Relation> tlinkRelationList = getTLinks( rootElement, entityMap, relationArgMap );
+         final List<Collection<TextSpan>> coreferenceTextSpans = new ArrayList<>();
+         coreferenceTextSpans.addAll( getCorefLinks( rootElement, entityMap, relationArgMap ) );
+         coreferenceTextSpans.addAll( getCorefChains( rootElement, entityMap, relationArgMap ) );
+         Collections.sort( coreferenceTextSpans, CoreferenceChainSpanComparator.getInstance() );
+         List<CoreferenceChain> coreferenceChainList = Collections.emptyList();
+         if ( !entityList.isEmpty() || !eventList.isEmpty() || !timexList.isEmpty() ) {
+            coreferenceChainList = CoreferenceFactory.createCoreferenceChains( coreferenceTextSpans, entityList,
+                  eventList, timexList );
+         }
+         int wordCount = -1;
+         if ( documentText != null && !documentText.isEmpty() ) {
+            wordCount = documentText.split( "\\s+" ).length;
+         }
+         final ImmutableAnnotationStore.AnnoteCollectBuilder builder
+               = new ImmutableAnnotationStore.AnnoteCollectBuilder();
+         builder.entities( entityList ).events( eventList ).times( timexList );
+//         builder.relations( umlsRelationList )
+         builder.timeRelations( tlinkRelationList );
+//         builder.coreferenceTextSpans( coreferenceChains );
+         builder.coreferenceChains( coreferenceChainList ).wordCount( wordCount );
+         if ( documentText != null && !documentText.isEmpty() ) {
+            builder.documentText( documentText );
+         }
+         _annotationStore = builder.build();
+         return true;
+      } catch ( JDOMException jdomE ) {
+         LOGGER.severe( jdomE.getMessage() );
+         return false;
+      } catch ( IOException ioE ) {
+         LOGGER.severe( ioE.getMessage() );
+         return false;
+      }
+   }
+
+
+   static private String getDocumentText( final Element rootElement ) {
+      final Element child = getChild( rootElement, DOCUMENT_TEXT_NAME );
+      final String text = child.getAttributeValue( DOCUMENT_TEXT );
+      return text.replace( "&#10;", "\n" );
+   }
+
+   /**
+    * @param rootElement the root xml element in an annotation xml file
+    * @return map of entityIDs and Knowtator Entities
+    */
+   private Map<String, Entity> getEntityMap( final Element rootElement,
+                                             final Map<String, Element> eventMap,
+                                             final Map<String, Element> eventPropertiesMap ) {
+      final Map<String, Entity> entityMap = new HashMap<>();
+      entityMap.putAll( getEntities( rootElement, SIGN_SYMPTOM, null, null ) );
+      entityMap.putAll( getEntities( rootElement, PROCEDURE, null, null ) );
+      entityMap.putAll( getEntities( rootElement, DISEASE, null, null ) );
+      entityMap.putAll( getEntities( rootElement, MEDICATION, null, null ) );
+      entityMap.putAll( getEntities( rootElement, ANATOMIC_SITE, null, null ) );
+      entityMap.putAll( getEntities( rootElement, EVENT_MENTION, eventMap, eventPropertiesMap ) );
+      entityMap.putAll( getEntities( rootElement, TIME_MENTION, null, null ) );
+      entityMap.putAll( getEntities( rootElement, DATE_ANNOTATION, null, null ) );
+      return entityMap;
+   }
+
+
+   static private Map<String, String> getRelationArgMap( final Element rootElement ) {
+      final Map<String, String> relationArgMap = new HashMap<>();
+      final List<Element> argElements = getChildren( rootElement, RELATION_ARG );
+      for ( Element argElement : argElements ) {
+         final String argId = getElementId( argElement );
+         final String entityId = argElement.getAttributeValue( ARGUMENT );
+         relationArgMap.put( argId, entityId );
+      }
+      return relationArgMap;
+
+
+      //TODO
+
+   }
+
+
+   static private Map<String, Element> createElementIdMap( final Element rootElement, final String elementClassName ) {
+      final List<Element> classElements = getChildren( rootElement, elementClassName );
+      final Map<String, Element> elementIdMap = new HashMap<>( classElements.size() );
+      for ( Element element : classElements ) {
+         final String elementId = getElementId( element );
+         if ( elementId != null ) {
+            elementIdMap.put( elementId, element );
+         }
+      }
+      return elementIdMap;
+   }
+
+
+   private Map<String, Entity> getEntities( final Element rootElement, final String classElementName,
+                                            final Map<String, Element> eventMap,
+                                            final Map<String, Element> eventPropertiesMap ) {
+      final List<Element> classElements = getChildren( rootElement, classElementName );
+      final Map<String, Entity> entityMap = new HashMap<>();
+      final String documentText = getDocumentText();
+      for ( Element entityElement : classElements ) {
+         final String entityId = getElementId( entityElement );
+         final TextSpan textSpan = createEntityTextSpan( entityElement );
+         if ( textSpan.equals( BAD_TEXT_SPAN ) ) {
+            continue;
+         }
+         final String spannedText = getSpannedText( textSpan, documentText );
+         if ( spannedText.trim().isEmpty() ) {
+            continue;
+         }
+         final ClassType classType = getEntityClassType( classElementName );
+         final Collection<org.chboston.cnlp.nlp.annotation.attribute.Attribute> nlpAttributes
+               = createNlpAttributes( entityElement );
+         if ( classElementName.equals( EVENT_MENTION ) ) {
+            final String eventId = entityElement.getAttributeValue( EVENT_ID_KEY );
+            nlpAttributes.addAll( parseEventAttributes( rootElement, eventId, eventMap, eventPropertiesMap ) );
+         } else if ( classElementName.equals( TIME_MENTION ) || classElementName.equals( DATE_ANNOTATION ) ) {
+            nlpAttributes.add( new DefaultAttribute( "XMI_TIMEX", "DATE" ) );
+         } else {
+            final String conceptIdArray = entityElement.getAttributeValue( CONCEPT_ARRAY );
+            if ( conceptIdArray != null ) {
+               final String[] conceptIds = conceptIdArray.split( "\\s+" );
+               for ( String conceptId : conceptIds ) {
+                  nlpAttributes.addAll( parseUmlsAttributes( rootElement, conceptId ) );
+               }
+            }
+         }
+         nlpAttributes.add( new DefaultAttribute( DefinedAttributeType.UNIQUE_ID, entityId ) );
+         final Entity entity = new DefaultEntity( textSpan, spannedText, classType,
+               nlpAttributes
+                     .toArray( new org.chboston.cnlp.nlp.annotation.attribute.Attribute[ nlpAttributes.size() ] ) );
+         entityMap.put( entityId, entity );
+      }
+      return entityMap;
+   }
+
+
+   static private Collection<org.chboston.cnlp.nlp.annotation.attribute.Attribute> createNlpAttributes(
+         final Element element ) {
+      final Collection<Attribute> nonIdAttributes = getNonIdXmlAttributes( element );
+      final Collection<org.chboston.cnlp.nlp.annotation.attribute.Attribute> attributeList
+            = new ArrayList<>( nonIdAttributes.size() );
+      for ( Attribute xmlAttribute : nonIdAttributes ) {
+         attributeList.add( createNlpAttribute( xmlAttribute.getName(), xmlAttribute.getValue() ) );
+      }
+      return attributeList;
+   }
+
+   static private org.chboston.cnlp.nlp.annotation.attribute.Attribute createNlpAttribute( final String name,
+                                                                                           final String value ) {
+      final AttributeType attributeType = AttributeTypeFactory.getAttributeForName( name );
+      if ( attributeType == DefinedAttributeType.POLARITY && value.equals( "-1" ) ) {
+         return new DefaultAttribute( DefinedAttributeType.POLARITY, "NEG" );
+      }
+      return new DefaultAttribute( attributeType.getName(), value );
+   }
+
+
+   static private ClassType getEntityClassType( final String classTypeName ) {
+      // TODO - add more class types.  See Knowtator xml parser for possible list
+      switch ( classTypeName ) {
+         case EVENT_MENTION:
+            return TemporalClassType.EVENT;
+         case TIME_MENTION:
+            return TemporalClassType.TIMEX;
+         case DATE_ANNOTATION:
+            return TemporalClassType.TIMEX;
+         case SIGN_SYMPTOM:
+            return SemanticClassType.SIGN_OR_SYMPTOM;
+         case DISEASE:
+            return SemanticClassType.DISEASE_DISORDER;
+         case ANATOMIC_SITE:
+            return SemanticClassType.ANATOMICAL_SITE;
+         case MEDICATION:
+            return SemanticClassType.MEDICATION;
+         case PROCEDURE:
+            return SemanticClassType.PROCEDURE;
+         case "generic_class":
+            return SemanticClassType.MISC;
+      }
+      return new CustomClassType( classTypeName );
+   }
+
+
+   static private Collection<org.chboston.cnlp.nlp.annotation.attribute.Attribute> parseEventAttributes(
+         final Element rootElement, final String eventId,
+         final Map<String, Element> eventMap, final Map<String, Element> eventPropertiesMap ) {
+      final Element eventElement = eventMap.get( eventId );
+      if ( eventElement == null ) {
+         return Collections.emptyList();
+      }
+      final String propertiesId = eventElement.getAttributeValue( PROPERTIES_ID_KEY );
+      if ( propertiesId == null ) {
+         return Collections.emptyList();
+      }
+      final Element propertiesElement = eventPropertiesMap.get( propertiesId );
+      if ( propertiesElement == null ) {
+         return Collections.emptyList();
+      }
+      return createNlpAttributes( propertiesElement );
+   }
+
+   static private Collection<org.chboston.cnlp.nlp.annotation.attribute.Attribute> parseUmlsAttributes(
+         final Element rootElement, final String conceptId ) {
+      final Element conceptElement = getIdElement( rootElement, UMLS_CONCEPT, conceptId );
+      if ( conceptElement == null ) {
+         return Collections.emptyList();
+      }
+      return createNlpAttributes( conceptElement );
+   }
+
+
+   static private Element getIdElement( final Element rootElement, final String elementTypeName,
+                                        final String elementId ) {
+      final List<Element> childElements = getChildren( rootElement, elementTypeName );
+      for ( Element childElement : childElements ) {
+         if ( getElementId( childElement ).equals( elementId ) ) {
+            return childElement;
+         }
+      }
+      return null;
+   }
+
+   static private String getElementId( final Element element ) {
+      final List<Attribute> xmiAttributes = element.getAttributes();
+      if ( xmiAttributes != null ) {
+         for ( Attribute xmiAttribute : xmiAttributes ) {
+            final String attributeName = xmiAttribute.getName();
+            final String attributeValue = xmiAttribute.getValue();
+            if ( attributeName.equals( ID ) && !attributeValue.equals( "0" ) ) {
+               return attributeValue;
+            }
+         }
+      }
+      return "";
+   }
+
+   static private Collection<Attribute> getNonIdXmlAttributes( final Element element ) {
+      final List<Attribute> xmiAttributes = element.getAttributes();
+      final Collection<Attribute> nonIdAttributes = new ArrayList<>( xmiAttributes.size() - 1 );
+      for ( Attribute xmiAttribute : xmiAttributes ) {
+         final String attributeName = xmiAttribute.getName();
+         if ( !attributeName.equals( ID ) ) {
+            nonIdAttributes.add( xmiAttribute );
+         }
+      }
+      return nonIdAttributes;
+   }
+
+   /**
+    * @param rootElement xml root element
+    * @param entityMap   map of elementIDs and Entities
+    * @return list of Relations created with all the given information
+    */
+   static private List<Relation> getTLinks( final Element rootElement,
+                                            final Map<String, Entity> entityMap,
+                                            final Map<String, String> relationArgMap ) {
+      if ( entityMap.isEmpty() ) {
+         return Collections.emptyList();
+      }
+      final List<Relation> relationList = new ArrayList<>();
+      final List<Element> relationElementList = getChildren( rootElement, TLINK );
+      final List<org.chboston.cnlp.nlp.annotation.attribute.Attribute> attributeList = new ArrayList<>();
+      for ( Element relationElement : relationElementList ) {
+         attributeList.clear();
+         final String relationId = getElementId( relationElement );
+         // TODO make classtype tlink
+         String sourceEntityId = "";
+         String targetEntityId = "";
+         final Collection<Attribute> nonIdAttributes = getNonIdXmlAttributes( relationElement );
+         for ( Attribute xmlAttribute : nonIdAttributes ) {
+            final String attributeName = xmlAttribute.getName();
+            final String attributeValue = xmlAttribute.getValue();
+            if ( attributeName.equalsIgnoreCase( LINK_ARG1 ) ) {
+               sourceEntityId = attributeValue;
+            } else if ( attributeName.equalsIgnoreCase( LINK_ARG2 ) ) {
+               targetEntityId = attributeValue;
+            } else if ( attributeName.equalsIgnoreCase( LINK_TYPE ) ) {
+               attributeList.add( createNlpAttribute( DefinedAttributeType.RELATION_TYPE.getName(), attributeValue ) );
+            } else {
+               attributeList.add( createNlpAttribute( attributeName, attributeValue ) );
+            }
+         }
+         if ( sourceEntityId.isEmpty() || targetEntityId.isEmpty() ) {
+            LOGGER.severe( "Relation " + relationId
+                           + " has no Source " + sourceEntityId
+                           + " and/or no Target " + targetEntityId );
+            continue;
+         }
+         final String realSource = relationArgMap.get( sourceEntityId );
+         final String realTarget = relationArgMap.get( targetEntityId );
+         if ( realSource == null || realTarget == null ) {
+            LOGGER.severe( "Relation " + relationId
+                           + " has no Source " + sourceEntityId
+                           + " and/or no Target " + targetEntityId );
+            continue;
+         }
+
+         final Entity entity1 = entityMap.get( realSource );
+         final Entity entity2 = entityMap.get( realTarget );
+         if ( entity1 == null || entity2 == null ) {
+            LOGGER.severe( "Relation " + relationId
+                           + " Source " + realSource
+                           + " and/or Target " + realTarget + " does not exist" );
+            continue;
+         }
+         attributeList.add( new DefaultAttribute( DefinedAttributeType.UNIQUE_ID, relationId ) );
+         final Relation relation = new DefaultRelation( entity1, entity2, TemporalClassType.TLINK,
+               attributeList
+                     .toArray( new org.chboston.cnlp.nlp.annotation.attribute.Attribute[ attributeList.size() ] ) );
+         relationList.add( relation );
+      }
+      return relationList;
+   }
+
+
+   /**
+    * @param rootElement    xml root element
+    * @param entityMap      map of elementIDs and Entities
+    * @param relationArgMap map of argument elementIDs and entity elementIDs
+    * @return list of TextSpan pairs for Coreference Relations created with all the given information
+    */
+   static private List<Collection<TextSpan>> getCorefLinks( final Element rootElement,
+                                                            final Map<String, Entity> entityMap,
+                                                            final Map<String, String> relationArgMap ) {
+      if ( entityMap.isEmpty() ) {
+         return Collections.emptyList();
+      }
+      final List<Collection<TextSpan>> corefList = new ArrayList<>();
+      final List<Element> relationElementList = getChildren( rootElement, COREF_RELATION );
+      for ( Element relationElement : relationElementList ) {
+         final String relationId = getElementId( relationElement );
+         String sourceEntityId = "";
+         String targetEntityId = "";
+         final Collection<Attribute> nonIdAttributes = getNonIdXmlAttributes( relationElement );
+         for ( Attribute xmlAttribute : nonIdAttributes ) {
+            final String attributeName = xmlAttribute.getName();
+            final String attributeValue = xmlAttribute.getValue();
+            if ( attributeName.equalsIgnoreCase( LINK_ARG1 ) ) {
+               sourceEntityId = attributeValue;
+            } else if ( attributeName.equalsIgnoreCase( LINK_ARG2 ) ) {
+               targetEntityId = attributeValue;
+            }
+         }
+         if ( sourceEntityId.isEmpty() || targetEntityId.isEmpty() ) {
+            LOGGER.severe( "Relation " + relationId
+                           + " has no Source " + sourceEntityId
+                           + " and/or no Target " + targetEntityId );
+            continue;
+         }
+         final String realSource = relationArgMap.get( sourceEntityId );
+         final String realTarget = relationArgMap.get( targetEntityId );
+         if ( realSource == null || realTarget == null ) {
+            LOGGER.severe( "Relation " + relationId
+                           + " has no Source " + sourceEntityId
+                           + " and/or no Target " + targetEntityId );
+            continue;
+         }
+         final Entity entity1 = entityMap.get( realSource );
+         final Entity entity2 = entityMap.get( realTarget );
+         if ( entity1 == null || entity2 == null ) {
+            LOGGER.severe( "Relation " + relationId
+                           + " Source " + realSource
+                           + " and/or Target " + realTarget + " does not exist" );
+            continue;
+         }
+         final Collection<TextSpan> textSpans = new ArrayList<>( 2 );
+         textSpans.add( entity1.getTextSpan() );
+         textSpans.add( entity2.getTextSpan() );
+         corefList.add( textSpans );
+      }
+      return Collections.unmodifiableList( corefList );
+   }
+
+
+   /**
+    * @param rootElement    xml root element
+    * @param entityMap      map of elementIDs and Entities
+    * @param relationArgMap map of argument elementIDs and entity elementIDs
+    * @return list of TextSpan pairs for Coreference Relations created with all the given information
+    */
+   static private List<Collection<TextSpan>> getCorefChains( final Element rootElement,
+                                                             final Map<String, Entity> entityMap,
+                                                             final Map<String, String> relationArgMap ) {
+      if ( entityMap.isEmpty() ) {
+         return Collections.emptyList();
+      }
+      final Pattern memberSplitter = Pattern.compile( "\\s+" );
+      final List<Collection<TextSpan>> corefList = new ArrayList<>();
+      final List<Element> relationElementList = getChildren( rootElement, COREF_CHAIN );
+      final Collection<String> entityIds = new ArrayList<>();
+      for ( Element relationElement : relationElementList ) {
+         entityIds.clear();
+         final String relationId = getElementId( relationElement );
+         final Collection<Attribute> nonIdAttributes = getNonIdXmlAttributes( relationElement );
+         String[] argumentIDs = null;
+         for ( Attribute xmlAttribute : nonIdAttributes ) {
+            final String attributeName = xmlAttribute.getName();
+            final String attributeValue = xmlAttribute.getValue();
+            if ( attributeName.equalsIgnoreCase( COREF_MEMBERS ) ) {
+               argumentIDs = memberSplitter.split( attributeValue );
+               break;
+            }
+         }
+         if ( argumentIDs == null || argumentIDs.length == 0 ) {
+            LOGGER.severe( "Relation " + relationId + " has no Members" );
+            continue;
+         }
+         if ( argumentIDs.length == 1 ) {
+            LOGGER.severe( "Relation " + relationId + " has only one Member " + argumentIDs[ 0 ] );
+            continue;
+         }
+         for ( String argumentId : argumentIDs ) {
+            final String realSource = relationArgMap.get( argumentId );
+            if ( realSource == null ) {
+               LOGGER.severe( "Relation " + relationId
+                              + " has no Source " + argumentId );
+               continue;
+            }
+            entityIds.add( realSource );
+         }
+         final Collection<TextSpan> textSpans = new ArrayList<>( 2 );
+         for ( String entityId : entityIds ) {
+            final Entity entity = entityMap.get( entityId );
+            if ( entity == null ) {
+               LOGGER.severe( "Relation " + relationId + " Entity ID " + entityId + " does not exist" );
+               continue;
+            }
+            textSpans.add( entity.getTextSpan() );
+         }
+         if ( textSpans.size() > 1 ) {
+            corefList.add( textSpans );
+         }
+      }
+      return Collections.unmodifiableList( corefList );
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   protected TextSpan createEntityTextSpan( final Element spanElement ) {
+      int begin = 0;
+      int end = 0;
+      try {
+         begin = spanElement.getAttribute( BEGIN ).getIntValue();
+         end = spanElement.getAttribute( END ).getIntValue();
+      } catch ( JDOMException jdomE ) {
+         LOGGER.severe( jdomE.getMessage() );
+         return BAD_TEXT_SPAN;
+      }
+      return new DefaultTextSpan( begin, end );
+   }
+
+   /**
+    * Anafora XML does not provide actual text, but the document text may be known.
+    * If the document text is known then this simply returns a substring, otherwise a string of 'A'
+    *
+    * @param textSpan -
+    * @return The spanned text within provided document text, or a String filled with character 'A'
+    */
+   static private String getSpannedText( final TextSpan textSpan, final String documentText ) {
+      if ( documentText == null || documentText.isEmpty() ) {
+         return fakeSomeText( textSpan );
+      }
+      final int startIndex = textSpan.getStartIndex();
+      final int endIndex = textSpan.getEndIndex();
+      if ( startIndex >= 0 && endIndex < documentText.length() ) {
+         return documentText.substring( startIndex, endIndex );
+      }
+      return fakeSomeText( textSpan );
+   }
+
+   /**
+    * Anafora XML does not provide actual text, so we need to fake it.
+    * This will knock some of the IAA capabilities, such as Alpha computations based upon word count,
+    * marked comparison by word count, etc.
+    *
+    * @param textSpan -
+    * @return A String the length of the textSpan filled with the character 'A'
+    */
+   static private String fakeSomeText( final TextSpan textSpan ) {
+      if ( textSpan instanceof DiscontiguousTextSpan ) {
+         final TextSpan jointTextSpan = new DefaultTextSpan( textSpan.getStartIndex(), textSpan.getEndIndex() );
+         return fakeSomeText( jointTextSpan );
+      }
+      final char[] chars = new char[ textSpan.getLength() ];
+      Arrays.fill( chars, 'A' );
+      return String.valueOf( chars );
+   }
+
+
+   static private List<Element> getChildren( final Element rootElement, final String name ) {
+      final List<?> rootChildren = rootElement.getChildren();
+      final List<Element> children = new ArrayList<>( rootChildren.size() );
+      for ( Object child : rootChildren ) {
+         if ( child instanceof Element && ((Element)child).getName().equals( name ) ) {
+            children.add( (Element)child );
+         }
+      }
+      return children;
+   }
+
+   static private Element getChild( final Element rootElement, final String name ) {
+      final List<Element> children = getChildren( rootElement, name );
+      if ( children.isEmpty() ) {
+         return null;
+      }
+      return children.get( 0 );
+   }
+
+
+   static private void testParse( final String filePath ) {
+      final SAXBuilder saxBuilder = new SAXBuilder();
+      try {
+         final Document document = saxBuilder.build( filePath );
+         final Element rootElement = document.getRootElement();
+         final List<?> rootChildren = rootElement.getChildren();
+         for ( Object child : rootChildren ) {
+            if ( child instanceof Element ) {
+               final Element element = (Element)child;
+               System.out.println( element.getName() );
+               final List<?> stuff = element.getAttributes();
+               for ( Object thing : stuff ) {
+                  if ( thing instanceof Attribute ) {
+                     final Attribute attribute = (Attribute)thing;
+                     System.out.println( "\t" + attribute.getName() + " = " + attribute.getValue() );
+                  }
+               }
+            } else {
+               LOGGER.warning( "   NOT ELEMENT " + child.toString() );
+            }
+         }
+         getDocumentText( rootElement );
+      } catch ( JDOMException | IOException multE ) {
+         LOGGER.severe( multE.getMessage() );
+      }
+   }
+
+
+   public static void main( String[] args ) {
+      final File inputDir = new File( INPUT_DIR_PATH );
+      final File[] files = inputDir.listFiles();
+      if ( files == null ) {
+         LOGGER.warning( "No files in " + inputDir.getPath() );
+         System.exit( 0 );
+      }
+      for ( File file : files ) {
+//         if ( !file.getName().endsWith( ".old" ) ) {
+         if ( !file.getName().endsWith( "_report_4.txt.xmi.old" ) ) {
+            continue;
+         }
+         testParse( file.getPath() );
+         break;
+      }
+   }
+
+}

Added: ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/XmiEntitySegregator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/XmiEntitySegregator.java?rev=1660963&view=auto
==============================================================================
--- ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/XmiEntitySegregator.java (added)
+++ ctakes/sandbox/timelanes/org/chboston/cnlp/xmi/parser/XmiEntitySegregator.java Thu Feb 19 18:06:13 2015
@@ -0,0 +1,74 @@
+package org.chboston.cnlp.xmi.parser;
+
+import org.chboston.cnlp.nlp.annotation.annotation.AnnotationSpanComparator;
+import org.chboston.cnlp.nlp.annotation.classtype.ClassType;
+import org.chboston.cnlp.nlp.annotation.classtype.TemporalClassType;
+import org.chboston.cnlp.nlp.annotation.entity.Entity;
+import org.chboston.cnlp.nlp.annotation.textspan.TextSpan;
+
+import java.util.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/28/13
+ */
+final public class XmiEntitySegregator {
+
+   private XmiEntitySegregator() {
+   }
+
+   /**
+    * @param entities collection of entities
+    * @return all named entities with the given collection of entities
+    */
+   static public List<Entity> getNamedEntities( final Iterable<Entity> entities ) {
+      final List<Entity> namedEntityList = new ArrayList<>();
+      for ( Entity entity : entities ) {
+         final ClassType type = entity.getClassType();
+         if ( type != TemporalClassType.EVENT && type != TemporalClassType.TIMEX ) {
+            namedEntityList.add( entity );
+         }
+      }
+      Collections.sort( namedEntityList, AnnotationSpanComparator.getInstance() );
+      return Collections.unmodifiableList( namedEntityList );
+   }
+
+   /**
+    * @param entities collection of entities
+    * @return all events with the given collection of entities
+    */
+   static public List<Entity> getEvents( final Iterable<Entity> entities ) {
+      final List<Entity> eventList = new ArrayList<>();
+      for ( Entity entity : entities ) {
+         final ClassType type = entity.getClassType();
+         if ( type == TemporalClassType.EVENT ) {
+            eventList.add( entity );
+         }
+      }
+      Collections.sort( eventList, AnnotationSpanComparator.getInstance() );
+      return Collections.unmodifiableList( eventList );
+   }
+
+   /**
+    * @param entities collection of entities
+    * @return all timex3 times with the given collection of entities
+    */
+   static public List<Entity> getTimes( final Iterable<Entity> entities ) {
+      // XMI has Date, Time, and Timex3 annotations.  Many will overlap.  We only want one per textSpan.
+      final Map<TextSpan, Entity> textSpanTimes = new HashMap<>();
+      for ( Entity entity : entities ) {
+         final ClassType type = entity.getClassType();
+         if ( type == TemporalClassType.TIMEX ) {
+            textSpanTimes.put( entity.getTextSpan(), entity );
+         }
+      }
+      final List<Entity> timexList = new ArrayList<>();
+      for ( Entity entity : textSpanTimes.values() ) {
+         timexList.add( entity );
+      }
+      Collections.sort( timexList, AnnotationSpanComparator.getInstance() );
+      return Collections.unmodifiableList( timexList );
+   }
+
+}



Mime
View raw message