ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1689929 - in /ctakes/trunk/ctakes-core: desc/cas_consumer/ src/main/java/org/apache/ctakes/core/cc/pretty/ src/main/java/org/apache/ctakes/core/cc/pretty/cell/ src/main/java/org/apache/ctakes/core/cc/pretty/html/ src/main/java/org/apache/c...
Date Wed, 08 Jul 2015 18:16:46 GMT
Author: seanfinan
Date: Wed Jul  8 18:16:46 2015
New Revision: 1689929

URL: http://svn.apache.org/r1689929
Log:
CTAKES-366  Adding PrettyTextWriter to core.  Writes output to file, sentence by sentence, with pos, semantic types and umls cuis marked by text span

Added:
    ctakes/trunk/ctakes-core/desc/cas_consumer/PrettyTextWriter.xml
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/SemanticGroup.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/AbstractItemCell.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/BaseItemCell.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultBaseItemCell.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/ItemCell.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/UmlsItemCell.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/html/
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterUima.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/DefaultItemRow.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/ItemRow.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/DefaultTextSpan.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/TextSpan.java

Added: ctakes/trunk/ctakes-core/desc/cas_consumer/PrettyTextWriter.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/desc/cas_consumer/PrettyTextWriter.xml?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/desc/cas_consumer/PrettyTextWriter.xml (added)
+++ ctakes/trunk/ctakes-core/desc/cas_consumer/PrettyTextWriter.xml Wed Jul  8 18:16:46 2015
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier">
+   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+   <implementationName>org.apache.ctakes.core.cc.pretty.plaintext.PrettyTextWriterUima</implementationName>
+   <processingResourceMetaData>
+      <name>PrettyTextWriter</name>
+      <description>Cas Consumer that writes pretty lines to file(s) or console minimum identified annotation.
+         Multiple rows are written per sentence with marks for pos, umls entities.
+      </description>
+      <version>1.0</version>
+      <vendor>Apache Software Foundation</vendor>
+      <configurationParameters>
+         <configurationParameter>
+            <name>OutputDirectory</name>
+            <description>Directory to which files should be saved</description>
+            <type>String</type>
+            <multiValued>false</multiValued>
+            <mandatory>false</mandatory>
+         </configurationParameter>
+      </configurationParameters>
+      <configurationParameterSettings>
+         <nameValuePair>
+            <name>OutputDirectory</name>
+            <value>
+               <!--  CHANGE ME  -->
+               <string>CHANGE ME</string>
+            </value>
+         </nameValuePair>
+      </configurationParameterSettings>
+      <typeSystemDescription/>
+      <typePriorities/>
+      <fsIndexCollection/>
+      <capabilities/>
+      <operationalProperties>
+         <modifiesCas>false</modifiesCas>
+         <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      </operationalProperties>
+   </processingResourceMetaData>
+   <externalResourceDependencies>
+   </externalResourceDependencies>
+   <resourceManagerConfiguration>
+      <externalResources>
+      </externalResources>
+      <externalResourceBindings>
+      </externalResourceBindings>
+   </resourceManagerConfiguration>
+</casConsumerDescription>

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/SemanticGroup.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/SemanticGroup.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/SemanticGroup.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/SemanticGroup.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,60 @@
+package org.apache.ctakes.core.cc.pretty;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+/**
+ * enumeration of ctakes semantic types:
+ * anatomical site, disease/disorder, finding (sign/symptom), test/procedure, and medication
+ */
+public enum SemanticGroup {
+   ////////////////////////////  Similar is in org.apache.ctakes.dictionary.lookup2.util.SemanticUtil
+   ////////////////////////////  and should be moved to core if this new class is taken up
+   // cTakes types
+   ANATOMICAL_SITE( "Anatomy", "T021", "T022", "T023", "T024", "T025", "T026", "T029", "T030" ),
+   DISORDER( "Disorder", "T019", "T020", "T037", "T047", "T048", "T049", "T050", "T190", "T191" ),
+   FINDING( "Finding", "T033", "T034", "T040", "T041", "T042", "T043", "T044", "T045", "T046",
+         "T056", "T057", "T184" ),
+   PROCEDURE( "Procedure", "T059", "T060", "T061" ),
+   MEDICATION( "Drug", "T109", "T110", "T114", "T115", "T116", "T118", "T119",
+         "T121", "T122", "T123", "T124", "T125", "T126", "T127",
+         "T129", "T130", "T131", "T195", "T196", "T197", "T200", "T203" );
+   static private final String UNKNOWN_SEMANTIC = "Unknown";
+   final private String _name;
+   final private Collection<String> _tuis;
+
+   /**
+    * ctakes semantic type defined by tuis
+    *
+    * @param name short name of the type: anatomy, disorder, finding, procedure, drug
+    * @param tuis tuis that define the semantic type
+    */
+   private SemanticGroup( final String name, final String... tuis ) {
+      _name = name;
+      _tuis = Arrays.asList( tuis );
+   }
+
+   /**
+    * @return name of this semantic type
+    */
+   public String getName() {
+      return _name;
+   }
+
+   /**
+    * @param tui a tui of interest
+    * @return the name of a Semantic type associated with the tui
+    */
+   static public String getSemanticName( final String tui ) {
+      if ( tui == null || tui.isEmpty() ) {
+         return UNKNOWN_SEMANTIC;
+      }
+      for ( SemanticGroup semanticGroup : SemanticGroup.values() ) {
+         if ( semanticGroup._tuis.contains( tui ) ) {
+            return semanticGroup._name;
+         }
+      }
+      return UNKNOWN_SEMANTIC;
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/AbstractItemCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/AbstractItemCell.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/AbstractItemCell.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/AbstractItemCell.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,24 @@
+package org.apache.ctakes.core.cc.pretty.cell;
+
+import org.apache.ctakes.core.cc.pretty.textspan.TextSpan;
+
+/**
+ * Contains the text span
+ */
+public abstract class AbstractItemCell implements ItemCell {
+
+   final private TextSpan _textSpan;
+
+   public AbstractItemCell( final TextSpan textSpan ) {
+      _textSpan = textSpan;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public TextSpan getTextSpan() {
+      return _textSpan;
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/BaseItemCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/BaseItemCell.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/BaseItemCell.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/BaseItemCell.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,13 @@
+package org.apache.ctakes.core.cc.pretty.cell;
+
+/**
+ * Item cell for most basic item: text and part of speech
+ */
+public interface BaseItemCell extends ItemCell {
+
+   /**
+    * @return part of speech for the item in this cell
+    */
+   String getPos();
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultBaseItemCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultBaseItemCell.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultBaseItemCell.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultBaseItemCell.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,73 @@
+package org.apache.ctakes.core.cc.pretty.cell;
+
+import org.apache.ctakes.core.cc.pretty.textspan.TextSpan;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 7/6/2015
+ */
+public final class DefaultBaseItemCell extends AbstractItemCell implements BaseItemCell {
+
+   final private String _text;
+   final private String _pos;
+
+   public DefaultBaseItemCell( final TextSpan textSpan, final String text, final String pos ) {
+      super( textSpan );
+      _text = text;
+      _pos = pos == null ? "" : pos;
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return the maximum length from the text and pos
+    */
+   @Override
+   public int getWidth() {
+      return Math.max( getTextSpan().getWidth(), _pos.length() );
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return 2.  One line each for text and pos
+    */
+   @Override
+   public int getHeight() {
+      return 2;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public String getText() {
+      return _text;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public String getPos() {
+      return _pos;
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return the text from the document for line index 0 and the part of speech for line index 1
+    */
+   @Override
+   public String getLineText( final int lineIndex ) {
+      switch ( lineIndex ) {
+         case 0:
+            return _text;
+         case 1:
+            return _pos;
+      }
+      return "";
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/DefaultUmlsItemCell.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,123 @@
+package org.apache.ctakes.core.cc.pretty.cell;
+
+import org.apache.ctakes.core.cc.pretty.SemanticGroup;
+import org.apache.ctakes.core.cc.pretty.textspan.TextSpan;
+
+import java.util.*;
+
+/**
+ * Item Cell that holds information about an umls entity
+ */
+public final class DefaultUmlsItemCell extends AbstractItemCell implements UmlsItemCell {
+
+   static private final int CUI_SPAN = 8;
+   static private final String NEGATED_TEXT = "Negated";
+
+
+   final private int _semanticWidth;
+   final private int _height;
+   final private boolean _negated;
+   final private List<String> _semanticTextLines;
+
+   /**
+    * @param semanticCuiNames -
+    * @return a list of semantic type names, sorted in the order of ctakes semantic group names followed by
+    * alphabetical custom names
+    */
+   static private Collection<String> getSortedSemanticNames( final Collection<String> semanticCuiNames ) {
+      final Collection<String> semanticGroupNames = new ArrayList<>();
+      for ( SemanticGroup semanticGroup : SemanticGroup.values() ) {
+         semanticGroupNames.add( semanticGroup.getName() );
+      }
+      final List<String> sortedCuiNames = new ArrayList<>( semanticCuiNames );
+      semanticGroupNames.retainAll( sortedCuiNames );
+      sortedCuiNames.removeAll( semanticGroupNames );
+      Collections.sort( sortedCuiNames );
+      semanticGroupNames.addAll( sortedCuiNames );
+      return semanticGroupNames;
+   }
+
+
+   public DefaultUmlsItemCell( final TextSpan textSpan, final int polarity,
+                               final Map<String, Collection<String>> semanticCuis ) {
+      super( textSpan );
+      int width = CUI_SPAN;
+      _negated = polarity < 0;
+      _semanticTextLines = new ArrayList<>();
+      final List<String> sortedCuis = new ArrayList<>();
+      final Collection<String> semanticGroupNames = getSortedSemanticNames( semanticCuis.keySet() );
+      for ( String semanticName : semanticGroupNames ) {
+         final Collection<String> cuis = semanticCuis.get( semanticName );
+         if ( cuis != null ) {
+            width = Math.max( width, semanticName.length() );
+            _semanticTextLines.add( semanticName );
+            sortedCuis.addAll( cuis );
+            Collections.sort( sortedCuis );
+            _semanticTextLines.addAll( sortedCuis );
+            sortedCuis.clear();
+         }
+      }
+      _semanticWidth = width;
+      int height = 1 + _semanticTextLines.size(); // text + cuis
+      if ( _negated ) {
+         height++;  // to print negation
+      }
+      _height = height;
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return the maximum of the document text length, the semantic type length and the cui length (8)
+    */
+   @Override
+   public int getWidth() {
+      return Math.max( getTextSpan().getWidth(), _semanticWidth );
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return the 1 for the text span representation line + number of semantic types + cuis + 1 if negated
+    */
+   @Override
+   public int getHeight() {
+      return _height;
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return {@link UmlsItemCell#ENTITY_FILL}
+    */
+   @Override
+   public String getText() {
+      return ENTITY_FILL;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean isNegated() {
+      return _negated;
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @return {@link UmlsItemCell#ENTITY_FILL} for index 0, Semantic types and Cuis for lines after that, then negated
+    */
+   @Override
+   public String getLineText( final int lineIndex ) {
+      if ( lineIndex == 0 ) {
+         return ENTITY_FILL;
+      } else if ( lineIndex > 0 && lineIndex - 1 < _semanticTextLines.size() ) {
+         return _semanticTextLines.get( lineIndex - 1 );
+      } else if ( isNegated() && lineIndex - 1 == _semanticTextLines.size() ) {
+         return NEGATED_TEXT;
+      }
+      return "";
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/ItemCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/ItemCell.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/ItemCell.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/ItemCell.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,36 @@
+package org.apache.ctakes.core.cc.pretty.cell;
+
+import org.apache.ctakes.core.cc.pretty.textspan.TextSpan;
+
+
+/**
+ * Container class for information about an item (text, annotation, etc.) cell - used for calculating print layout.
+ */
+public interface ItemCell {
+
+   /**
+    * @return text span for the original document text in the item cell
+    */
+   TextSpan getTextSpan();
+
+   /**
+    * @return width in characters required to accommodate this item cell
+    */
+   int getWidth();
+
+   /**
+    * @return height in lines required to accommodate this item cell
+    */
+   int getHeight();
+
+   /**
+    * @return the original document text for this item cell
+    */
+   String getText();
+
+   /**
+    * @param lineIndex index of the line required to write this item cell
+    * @return text to be written on the given line to represent this item cell
+    */
+   String getLineText( int lineIndex );
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/UmlsItemCell.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/UmlsItemCell.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/UmlsItemCell.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/cell/UmlsItemCell.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,17 @@
+package org.apache.ctakes.core.cc.pretty.cell;
+
+
+/**
+ * Item Cell for an umls entity
+ */
+public interface UmlsItemCell extends ItemCell {
+
+   // Return Code used to indicate that a full entity span should be filled with an indicator character, e.g. '='
+   static public final String ENTITY_FILL = "ENTITY_FILL";
+
+   /**
+    * @return true if the umls entity represented by this item cell is negated
+    */
+   boolean isNegated();
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriter.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,357 @@
+package org.apache.ctakes.core.cc.pretty.plaintext;
+
+import org.apache.ctakes.core.cc.pretty.SemanticGroup;
+import org.apache.ctakes.core.cc.pretty.cell.DefaultBaseItemCell;
+import org.apache.ctakes.core.cc.pretty.cell.DefaultUmlsItemCell;
+import org.apache.ctakes.core.cc.pretty.cell.ItemCell;
+import org.apache.ctakes.core.cc.pretty.row.DefaultItemRow;
+import org.apache.ctakes.core.cc.pretty.row.ItemRow;
+import org.apache.ctakes.core.cc.pretty.textspan.DefaultTextSpan;
+import org.apache.ctakes.core.cc.pretty.textspan.TextSpan;
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.core.util.IdentifiedAnnotationUtil;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.*;
+
+
+/**
+ * Writes Document text, pos, semantic types and cuis to file.  Each Sentence starts a new series of pretty text lines.
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 6/24/2015
+ */
+final public class PrettyTextWriter {
+
+   static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+   static private final Logger LOGGER = Logger.getLogger( "PrettyTextWriter" );
+   static private final String FILE_EXTENSION = ".pretty.txt";
+
+   private String _outputDirPath;
+
+   /**
+    * @param outputDirectoryPath may be empty or null, in which case the current working directory is used
+    * @throws IllegalArgumentException if the provided path points to a File and not a Directory
+    * @throws SecurityException        if the File System has issues
+    */
+   public void setOutputDirectory( final String outputDirectoryPath ) throws IllegalArgumentException,
+                                                                             SecurityException {
+      // If no outputDir is specified (null or empty) the current working directory will be used.  Else check path.
+      if ( outputDirectoryPath == null || outputDirectoryPath.isEmpty() ) {
+         _outputDirPath = "";
+         LOGGER.debug( "No Output Directory Path specified, using current working directory "
+                       + System.getProperty( "user.dir" ) );
+         return;
+      }
+      final File outputDir = new File( outputDirectoryPath );
+      if ( !outputDir.exists() ) {
+         outputDir.mkdirs();
+      }
+      if ( !outputDir.isDirectory() ) {
+         throw new IllegalArgumentException( outputDirectoryPath + " is not a valid directory path" );
+      }
+      _outputDirPath = outputDirectoryPath;
+      LOGGER.debug( "Output Directory Path set to " + _outputDirPath );
+   }
+
+   /**
+    * Process the jcas and write pretty sentences to file.  Filename is based upon the document id stored in the cas
+    *
+    * @param jcas ye olde ...
+    */
+   public void process( final JCas jcas ) {
+      LOGGER.info( "Starting processing" );
+      final String docId = DocumentIDAnnotationUtil.getDocumentIdForFile( jcas );
+      File outputFile;
+      if ( _outputDirPath == null || _outputDirPath.isEmpty() ) {
+         outputFile = new File( docId + FILE_EXTENSION );
+      } else {
+         outputFile = new File( _outputDirPath, docId + FILE_EXTENSION );
+      }
+      try ( final BufferedWriter writer = new BufferedWriter( new FileWriter( outputFile ) ) ) {
+         final Collection<Sentence> sentences = JCasUtil.select( jcas, Sentence.class );
+         for ( Sentence sentence : sentences ) {
+            writeSentence( jcas, sentence, writer );
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( "Could not not write pretty file " + outputFile.getPath() );
+         LOGGER.error( ioE.getMessage() );
+      }
+      LOGGER.info( "Finished processing" );
+   }
+
+   /**
+    * Write a sentence from the document text
+    *
+    * @param jcas     ye olde ...
+    * @param sentence annotation containing the sentence
+    * @param writer   writer to which pretty text for the sentence should be written
+    * @throws IOException if the writer has issues
+    */
+   static private void writeSentence( final JCas jcas,
+                                      final AnnotationFS sentence,
+                                      final BufferedWriter writer ) throws IOException {
+      // Create the base row
+      final Map<TextSpan, ItemCell> baseItemMap = createBaseItemMap( jcas, sentence );
+      // Create covering annotations (item cells that cover more than one base cell)
+      final Map<Integer, Collection<ItemCell>> coveringItemMap
+            = createCoveringItemMap( jcas, sentence, baseItemMap );
+      // Create annotation rows with shorter spans on top
+      final Collection<ItemRow> itemRows = new ArrayList<>();
+      final ItemRow baseItemRow = new DefaultItemRow();
+      for ( ItemCell itemCell : baseItemMap.values() ) {
+         baseItemRow.addItemCell( itemCell );
+      }
+      itemRows.add( baseItemRow );
+      itemRows.addAll( createItemRows( coveringItemMap ) );
+      // Create list of all text span offsets
+      final Collection<Integer> offsets = new HashSet<>();
+      for ( TextSpan textSpan : baseItemMap.keySet() ) {
+         offsets.add( textSpan.getBegin() );
+         offsets.add( textSpan.getEnd() );
+      }
+      // Create map of all text span offsets to adjusted offsets
+      final Map<Integer, Integer> offsetAdjustedMap = createOffsetAdjustedMap( offsets, itemRows );
+      // print all of the item rows
+      printItemRows( offsetAdjustedMap, itemRows, writer );
+   }
+
+   /**
+    * @param jcas     ye olde ...
+    * @param sentence annotation containing the sentence
+    * @return map of text spans and item cells that represent those spans
+    */
+   static private Map<TextSpan, ItemCell> createBaseItemMap( final JCas jcas, final AnnotationFS sentence ) {
+      final int sentenceBegin = sentence.getBegin();
+      final Collection<BaseToken> baseTokens
+            = org.apache.uima.fit.util.JCasUtil.selectCovered( jcas, BaseToken.class, sentence );
+      final Map<TextSpan, ItemCell> baseItemMap = new HashMap<>();
+      for ( BaseToken baseToken : baseTokens ) {
+         final TextSpan textSpan = new DefaultTextSpan( baseToken, sentenceBegin );
+         if ( textSpan.getWidth() == 0 ) {
+            continue;
+         }
+         if ( baseToken instanceof NewlineToken ) {
+            final ItemCell itemCell = new DefaultBaseItemCell( textSpan, " ", "" );
+            baseItemMap.put( textSpan, itemCell );
+            continue;
+         }
+         final String tokenText = baseToken.getCoveredText();
+         final String tokenPos = getTokenPos( baseToken );
+         final ItemCell itemCell = new DefaultBaseItemCell( textSpan, tokenText, tokenPos );
+         baseItemMap.put( textSpan, itemCell );
+      }
+      return baseItemMap;
+   }
+
+   /**
+    * @param jcas        ye olde ...
+    * @param sentence    annotation containing the sentence
+    * @param baseItemMap map of text spans and item cells that represent those spans
+    * @return map of covering annotations (item cells that cover more than one base cell)
+    */
+   static private Map<Integer, Collection<ItemCell>> createCoveringItemMap( final JCas jcas,
+                                                                            final AnnotationFS sentence,
+                                                                            final Map<TextSpan, ItemCell> baseItemMap ) {
+      final int sentenceBegin = sentence.getBegin();
+      final Collection<IdentifiedAnnotation> identifiedAnnotations
+            = JCasUtil.selectCovered( jcas, IdentifiedAnnotation.class, sentence );
+      final Map<Integer, Collection<ItemCell>> coveringAnnotationMap = new HashMap<>();
+      for ( IdentifiedAnnotation identifiedAnnotation : identifiedAnnotations ) {
+         final Map<String, Collection<String>> semanticCuis = getSemanticCuis( identifiedAnnotation );
+         if ( semanticCuis.isEmpty() ) {
+            continue;
+         }
+         final TextSpan textSpan = new DefaultTextSpan( identifiedAnnotation, sentenceBegin );
+         if ( textSpan.getWidth() == 0 ) {
+            continue;
+         }
+         final ItemCell itemCell = new DefaultUmlsItemCell( textSpan, identifiedAnnotation
+               .getPolarity(), semanticCuis );
+         final Collection<ItemCell> coveredBaseItems = getCoveredBaseItems( textSpan, baseItemMap );
+         Collection<ItemCell> coveringAnnotations
+               = coveringAnnotationMap.get( coveredBaseItems.size() );
+         if ( coveringAnnotations == null ) {
+            coveringAnnotations = new HashSet<>();
+            coveringAnnotationMap.put( coveredBaseItems.size(), coveringAnnotations );
+         }
+         coveringAnnotations.add( itemCell );
+      }
+      return coveringAnnotationMap;
+   }
+
+   /**
+    * @param offsets  original document offsets
+    * @param itemRows item rows
+    * @return map of original document offsets to adjusted printable offsets
+    */
+   static private Map<Integer, Integer> createOffsetAdjustedMap( final Collection<Integer> offsets,
+                                                                 final Iterable<ItemRow> itemRows ) {
+      // Create map of all text span offsets to adjusted offsets
+      final List<Integer> offsetList = new ArrayList<>( offsets );
+      Collections.sort( offsetList );
+      final Map<Integer, Integer> offsetAdjustedMap = new HashMap<>( offsetList.size() );
+      for ( Integer offset : offsetList ) {
+         offsetAdjustedMap.put( offset, offset );
+      }
+      for ( ItemRow itemRow : itemRows ) {
+         final Collection<ItemCell> rowItemCells = itemRow.getItemCells();
+         for ( ItemCell itemCell : rowItemCells ) {
+            final TextSpan textSpan = itemCell.getTextSpan();
+            final int needWidth = itemCell.getWidth();
+            final int nowWidth = offsetAdjustedMap.get( textSpan.getEnd() ) -
+                                 offsetAdjustedMap.get( textSpan.getBegin() );
+            if ( needWidth > nowWidth ) {
+               final int delta = needWidth - nowWidth;
+               for ( Integer originalOffset : offsetList ) {
+                  if ( originalOffset >= textSpan.getEnd() ) {
+                     final Integer oldAdjustedOffset = offsetAdjustedMap.get( originalOffset );
+                     offsetAdjustedMap.put( originalOffset, oldAdjustedOffset + delta );
+                  }
+               }
+            }
+         }
+      }
+      return offsetAdjustedMap;
+   }
+
+   /**
+    * @param offsetAdjustedMap map of original document offsets to adjusted printable offsets
+    * @param itemRows          item rows
+    * @param writer            writer to which pretty text for the sentence should be written
+    * @throws IOException if the writer has issues
+    */
+   static private void printItemRows( final Map<Integer, Integer> offsetAdjustedMap,
+                                      final Iterable<ItemRow> itemRows,
+                                      final BufferedWriter writer ) throws IOException {
+      int rowWidth = 0;
+      for ( int adjustedOffset : offsetAdjustedMap.values() ) {
+         rowWidth = Math.max( rowWidth, adjustedOffset );
+      }
+      // Write Sentence Rows
+      boolean firstLine = true;
+      for ( ItemRow itemRow : itemRows ) {
+         final int rowHeight = itemRow.getHeight();
+         for ( int i = 0; i < rowHeight; i++ ) {
+            final String lineText = itemRow.getTextLine( i, rowWidth, offsetAdjustedMap );
+            if ( !lineText.isEmpty() ) {
+               if ( firstLine ) {
+                  writer.write( "TEXT:  " + lineText );
+                  firstLine = false;
+               } else {
+                  writer.write( "       " + lineText );
+
+               }
+               writer.newLine();
+            }
+         }
+      }
+      writer.newLine();
+   }
+
+
+   /**
+    * @param textSpan    text span of interest
+    * @param baseItemMap map of text spans and item cells that represent those spans
+    * @return item cells for covered base items
+    */
+   static private Collection<ItemCell> getCoveredBaseItems( final TextSpan textSpan,
+                                                            final Map<TextSpan, ItemCell> baseItemMap ) {
+      final Collection<ItemCell> coveredBaseItems = new ArrayList<>();
+      for ( Map.Entry<TextSpan, ItemCell> baseItemEntry : baseItemMap.entrySet() ) {
+         if ( baseItemEntry.getKey().overlaps( textSpan ) ) {
+            coveredBaseItems.add( baseItemEntry.getValue() );
+         }
+      }
+      return coveredBaseItems;
+   }
+
+
+   /**
+    * Create annotation rows with shorter spans on top
+    *
+    * @param coveringItemMap map of all item cells for the sentence,
+    *                        key = number of tokens covered, value = item cells
+    * @return list of item rows, each containing non-overlapping item cells
+    */
+   static private Collection<ItemRow> createItemRows( final Map<Integer, Collection<ItemCell>> coveringItemMap ) {
+      final List<Integer> sortedCounts = new ArrayList<>( coveringItemMap.keySet() );
+      Collections.sort( sortedCounts );
+      final Collection<ItemRow> itemRows = new ArrayList<>();
+      for ( Integer coveredCount : sortedCounts ) {
+         final Collection<ItemCell> itemCells = coveringItemMap.get( coveredCount );
+         for ( ItemCell itemCell : itemCells ) {
+            boolean added = false;
+            for ( ItemRow itemRow : itemRows ) {
+               added = itemRow.addItemCell( itemCell );
+               if ( added ) {
+                  break;
+               }
+            }
+            if ( !added ) {
+               final ItemRow itemRow = new DefaultItemRow();
+               itemRow.addItemCell( itemCell );
+               itemRows.add( itemRow );
+            }
+         }
+      }
+      return itemRows;
+   }
+
+   /**
+    * @param baseToken some token
+    * @return a part of speech text representation if the basetoken is a word token, else ""
+    */
+   static private String getTokenPos( final BaseToken baseToken ) {
+      if ( !(baseToken instanceof WordToken) ) {
+         return "";
+      }
+      // We are only interested in tokens that are -words-
+      final String tokenPos = baseToken.getPartOfSpeech();
+      if ( tokenPos == null ) {
+         return "";
+      }
+      return tokenPos;
+   }
+
+
+   /**
+    * @param identifiedAnnotation an annotation of interest
+    * @return map of semantic type names and cuis within those types as they apply to the annotation
+    */
+   static private Map<String, Collection<String>> getSemanticCuis( final IdentifiedAnnotation identifiedAnnotation ) {
+      final Collection<UmlsConcept> umlsConcepts = IdentifiedAnnotationUtil.getUmlsConcepts( identifiedAnnotation );
+      if ( umlsConcepts == null || umlsConcepts.isEmpty() ) {
+         return Collections.emptyMap();
+      }
+      final Map<String, Collection<String>> semanticCuis = new HashMap<>();
+      for ( UmlsConcept umlsConcept : umlsConcepts ) {
+         final String cui = umlsConcept.getCui();
+         final String tui = umlsConcept.getTui();
+         final String semanticName = SemanticGroup.getSemanticName( tui );
+         Collection<String> cuis = semanticCuis.get( semanticName );
+         if ( cuis == null ) {
+            cuis = new HashSet<>();
+            semanticCuis.put( semanticName, cuis );
+         }
+         cuis.add( cui );
+      }
+      return semanticCuis;
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterFit.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,103 @@
+package org.apache.ctakes.core.cc.pretty.plaintext;
+
+//import org.apache.log4j.Logger;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.fit.component.CasConsumer_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+
+/**
+ * Writes Document text, pos, semantic types and cuis.  Each Sentence starts a new series of pretty text lines.
+ * This version can be used in the UimaFit style with {@link org.apache.uima.fit.descriptor.ConfigurationParameter}
+ * It cannot be used in the old Uima CPE style (e.g. the Uima CPE Gui) as the Uima CPE has problems with Fit Consumers.
+ * There is a version that can be used with the CPE GUI:
+ * {@link org.apache.ctakes.core.cc.pretty.plaintext.PrettyTextWriterUima}
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @see org.apache.ctakes.core.cc.pretty.plaintext.PrettyTextWriter
+ * @since 7/8/2015
+ */
+public class PrettyTextWriterFit extends CasConsumer_ImplBase {
+
+   // UimaFit magically sets the value of the first instance variable to the parameter value - desired or otherwise
+   static private final String PARAM_OUTPUTDIR = PrettyTextWriter.PARAM_OUTPUTDIR;
+   @ConfigurationParameter(
+         name = PARAM_OUTPUTDIR,
+         mandatory = false,
+         description = "Directory to which files should be saved",
+         defaultValue = ""
+   )
+   private String fitOutputDirectoryPath;
+
+//   static private final Logger LOGGER = Logger.getLogger( "PrettyTextWriterFit" );
+
+   // delegate
+   final private PrettyTextWriter _prettyTextWriter;
+
+   public PrettyTextWriterFit() {
+      super();
+      _prettyTextWriter = new PrettyTextWriter();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize( final UimaContext uimaContext ) throws ResourceInitializationException {
+      super.initialize( uimaContext );
+      try {
+         if ( fitOutputDirectoryPath != null ) {
+            _prettyTextWriter.setOutputDirectory( fitOutputDirectoryPath );
+         } else {
+            _prettyTextWriter.setOutputDirectory( (String)uimaContext.getConfigParameterValue( PARAM_OUTPUTDIR ) );
+         }
+      } catch ( IllegalArgumentException | SecurityException multE ) {
+         // thrown if the path specifies a File (not Dir) or by file system access methods
+         throw new ResourceInitializationException( multE );
+      }
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void process( final CAS aCAS ) throws AnalysisEngineProcessException {
+      JCas jcas;
+      try {
+         jcas = aCAS.getJCas();
+      } catch ( CASException casE ) {
+         throw new AnalysisEngineProcessException( casE );
+      }
+      _prettyTextWriter.process( jcas );
+   }
+
+   /**
+    * @return This Cas Consumer as an Analysis Engine
+    * @throws org.apache.uima.resource.ResourceInitializationException if anything went wrong
+    */
+   static public AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
+      return createAnnotatorDescription( "" );
+   }
+
+   /**
+    * @param outputDirectoryPath may be empty or null, in which case the current working directory is used
+    * @return This Cas Consumer as an Analysis Engine
+    * @throws org.apache.uima.resource.ResourceInitializationException if anything went wrong
+    */
+   static public AnalysisEngineDescription createAnnotatorDescription( final String outputDirectoryPath )
+         throws ResourceInitializationException {
+      return AnalysisEngineFactory.createEngineDescription( PrettyTextWriterFit.class,
+            PrettyTextWriterFit.PARAM_OUTPUTDIR, outputDirectoryPath );
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterUima.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterUima.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterUima.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/plaintext/PrettyTextWriterUima.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,64 @@
+package org.apache.ctakes.core.cc.pretty.plaintext;
+
+import org.apache.log4j.Logger;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+
+
+/**
+ * Writes Document text, pos, semantic types and cuis.  Each Sentence starts a new series of pretty text lines.
+ * This can be used with the old descriptor .xml files and the UIMA CPE Gui.  For a UimaFit PrettyTextWriter, use
+ * {@link org.apache.ctakes.core.cc.pretty.plaintext.PrettyTextWriterFit}
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @see org.apache.ctakes.core.cc.pretty.plaintext.PrettyTextWriter
+ * @since 6/24/2015
+ */
+final public class PrettyTextWriterUima extends CasConsumer_ImplBase {
+
+   static private final String PARAM_OUTPUTDIR = PrettyTextWriter.PARAM_OUTPUTDIR;
+
+   static private final Logger LOGGER = Logger.getLogger( "PrettyTextWriterUima" );
+
+   // delegate
+   final private PrettyTextWriter _prettyTextWriter;
+
+   public PrettyTextWriterUima() {
+      super();
+      _prettyTextWriter = new PrettyTextWriter();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize() throws ResourceInitializationException {
+      super.initialize();
+      try {
+         _prettyTextWriter.setOutputDirectory( (String)getConfigParameterValue( PARAM_OUTPUTDIR ) );
+      } catch ( IllegalArgumentException | SecurityException multE ) {
+         // thrown if the path specifies a File (not Dir) or by file system access methods
+         throw new ResourceInitializationException( multE );
+      }
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void processCas( final CAS aCAS ) throws AnalysisEngineProcessException {
+      JCas jcas;
+      try {
+         jcas = aCAS.getJCas();
+      } catch ( CASException casE ) {
+         throw new AnalysisEngineProcessException( casE );
+      }
+      _prettyTextWriter.process( jcas );
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/DefaultItemRow.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/DefaultItemRow.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/DefaultItemRow.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/DefaultItemRow.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,99 @@
+package org.apache.ctakes.core.cc.pretty.row;
+
+import org.apache.ctakes.core.cc.pretty.cell.ItemCell;
+import org.apache.ctakes.core.cc.pretty.cell.UmlsItemCell;
+import org.apache.ctakes.core.cc.pretty.textspan.TextSpan;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Map;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 7/6/2015
+ */
+public final class DefaultItemRow implements ItemRow {
+
+   static private final char END_CHAR = '|';
+   static private final char FILL_CHAR = '=';
+
+   final private Collection<ItemCell> _itemCells = new HashSet<>();
+   private int _rowHeight = 0;
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean addItemCell( final ItemCell itemCell ) {
+      final TextSpan textSpan = itemCell.getTextSpan();
+      for ( ItemCell extantItemCell : _itemCells ) {
+         if ( extantItemCell.getTextSpan().overlaps( textSpan ) ) {
+            return false;
+         }
+      }
+      _itemCells.add( itemCell );
+      _rowHeight = Math.max( _rowHeight, itemCell.getHeight() );
+      return true;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getHeight() {
+      return _rowHeight;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<ItemCell> getItemCells() {
+      return _itemCells;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public String getTextLine( final int lineIndex, final int rowWidth, final Map<Integer, Integer> offsetAdjustedMap ) {
+      final char[] chars = new char[ rowWidth ];
+      Arrays.fill( chars, ' ' );
+      final StringBuilder sb = new StringBuilder( rowWidth );
+      sb.append( chars );
+      for ( ItemCell itemCell : _itemCells ) {
+         final int begin = offsetAdjustedMap.get( itemCell.getTextSpan().getBegin() );
+         final int end = offsetAdjustedMap.get( itemCell.getTextSpan().getEnd() );
+         final int width = end - begin;
+         final String annotationText = itemCell.getLineText( lineIndex );
+         if ( annotationText.equals( UmlsItemCell.ENTITY_FILL ) ) {
+            final char[] fill_chars = new char[ width ];
+            Arrays.fill( fill_chars, FILL_CHAR );
+            fill_chars[ 0 ] = END_CHAR;
+            fill_chars[ fill_chars.length - 1 ] = END_CHAR;
+            sb.replace( begin, begin + width, new String( fill_chars ) );
+         } else {
+            final int paddedOffset = getPaddedOffset( annotationText, width );
+            sb.replace( begin + paddedOffset, begin + paddedOffset + annotationText.length(), annotationText );
+         }
+      }
+      return sb.toString();
+   }
+
+
+   /**
+    * @param text  to be printed
+    * @param width of cell
+    * @return required padding in characters to center text in cell
+    */
+   static private int getPaddedOffset( final CharSequence text, final int width ) {
+      final int textWidth = text.length();
+      if ( textWidth == width ) {
+         return 0;
+      }
+      return (width - textWidth) / 2;
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/ItemRow.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/ItemRow.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/ItemRow.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/row/ItemRow.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,39 @@
+package org.apache.ctakes.core.cc.pretty.row;
+
+import org.apache.ctakes.core.cc.pretty.cell.ItemCell;
+
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * A row of item cells that represent part of a sentence and its annotations
+ */
+public interface ItemRow {
+
+   /**
+    * Attempt to add an item cell to this row
+    *
+    * @param itemCell -
+    * @return true if the item cell was added to this row
+    */
+   boolean addItemCell( ItemCell itemCell );
+
+   /**
+    * @return height in lines required to accommodate this item cell
+    */
+   int getHeight();
+
+   /**
+    * @return item cells in this row
+    */
+   Collection<ItemCell> getItemCells();
+
+   /**
+    * @param lineIndex         index of the line required to write this item cell
+    * @param rowWidth          width in characters of the row
+    * @param offsetAdjustedMap map of original document offsets to adjusted printable offsets
+    * @return text to be written on the given line to represent this item row
+    */
+   String getTextLine( int lineIndex, int rowWidth, Map<Integer, Integer> offsetAdjustedMap );
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/DefaultTextSpan.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/DefaultTextSpan.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/DefaultTextSpan.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/DefaultTextSpan.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,82 @@
+package org.apache.ctakes.core.cc.pretty.textspan;
+
+import org.apache.uima.cas.text.AnnotationFS;
+
+/**
+ * Holder for begin and end text span offsets within a containing sentence
+ */
+public final class DefaultTextSpan implements TextSpan {
+
+   final private int _begin;
+   final private int _end;
+
+   /**
+    * @param annotation     -
+    * @param sentenceOffset begin span offset of the containing sentence
+    */
+   public DefaultTextSpan( final AnnotationFS annotation, final int sentenceOffset ) {
+      this( annotation.getBegin() - sentenceOffset, annotation.getEnd() - sentenceOffset );
+   }
+
+   /**
+    * @param begin begin offset within the containing sentence
+    * @param end   end offset within the containing sentence
+    */
+   public DefaultTextSpan( final int begin, final int end ) {
+      _begin = begin;
+      _end = end;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getBegin() {
+      return _begin;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getEnd() {
+      return _end;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getWidth() {
+      return _end - _begin;
+   }
+
+   /**
+    * NOTE: TextSpans are begin inclusive end exclusive.
+    * So, 1 is subtracted from the end when comparing to another begin
+    *
+    * @param textSpan another textspan
+    * @return true if there is overlap between the two text spans
+    */
+   @Override
+   public boolean overlaps( final TextSpan textSpan ) {
+      return !(textSpan.getEnd() - 1 < _begin) && !(textSpan.getBegin() > _end - 1);
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean equals( final Object other ) {
+      return other instanceof TextSpan && ((TextSpan)other).getBegin() == _begin && ((TextSpan)other).getEnd() == _end;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int hashCode() {
+      return 1000 * _end + _begin;
+   }
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/TextSpan.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/TextSpan.java?rev=1689929&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/TextSpan.java (added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/pretty/textspan/TextSpan.java Wed Jul  8 18:16:46 2015
@@ -0,0 +1,31 @@
+package org.apache.ctakes.core.cc.pretty.textspan;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 7/6/2015
+ */
+public interface TextSpan {
+
+   /**
+    * @return begin offset
+    */
+   int getBegin();
+
+   /**
+    * @return end offset
+    */
+   int getEnd();
+
+   /**
+    * @return width of the text span
+    */
+   int getWidth();
+
+   /**
+    * @param textSpan another text span
+    * @return true if the text spans overlap
+    */
+   boolean overlaps( TextSpan textSpan );
+
+}



Mime
View raw message