ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1650442 - in /ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core: cc/AbstractJdbcWriter.java cc/JdbcWriterTemplate.java util/IdentifiedAnnotationUtil.java util/SourceMetadataUtil.java
Date Fri, 09 Jan 2015 05:33:11 GMT
Author: seanfinan
Date: Fri Jan  9 05:33:10 2015
New Revision: 1650442

URL: http://svn.apache.org/r1650442
Log:
Simple Template code for jdbc writer cas consumer
Abstract jdbc writer cas consumer
Utility class with convenience methods for pulling code information from IdentifiedAnnotations
Utility class with convenience methods for pulling source information from the cas

Added:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/AbstractJdbcWriter.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/JdbcWriterTemplate.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/IdentifiedAnnotationUtil.java
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/SourceMetadataUtil.java

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/AbstractJdbcWriter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/AbstractJdbcWriter.java?rev=1650442&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/AbstractJdbcWriter.java
(added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/AbstractJdbcWriter.java
Fri Jan  9 05:33:10 2015
@@ -0,0 +1,280 @@
+package org.apache.ctakes.core.cc;
+
+import org.apache.ctakes.core.resource.JdbcConnectionResource;
+import org.apache.ctakes.core.util.SourceMetadataUtil;
+import org.apache.ctakes.typesystem.type.structured.SourceData;
+import org.apache.log4j.Logger;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceAccessException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.ProcessTrace;
+
+import java.io.IOException;
+import java.sql.*;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Write cas to a database using jdbc
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/8/2015
+ */
+abstract public class AbstractJdbcWriter extends CasConsumer_ImplBase {
+
+   static private final Logger LOGGER = Logger.getLogger( "AbstractJdbcWriter" );
+
+   // Parameter names for the desc file
+   static public final String PARAM_DB_CONN_RESRC = "DbConnResrcName";
+
+   // Maximum row count for prepared statement batches
+   static private final int MAX_BATCH_SIZE = 100;
+
+
+   protected interface TableInfo {
+      String getTableName();
+
+      FieldInfo[] getFieldInfos();
+   }
+
+   protected interface FieldInfo {
+      String getFieldName();
+
+      int getFieldIndex();
+
+      Class<?> getValueClass();
+   }
+
+   static protected class TableSqlInfo {
+      final private PreparedStatement __preparedStatement;
+      private int __batchCount;
+
+      protected TableSqlInfo( final Connection connection, final TableInfo tableInfo ) throws
SQLException {
+         final String sql = createRowInsertSql( tableInfo.getTableName(), tableInfo.getFieldInfos()
);
+         __preparedStatement = connection.prepareStatement( sql );
+      }
+
+      protected PreparedStatement getPreparedStatement() {
+         return __preparedStatement;
+      }
+
+      protected void setBatchCount( final int batchCount ) {
+         __batchCount = batchCount;
+      }
+
+      protected int getBatchCount() {
+         return __batchCount;
+      }
+   }
+
+
+   final protected Map<String, TableSqlInfo> _tableSqlInfoMap = new HashMap<>();
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize() throws ResourceInitializationException {
+      final String resourceName = (String)getConfigParameterValue( PARAM_DB_CONN_RESRC );
+      JdbcConnectionResource resource;
+      try {
+         resource = (JdbcConnectionResource)getUimaContext().getResourceObject( resourceName
);
+      } catch ( ResourceAccessException raE ) {
+         // thrown by UimaContext.getResourceObject(..)
+         throw new ResourceInitializationException( raE );
+      }
+      final Connection connection = resource.getConnection();
+      final Collection<TableInfo> tableInfos = getTableInfos();
+      try {
+         for ( TableInfo tableInfo : tableInfos ) {
+            _tableSqlInfoMap.put( tableInfo.getTableName(), new TableSqlInfo( connection,
tableInfo ) );
+         }
+      } catch ( SQLException sqlE ) {
+         // thrown by Connection.prepareStatement(..)
+         throw new ResourceInitializationException( sqlE );
+      }
+   }
+
+   /**
+    * {@inheritDoc}
+    * closes the PreparedStatements
+    */
+   @Override
+   public void collectionProcessComplete( ProcessTrace arg0 )
+         throws ResourceProcessException, IOException {
+      try {
+         for ( TableSqlInfo tableSqlInfo : _tableSqlInfoMap.values() ) {
+            tableSqlInfo.__preparedStatement.close();
+         }
+      } catch ( SQLException sqlE ) {
+         LOGGER.warn( sqlE.getMessage() );
+      }
+      super.collectionProcessComplete( arg0 );
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void processCas( final CAS aCAS ) throws ResourceProcessException {
+      JCas jcas;
+      try {
+         jcas = aCAS.getJCas();
+      } catch ( CASException casE ) {
+         throw new ResourceProcessException( casE );
+      }
+      final SourceData sourceData = SourceMetadataUtil.getSourceData( jcas );
+      if ( sourceData == null ) {
+         LOGGER.error( "Missing source metadata for document." );
+         return;
+      }
+      final long patientNum = SourceMetadataUtil.getPatientNum( jcas );
+      final int encounterNum = SourceMetadataUtil.getEncounterNum( sourceData );
+      final String providerId = SourceMetadataUtil.getProviderId( sourceData );
+      final Timestamp startDate = SourceMetadataUtil.getStartDate( sourceData );
+      try {
+
+         writeJCasInformation( jcas, encounterNum, patientNum, providerId, startDate );
+
+         for ( TableSqlInfo tableSqlInfo : _tableSqlInfoMap.values() ) {
+            if ( tableSqlInfo.getBatchCount() > 0 ) {
+               tableSqlInfo.getPreparedStatement().executeBatch();
+               // Not all drivers automatically clearCollection the batch.  This is considered
by some to be a feature, by most a bug.
+               tableSqlInfo.getPreparedStatement().clearBatch();
+               tableSqlInfo.setBatchCount( 0 );
+            }
+         }
+      } catch ( SQLException sqlE ) {
+         // thrown by PreparedStatement methods
+         throw new ResourceProcessException( sqlE );
+      }
+   }
+
+
+   /**
+    * Called from initialize()
+    *
+    * @return Table Info Objects for all tables of interest
+    */
+   abstract protected Collection<TableInfo> getTableInfos();
+
+   /**
+    * The main "process" method, called from processCas
+    *
+    * @param jcas         -
+    * @param encounterNum -
+    * @param patientNum   -
+    * @param providerId   -
+    * @param startDate    -
+    * @throws SQLException if implementations throw SQLException
+    */
+   abstract protected void writeJCasInformation( final JCas jcas, final int encounterNum,
+                                                 final long patientNum, final String providerId,
+                                                 final Timestamp startDate ) throws SQLException;
+
+
+   /**
+    * @return the map of table name to table sql info objects
+    */
+   protected Map<String, TableSqlInfo> getTableSqlInfoMap() {
+      return _tableSqlInfoMap;
+   }
+
+   /**
+    * This is a safety method to set values of fieldInfoMaps instead of doing a direct .put
in the map.
+    * an IllegalArgumentException will be thrown if the given value is not the same class
type as what the given
+    * FieldInfo wants
+    *
+    * @param fieldInfoMap map in which to set the value
+    * @param fieldInfo    key
+    * @param value        value
+    */
+   static protected void setFieldInfoValue( final Map<FieldInfo, Object> fieldInfoMap,
+                                            final FieldInfo fieldInfo, final Object value
) {
+      final Class<?> valueClass = fieldInfo.getValueClass();
+      if ( !valueClass.isInstance( value ) ) {
+         throw new IllegalArgumentException( "Invalid Value for Field " + fieldInfo.getFieldName()
);
+      }
+      fieldInfoMap.put( fieldInfo, value );
+   }
+
+   /**
+    * Adds a new row of values to a batch in the prepared statement.  If the number of rows
hits a maximum size (100)
+    * then the batch is executed.
+    *
+    * @param preparedStatement -
+    * @param batchSize         the current batch row count in the prepared statement
+    * @param fieldInfoMap      for row value assignment
+    * @return new batchCount (incremented by one or reset to zero)
+    * @throws SQLException if a PreparedStatement call throws one or if there is a type,
value mismatch in fieldInfoMap
+    */
+   static protected int writeTableRow( final PreparedStatement preparedStatement, final int
batchSize,
+                                       final Map<? extends FieldInfo, Object> fieldInfoMap
) throws SQLException {
+      for ( Map.Entry<? extends FieldInfo, Object> fieldInfoEntry : fieldInfoMap.entrySet()
) {
+         final int fieldIndex = fieldInfoEntry.getKey().getFieldIndex();
+         final Class<?> valueClass = fieldInfoEntry.getKey().getValueClass();
+         final Object value = fieldInfoEntry.getValue();
+         if ( valueClass.isAssignableFrom( String.class ) && String.class.isInstance(
value ) ) {
+            preparedStatement.setString( fieldIndex, (String)value );
+         } else if ( valueClass.isAssignableFrom( Integer.class ) && Integer.class.isInstance(
value ) ) {
+            preparedStatement.setInt( fieldIndex, (Integer)value );
+         } else if ( valueClass.isAssignableFrom( Long.class ) && Long.class.isInstance(
value ) ) {
+            preparedStatement.setLong( fieldIndex, (Long)value );
+         } else if ( valueClass.isAssignableFrom( Float.class ) && Float.class.isInstance(
value ) ) {
+            preparedStatement.setFloat( fieldIndex, (Float)value );
+         } else if ( valueClass.isAssignableFrom( Double.class ) && Double.class.isInstance(
value ) ) {
+            preparedStatement.setDouble( fieldIndex, (Double)value );
+         } else if ( valueClass.isAssignableFrom( Boolean.class ) && Boolean.class.isInstance(
value ) ) {
+            preparedStatement.setBoolean( fieldIndex, (Boolean)value );
+         } else if ( valueClass.isAssignableFrom( Timestamp.class ) && Timestamp.class.isInstance(
value ) ) {
+            preparedStatement.setTimestamp( fieldIndex, (Timestamp)value );
+         } else {
+            throw new SQLDataException( "Invalid Value Class for Field " + fieldInfoEntry.getKey().getFieldName()
);
+         }
+      }
+      preparedStatement.addBatch();
+      if ( batchSize + 1 >= MAX_BATCH_SIZE ) {
+         preparedStatement.executeBatch();
+         // Not all drivers automatically clear the batch.  This is considered by some to
be a feature, by most a bug.
+         preparedStatement.clearBatch();
+         return 0;
+      }
+      return batchSize + 1;
+   }
+
+   /**
+    * @param tableName  -
+    * @param fieldInfos -
+    * @return -
+    * @throws SQLDataException
+    */
+   static protected String createRowInsertSql( final String tableName,
+                                               final FieldInfo... fieldInfos ) throws SQLDataException
{
+      if ( fieldInfos.length == 0 ) {
+         throw new SQLDataException( "Must set at least one Field to create an sql insert
Statement" );
+      }
+      final StringBuilder statement = new StringBuilder( "insert into" );
+      final StringBuilder queries = new StringBuilder();
+      statement.append( " " ).append( tableName );
+      statement.append( " (" );
+      for ( FieldInfo fieldInfo : fieldInfos ) {
+         statement.append( fieldInfo.getFieldName() ).append( "," );
+         queries.append( "?," );
+      }
+      // remove the last comma
+      statement.setLength( statement.length() - 1 );
+      queries.setLength( queries.length() - 1 );
+      statement.append( ") values (" ).append( queries ).append( ")" );
+      return statement.toString();
+   }
+
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/JdbcWriterTemplate.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/JdbcWriterTemplate.java?rev=1650442&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/JdbcWriterTemplate.java
(added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/cc/JdbcWriterTemplate.java
Fri Jan  9 05:33:10 2015
@@ -0,0 +1,296 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.core.cc;
+
+import org.apache.ctakes.core.util.IdentifiedAnnotationUtil;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.util.*;
+
+/**
+ * Template Cas Consumer to write a table to a sql database using jdbc
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/8/2015
+ */
+public class JdbcWriterTemplate extends AbstractJdbcWriter {
+
+   static private final Logger LOGGER = Logger.getLogger( "JdbcWriterTemplate" );
+
+   // Parameter names for the desc file
+   static public final String PARAM_VECTOR_TABLE = "VectorTable";
+
+   static private final String SPAN_START_LABEL = "START";
+   static private final String SPAN_END_LABEL = "END";
+
+   public enum I2b2FieldInfo implements AbstractJdbcWriter.FieldInfo {
+      ENCOUNTER_NUM( 1, "encounter_num", Integer.class ),
+      PATIENT_NUM( 2, "patient_num", Long.class ),
+      CONCEPT_CD( 3, "concept_cd", String.class ),
+      PROVIDER_ID( 4, "provider_id", String.class ),
+      START_DATE( 5, "start_date", Timestamp.class ),
+      MODIFIER_CD( 6, "modifier_cd", String.class ),
+      INSTANCE_NUM( 7, "instance_num", Long.class ),
+      VALTYPE_CD( 8, "valtype_cd", String.class ),
+      TVAL_CHAR( 9, "tval_char", String.class ),
+      //      END_DATE(10,"end_date",Timestamp.class),
+//      LOCATION_CD(11,"location_cd",String.class),
+//      CONFIDENCE_NUM(12,"confidence_num",Integer.class),
+//      OBSERVATION_BLOB(13,"observation_blob",String.class),
+      I2B2_OBERVATION_BLOB( 10, "observation_blob", String.class );
+      final private String __name;
+      final private int __index;
+      final private Class<?> __class;
+
+      I2b2FieldInfo( final int index, final String name, final Class<?> valueClass
) {
+         __name = name;
+         __index = index;
+         __class = valueClass;
+      }
+
+      @Override
+      public String getFieldName() {
+         return __name;
+      }
+
+      @Override
+      public int getFieldIndex() {
+         return __index;
+      }
+
+      @Override
+      public Class<?> getValueClass() {
+         return __class;
+      }
+   }
+
+
+   private String _tableName;
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void initialize() throws ResourceInitializationException {
+      _tableName = (String)getConfigParameterValue( PARAM_VECTOR_TABLE );
+      super.initialize();
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   protected Collection<TableInfo> getTableInfos() {
+      final TableInfo tableInfo = new TableInfo() {
+         @Override
+         public String getTableName() {
+            return _tableName;
+         }
+
+         @Override
+         public FieldInfo[] getFieldInfos() {
+            return I2b2FieldInfo.values();
+         }
+      };
+      return Collections.singletonList( tableInfo );
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   protected void writeJCasInformation( final JCas jcas, final int encounterNum,
+                                        final long patientNum, final String providerId,
+                                        final Timestamp startDate ) throws SQLException {
+      saveEntities( jcas, encounterNum, patientNum, providerId, startDate );
+   }
+
+
+   private void saveEntities( final JCas jcas, final int encounterNum, final long patientNum,
final String providerId,
+                              final Timestamp startDate ) throws SQLException {
+      final AnnotationIndex<Annotation> identifiedsIndex = jcas.getAnnotationIndex(
IdentifiedAnnotation.type );
+      if ( identifiedsIndex == null || identifiedsIndex.size() == 0 ) {
+         return;
+      }
+      final Map<I2b2FieldInfo, Object> fieldInfoValues = new EnumMap<>( I2b2FieldInfo.class
);
+      fieldInfoValues.put( I2b2FieldInfo.ENCOUNTER_NUM, encounterNum );
+      fieldInfoValues.put( I2b2FieldInfo.PATIENT_NUM, patientNum );
+      fieldInfoValues.put( I2b2FieldInfo.PROVIDER_ID, providerId );
+      fieldInfoValues.put( I2b2FieldInfo.START_DATE, startDate );
+      fieldInfoValues.put( I2b2FieldInfo.MODIFIER_CD, "@" );
+      fieldInfoValues.put( I2b2FieldInfo.VALTYPE_CD, "T" );
+      final Map<I2b2Concept, Collection<IdentifiedAnnotation>> cuiAnnotationListMap
= new HashMap<>();
+      for ( Annotation annotation : identifiedsIndex ) {
+         if ( annotation instanceof IdentifiedAnnotation ) {
+            final Collection<UmlsConcept> umlsConcepts
+                  = IdentifiedAnnotationUtil.getUmlsConcepts( (IdentifiedAnnotation)annotation
);
+            final Collection<I2b2Concept> i2b2Concepts = createI2b2Concepts( umlsConcepts
);
+            for ( I2b2Concept i2b2Concept : i2b2Concepts ) {
+               Collection<IdentifiedAnnotation> annotationList = cuiAnnotationListMap.get(
i2b2Concept );
+               if ( annotationList == null ) {
+                  annotationList = new ArrayList<>();
+                  cuiAnnotationListMap.put( i2b2Concept, annotationList );
+               }
+               annotationList.add( (IdentifiedAnnotation)annotation );
+            }
+         }
+      }
+      for ( Map.Entry<I2b2Concept, Collection<IdentifiedAnnotation>> i2b2ConceptAnnotations
: cuiAnnotationListMap
+            .entrySet() ) {
+         saveI2b2Concept( fieldInfoValues, i2b2ConceptAnnotations.getKey(), i2b2ConceptAnnotations.getValue()
);
+      }
+   }
+
+   private void saveI2b2Concept( final Map<I2b2FieldInfo, Object> fieldInfoValues,
+                                 final I2b2Concept i2b2Concept,
+                                 final Iterable<IdentifiedAnnotation> annotations )
throws SQLException {
+      final String cui = i2b2Concept.getCui();
+      String preferredText = i2b2Concept.getPreferredText();
+      if ( preferredText == null ) {
+         preferredText = "";
+      }
+      // save Affirmed
+      saveAnnotations( fieldInfoValues, cui, preferredText, annotations, true );
+      // save negated
+      saveAnnotations( fieldInfoValues, cui, preferredText, annotations, false );
+   }
+
+   private void saveAnnotations( final Map<I2b2FieldInfo, Object> fieldInfoValues,
+                                 final String cui,
+                                 final String preferredText,
+                                 final Iterable<IdentifiedAnnotation> annotations,
+                                 final boolean saveAffirmed ) throws SQLException {
+      int instanceNum = 1;
+      final String conceptCode = (saveAffirmed ? "" : "-") + cui;
+      fieldInfoValues.put( I2b2FieldInfo.CONCEPT_CD, conceptCode );
+      final String tvalChar = preferredText + (saveAffirmed ? "" : " Negated");
+      fieldInfoValues.put( I2b2FieldInfo.TVAL_CHAR, tvalChar );
+      final PreparedStatement preparedStatement = _tableSqlInfoMap.get( _tableName ).getPreparedStatement();
+      int batchCount = _tableSqlInfoMap.get( _tableName ).getBatchCount();
+      for ( IdentifiedAnnotation annotation : annotations ) {
+         final boolean isNegated = annotation.getPolarity() < 0;
+         if ( saveAffirmed == isNegated ) {
+            continue;
+         }
+         fieldInfoValues.put( I2b2FieldInfo.INSTANCE_NUM, instanceNum );
+         final String observationBlob = createBlob( annotation );
+         fieldInfoValues.put( I2b2FieldInfo.I2B2_OBERVATION_BLOB, observationBlob );
+         batchCount = writeTableRow( preparedStatement, batchCount, fieldInfoValues );
+         instanceNum++;
+      }
+   }
+
+
+   /**
+    * @param annotation -
+    * @return a blob with encoded text span and covered text of the annotation
+    */
+   static private String createBlob( final IdentifiedAnnotation annotation ) {
+      final StringBuilder sb = new StringBuilder();
+      sb.append( '<' ).append( SPAN_START_LABEL ).append( '>' );
+      sb.append( annotation.getBegin() ).append( "</" ).append( SPAN_START_LABEL ).append(
'>' );
+      sb.append( '<' ).append( SPAN_END_LABEL ).append( '>' );
+      sb.append( annotation.getEnd() ).append( "</" ).append( SPAN_END_LABEL ).append(
'>' );
+      sb.append( annotation.getCoveredText() );
+      return sb.toString();
+   }
+
+   /**
+    * I2b2 only cares about Cui & preferred text.
+    * A Cui may belong to multiple Tuis, making multiple UmlsConcept objects (one per tui).
+    * I2b2 does NOT want multiple rows of a single Cui just because it has multiple tuis.
+    *
+    * @param umlsConcepts -
+    * @return -
+    */
+   static private Collection<I2b2Concept> createI2b2Concepts( final Iterable<UmlsConcept>
umlsConcepts ) {
+      final Collection<I2b2Concept> i2b2Concepts = new HashSet<>();
+      for ( UmlsConcept umlsConcept : umlsConcepts ) {
+         // Because the hashcode for an I2b2Concept is created from Cui and PrefText, the
"new" I2b2Concept
+         // may not be unique.  No repeats will be stored
+         i2b2Concepts.add( new I2b2Concept( umlsConcept.getCui(), umlsConcept.getPreferredText()
) );
+      }
+      return i2b2Concepts;
+   }
+
+
+   /**
+    * A more useful representation of umls concept for our purposes - we don't want repeat
cuis for multiple tuis
+    */
+   static private class I2b2Concept {
+
+      static public final String PREFERRED_TEXT_UNKNOWN = "Unknown Preferred Teex";
+
+      final private String _cui;
+      final private String _preferredText;
+
+      final private int _hashcode;
+
+      private I2b2Concept( final String cui ) {
+         this( cui, PREFERRED_TEXT_UNKNOWN );
+      }
+
+      private I2b2Concept( final String cui, final String preferredText ) {
+         _cui = cui;
+         _preferredText = preferredText != null ? preferredText : PREFERRED_TEXT_UNKNOWN;
+         _hashcode = (cui + "_" + preferredText).hashCode();
+      }
+
+      public String getCui() {
+         return _cui;
+      }
+
+      public String getPreferredText() {
+         return _preferredText;
+      }
+
+      /**
+       * {@inheritDoc}
+       */
+      @Override
+      public boolean equals( final Object value ) {
+         return value instanceof I2b2Concept
+                && _cui.equals( ((I2b2Concept)value)._cui )
+                && _preferredText.equals( ((I2b2Concept)value)._preferredText );
+      }
+
+      /**
+       * {@inheritDoc}
+       */
+      @Override
+      public int hashCode() {
+         return _hashcode;
+      }
+   }
+
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/IdentifiedAnnotationUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/IdentifiedAnnotationUtil.java?rev=1650442&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/IdentifiedAnnotationUtil.java
(added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/IdentifiedAnnotationUtil.java
Fri Jan  9 05:33:10 2015
@@ -0,0 +1,120 @@
+package org.apache.ctakes.core.util;
+
+import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.log4j.Logger;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.jcas.cas.FSArray;
+
+import java.util.Collection;
+import java.util.HashSet;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/8/2015
+ */
+final public class IdentifiedAnnotationUtil {
+
+   static private final Logger LOGGER = Logger.getLogger( "IdentifiedAnnotationUtil" );
+
+   static public final String CTAKES_SNOMED_CODING_SCHEME = "SNOMED";
+   static public final String CTAKES_RXNORM_CODING_SCHEME = "RXNORM";
+
+   static private final FeatureStructure[] EMPTY_FEATURE_ARRAY = new FeatureStructure[ 0
];
+
+   private IdentifiedAnnotationUtil() {
+   }
+
+
+   static private FeatureStructure[] getOntologyConcepts( final IdentifiedAnnotation annotation
) {
+      final FSArray ontologyConcepts = annotation.getOntologyConceptArr();
+      if ( ontologyConcepts == null ) {
+         return EMPTY_FEATURE_ARRAY;
+      }
+      return ontologyConcepts.toArray();
+   }
+
+   /**
+    * @param annotation -
+    * @return list of all Umls Concepts associated with the annotation
+    */
+   static public Collection<UmlsConcept> getUmlsConcepts( final IdentifiedAnnotation
annotation ) {
+      final FeatureStructure[] ontologyConcepts = getOntologyConcepts( annotation );
+      final Collection<UmlsConcept> umlsConcepts = new HashSet<>( ontologyConcepts.length
);
+      for ( FeatureStructure ontologyConcept : ontologyConcepts ) {
+         if ( ontologyConcept instanceof UmlsConcept ) {
+            umlsConcepts.add( (UmlsConcept)ontologyConcept );
+         }
+      }
+      return umlsConcepts;
+   }
+
+   /**
+    * @param annotation -
+    * @return list of all Umls cuis associated with the annotation
+    */
+   static public Collection<String> getUmlsCuis( final IdentifiedAnnotation annotation
) {
+      final FeatureStructure[] ontologyConcepts = getOntologyConcepts( annotation );
+      final Collection<String> cuis = new HashSet<>( ontologyConcepts.length
);
+      for ( FeatureStructure ontologyConcept : ontologyConcepts ) {
+         if ( ontologyConcept instanceof UmlsConcept ) {
+            final UmlsConcept umlsConcept = (UmlsConcept)ontologyConcept;
+            final String cui = umlsConcept.getCui();
+            cuis.add( cui );
+         }
+      }
+      return cuis;
+   }
+
+   /**
+    * @param annotation -
+    * @return list of all Snomed codes associated with the annotation
+    */
+   static public Collection<String> getSnomedCodes( final IdentifiedAnnotation annotation
) {
+      final FeatureStructure[] ontologyConcepts = getOntologyConcepts( annotation );
+      final Collection<String> snomeds = new HashSet<>();
+      for ( FeatureStructure featureStructure : ontologyConcepts ) {
+         final OntologyConcept ontologyConcept = (OntologyConcept)featureStructure;
+         if ( ontologyConcept instanceof UmlsConcept ) {
+            continue;
+         }
+         final String code = ontologyConcept.getCode();
+         if ( code == null || code.isEmpty() ) {
+            continue;
+         }
+         final String codingScheme = ontologyConcept.getCodingScheme();
+         if ( codingScheme != null && CTAKES_SNOMED_CODING_SCHEME.equalsIgnoreCase(
codingScheme.trim() ) ) {
+            snomeds.add( code );
+         }
+      }
+      return snomeds;
+   }
+
+   /**
+    * @param annotation -
+    * @return list of all rxNORM codes associated with the annotation
+    */
+   static public Collection<String> getRxNormCodes( final IdentifiedAnnotation annotation
) {
+      final FeatureStructure[] ontologyConcepts = getOntologyConcepts( annotation );
+      final Collection<String> rxNorms = new HashSet<>();
+      for ( FeatureStructure featureStructure : ontologyConcepts ) {
+         final OntologyConcept ontologyConcept = (OntologyConcept)featureStructure;
+         if ( ontologyConcept instanceof UmlsConcept ) {
+            continue;
+         }
+         final String code = ontologyConcept.getCode();
+         if ( code == null || code.isEmpty() ) {
+            continue;
+         }
+         final String codingScheme = ontologyConcept.getCodingScheme();
+         if ( codingScheme != null && CTAKES_RXNORM_CODING_SCHEME.equalsIgnoreCase(
codingScheme.trim() ) ) {
+            rxNorms.add( code );
+         }
+      }
+      return rxNorms;
+   }
+
+
+}

Added: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/SourceMetadataUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/SourceMetadataUtil.java?rev=1650442&view=auto
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/SourceMetadataUtil.java
(added)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/util/SourceMetadataUtil.java
Fri Jan  9 05:33:10 2015
@@ -0,0 +1,121 @@
+package org.apache.ctakes.core.util;
+
+import org.apache.ctakes.typesystem.type.structured.Metadata;
+import org.apache.ctakes.typesystem.type.structured.SourceData;
+import org.apache.log4j.Logger;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.TOP;
+import org.apache.uima.resource.ResourceProcessException;
+
+import java.sql.Timestamp;
+
+/**
+ * Utility class with convenience methods for a few commonly-used JCas Metadata types that
are begged of the source
+ *
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 1/8/2015
+ */
+final public class SourceMetadataUtil {
+
+   static private final Logger LOGGER = Logger.getLogger( "SourceMetadataUtil" );
+
+   private SourceMetadataUtil() {
+   }
+
+
+   /**
+    * @param jcas ye olde jay-cas
+    * @return the patient id for the source or -1 if one is not found
+    */
+   static public long getPatientNum( final JCas jcas ) {
+      final Metadata metadata = getMetadata( jcas );
+      if ( metadata == null ) {
+         return -1;
+      }
+      return metadata.getPatientID();
+   }
+
+   /**
+    * @param jcas ye olde jay-cas
+    * @return the Metadata for the given jcas or null if one is not found
+    */
+   static private Metadata getMetadata( final JCas jcas ) {
+      // TODO I really dislike this index-everything-to-java1 paradigm
+      final FSIterator<TOP> itr = jcas.getJFSIndexRepository().getAllIndexedFS( Metadata.type
);
+      if ( itr == null || !itr.hasNext() ) {
+         return null;
+      }
+      return (Metadata)itr.next();
+   }
+
+   /**
+    * The first step in utilizing SourceData is getting it!
+    *
+    * @param jcas ye olde jay-cas
+    * @return the metadata for the source associated with the jcas or null if one is not
found
+    */
+   static public SourceData getSourceData( final JCas jcas ) {
+      final Metadata metadata = getMetadata( jcas );
+      if ( metadata == null ) {
+         return null;
+      }
+      return metadata.getSourceData();
+   }
+
+   /**
+    * @param sourcedata -
+    * @return the instance id or -1 if there isn't one
+    * @throws ResourceProcessException if the internal value is not parseable as long
+    */
+   static public long getInstanceNum( final SourceData sourcedata ) throws ResourceProcessException
{
+      final String instance = sourcedata.getSourceInstanceId();
+      if ( instance == null || instance.isEmpty() ) {
+         return -1;
+      }
+      long instanceNum;
+      try {
+         instanceNum = Long.parseLong( instance );
+      } catch ( NumberFormatException nfE ) {
+         // thrown by Integer.parseInt
+         throw new ResourceProcessException( nfE );
+      }
+      return instanceNum;
+   }
+
+   /**
+    * @param sourcedata -
+    * @return the encounter id
+    * @throws ResourceProcessException if the encounter id does not exist or is not parseable
as an int
+    */
+   static public int getEncounterNum( final SourceData sourcedata ) throws ResourceProcessException
{
+      final String encounter = sourcedata.getSourceEncounterId();
+      int encounterNum;
+      try {
+         encounterNum = Integer.parseInt( encounter );
+      } catch ( NumberFormatException nfE ) {
+         // thrown by Integer.parseInt
+         throw new ResourceProcessException( nfE );
+      }
+      return encounterNum;
+   }
+
+   /**
+    * @param sourcedata -
+    * @return the author specialty
+    */
+   static public String getProviderId( final SourceData sourcedata ) {
+      return sourcedata.getAuthorSpecialty();
+   }
+
+   /**
+    * @param sourcedata -
+    * @return the original date for the source
+    */
+   static public Timestamp getStartDate( final SourceData sourcedata ) {
+      final String sourceDate = sourcedata.getSourceOriginalDate();
+      return Timestamp.valueOf( sourceDate );
+   }
+
+}



Mime
View raw message