ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1764529 - in /ctakes/trunk: ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/ ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/ ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/ ctakes-examp...
Date Wed, 12 Oct 2016 19:50:39 GMT
Author: seanfinan
Date: Wed Oct 12 19:50:39 2016
New Revision: 1764529

URL: http://svn.apache.org/viewvc?rev=1764529&view=rev
Log:
PipelineReader uses key=value key=value ...  for setting component parameters
PipelineReader has addDescription command to utilize static .createAnnotatorDescription()
Renaming of example pipeline runners

Added:
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/
      - copied from r1764190, ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt
      - copied, changed from r1764190, ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt
Removed:
    ctakes/trunk/ctakes-examples/src/main/java/org/apache/ctakes/examples/pipelines/
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/ExamplePipeline1.txt
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/ExamplePipeline2.txt
    ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/
Modified:
    ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
    ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/regression/test/RegressionPipelineTest.java
    ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java

Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java?rev=1764529&r1=1764528&r2=1764529&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
(original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/pipeline/PipelineReader.java
Wed Oct 12 19:50:39 2016
@@ -7,6 +7,7 @@ import org.apache.ctakes.core.resource.F
 import org.apache.log4j.Logger;
 import org.apache.uima.UIMAException;
 import org.apache.uima.analysis_component.AnalysisComponent;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.resource.ResourceInitializationException;
 
@@ -15,30 +16,32 @@ import java.io.IOException;
 import java.io.InputStreamReader;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.regex.Pattern;
 
 /**
  * Creates a pipeline (PipelineBuilder) from specifications in a flat plaintext file.
- * <p>
+ *
  * <p>There are several basic commands:
  * addPackage <i>user_package_name</i>
  * loadParameters <i>path_to_properties_file_with_ae_parameters</i>
- * addParameters <i>ae_parameter_name</i>|<i>ae_parameter_value</i>|
...
+ * addParameters <i>ae_parameter_name=ae_parameter_value e_parameter_name=ae_parameter_value</i>
...
  * reader <i>collection_reader_class_name</i>
  * readFiles <i>input_directory</i>
- * <i>input_directory</i> can be empty if {@link FilesInDirectoryCollectionReader#PARAM_INPUTDIR}
was specified
- * add <i>ae_or_cc_class_name</i>
- * addLogged <i>ae_or_cc_class_name</i>
+ *    <i>input_directory</i> can be empty if
+ *    {@link FilesInDirectoryCollectionReader#PARAM_INPUTDIR} ("InputDirectory") was specified
+ * add <i>ae_or_cc_class_name ae_parameter_name=ae_parameter_value e_parameter_name<=ae_parameter_value</i>
...
+ * addLogged <i>ae_or_cc_class_name ae_parameter_name=ae_parameter_value e_parameter_name=ae_parameter_value</i>
...
+ * addDescription <i>ae_or_cc_class_name</i>
  * collectCuis
  * collectEntities
  * writeXmis <i>output_directory</i>
- * <i>output_directory</i> can be empty if {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR}
was specified
- * <p>
+ *    <i>output_directory</i> can be empty if
+ *    {@link XmiWriterCasConsumerCtakes#PARAM_OUTPUTDIR} ("OutputDirectory") was specified
  * # and // may be used to mark line comments
  * </p>
- * <p>
  * class names must be fully-specified with package unless they are in standard ctakes cr
ae or cc packages,
  * or in a package specified by an earlier addPackage command.
  *
@@ -74,7 +77,8 @@ final public class PipelineReader {
 
    static private final Object[] EMPTY_OBJECT_ARRAY = new Object[ 0 ];
 
-   static private final Pattern SPLIT_PATTERN = Pattern.compile( "\\|" );
+   static private final Pattern SPACE_PATTERN = Pattern.compile( "\\s+" );
+   static private final Pattern KEY_VALUE_PATTERN = Pattern.compile( "=" );
 
    private PipelineBuilder _builder;
 
@@ -117,7 +121,7 @@ final public class PipelineReader {
                continue;
             }
             final int spaceIndex = line.indexOf( ' ' );
-            if ( spaceIndex < 3 ) {
+            if ( spaceIndex < 0 ) {
                addToPipeline( line, "" );
             } else {
                addToPipeline( line.substring( 0, spaceIndex ), line.substring( spaceIndex
+ 1 ).trim() );
@@ -136,6 +140,7 @@ final public class PipelineReader {
       return _builder;
    }
 
+
    /**
     * @param command   specified by first word in the file line
     * @param parameter specified by second word in the file line
@@ -150,7 +155,7 @@ final public class PipelineReader {
             _builder.loadParameters( parameter );
             break;
          case "addParameters":
-            _builder.addParameters( getStrings( parameter ) );
+            _builder.addParameters( splitParameters( parameter ) );
             break;
          case "reader":
             _builder.reader( createReader( parameter ) );
@@ -163,15 +168,34 @@ final public class PipelineReader {
             }
             break;
          case "add":
-            _builder.add( getComponentClass( parameter ) );
+            if ( hasParameters( parameter ) ) {
+               final String[] component_parameters = splitFromParameters( parameter );
+               final String component = component_parameters[ 0 ];
+               final Object[] parameters = splitParameters( component_parameters[ 1 ] );
+               _builder.add( getComponentClass( component ), parameters );
+            } else {
+               _builder.add( getComponentClass( parameter ) );
+            }
             break;
          case "addLogged":
-            _builder.addLogged( getComponentClass( parameter ) );
+            if ( hasParameters( parameter ) ) {
+               final String[] component_parameters = splitFromParameters( parameter );
+               final String component = component_parameters[ 0 ];
+               final Object[] parameters = splitParameters( component_parameters[ 1 ] );
+               _builder.addLogged( getComponentClass( component ), parameters );
+            } else {
+               _builder.addLogged( getComponentClass( parameter ) );
+            }
+            break;
+         case "addDescription":
+            final AnalysisEngineDescription description = createDescription( parameter );
+            _builder.addDescription( description );
             break;
+
          case "collectCuis":
             _builder.collectCuis();
             break;
-         case "collectEntites":
+         case "collectEntities":
             _builder.collectEntities();
             break;
          case "writeXmis":
@@ -230,11 +254,48 @@ final public class PipelineReader {
          if ( componentClass != null ) {
             return componentClass;
          }
+         componentClass = getPackagedClass(
+               "org.apache.ctakes." + packageName, className, AnalysisComponent.class );
+         if ( componentClass != null ) {
+            return componentClass;
+         }
       }
       return null;
    }
 
    /**
+    * This requires that the component class has a static createAnnotatorDescription method
with no parameters
+    * @param className component class for which a descriptor should be created
+    * @return a description generated for the component
+    * @throws ResourceInitializationException if anything went wrong with finding the class
or the method,
+    * or invoking the method to get an AnalysisEngineDescription
+    */
+   private AnalysisEngineDescription createDescription( final String className )
+         throws ResourceInitializationException {
+      final Class<? extends AnalysisComponent> componentClass = getComponentClass(
className );
+      Method method;
+      try {
+         method = componentClass.getMethod( "createAnnotatorDescription" );
+      } catch ( NoSuchMethodException nsmE ) {
+         LOGGER.error( "No createAnnotatorDescription method in " + className );
+         throw new ResourceInitializationException( nsmE );
+      }
+      try {
+         final Object invocation = method.invoke( null );
+         if ( !AnalysisEngineDescription.class.isInstance( invocation ) ) {
+            LOGGER.error( "createAnnotatorDescription in " + className + " returned an "
+                          + invocation.getClass().getName() + " not an AnalysisEngineDescription"
);
+            throw new ResourceInitializationException();
+         }
+         return (AnalysisEngineDescription)invocation;
+      } catch ( IllegalAccessException | InvocationTargetException multE ) {
+         LOGGER.error( "Could not invoke createAnnotatorDescription on " + className );
+         throw new ResourceInitializationException( multE );
+      }
+   }
+
+
+   /**
     * @param className fully-specified or simple name of a cr Collection Reader class
     * @return instantiated collection reader
     * @throws ResourceInitializationException if the class could not be found or instantiated
@@ -282,6 +343,11 @@ final public class PipelineReader {
          if ( readerClass != null ) {
             return readerClass;
          }
+         readerClass = getPackagedClass(
+               "org.apache.ctakes." + packageName, className, CollectionReader.class );
+         if ( readerClass != null ) {
+            return readerClass;
+         }
       }
       return null;
    }
@@ -328,12 +394,90 @@ final public class PipelineReader {
    }
 
    /**
-    * @param parameter text
-    * @return array created by splitting text at '|' characters
+    *
+    * @param text -
+    * @return true if there is more than one word in the text
     */
-   static private String[] getStrings( final String parameter ) {
-      return SPLIT_PATTERN.split( parameter );
+   static private boolean hasParameters( final String text ) {
+      return SPACE_PATTERN.split( text ).length > 1;
    }
 
+   /**
+    * @param text text with more than one word
+    * @return an array of two strings, [0]= the first word, [1]= the remaining words separated
by spaces
+    */
+   static private String[] splitFromParameters( final String text ) {
+      final String[] allSplits = SPACE_PATTERN.split( text );
+      final String[] returnSplits = new String[ 2 ];
+      returnSplits[ 0 ] = allSplits[ 0 ];
+      String parameters = allSplits[ 1 ];
+      for ( int i = 2; i < allSplits.length; i++ ) {
+         parameters += " " + allSplits[ i ];
+      }
+      returnSplits[ 1 ] = parameters;
+      return returnSplits;
+   }
+
+   /**
+    * @param text -
+    * @return array created by splitting text ' ' and then at '=' characters
+    */
+   static private Object[] splitParameters( final String text ) {
+      if ( text == null || text.trim().isEmpty() ) {
+         return EMPTY_OBJECT_ARRAY;
+      }
+      final String[] pairs = SPACE_PATTERN.split( text.trim() );
+      final Object[] keysAndValues = new Object[ pairs.length * 2 ];
+      int i = 0;
+      for ( String pair : pairs ) {
+         final String[] keyAndValue = KEY_VALUE_PATTERN.split( pair );
+         keysAndValues[ i ] = keyAndValue[ 0 ];
+         if ( keyAndValue.length == 1 ) {
+            keysAndValues[ i + 1 ] = "";
+         } else if ( keyAndValue.length > 2 ) {
+            LOGGER.warn( "Multiple parameter values, using first of " + pair );
+         }
+         keysAndValues[ i + 1 ] = getValueObject( keyAndValue[ 1 ] );
+         i += 2;
+      }
+      return keysAndValues;
+   }
+
+   static private Object getValueObject( final String value ) {
+      final Object returner = attemptParseBoolean( value );
+      if ( !value.equals( returner ) ) {
+         return returner;
+      }
+      return attemptParseInt( value );
+   }
+
+   /**
+    * Since uimafit parameter values can be integers, check for an integer value
+    *
+    * @param value String value parsed from file
+    * @return the value as an Integer, or the original String if an Integer could not be
resolved
+    */
+   static private Object attemptParseInt( final String value ) {
+      try {
+         return Integer.valueOf( value );
+      } catch ( NumberFormatException nfE ) {
+         return value;
+      }
+   }
+
+   /**
+    * Since uimafit parameter values can be boolean, check for a boolean value
+    *
+    * @param value String value parsed from file
+    * @return the value as a Boolean, or the original String if it is not "true" or "false",
case insensitive
+    */
+   static private Object attemptParseBoolean( final String value ) {
+      if ( value.equalsIgnoreCase( "true" ) ) {
+         return Boolean.TRUE;
+      } else if ( value.equalsIgnoreCase( "false" ) ) {
+         return Boolean.FALSE;
+      }
+      return value;
+   }
 
 }

Copied: ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt
(from r1764190, ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt)
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt?p2=ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt&p1=ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt&r1=1764190&r2=1764529&rev=1764529&view=diff
==============================================================================
--- ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipelines/ExamplePipeline1.txt
(original)
+++ ctakes/trunk/ctakes-examples/src/main/resources/org/apache/ctakes/examples/pipeline/HelloWorldPipeline.txt
Wed Oct 12 19:50:39 2016
@@ -1,8 +1,15 @@
+// This file contains commands and parameters to run the ctakes-examples "Hello World" pipeline
+
 // Equivalent of ClinicalPipelineFactory.getTokenProcessingPipeline()
 add SimpleSegmentAnnotator
 add SentenceDetector
 add TokenizerAnnotatorPTB
 add ContextDependentTokenizerAnnotator
+// The POSTagger has a -complex- startup, but it can create its own description to handle
it
+addDescription POSTagger
+
+// add the simple Hello World Annotator
+add org.apache.ctakes.examples.ae.ExampleHelloWorldAnnotator
 
-// The POSTagger has a -complex- startup and should be added manually
-# add POSTagger
+// Collect discovered entities for post-run information
+collectEntities

Modified: ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/regression/test/RegressionPipelineTest.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/regression/test/RegressionPipelineTest.java?rev=1764529&r1=1764528&r2=1764529&view=diff
==============================================================================
--- ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/regression/test/RegressionPipelineTest.java
(original)
+++ ctakes/trunk/ctakes-regression-test/src/test/java/org/apache/ctakes/regression/test/RegressionPipelineTest.java
Wed Oct 12 19:50:39 2016
@@ -18,14 +18,6 @@
  */
 package org.apache.ctakes.regression.test;
 
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-
 import org.apache.log4j.Logger;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.cas.CAS;
@@ -42,12 +34,19 @@ import org.junit.Test;
 import org.w3c.dom.Document;
 import org.xml.sax.SAXException;
 
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
 /**
  * Runs a full pipeline and compares the xml output to ensure all annotators
  * work together in harmony.
  * 
  * This is designed to run all CPE's inside the
- * desc/collection_processing_engine Directory. So any new pipelines added there
+ * desc/collection_processing_engine Directory. So any new pipeline added there
  * will automatically be run and tested as long as they put the generated output
  * to expectedoutput/{nameofcpe}
  * 

Modified: ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml?rev=1764529&r1=1764528&r2=1764529&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml
(original)
+++ ctakes/trunk/ctakes-relation-extractor/desc/analysis_engine/RelationExtractorPreprocessor.xml
Wed Oct 12 19:50:39 2016
@@ -61,6 +61,7 @@
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
       <import location="../../../ctakes-dictionary-lookup/desc/analysis_engine/DictionaryLookupAnnotatorUMLS.xml"/>
+       <!--<import location="../../../ctakes-dictionary-lookup-fast/desc/analysis_engine/UmlsLookupAnnotator.xml"/>-->
     </delegateAnalysisEngine>
     <delegateAnalysisEngine key="LookupWindowAnnotator">
       <import location="../../../ctakes-clinical-pipeline/desc/analysis_engine/LookupWindowAnnotator.xml"/>

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java?rev=1764529&r1=1764528&r2=1764529&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java
(original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/metastasis/MetastasisXmiGenerationPipeline.java
Wed Oct 12 19:50:39 2016
@@ -1,14 +1,6 @@
 package org.apache.ctakes.relationextractor.metastasis;
 
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.MalformedURLException;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.List;
-
+import com.google.common.io.CharStreams;
 import org.apache.ctakes.relationextractor.eval.SHARPXMI.CopyDocumentTextToGoldView;
 import org.apache.ctakes.relationextractor.eval.SHARPXMI.DocumentIDAnnotator;
 import org.apache.uima.UIMAFramework;
@@ -35,12 +27,21 @@ import org.cleartk.util.cr.UriCollection
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import com.google.common.io.CharStreams;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.List;
 
 public class MetastasisXmiGenerationPipeline {
 
-  public static final File ANAFORA_ANNOTATIONS_DIR = new File("DeepPhe/Metastasis/Anafora/All/");
-  public static final String XMI_OUTPUT_DIR = "DeepPhe/Metastasis/Xmi/All/";
+   //  public static final File ANAFORA_ANNOTATIONS_DIR = new File("DeepPhe/Metastasis/Anafora/All/");
+   public static final File ANAFORA_ANNOTATIONS_DIR
+         = new File( "\\\\rc-fs.tch.harvard.edu\\chip-nlp\\Public\\DeepPhe\\Metastasis\\Anafora\\Test"
);
+   public static final String XMI_OUTPUT_DIR = "C:\\Spiffy\\prj_darth_phenome\\output\\temp\\metastatic\\Test";
   public static final String GOLD_VIEW_NAME = "GoldView";
 
   public static void main(String[] args) throws Exception {
@@ -62,8 +63,9 @@ public class MetastasisXmiGenerationPipe
 
     AggregateBuilder builder = new AggregateBuilder();
     builder.add(UriToDocumentTextAnnotator.getDescription());
-    
-    File preprocessDescFile = new File("desc/analysis_engine/RelationExtractorPreprocessor.xml");
+
+     File preprocessDescFile
+           = new File( "C:\\Spiffy\\ctakes_trunk_intellij\\dev\\apache\\ctakes-relation-extractor\\desc\\analysis_engine/RelationExtractorPreprocessor.xml"
);
     XMLParser parser = UIMAFramework.getXMLParser();
     XMLInputSource source = new XMLInputSource(preprocessDescFile);
     builder.add(parser.parseAnalysisEngineDescription(source));



Mime
View raw message