ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1485228 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes: assertion/eval/ assertion/medfacts/ assertion/medfacts/cleartk/ assertion/medfacts/cleartk/extractors/ relationextractor/cr/
Date Wed, 22 May 2013 14:34:14 GMT
Author: tmill
Date: Wed May 22 14:34:13 2013
New Revision: 1485228

URL: http://svn.apache.org/r1485228
Log:
addresses ctakes-154: Adds tree fragment features core classes. Some cleanup of eval code
(removing comments, removing unused features).

Added:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/TreeFragmentFeatureExtractor.java
  (with props)
Removed:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/relationextractor/cr/Mapper.java
Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngine.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1485228&r1=1485227&r2=1485228&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
(original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
Wed May 22 14:34:13 2013
@@ -31,6 +31,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 
+import org.apache.ctakes.assertion.medfacts.cleartk.AlternateCuePhraseAnnotator;
 import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.AssertionComponents;
 import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
@@ -74,9 +75,11 @@ import org.apache.uima.resource.metadata
 import org.apache.uima.util.CasCopier;
 import org.cleartk.classifier.CleartkAnnotator;
 import org.cleartk.classifier.DataWriterFactory;
+import org.cleartk.classifier.jar.DefaultDataWriterFactory;
 import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
 import org.cleartk.classifier.jar.GenericJarClassifierFactory;
 import org.cleartk.classifier.jar.JarClassifierBuilder;
+import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
 import org.cleartk.classifier.opennlp.DefaultMaxentDataWriterFactory;
 import org.cleartk.eval.AnnotationStatistics;
 import org.cleartk.eval.Evaluation_ImplBase;
@@ -100,6 +103,9 @@ import com.google.common.base.Objects;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
+//import org.chboston.cnlp.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+//import org.chboston.cnlp.ctakes.relationextractor.eval.RelationExtractorEvaluation;
+//import org.chboston.cnlp.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
 
 public class AssertionEvaluation extends Evaluation_ImplBase<File, Map<String, AnnotationStatistics>>
{
   
@@ -251,6 +257,8 @@ protected static Options options = new O
 
     // determine the type of classifier to be trained
     Class<? extends DataWriterFactory<String>> dataWriterFactoryClass = DefaultMaxentDataWriterFactory.class;
+//    Class<? extends DataWriterFactory<String>> dataWriterFactoryClass = DefaultMultiClassLIBSVMDataWriterFactory.class;
+    
     // TODO Class<? extends DataWriterFactory<String>> dataWriterFactoryClass
= DefaultDataWriterFactory.class;
     //
     // A DataWriterFactory that creates a data writer from the class given by
@@ -277,6 +285,8 @@ protected static Options options = new O
         annotationTypes,
         annotatorClass,
         dataWriterFactoryClass,
+//        "-c",
+//        "1"
         "100",
         "2"
         );
@@ -456,7 +466,7 @@ public static void printScore(Map<String
             ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
             generalSectionRegexFileUri
             );
-    builder.add(zonerAnnotator);
+//    builder.add(zonerAnnotator);
 
     String mayoSectionRegexFileUri =
         "org/mitre/medfacts/uima/mayo_sections.xml";
@@ -465,18 +475,19 @@ public static void printScore(Map<String
             ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
             mayoSectionRegexFileUri
             );
-    builder.add(mayoZonerAnnotator);
+//    builder.add(mayoZonerAnnotator);
   
-    URL assertionCuePhraseLookupAnnotatorDescriptorUrl1 = this.getClass().getClassLoader().getResource("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator.xml");
-    logger.info(String.format("assertionCuePhraseLookupAnnotatorDescriptorUrl1 (slashes):
%s", assertionCuePhraseLookupAnnotatorDescriptorUrl1));
-    URL assertionCuePhraseLookupAnnotatorDescriptorUrl2 = this.getClass().getClassLoader().getResource("org.apache.ctakes.dictionary.lookup.AssertionCuePhraseDictionaryLookupAnnotator.xml");
-    logger.info(String.format("assertionCuePhraseLookupAnnotatorDescriptorUrl2 (periods):
%s", assertionCuePhraseLookupAnnotatorDescriptorUrl2));
-
+//    URL assertionCuePhraseLookupAnnotatorDescriptorUrl1 = this.getClass().getClassLoader().getResource("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator.xml");
+//    logger.info(String.format("assertionCuePhraseLookupAnnotatorDescriptorUrl1 (slashes):
%s", assertionCuePhraseLookupAnnotatorDescriptorUrl1));
+//    URL assertionCuePhraseLookupAnnotatorDescriptorUrl2 = this.getClass().getClassLoader().getResource("org.apache.ctakes.dictionary.lookup.AssertionCuePhraseDictionaryLookupAnnotator.xml");
+//    logger.info(String.format("assertionCuePhraseLookupAnnotatorDescriptorUrl2 (periods):
%s", assertionCuePhraseLookupAnnotatorDescriptorUrl2));
+//
+//    
+//    AnalysisEngineDescription cuePhraseLookupAnnotator =
+//        AnalysisEngineFactory.createAnalysisEngineDescription("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator");
+//    builder.add(cuePhraseLookupAnnotator);
+    builder.add(AnalysisEngineFactory.createPrimitiveDescription(AlternateCuePhraseAnnotator.class,
new Object[]{}));
     
-    AnalysisEngineDescription cuePhraseLookupAnnotator =
-        AnalysisEngineFactory.createAnalysisEngineDescription("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator");
-    builder.add(cuePhraseLookupAnnotator);
-
     if (!options.ignorePolarity)
     {
 	    AnalysisEngineDescription polarityAnnotator = AnalysisEngineFactory.createPrimitiveDescription(PolarityCleartkAnalysisEngine.class);
//,  this.additionalParamemters);
@@ -1050,9 +1061,10 @@ private void addExternalAttributeAnnotat
 private void addCleartkAttributeAnnotatorsToAggregate(File directory,
 		AggregateBuilder builder) throws UIMAException, IOException,
 		ResourceInitializationException {
-	AnalysisEngineDescription cuePhraseLookupAnnotator =
-		AnalysisEngineFactory.createAnalysisEngineDescription("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator");
-	builder.add(cuePhraseLookupAnnotator);
+//	AnalysisEngineDescription cuePhraseLookupAnnotator =
+//		AnalysisEngineFactory.createAnalysisEngineDescription("org/apache/ctakes/dictionary/lookup/AssertionCuePhraseDictionaryLookupAnnotator");
+//	builder.add(cuePhraseLookupAnnotator);
+    builder.add(AnalysisEngineFactory.createPrimitiveDescription(AlternateCuePhraseAnnotator.class,
new Object[]{}));
 
 	String generalSectionRegexFileUri =
 		"org/mitre/medfacts/zoner/section_regex.xml";
@@ -1061,7 +1073,7 @@ private void addCleartkAttributeAnnotato
 				ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
 				generalSectionRegexFileUri
 		);
-	builder.add(zonerAnnotator);
+//	builder.add(zonerAnnotator);
 
 	String mayoSectionRegexFileUri =
 		"org/mitre/medfacts/uima/mayo_sections.xml";
@@ -1070,7 +1082,7 @@ private void addCleartkAttributeAnnotato
 				ZoneAnnotator.PARAM_SECTION_REGEX_FILE_URI,
 				mayoSectionRegexFileUri
 		);
-	builder.add(mayoZonerAnnotator);
+//	builder.add(mayoZonerAnnotator);
 
 	// RUN THE CLEARTK CLASSIFIERS
 	if (!options.ignorePolarity)

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngine.java?rev=1485228&r1=1485227&r2=1485228&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngine.java
(original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/AssertionAnalysisEngine.java
Wed May 22 14:34:13 2013
@@ -142,7 +142,6 @@ public class AssertionAnalysisEngine ext
   public void process(JCas jcas) throws AnalysisEngineProcessException
   {
     logger.debug("(logging statement) AssertionAnalysisEngine.process() BEGIN");
-    System.out.println("(stdout) AssertionAnalysisEngine.process() BEGIN");
     String contents = jcas.getDocumentText();
 
     // String tokenizedContents = tokenizeCasDocumentText(jcas);
@@ -280,8 +279,7 @@ public class AssertionAnalysisEngine ext
       //logger.info("    assertionTypeMap loop INSIDE END");
     }
     //logger.info("assertionTypeMap loop OUTSIDE AFTER!!");
-    System.out.println("(stdout) AssertionAnalysisEngine.process() END");
-    logger.info("(logging statement) AssertionAnalysisEngine.process() END");
+    logger.debug("(logging statement) AssertionAnalysisEngine.process() END");
   }
 
   public static void mapI2B2AssertionValueToCtakes(String assertionType,

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1485228&r1=1485227&r2=1485228&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
(original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
Wed May 22 14:34:13 2013
@@ -19,7 +19,6 @@
 package org.apache.ctakes.assertion.medfacts.cleartk;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@@ -45,19 +44,12 @@ import org.cleartk.classifier.CleartkAnn
 import org.cleartk.classifier.Feature;
 import org.cleartk.classifier.Instance;
 import org.cleartk.classifier.feature.extractor.CleartkExtractor;
-import org.cleartk.classifier.feature.extractor.ContextExtractor;
-import org.cleartk.classifier.feature.extractor.ContextExtractor.Following;
-import org.cleartk.classifier.feature.extractor.ContextExtractor.Preceding;
 import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
 import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
 import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
-import org.cleartk.classifier.feature.extractor.simple.SpannedTextExtractor;
 import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
-import org.cleartk.classifier.feature.proliferate.CapitalTypeProliferator;
-import org.cleartk.classifier.feature.proliferate.CharacterNGramProliferator;
-import org.cleartk.classifier.feature.proliferate.LowerCaseProliferator;
-import org.cleartk.classifier.feature.proliferate.NumericTypeProliferator;
-import org.cleartk.classifier.feature.proliferate.ProliferatingExtractor;
+import org.cleartk.classifier.feature.function.FeatureFunctionExtractor;
+import org.cleartk.classifier.feature.function.LowerCaseFeatureFunction;
 import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.ConfigurationParameterFactory;
@@ -117,14 +109,17 @@ public abstract class AssertionCleartkAn
 	
 	
 //private SimpleFeatureExtractor tokenFeatureExtractor;
-  protected List<ContextExtractor<IdentifiedAnnotation>> contextFeatureExtractors;
-  protected List<ContextExtractor<BaseToken>> tokenContextFeatureExtractors;
+//  protected List<ContextExtractor<IdentifiedAnnotation>> contextFeatureExtractors;
+//  protected List<ContextExtractor<BaseToken>> tokenContextFeatureExtractors;
+  protected List<CleartkExtractor> contextFeatureExtractors;
+  protected List<CleartkExtractor> tokenContextFeatureExtractors;
   protected List<CleartkExtractor> tokenCleartkExtractors;
   protected List<SimpleFeatureExtractor> entityFeatureExtractors;
 
   protected CleartkExtractor cuePhraseInWindowExtractor;
   
-  @SuppressWarnings("deprecation")
+  @Override
+@SuppressWarnings("deprecation")
   public void initialize(UimaContext context) throws ResourceInitializationException {
     super.initialize(context);
     
@@ -133,7 +128,7 @@ public abstract class AssertionCleartkAn
     }
     
     // alias for NGram feature parameters
-    int fromRight = CharacterNGramProliferator.RIGHT_TO_LEFT;
+//    int fromRight = CharacterNGramProliferator.RIGHT_TO_LEFT;
 
     // a list of feature extractors that require only the token:
     // the stem of the word, the text of the word itself, plus
@@ -141,13 +136,14 @@ public abstract class AssertionCleartkAn
     this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
     
     // a list of feature extractors that require the token and the sentence
-    this.contextFeatureExtractors = new ArrayList<ContextExtractor<IdentifiedAnnotation>>();
+//    this.contextFeatureExtractors = new ArrayList<CleartkExtractor>();
     
     this.tokenCleartkExtractors = new ArrayList<CleartkExtractor>();
 
     CleartkExtractor tokenExtraction1 = 
     		new CleartkExtractor(
     				BaseToken.class, 
+//    				new FeatureFunctionExtractor(new CoveredTextExtractor(), new LowerCaseFeatureFunction()),
     				new CoveredTextExtractor(),
     				//new CleartkExtractor.Covered(),
     				new CleartkExtractor.LastCovered(2),
@@ -161,24 +157,23 @@ public abstract class AssertionCleartkAn
             new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
     				);
     
-    CleartkExtractor posExtraction1 = 
-    		new CleartkExtractor(
-    				BaseToken.class,
-    				new TypePathExtractor(BaseToken.class, "partOfSpeech"),
-    				new CleartkExtractor.LastCovered(2),
-    				new CleartkExtractor.Preceding(3),
-    				new CleartkExtractor.Following(2)
-    				);
+//    CleartkExtractor posExtraction1 = 
+//    		new CleartkExtractor(
+//    				BaseToken.class,
+//    				new TypePathExtractor(BaseToken.class, "partOfSpeech"),
+//    				new CleartkExtractor.LastCovered(2),
+//    				new CleartkExtractor.Preceding(3),
+//    				new CleartkExtractor.Following(2)
+//    				);
 
     this.tokenCleartkExtractors.add(tokenExtraction1);
     //this.tokenCleartkExtractors.add(posExtraction1);
     
-    this.contextFeatureExtractors.add(new ContextExtractor<IdentifiedAnnotation>(
-        IdentifiedAnnotation.class,
-        new CoveredTextExtractor(),
-        //new TypePathExtractor(IdentifiedAnnotation.class, "stem"),
-        new Preceding(2),
-        new Following(2)));
+//    this.contextFeatureExtractors.add(new CleartkExtractor(IdentifiedAnnotation.class,
+//        new CoveredTextExtractor(),
+//        //new TypePathExtractor(IdentifiedAnnotation.class, "stem"),
+//        new Preceding(2),
+//        new Following(2)));
     
     // stab at dependency-based features
     //List<Feature> features = new ArrayList<Feature>();
@@ -275,26 +270,6 @@ public abstract class AssertionCleartkAn
 
       // extract all features that require the token and sentence annotations
 
-      /*** Commented by SWU 01/24/13 -- doesn't seem to be used
-      Collection<Sentence> sentenceList = coveringSentenceMap.get(entityMention);
-      Sentence sentence = null;
-      if (sentenceList == null || sentenceList.isEmpty())
-      {
-        String message = "no surrounding sentence found";
-        Exception runtimeException = new RuntimeException(message);
-        AnalysisEngineProcessException aeException = new AnalysisEngineProcessException(runtimeException);
-        logger.log(Level.ERROR, message);
-      } else if (sentenceList.size() > 1)
-      {
-        String message = "more than one surrounding sentence found";
-        Exception runtimeException = new RuntimeException(message);
-        AnalysisEngineProcessException aeException = new AnalysisEngineProcessException(runtimeException);
-        logger.log(Level.ERROR, message);
-      } else
-      {
-        sentence = sentenceList.iterator().next();
-      }
-      */
       //Sentence sentence = sentenceList.iterator().next();
       
       /*
@@ -320,8 +295,8 @@ public abstract class AssertionCleartkAn
     	  instance.addAll(extractor.extract(identifiedAnnotationView, entityOrEventMention));
         }
       
-      List<Feature> cuePhraseFeatures =
-          cuePhraseInWindowExtractor.extract(jCas, entityOrEventMention);
+      List<Feature> cuePhraseFeatures = null;
+//          cuePhraseInWindowExtractor.extract(jCas, entityOrEventMention);
           //cuePhraseInWindowExtractor.extractWithin(jCas, entityMention, firstCoveringSentence);
       
       if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
@@ -340,9 +315,20 @@ public abstract class AssertionCleartkAn
       List<Feature> zoneFeatures = extractZoneFeatures(coveringZoneMap, entityOrEventMention);
       if (zoneFeatures != null && !zoneFeatures.isEmpty())
       {
-        instance.addAll(zoneFeatures);
+//        instance.addAll(zoneFeatures);
+      }
+      
+      List<Feature> feats = instance.getFeatures();
+//      List<Feature> lcFeats = new ArrayList<Feature>();
+      
+      for(Feature feat : feats){
+    	  if(feat.getName() != null && (feat.getName().startsWith("TreeFrag") || feat.getName().startsWith("WORD")
|| feat.getName().startsWith("NEG"))) continue;
+    	  if(feat.getValue() instanceof String){
+    		  feat.setValue(((String)feat.getValue()).toLowerCase());
+//    		  lcFeats.add(new Feature("LC_" + feat.getName(), ((String)feat.getValue()).toLowerCase()));
+    	  }
       }
-       
+//      instance.addAll(lcFeats);
 
       setClassLabel(entityOrEventMention, instance);
       
@@ -356,7 +342,6 @@ public abstract class AssertionCleartkAn
     
     if (zoneList == null || zoneList.isEmpty())
     {
-      //return null;
       //logger.info("AssertionCleartkAnalysisEngine.extractZoneFeatures() early END (no zones)");
       return new ArrayList<Feature>();
     } else

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java?rev=1485228&r1=1485227&r2=1485228&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
(original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
Wed May 22 14:34:13 2013
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 
 import org.apache.ctakes.assertion.medfacts.cleartk.extractors.ContextWordWindowExtractor;
 import org.apache.ctakes.assertion.medfacts.cleartk.extractors.NegationDependencyFeatureExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.TreeFragmentFeatureExtractor;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -35,15 +36,17 @@ public class PolarityCleartkAnalysisEngi
 	@Override
 	public void initialize(UimaContext context) throws ResourceInitializationException {
 		super.initialize(context);
-		probabilityOfKeepingADefaultExample = 0.1;
+		probabilityOfKeepingADefaultExample = 1.0; //0.1;
 		
 		if(this.entityFeatureExtractors == null){
 			this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
 		}
 		this.entityFeatureExtractors.add(new NegationDependencyFeatureExtractor());
 		this.entityFeatureExtractors.add(new ContextWordWindowExtractor("org/apache/ctakes/assertion/models/polarity.txt"));
+		this.entityFeatureExtractors.add(new TreeFragmentFeatureExtractor("org/apache/ctakes/assertion/models/sharpPolarityFrags.txt"));
 	}
 
+	@Override
 	public void setClassLabel(IdentifiedAnnotation entityOrEventMention, Instance<String>
instance) throws AnalysisEngineProcessException {
 	      if (this.isTraining())
 	      {

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/TreeFragmentFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/TreeFragmentFeatureExtractor.java?rev=1485228&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/TreeFragmentFeatureExtractor.java
(added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/TreeFragmentFeatureExtractor.java
Wed May 22 14:34:13 2013
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import static org.apache.ctakes.assertion.util.AssertionTreeUtils.extractAboveLeftConceptTree;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Scanner;
+
+import org.apache.ctakes.assertion.util.SemanticClasses;
+import org.apache.ctakes.constituency.parser.util.TreeUtils;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.utils.tree.FragmentUtils;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.cleartk.util.CleartkInitializationException;
+
+/* 
+ * This class implements a ClearTK feature extractor for tree kernel fragment features
+ * as derived using the flink toolkit (http://danielepighin.net/cms/software/flink).
+ * Model location is hardcoded as of right now.
+ * TODO: Parameterize & unstaticize this so that, e.g., multiple projects could use this
feature if necessary.
+ */
+public class TreeFragmentFeatureExtractor implements SimpleFeatureExtractor {
+	public static final String PARAM_OUTPUTDIR = "outputDir";
+	public static final String PARAM_SEMDIR = "semDir";
+	static HashSet<SimpleTree> frags = null;
+	private SemanticClasses sems = null;
+
+	public TreeFragmentFeatureExtractor(String resourceFilename) throws CleartkInitializationException{
+		if(frags == null) initializeFrags(resourceFilename);
+		try{
+			sems = new SemanticClasses(FileLocator.locateFile("org/apache/ctakes/assertion/semantic_classes").getAbsolutePath());
+		}catch(Exception e){
+			throw new CleartkInitializationException(e, "org/apache/ctakes/assertion/semantic_classes",
"Could not find semantic classes resource.", new Object[]{});
+		}
+	}
+
+	private static void initializeFrags(String resourceFilename){
+		frags = new HashSet<SimpleTree>();
+		try{
+			File fragsFile = FileLocator.locateFile(resourceFilename);
+			Scanner scanner = new Scanner(fragsFile);
+			while(scanner.hasNextLine()){
+				frags.add(FragmentUtils.frag2tree(scanner.nextLine().trim()));
+			}
+		}catch(FileNotFoundException e){
+			System.err.println("Missing fragment file!");
+		}
+	}
+
+	@Override
+	public List<Feature> extract(JCas jcas, Annotation annotation)
+			throws CleartkExtractorException {
+		List<Feature> features = new ArrayList<Feature>();
+		SimpleTree tree = extractAboveLeftConceptTree(jcas, annotation, sems);
+		
+		for(SimpleTree frag : frags){
+			if(TreeUtils.containsIgnoreCase(tree, frag)){
+				features.add(new Feature("TreeFrag", frag.toString()));
+			}
+		}
+	
+		return features;
+	}
+
+}

Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/TreeFragmentFeatureExtractor.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/TreeFragmentFeatureExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message