ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject svn commit: r1548577 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion: medfacts/cleartk/ medfacts/cleartk/extractors/ train/
Date Fri, 06 Dec 2013 16:14:25 GMT
Author: swu
Date: Fri Dec  6 16:14:24 2013
New Revision: 1548577

URL: http://svn.apache.org/r1548577
Log:
ctakes-assertion with frustratingly easy domain adaptation and associated tests. assertion-evaluation
will be updated separately

Added:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityFedaCleartkAnalysisEngine.java
  (with props)
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ExtractorListFeatureFunctionConverter.java
  (with props)
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/FedaFeatureFunction.java
  (with props)
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTests.java
  (with props)
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTrain.java
  (with props)
Modified:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1548577&r1=1548576&r2=1548577&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
(original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
Fri Dec  6 16:14:24 2013
@@ -18,14 +18,20 @@
  */
 package org.apache.ctakes.assertion.medfacts.cleartk;
 
+import java.io.File;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
 
+import org.apache.commons.io.FilenameUtils;
 import org.apache.ctakes.assertion.attributes.features.selection.FeatureSelection;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.FedaFeatureFunction;
 import org.apache.ctakes.assertion.zoner.types.Zone;
 import org.apache.ctakes.typesystem.type.constants.CONST;
 import org.apache.ctakes.typesystem.type.structured.DocumentID;
@@ -50,12 +56,21 @@ import org.cleartk.classifier.feature.ex
 import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
 import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
 import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
+import org.cleartk.classifier.feature.function.FeatureFunctionExtractor;
 import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.ConfigurationParameterFactory;
 import org.uimafit.util.JCasUtil;
 //import org.chboston.cnlp.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
 
+
+
+import scala.actors.threadpool.Arrays;
+
+/**
+ * @author swu
+ *
+ */
 public abstract class AssertionCleartkAnalysisEngine extends
     CleartkAnnotator<String>
 {
@@ -65,6 +80,10 @@ public abstract class AssertionCleartkAn
 	
   public static int relationId; // counter for error logging
 
+  // additional parameter for domain adaptation
+  public static final String FILE_TO_DOMAIN_MAP = "mapTrainFileToDomain";
+
+
   @ConfigurationParameter(
       name = PARAM_GOLD_VIEW_NAME,
       mandatory = false,
@@ -108,6 +127,13 @@ public abstract class AssertionCleartkAn
 
   protected static final String FEATURE_SELECTION_NAME = "SelectNeighborFeatures";
 
+  @ConfigurationParameter(
+		  name = FILE_TO_DOMAIN_MAP,
+		  mandatory = false,
+		  description = "a map of filenames to their respective domains (i.e., directories that
contain them)")
+  protected String fileDomainMap;
+  protected Map<String,String> fileToDomain = new HashMap<String,String>();
+  
   protected String lastLabel;
   
   
@@ -137,7 +163,10 @@ public abstract class AssertionCleartkAn
   protected List<CleartkExtractor> tokenCleartkExtractors;
   protected List<SimpleFeatureExtractor> entityFeatureExtractors;
   protected CleartkExtractor cuePhraseInWindowExtractor;
-
+  
+  protected List<FeatureFunctionExtractor> featureFunctionExtractors;
+  protected FedaFeatureFunction ffDomainAdaptor;
+  
   protected FeatureSelection<String> featureSelection;
   
   public abstract void setClassLabel(IdentifiedAnnotation entityMention, Instance<String>
instance) throws AnalysisEngineProcessException;
@@ -151,6 +180,24 @@ public abstract class AssertionCleartkAn
   public void initialize(UimaContext context) throws ResourceInitializationException {
     super.initialize(context);
     
+    // Re-process the "directory" string for domains that were used in the data
+    if (null != fileDomainMap) {
+    	String[] dirs = fileDomainMap.split("[;:]");
+    	for (String dir : dirs) {
+    		
+    		// TODO: normalize dir to real domainId
+    		String domainId = normalizeToDomain(dir);
+    		
+    		File dataDir = new File(dir);
+    		if (dataDir.listFiles()!=null) {
+    			for (File f : dataDir.listFiles()) {
+    				fileToDomain.put( FilenameUtils.removeExtension(f.getName()), domainId );
+    			}
+        		//    	System.out.println(trainFiles.toString());
+    		}
+    	}
+    }
+    
     if (this.isTraining() && this.goldViewName == null) {
       throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during
training");
     }
@@ -229,16 +276,29 @@ public abstract class AssertionCleartkAn
 //          new CleartkExtractor.Bag(new CleartkExtractor.Preceding(10)),
 //          new CleartkExtractor.Bag(new CleartkExtractor.Following(10))
           );
-    
+
+    if (!fileToDomain.isEmpty()) {
+    	// set up FeatureFunction for all the laggard, non-Extractor features
+    	ffDomainAdaptor = new FedaFeatureFunction( new ArrayList<String>(new HashSet<String>(fileToDomain.values()))
);
+    }
   }
 
   @Override
   public void process(JCas jCas) throws AnalysisEngineProcessException
   {
     DocumentID documentId = JCasUtil.selectSingle(jCas, DocumentID.class);
+    String domainId = "";
+    
+    
     if (documentId != null)
     {
       logger.debug("processing next doc: " + documentId.getDocumentID());
+
+      // set the domain to be FeatureFunction'ed into all extractors
+      if (!fileToDomain.isEmpty()) {
+    	  domainId = fileToDomain.get(documentId.getDocumentID());
+    	  ffDomainAdaptor.setDomain(domainId); // if domain is not found, no warning -- just
considers general domain
+      }
     } else
     {
       logger.warn("processing next doc (doc id is null)");
@@ -323,10 +383,14 @@ public abstract class AssertionCleartkAn
           instance.addAll(extractor.extract(identifiedAnnotationView, entityMention));
         }
         */
-      for (CleartkExtractor extractor : this.tokenCleartkExtractors) {
-          //instance.addAll(extractor.extractWithin(identifiedAnnotationView, entityMention,
sentence));
-    	  instance.addAll(extractor.extract(identifiedAnnotationView, entityOrEventMention));
-        }
+      
+      // only use extract this version if not doing domain adaptation 
+      if (ffDomainAdaptor==null) {
+    	  for (CleartkExtractor extractor : this.tokenCleartkExtractors) {
+    		  //instance.addAll(extractor.extractWithin(identifiedAnnotationView, entityMention,
sentence));
+    		  instance.addAll(extractor.extract(identifiedAnnotationView, entityOrEventMention));
+    	  }
+      }
       
 //      List<Feature> cuePhraseFeatures = null;
 //          cuePhraseInWindowExtractor.extract(jCas, entityOrEventMention);
@@ -351,6 +415,14 @@ public abstract class AssertionCleartkAn
 //          instance.add(new Feature("ClosestCue_Phrase", closestCue.getCuePhrase()));
           instance.add(new Feature("ClosestCue_PhraseFamily", closestCue.getCuePhraseAssertionFamily()));
           instance.add(new Feature("ClosestCue_PhraseCategory", closestCue.getCuePhraseCategory()));
+          
+          // add hack-ey domain adaptation to these hacked-in features
+          if (!fileToDomain.isEmpty() && ffDomainAdaptor!=null) {
+        	  instance.addAll(ffDomainAdaptor.apply(new Feature("ClosestCue_Word", closestCue.getCoveredText())));
+        	  instance.addAll(ffDomainAdaptor.apply(new Feature("ClosestCue_PhraseFamily", closestCue.getCuePhraseAssertionFamily())));
+              instance.addAll(ffDomainAdaptor.apply(new Feature("ClosestCue_PhraseCategory",
closestCue.getCuePhraseCategory())));
+          }
+          
         }
       }
 //      if (cuePhraseFeatures != null && !cuePhraseFeatures.isEmpty())
@@ -365,18 +437,25 @@ public abstract class AssertionCleartkAn
           // 7/9/13 srh modified per tmiller so it's binary but not numeric feature
           //instance.add(new Feature("ENTITY_TYPE_" + entityOrEventMention.getTypeID()));
           instance.add(new Feature("ENTITY_TYPE_ANAT_SITE"));
-      } /* This hurts recall more than it helps precision
+          // add hack-ey domain adaptation to these hacked-in features
+          if (!fileToDomain.isEmpty() && ffDomainAdaptor!=null) {
+        	  instance.addAll(ffDomainAdaptor.apply(new Feature("ENTITY_TYPE_ANAT_SITE")));
+          }
+      }
+      /* This hurts recall more than it helps precision
       else if (eemTypeId == CONST.NE_TYPE_ID_DRUG) {
     	  // 7/10 adding drug
     	  instance.add(new Feature("ENTITY_TYPE_DRUG"));
       }
       */
       
-      for (SimpleFeatureExtractor extractor : this.entityFeatureExtractors) {
-        instance.addAll(extractor.extract(jCas, entityOrEventMention));
+      // only extract these features if not doing domain adaptation
+      if (ffDomainAdaptor==null) {
+    	  for (SimpleFeatureExtractor extractor : this.entityFeatureExtractors) {
+    		  instance.addAll(extractor.extract(jCas, entityOrEventMention));
+    	  }
       }
       
-      
       List<Feature> zoneFeatures = extractZoneFeatures(coveringZoneMap, entityOrEventMention);
       if (zoneFeatures != null && !zoneFeatures.isEmpty())
       {
@@ -388,11 +467,19 @@ public abstract class AssertionCleartkAn
       
       for(Feature feat : feats){
     	  if(feat.getName() != null && (feat.getName().startsWith("TreeFrag") || feat.getName().startsWith("WORD")
|| feat.getName().startsWith("NEG"))) continue;
+    	  if(feat.getName() != null && (feat.getName().contains("_TreeFrag") || feat.getName().contains("_WORD")
|| feat.getName().contains("_NEG"))) continue;
     	  if(feat.getValue() instanceof String){
     		  feat.setValue(((String)feat.getValue()).toLowerCase());
     	  }
       }
 
+      if (!fileToDomain.isEmpty() && ffDomainAdaptor!=null) {
+    	  for (FeatureFunctionExtractor extractor : this.featureFunctionExtractors) {
+    		  // TODO: extend to the case where the extractors take a different argument besides
entityOrEventMention
+    		  instance.addAll(extractor.extract(jCas, entityOrEventMention));
+    	  }
+      }
+      
       // grab the output label
       setClassLabel(entityOrEventMention, instance);
 
@@ -445,7 +532,31 @@ public abstract class AssertionCleartkAn
 	    return desc;
 	  }
 
+public Map<String, String> getTrainFileToDomain() {
+	return fileToDomain;
+}
 
+public void setTrainFileToDomain(Map<String, String> trainFileToDomain) {
+	this.fileToDomain = trainFileToDomain;
+}
+
+/** Looks in the domain string (path) for meaningful corpus names 
+ * @param dir
+ * @return
+ */
+public static String normalizeToDomain(String dir) {
+	  // TODO: real normalization
+	  String[] p = dir.split("/");
+	  List<String> parts = Arrays.asList(p);
+	  Collections.reverse(parts);
+	  for (String part : parts) {
+		  if ( part.toLowerCase().startsWith("test") || part.toLowerCase().startsWith("train")
|| part.toLowerCase().startsWith("dev") ) {
+			  continue;
+		  }
+		  return part;
+	  }
+	  return dir;
+}
   
   /*
   public static AnalysisEngineDescription getClassifierDescription(String modelFileName)

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityFedaCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityFedaCleartkAnalysisEngine.java?rev=1548577&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityFedaCleartkAnalysisEngine.java
(added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityFedaCleartkAnalysisEngine.java
Fri Dec  6 16:14:24 2013
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.medfacts.cleartk;
+
+import java.io.File;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.assertion.attributes.features.selection.Chi2FeatureSelection;
+import org.apache.ctakes.assertion.attributes.features.selection.FeatureSelection;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.AboveLeftFragmentExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.AboveRightFragmentExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.ContextWordWindowExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.ExtractorListFeatureFunctionConverter;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.FedaFeatureFunction;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.NegationDependencyFeatureExtractor;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.cleartk.classifier.feature.function.FeatureFunctionExtractor;
+import org.uimafit.descriptor.ConfigurationParameter;
+
+
+public class PolarityFedaCleartkAnalysisEngine extends PolarityCleartkAnalysisEngine {
+
+	public static final String NEGATED = "NEGATED";
+	public static final String NOT_NEGATED = "NOT_NEGATED";
+
+	@Override
+	public void initialize(UimaContext context) throws ResourceInitializationException {
+		super.initialize(context);
+		probabilityOfKeepingADefaultExample = 1.0; //0.1;
+
+		if(this.entityFeatureExtractors == null){
+			this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+		}
+		this.entityFeatureExtractors.add(new NegationDependencyFeatureExtractor());
+		this.entityFeatureExtractors.add(new ContextWordWindowExtractor("org/apache/ctakes/assertion/models/polarity.txt"));
+		this.entityFeatureExtractors.add(new AboveLeftFragmentExtractor("AL_Polarity","org/apache/ctakes/assertion/models/sharpPolarityFrags.txt"));
+		//		this.entityFeatureExtractors.add(new AboveRightFragmentExtractor("AR_Polarity","org/apache/ctakes/assertion/models/sharpArPolarityFrags.txt"));
+
+		initializeDomainAdaptation();
+	
+		initializeFeatureSelection();
+		
+	}
+
+	@Override
+	public void setClassLabel(IdentifiedAnnotation entityOrEventMention, Instance<String>
instance) throws AnalysisEngineProcessException {
+	      if (this.isTraining())
+	      {
+	        String polarity = (entityOrEventMention.getPolarity() == CONST.NE_POLARITY_NEGATION_PRESENT)
? NEGATED : NOT_NEGATED; // "negated" : "present";
+	        this.lastLabel = polarity;
+	        // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
+	        if (NEGATED.equals(polarity))
+	        {
+	          logger.debug("TRAINING: " + polarity);
+	        }
+	        if (NOT_NEGATED.equals(polarity) 
+	        		&& coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+	        	return;
+	        }
+	        instance.setOutcome(polarity);
+//	        this.dataWriter.write(instance);
+	      } else
+	      {
+	        String label = this.classifier.classify(instance.getFeatures());
+	        this.lastLabel = label;
+	        int polarity = CONST.NE_POLARITY_NEGATION_ABSENT;
+	        if (NOT_NEGATED.equals(label))
+	        {
+	          polarity = CONST.NE_POLARITY_NEGATION_ABSENT;
+	        } else if (NEGATED.equals(label))
+	        {
+	          polarity = CONST.NE_POLARITY_NEGATION_PRESENT;
+            logger.debug(String.format("DECODING/EVAL: %s//%s [%d-%d] (%s)", label, polarity,
entityOrEventMention.getBegin(), entityOrEventMention.getEnd(), entityOrEventMention.getClass().getName()));
+	        }
+	        entityOrEventMention.setPolarity(polarity);
+	      }
+	}
+	public static FeatureSelection<String> createFeatureSelection(double threshold) {
+		return new Chi2FeatureSelection<String>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME,
threshold, false);
+		//		  return new MutualInformationFeatureSelection<String>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME);
+	}
+
+	public static URI createFeatureSelectionURI(File outputDirectoryName) {
+		return new File(outputDirectoryName, FEATURE_SELECTION_NAME + "_Chi2_extractor.dat").toURI();
+	}
+
+	private void initializeDomainAdaptation() {
+		// Do domain adaptation
+		featureFunctionExtractors = new ArrayList<FeatureFunctionExtractor>();
+		//			FedaFeatureFunction ff = new FedaFeatureFunction(new ArrayList<String>(trainFileToDomain.values()));
+		featureFunctionExtractors.addAll(ExtractorListFeatureFunctionConverter.convert(contextFeatureExtractors,
ffDomainAdaptor));
+		featureFunctionExtractors.addAll(ExtractorListFeatureFunctionConverter.convert(tokenContextFeatureExtractors,
ffDomainAdaptor));
+		featureFunctionExtractors.addAll(ExtractorListFeatureFunctionConverter.convert(tokenCleartkExtractors,
ffDomainAdaptor));
+		featureFunctionExtractors.addAll(ExtractorListFeatureFunctionConverter.convert(entityFeatureExtractors,
ffDomainAdaptor));
+		featureFunctionExtractors.add(new FeatureFunctionExtractor(cuePhraseInWindowExtractor,
ffDomainAdaptor));
+	}
+	@Override
+	protected void initializeFeatureSelection() throws ResourceInitializationException {
+	    if (featureSelectionThreshold == 0) {
+	    	this.featureSelection = null;
+	    } else {
+	    	this.featureSelection = this.createFeatureSelection(this.featureSelectionThreshold);
+
+//	    	if ( (new File(this.featureSelectionURI)).exists() ) {
+//	    		try {
+//	    			this.featureSelection.load(this.featureSelectionURI);
+//	    		} catch (IOException e) {
+//	    			throw new ResourceInitializationException(e);
+//	    		}
+//	    	}
+	    }		
+	}
+	  
+}

Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityFedaCleartkAnalysisEngine.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ExtractorListFeatureFunctionConverter.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ExtractorListFeatureFunctionConverter.java?rev=1548577&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ExtractorListFeatureFunctionConverter.java
(added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ExtractorListFeatureFunctionConverter.java
Fri Dec  6 16:14:24 2013
@@ -0,0 +1,26 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.cleartk.classifier.feature.function.FeatureFunction;
+import org.cleartk.classifier.feature.function.FeatureFunctionExtractor;
+
+public class ExtractorListFeatureFunctionConverter {
+	public static List<FeatureFunctionExtractor> convert( List<? extends SimpleFeatureExtractor>
extractors, FeatureFunction ff ) {
+
+		List<FeatureFunctionExtractor> featureFunctionExtractors = new ArrayList<FeatureFunctionExtractor>();
+		if (null!=extractors) {
+			for (SimpleFeatureExtractor extractor : extractors) {
+				featureFunctionExtractors.add(
+						new FeatureFunctionExtractor(extractor,ff)
+						);
+			}
+		}
+		
+		return featureFunctionExtractors;
+	}
+
+}

Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ExtractorListFeatureFunctionConverter.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/FedaFeatureFunction.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/FedaFeatureFunction.java?rev=1548577&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/FedaFeatureFunction.java
(added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/FedaFeatureFunction.java
Fri Dec  6 16:14:24 2013
@@ -0,0 +1,48 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.function.FeatureFunction;
+
+public class FedaFeatureFunction implements FeatureFunction {
+
+	  public static final String DOMAIN_ADAPTATION_ALGORITHM = "FEDA";
+	  List<String> domainIds;
+	  String currentDomain;
+	  
+	  public FedaFeatureFunction ( List<String> domains ) {
+		  domainIds = domains;
+	  }
+	  
+	  /**
+	   * @return replicate the feature for the current domain, the original is a "general" domain
+	   */
+	  @Override
+	  public List<Feature> apply(Feature feature) {
+	    Object featureValue = feature.getValue();
+	    
+	    List<Feature> fedaFeatures = new ArrayList<Feature>();  
+	    fedaFeatures.add(feature);
+	    if (null==currentDomain) { return fedaFeatures; }
+	    
+//	    for (String domain : domainIds) {
+//		    String featureName = Feature.createName(domain, DOMAIN_ADAPTATION_ALGORITHM, feature.getName());
+	    String featureName = Feature.createName(currentDomain, DOMAIN_ADAPTATION_ALGORITHM,
feature.getName());
+	    
+	    fedaFeatures.add(
+	    		new Feature(
+	    				featureName,
+	    				featureValue.toString() )
+	    		);
+//	    }
+	    return fedaFeatures;
+	  }
+
+	  public void setDomain(String domain) {
+		  currentDomain = domain;
+	  }
+
+}

Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/FedaFeatureFunction.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTests.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTests.java?rev=1548577&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTests.java
(added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTests.java
Fri Dec  6 16:14:24 2013
@@ -0,0 +1,128 @@
+package org.apache.ctakes.assertion.train;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Date;
+
+import org.apache.ctakes.assertion.eval.AssertionEvaluation;
+import org.apache.ctakes.assertion.medfacts.cleartk.AssertionCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.util.AssertionConst;
+
+import scala.actors.threadpool.Arrays;
+
+
+/**
+ * For each assertion attribute (polarity, conditional, etc), run against the test directories
+ * for that attribute, using models that are under the models-dir.
+ * Note that this uses constants within {@link AssertionConst} for the directory names.
+ */
+public class PolarityDomainAdaptationTests {
+
+	final static String RUN_ID = "feda_";
+	
+	protected final static String SHARP_TEST = AssertionConst.DATA_DIR + "preprocessed_data/sharp/test";
+	protected final static String I2B2_TEST  = AssertionConst.DATA_DIR + "preprocessed_data/i2b2/test";
+	protected final static String MIPACQ_TEST = AssertionConst.DATA_DIR + "preprocessed_data/mipacq/test";
+	protected final static String NEGEX_TEST = AssertionConst.DATA_DIR + "preprocessed_data/negex";
+	
+	public static void main(String[] args) throws Exception {
+
+		AssertionEvaluation.useEvaluationLogFile = true;
+		AssertionEvaluation.evaluationLogFilePath = "eval/"+RUN_ID+new Date().toString().replaceAll("
","_") + ".txt";
+
+		ArrayList<TestPair> testGrid = new ArrayList<TestPair>();
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_FEDA, 	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_FEDA, 	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_FEDA, 	MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_FEDA, 	NEGEX_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.I2B2_FEDA,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.I2B2_FEDA,  	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.I2B2_FEDA,  	MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.I2B2_FEDA,  	NEGEX_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.MIPACQ_FEDA,  SHARP_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.MIPACQ_FEDA,  I2B2_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.MIPACQ_FEDA,  MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.MIPACQ_FEDA,  NEGEX_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.NEGEX_FEDA,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.NEGEX_FEDA,  	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.NEGEX_FEDA,  	MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.NEGEX_FEDA,  	NEGEX_TEST));  //
not valid
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_FEDA,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_FEDA,  	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_FEDA,  	MIPACQ_TEST));
// not meaningful
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_FEDA,  	NEGEX_TEST));
 // not meaningful
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_MIPACQ_FEDA,  SHARP_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_MIPACQ_FEDA,  I2B2_TEST));
   // not meaningful
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_MIPACQ_FEDA,  MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_MIPACQ_FEDA,  NEGEX_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_NEGEX_FEDA,  	SHARP_TEST));
// not meaningful
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_NEGEX_FEDA,  	I2B2_TEST));
 //not meaningful
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_NEGEX_FEDA,  	MIPACQ_TEST));
// not meaningful
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_NEGEX_FEDA,  	NEGEX_TEST));
 // not valid
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.I2B2_MIPACQ_NEGEX_FEDA,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_MIPACQ_NEGEX_FEDA,  	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_NEGEX_FEDA,  		MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_MIPACQ_FEDA,  	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_MIPACQ_FEDA,  	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_MIPACQ_FEDA,  	MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_MIPACQ_FEDA,  	NEGEX_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_MIPACQ_NEGEX_FEDA, 
	SHARP_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_MIPACQ_NEGEX_FEDA, 
	I2B2_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_MIPACQ_NEGEX_FEDA, 
	MIPACQ_TEST));
+		testGrid.add(new TestPair(PolarityDomainAdaptationTrain.SHARP_I2B2_MIPACQ_NEGEX_FEDA, 
	NEGEX_TEST)); //not valid
+
+		
+		String attribute = "polarity";
+
+		for (TestPair oneTest : testGrid) {
+			ArrayList<String> params = new ArrayList<String>();
+
+			File instancef = new File("eval/instances_"+
+					oneTest.model.substring(oneTest.model.lastIndexOf("/")+1)+"_"+
+					AssertionCleartkAnalysisEngine.normalizeToDomain(oneTest.data));
+			
+			params.add("--test-dir"); 	params.add(oneTest.data);
+			params.add("--models-dir"); params.add(oneTest.model);
+			String trainDomains = PolarityDomainAdaptationTrain.trainGrid.inverse().get(oneTest.model);
+			if (null == trainDomains) { continue; }
+			params.add("--train-dir"); 	params.add(trainDomains); // must list the train-dir in order
to establish which domains
+			//			params.add("--ytex-negation");
+			//		params.add("--evaluation-output-dir");	params.add(AssertionConst.evalOutputDir);
+			params.add("--test-only");	
+			params.add("--feda");
+			params.add("--print-instances");
+			// hack-y way to name this
+			params.add(instancef.getAbsolutePath());
+
+			// Build up an "ignore" string
+			for (String ignoreAttribute : AssertionConst.allAnnotationTypes) {
+				if (!ignoreAttribute.equals(attribute)) { 
+
+					if (ignoreAttribute.equals("historyOf")) {
+						ignoreAttribute = ignoreAttribute.substring(0, ignoreAttribute.length()-2);
+					}
+
+					params.add("--ignore-" + ignoreAttribute);
+				}
+			}
+			String[] paramList = params.toArray(new String[]{});
+
+			System.out.println(Arrays.asList(paramList).toString());
+
+			// Run the actual assertion test on just one attribute
+			AssertionEvaluation.main( paramList );
+		}
+	}
+
+
+	static class TestPair {
+		String model;
+		String data;
+		TestPair (String a, String b) {
+			model=a;
+			data=b;
+		}
+	}
+	
+
+}

Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTests.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTrain.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTrain.java?rev=1548577&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTrain.java
(added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTrain.java
Fri Dec  6 16:14:24 2013
@@ -0,0 +1,88 @@
+package org.apache.ctakes.assertion.train;
+
+import java.util.ArrayList;
+import java.util.Map.Entry;
+
+import org.apache.ctakes.assertion.eval.AssertionEvaluation;
+import org.apache.ctakes.assertion.util.AssertionConst;
+
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
+import com.google.common.collect.ImmutableBiMap;
+/**
+ * For each assertion attribute (polarity, conditional, etc), train a model using the data
+ * in the training directories for that attribute, and store the model under the models-dir
+ * Note that this uses constants within {@link AssertionConst} for the directory names.
+ */
+public class PolarityDomainAdaptationTrain {
+	protected final static String SHARP_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/sharp/train";
+	protected final static String I2B2_TRAIN  = AssertionConst.DATA_DIR + "preprocessed_data/i2b2/train";
+	protected final static String MIPACQ_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/mipacq/train";
+	protected final static String NEGEX_TRAIN = AssertionConst.DATA_DIR + "preprocessed_data/negex";
// actually test
+
+	public final static String SHARP_FEDA = "../ctakes-assertion-res/resources/model/sharptrain-feda";
+	protected final static String I2B2_FEDA  = "../ctakes-assertion-res/resources/model/i2b2train-feda";
+	protected final static String MIPACQ_FEDA  = "../ctakes-assertion-res/resources/model/mipacqtrain-feda";
+	protected final static String NEGEX_FEDA  = "../ctakes-assertion-res/resources/model/negextest-feda";
+	protected final static String SHARP_I2B2_FEDA = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train-feda";
+	protected final static String SHARP_MIPACQ_FEDA = "../ctakes-assertion-res/resources/model/sharptrain+mipacqtrain-feda";
+	protected final static String SHARP_NEGEX_FEDA = "../ctakes-assertion-res/resources/model/sharptrain+negextest-feda";
+	protected final static String I2B2_MIPACQ_NEGEX_FEDA = "../ctakes-assertion-res/resources/model/i2b2train+mipacqtrain+negextest-feda";
+	protected final static String SHARP_I2B2_MIPACQ_FEDA = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train+mipacqtrain-feda";
+	protected final static String SHARP_MIPACQ_NEGEX_FEDA = "../ctakes-assertion-res/resources/model/sharptrain+mipacqtrain+negextest-feda";
+	protected final static String SHARP_I2B2_NEGEX_FEDA = "../ctakes-assertion-res/resources/model/sharptrain+i2b2train+negextest-feda";
+	protected final static String SHARP_I2B2_MIPACQ_NEGEX_FEDA = "../ctakes-assertion-res/resources/model/sharpi2b2mipacqnegex-feda";
+
+	public static BiMap<String,String> trainGrid = HashBiMap.create();
+	static {
+		trainGrid.put(SHARP_TRAIN, 	SHARP_FEDA);
+		trainGrid.put(I2B2_TRAIN, 	I2B2_FEDA);
+		trainGrid.put(MIPACQ_TRAIN,	MIPACQ_FEDA);
+		trainGrid.put(NEGEX_TRAIN,	NEGEX_FEDA);
+		trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN,	SHARP_I2B2_FEDA);
+		trainGrid.put(SHARP_TRAIN+":"+MIPACQ_TRAIN,	SHARP_MIPACQ_FEDA);
+		trainGrid.put(SHARP_TRAIN+":"+NEGEX_TRAIN,	SHARP_NEGEX_FEDA);
+		trainGrid.put(I2B2_TRAIN+":"+MIPACQ_TRAIN+":"+NEGEX_TRAIN,	I2B2_MIPACQ_NEGEX_FEDA);
+		trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN+":"+MIPACQ_TRAIN,	SHARP_I2B2_MIPACQ_FEDA);
+		trainGrid.put(SHARP_TRAIN+":"+MIPACQ_TRAIN+":"+NEGEX_TRAIN,	SHARP_MIPACQ_NEGEX_FEDA);
+		trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN+":"+NEGEX_TRAIN,	SHARP_I2B2_NEGEX_FEDA);
+		trainGrid.put(SHARP_TRAIN+":"+I2B2_TRAIN+":"+MIPACQ_TRAIN+":"+NEGEX_TRAIN,	
+				SHARP_I2B2_MIPACQ_NEGEX_FEDA);
+	}
+	
+	public static void main(String[] args) throws Exception {
+
+		String attribute = "polarity";
+		
+
+		for (Entry<String, String> oneTrain : trainGrid.entrySet()) {
+			
+			ArrayList<String> params = new ArrayList<String>();
+
+			params.add("--train-dir"); 	params.add(oneTrain.getKey());
+			params.add("--models-dir"); params.add(oneTrain.getValue());
+			params.add("--train-only"); 
+			params.add("--feature-selection");	params.add(Float.toString(0.000000000001f));
+			params.add("--feda");
+
+			// Build up an "ignore" string
+			for (String ignoreAttribute : AssertionConst.allAnnotationTypes) {
+				if (!ignoreAttribute.equals(attribute)) { 
+
+					if (ignoreAttribute.equals("historyOf")) {
+						ignoreAttribute = ignoreAttribute.substring(0, ignoreAttribute.length()-2);
+					}
+
+					params.add("--ignore-" + ignoreAttribute);
+				}
+			}
+			String[] paramList = params.toArray(new String[]{});
+			
+			// Run the actual assertion training on just one attribute
+			AssertionEvaluation.main( paramList );
+		}
+		
+		
+		
+	}
+}

Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/PolarityDomainAdaptationTrain.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message