ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From shalg...@apache.org
Subject svn commit: r1459553 - in /incubator/ctakes/trunk: ctakes-assertion/resources/launch/ ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/ ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/ ctakes-...
Date Thu, 21 Mar 2013 22:18:54 GMT
Author: shalgrim
Date: Thu Mar 21 22:18:53 2013
New Revision: 1459553

URL: http://svn.apache.org/r1459553
Log:
Modified the pre-processing code to work with the latest change to SHARPKnowtatorXMLReader
where the member name got changed. Also modified to work with Windows paths

Added:
    incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval_preprocess.launch
  (with props)
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java
  (with props)
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java
  (with props)
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
  (with props)
Modified:
    incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java

Modified: incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch?rev=1459553&r1=1459552&r2=1459553&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval.launch
Thu Mar 21 22:18:53 2013
@@ -10,7 +10,7 @@
 <stringAttribute key="org.eclipse.debug.ui.ATTR_CAPTURE_IN_FILE" value="/tmp/assertion.log"/>
 <stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
 <stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.assertion.eval.AssertionEvaluation"/>
-<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="--train-dir
/sharp_data/train --test-dir /sharp_data/test --models-dir /sharp_data/model/eval.model"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="--train-dir
data/train --test-dir data/test --models-dir data/model/eval.model --dev-dir data/dev"/>
 <stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-assertion"/>
 <stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
 </launchConfiguration>

Added: incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval_preprocess.launch
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval_preprocess.launch?rev=1459553&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval_preprocess.launch
(added)
+++ incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval_preprocess.launch
Thu Mar 21 22:18:53 2013
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<launchConfiguration type="org.eclipse.jdt.launching.localJavaApplication">
+<stringAttribute key="bad_container_name" value="/ctakes-assertion/resour"/>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<stringAttribute key="org.eclipse.debug.ui.ATTR_CAPTURE_IN_FILE" value="/tmp/assertion.log"/>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.m2e.launchconfig.classpathProvider"/>
+<stringAttribute key="org.eclipse.jdt.launching.MAIN_TYPE" value="org.apache.ctakes.assertion.eval.AssertionEvaluation"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROGRAM_ARGUMENTS" value="--train-dir
data/train --test-dir data/test --models-dir data/model/eval.model --dev-dir data/dev --preprocess-only
&quot;H:\SHARP\Corpora\\Mayo\UMLS_CEM&quot;"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="ctakes-assertion"/>
+<stringAttribute key="org.eclipse.jdt.launching.SOURCE_PATH_PROVIDER" value="org.eclipse.m2e.launchconfig.sourcepathProvider"/>
+</launchConfiguration>

Propchange: incubator/ctakes/trunk/ctakes-assertion/resources/launch/ctakes__assertion_eval_preprocess.launch
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java?rev=1459553&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java
(added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java
Thu Mar 21 22:18:53 2013
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.attributes.features;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.ctakes.assertion.attributes.generic.GenericAttributeClassifier;
+import org.apache.ctakes.assertion.attributes.history.HistoryAttributeClassifier;
+import org.apache.ctakes.assertion.attributes.subject.SubjectAttributeClassifier;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+
+
+/** HistoryFeaturesExtractor
+ * 		Ports the features and classification decisions of the first version (logic) of the
subject tool
+ * 
+ * @author shalgrim
+ *
+ */
+public class HistoryFeaturesExtractor implements SimpleFeatureExtractor {
+	
+	
+	@Override
+	public List<Feature> extract(JCas jCas, Annotation arg) {
+		
+		List<Feature> features = new ArrayList<Feature>();
+		
+		// Pull in general dependency-based features -- externalize to another extractor?
+	    ConllDependencyNode node = DependencyUtility.getNominalHeadNode(jCas, arg);
+	    if (node!= null) {
+	    	features.add(new Feature("DEPENDENCY_HEAD", node));
+		}
+	    
+	    HashMap<String, Boolean> featsMap = HistoryAttributeClassifier.extract(jCas, arg);
+
+	    // Pull in all the features that were used for the rule-based module
+	    features.addAll( hashToFeatureList(featsMap) );
+	    // Pull in the result of the rule-based module as well
+	    features.add(new Feature("HISTORY_CLASSIFIER_LOGIC", HistoryAttributeClassifier.classifyWithLogic(featsMap)));
+
+	    
+	    return features;
+	}
+
+	private Collection<? extends Feature> hashToFeatureList(
+			HashMap<String, Boolean> featsIn) {
+		
+		Collection<Feature> featsOut = new HashSet<Feature>();
+		for (String featName : featsIn.keySet()) {
+			featsOut.add(new Feature(featName,featsIn.get(featName)));
+		}
+		
+		return featsOut;
+	}
+
+}

Propchange: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/HistoryFeaturesExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java?rev=1459553&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java
(added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java
Thu Mar 21 22:18:53 2013
@@ -0,0 +1,190 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.attributes.history;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.uima.UIMAException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.uimafit.util.JCasUtil;
+import org.xml.sax.SAXException;
+
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.SemanticArgument;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+
+
+/**
+ * @author shalgrim
+ *
+ */
+public class HistoryAttributeClassifier {
+
+	private static final String POSTCOORD_NMOD = "donor_srlarg";
+	private static final String DISCUSSION_DEPPATH = "discussion_deppath";
+	private static final String SUBSUMED_CHUNK = "other_token"; 
+	private static final String SUBSUMED_ANNOT = "other_deppath"; 
+    public static ArrayList<String> FeatureIndex = new ArrayList<String>();
+    static{
+            FeatureIndex.add(POSTCOORD_NMOD);
+            FeatureIndex.add(DISCUSSION_DEPPATH);
+            FeatureIndex.add(SUBSUMED_CHUNK);
+            FeatureIndex.add(SUBSUMED_ANNOT);
+    }
+
+	// currently goes from entityMention to Sentence to SemanticArgument
+	public static Boolean getHIstory(JCas jCas, IdentifiedAnnotation mention) {
+		
+		HashMap<String, Boolean> vfeat = extract(jCas, mention);
+		
+		return classifyWithLogic(vfeat);
+			
+	}
+
+
+	public static Boolean classifyWithLogic(HashMap<String, Boolean> vfeat) {
+		// Logic to identify cases, may be replaced by learned classification
+		int subsumectr = 0;
+		if (vfeat.get(SUBSUMED_CHUNK)) { } //subsumectr++; }
+		if (vfeat.get(SUBSUMED_ANNOT)) { subsumectr++; }
+		if (vfeat.get(POSTCOORD_NMOD)) { subsumectr++; }
+		Boolean subsume_summary = (subsumectr>0);
+		if (vfeat.get(DISCUSSION_DEPPATH) || subsume_summary) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+
+	public static HashMap<String, Boolean> extract(JCas jCas,
+			Annotation arg) {
+		HashMap<String,Boolean> vfeat = new HashMap<String,Boolean>();
+		for (String feat : FeatureIndex) {
+			vfeat.put(feat, false);
+		}
+		
+		// find the sentence that entityMention is in
+		Sentence sEntity = null;
+		Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
+		for (Sentence s : sentences) {
+			if ( s.getBegin()<=arg.getBegin() && s.getEnd()>=arg.getEnd()) {
+				sEntity = s;
+				break;
+			}
+		}
+//		if (sEntity==null)
+//			return null;
+		
+		if (sEntity!=null) {
+			
+
+			// 2) some other identified annotation subsumes this one?
+			List<IdentifiedAnnotation> lsmentions = JCasUtil.selectPreceding(jCas, IdentifiedAnnotation.class,
arg, 5);
+			lsmentions.addAll(JCasUtil.selectFollowing(jCas, IdentifiedAnnotation.class, arg, 5));
+			for (IdentifiedAnnotation annot : lsmentions) {
+				if ( annot.getBegin()>arg.getBegin()) {
+					break;
+				} else {
+					if ( annot.getEnd()<arg.getEnd()) {
+						continue;
+					} else if ( !DependencyUtility.equalCoverage(
+							DependencyUtility.getNominalHeadNode(jCas, annot),
+							DependencyUtility.getNominalHeadNode(jCas, arg)) ) {
+						// the case that annot is a superset
+						vfeat.put(SUBSUMED_ANNOT, true);
+					}
+				}
+			}
+			
+			// 3) some chunk subsumes this?
+			List<Chunk> lschunks = JCasUtil.selectPreceding(jCas, Chunk.class, arg, 5);
+			lschunks.addAll(JCasUtil.selectFollowing(jCas, Chunk.class, arg, 5));
+			for (Chunk chunk : lschunks) {
+				if ( chunk.getBegin()>arg.getBegin()) {
+					break;
+				} else {
+					if ( chunk.getEnd()<arg.getEnd()) {
+						continue;
+					} else if ( !DependencyUtility.equalCoverage(
+							DependencyUtility.getNominalHeadNode(jCas, chunk), 
+							DependencyUtility.getNominalHeadNode(jCas, arg)) ) {
+						// the case that annot is a superset
+						vfeat.put(SUBSUMED_CHUNK, true);
+					}
+				}
+			}
+		}
+		
+		
+		List<ConllDependencyNode> depnodes = JCasUtil.selectCovered(jCas, ConllDependencyNode.class,
arg);
+		if (!depnodes.isEmpty()) { 
+			ConllDependencyNode depnode = DependencyUtility.getNominalHeadNode(depnodes);
+
+			// 1) check if the head node of the entity mention is really just part of a larger noun
phrase
+			if (depnode.getDeprel().matches("(NMOD|amod|nmod|det|predet|nn|poss|possessive|infmod|partmod|rcmod)"))
{
+				vfeat.put(POSTCOORD_NMOD, true);
+			}
+
+			// 4) search dependency paths for discussion context
+			for (ConllDependencyNode dn : DependencyUtility.getPathToTop(jCas, depnode)) {
+				if ( isDiscussionContext(dn) ) {
+					vfeat.put(DISCUSSION_DEPPATH, true);
+				}
+			}
+		}
+		return vfeat;
+	}
+	
+	
+	private static boolean isDonorTerm(Annotation arg) {
+		return arg.getCoveredText().toLowerCase()
+		.matches("(donor).*");
+	}
+
+	
+	private static boolean isDiscussionContext(Annotation arg) {
+		return arg.getCoveredText().toLowerCase()
+		.matches("(discuss|ask|understand|understood|tell|told|mention|talk|speak|spoke|address).*");
+	}
+
+
+	// a main method for regex testing
+	public static void main(String[] args) {
+		String s = "steps";
+		if (s.toLowerCase().matches(".*(in-law|stepc|stepd|stepso|stepf|stepm|step-).*")) {
+			System.out.println("match");
+		} else {
+			System.out.println("no match");
+		}
+	}
+}

Propchange: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/history/HistoryAttributeClassifier.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1459553&r1=1459552&r2=1459553&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java
Thu Mar 21 22:18:53 2013
@@ -69,6 +69,7 @@ import org.apache.ctakes.assertion.medfa
 import org.apache.ctakes.assertion.medfacts.cleartk.AssertionComponents;
 import org.apache.ctakes.assertion.medfacts.cleartk.ConditionalCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.GenericCleartkAnalysisEngine;
+import org.apache.ctakes.assertion.medfacts.cleartk.HistoryCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
@@ -176,6 +177,13 @@ public class AssertionEvaluation extends
             required = false)
     public boolean ignoreGeneric = false;
         
+    // srh adding 2/20/13
+    @Option(
+            name = "--ignore-history",
+            usage = "specify whether 'history of' processing should be run (true or false).
default: false",
+            required = false)
+    public boolean ignoreHistory = false;
+        
     @Option(
             name = "--cross-validation",
             usage = "ignore the test set and run n-fold cross-validation. default: n=2",
@@ -337,6 +345,7 @@ protected static Options options = new O
 	    "ignore uncertainty: %b%n" +
 	    "ignore subject: %b%n" +
 	    "ignore generic: %b%n" +
+	    "ignore history: %b%n" +
 	    "%n%n",
 	    options.trainDirectory.getAbsolutePath(),
 	    (options.testDirectory != null) ? options.testDirectory.getAbsolutePath() : "",
@@ -346,7 +355,8 @@ protected static Options options = new O
 	    options.ignoreConditional,
 	    options.ignoreUncertainty,
 	    options.ignoreSubject,
-	    options.ignoreGeneric
+	    options.ignoreGeneric,
+	    options.ignoreHistory
 	    );
   }
 
@@ -534,6 +544,21 @@ public static void printScore(Map<String
 		    );
 		builder.add(genericAnnotator);
     }
+    
+    // 2/20/13 srh adding
+    if (!options.ignoreHistory) {
+    	AnalysisEngineDescription historyAnnotator = AnalysisEngineFactory.createPrimitiveDescription(HistoryCleartkAnalysisEngine.class);
+    	ConfigurationParameterFactory.addConfigurationParameters(
+    			historyAnnotator,
+    			AssertionCleartkAnalysisEngine.PARAM_GOLD_VIEW_NAME,
+    			AssertionEvaluation.GOLD_VIEW_NAME,
+    			CleartkAnnotator.PARAM_DATA_WRITER_FACTORY_CLASS_NAME,
+    			this.dataWriterFactoryClass.getName(),
+    			DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY,
+    			new File(directory, "historyOf").getPath()
+    			);
+    	builder.add(historyAnnotator);
+    }
 
 /*
     AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
@@ -606,6 +631,7 @@ public static void printScore(Map<String
     AnnotationStatistics uncertaintyStats = new AnnotationStatistics();
     AnnotationStatistics subjectStats = new AnnotationStatistics();
     AnnotationStatistics genericStats = new AnnotationStatistics();
+    AnnotationStatistics historyStats = new AnnotationStatistics();	// srh 3/6/13
     
     Map<String, AnnotationStatistics> map = new TreeMap<String, AnnotationStatistics>();

     if (!options.ignorePolarity)
@@ -632,6 +658,12 @@ public static void printScore(Map<String
     {
       map.put("generic", genericStats);
     }
+    
+    // srh 3/6/13
+    if (!options.ignoreHistory)
+    {
+    	map.put("historyOf", historyStats);
+    }
 
     for (JCas jCas : new JCasIterable(collectionReader, aggregate)) {
       JCas goldView;
@@ -707,6 +739,15 @@ public static void printScore(Map<String
 	    	  printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "generic", CONST.NE_GENERIC_TRUE,
Boolean.class);
 	      }
       }
+      
+      // srh 3/6/13
+      if (!options.ignoreHistory)
+      {
+    	  historyStats.add(goldEntitiesAndEvents, systemEntitiesAndEvents,
+    			  AnnotationStatistics.<IdentifiedAnnotation>annotationToSpan(),
+    			  AnnotationStatistics.<IdentifiedAnnotation>annotationToFeatureValue("historyOf"));
+      }
+      
     }
     return map;
   }
@@ -803,6 +844,7 @@ public static class HashableAnnotation i
         result = (this.getClass() == other.getClass() && this.begin == other.begin
             && this.end == other.end);
       }
+      
       return result;
     }
 

Added: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java?rev=1459553&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
(added)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
Thu Mar 21 22:18:53 2013
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.assertion.medfacts.cleartk;
+
+import java.util.ArrayList;
+
+import org.apache.ctakes.assertion.attributes.features.GenericFeaturesExtractor;
+import org.apache.ctakes.assertion.attributes.features.HistoryFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.feature.extractor.ContextExtractor;
+
+public class HistoryCleartkAnalysisEngine extends
+		AssertionCleartkAnalysisEngine {
+
+	boolean USE_DEFAULT_EXTRACTORS = false;
+	
+	@Override
+	public void initialize(UimaContext context) throws ResourceInitializationException {
+		super.initialize(context);
+		probabilityOfKeepingADefaultExample = 0.1;
+	
+//		if (this.isTraining() && this.goldViewName == null) {
+//			throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training");
+//		}
+		
+//		if (USE_DEFAULT_EXTRACTORS) {
+//			super.initialize(context);
+//		} else {
+			initialize_history_extractor();
+//		}
+
+	}
+
+	private void initialize_history_extractor() {
+		
+		if (this.contextFeatureExtractors==null) {
+			this.contextFeatureExtractors = new ArrayList<ContextExtractor<IdentifiedAnnotation>>();
+		}
+		this.contextFeatureExtractors.add( 
+				new ContextExtractor<IdentifiedAnnotation>(
+						IdentifiedAnnotation.class, new HistoryFeaturesExtractor()) );
+				
+	}
+	
+	@Override
+	public void setClassLabel(IdentifiedAnnotation entityMention,
+			Instance<String> instance) throws AnalysisEngineProcessException {
+		if (this.isTraining())
+	      {
+	        String generic = entityMention.getGeneric()? "1":"0";
+
+	        // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling
+	        if ("0".equals(generic) 
+	        		&& coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) {
+	        	return;
+	        }
+	        instance.setOutcome(generic);
+	        this.dataWriter.write(instance);
+	      } else
+	      {
+	        String label = this.classifier.classify(instance.getFeatures());
+	        entityMention.setGeneric("1".equals(label));
+	      }
+	}
+
+}

Propchange: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java?rev=1459553&r1=1459552&r2=1459553&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
Thu Mar 21 22:18:53 2013
@@ -148,7 +148,7 @@ public class GoldEntityAndAttributeReade
 			AnalysisEngineDescription goldAnnotator = AnalysisEngineFactory.createPrimitiveDescription(
 					SHARPKnowtatorXMLReader.class,
 					typeSystemDescription,
-					"TextURI",
+					"TextDirectory", // 3/13/13 halgrim changed from "TextURI" trying to work with new SHARPKnowtatorXMLReader.java
 					//"/work/medfacts/sharp/data/2012-10-16_full_data_set_updated/Seed_Corpus/sandbox/batch02_mayo/knowtator/"
 					textDirectory.toString() + "/"
 			);

Modified: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java?rev=1459553&r1=1459552&r2=1459553&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
(original)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
Thu Mar 21 22:18:53 2013
@@ -157,11 +157,16 @@ public class SHARPKnowtatorXMLReader ext
     if (this.textDirectory != null) {
       textPath = this.textDirectory + File.separator + textPath;
     }
-    try {
-      return new URI(textPath);
-    } catch (URISyntaxException e) {
-      throw new AnalysisEngineProcessException(e);
-    }
+
+    // srh changing to try to get to work on Windows
+//    try {
+    File tmpFile = new File(textPath);
+    URI answer = tmpFile.toURI();
+    return answer;
+      // return new URI(textPath);
+//    } catch (URISyntaxException e) {
+//      throw new AnalysisEngineProcessException(e);
+//    }
   }
   
   /**



Mime
View raw message