ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1469380 [4/4] - in /ctakes/trunk: ctakes-assertion-res/src/main/resources/org/apache/ctakes/assertion/models/ ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ ctakes-assertion/src/main/java/org/apache/ctakes/ass...
Date Thu, 18 Apr 2013 15:43:36 GMT
Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java?rev=1469380&r1=1469379&r2=1469380&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.java Thu Apr 18 15:43:35 2013
@@ -21,14 +21,18 @@ package org.apache.ctakes.assertion.medf
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
 
-import org.apache.uima.jcas.tcas.Annotation;
-
-import org.apache.log4j.Level;
+import org.apache.ctakes.assertion.zoner.types.Zone;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.EventMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -37,18 +41,13 @@ import org.apache.uima.cas.CASException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
-//import org.chboston.cnlp.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
 import org.cleartk.classifier.CleartkAnnotator;
-import org.cleartk.classifier.CleartkAnnotatorDescriptionFactory;
-import org.cleartk.classifier.CleartkSequenceAnnotator;
 import org.cleartk.classifier.Feature;
 import org.cleartk.classifier.Instance;
-import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
 import org.cleartk.classifier.feature.extractor.ContextExtractor;
-import org.cleartk.classifier.feature.extractor.ContextExtractor.Covered;
-import org.cleartk.classifier.feature.extractor.ContextExtractor.Preceding;
 import org.cleartk.classifier.feature.extractor.ContextExtractor.Following;
-import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.ContextExtractor.Preceding;
 import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
 import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
 import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
@@ -59,27 +58,11 @@ import org.cleartk.classifier.feature.pr
 import org.cleartk.classifier.feature.proliferate.LowerCaseProliferator;
 import org.cleartk.classifier.feature.proliferate.NumericTypeProliferator;
 import org.cleartk.classifier.feature.proliferate.ProliferatingExtractor;
-import org.cleartk.classifier.opennlp.DefaultMaxentDataWriterFactory;
-import org.cleartk.classifier.opennlp.MaxentDataWriterFactory_ImplBase;
-import org.cleartk.type.test.Token;
-import org.cleartk.classifier.Feature;
 import org.uimafit.descriptor.ConfigurationParameter;
 import org.uimafit.factory.AnalysisEngineFactory;
 import org.uimafit.factory.ConfigurationParameterFactory;
 import org.uimafit.util.JCasUtil;
-
-import org.apache.commons.lang.StringUtils;
-
-import org.apache.ctakes.assertion.zoner.types.Zone;
-import org.apache.ctakes.typesystem.type.structured.DocumentID;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
-import org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.EventMention;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
-
-import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+//import org.chboston.cnlp.ctakes.relationextractor.ae.ModifierExtractorAnnotator;
 
 public abstract class AssertionCleartkAnalysisEngine extends
     CleartkAnnotator<String>
@@ -155,17 +138,8 @@ public abstract class AssertionCleartkAn
     // a list of feature extractors that require only the token:
     // the stem of the word, the text of the word itself, plus
     // features created from the word text like character ngrams
-    this.entityFeatureExtractors = Arrays.asList(
-        new CoveredTextExtractor(),
-        //new TypePathExtractor(IdentifiedAnnotation.class, "stem"),
-        new ProliferatingExtractor(
-            new SpannedTextExtractor(),
-            new LowerCaseProliferator(),    
-            new CapitalTypeProliferator(),
-            new NumericTypeProliferator(),
-            new CharacterNGramProliferator(fromRight, 0, 2),
-            new CharacterNGramProliferator(fromRight, 0, 3)));
-
+    this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+    
     // a list of feature extractors that require the token and the sentence
     this.contextFeatureExtractors = new ArrayList<ContextExtractor<IdentifiedAnnotation>>();
     
@@ -356,11 +330,11 @@ public abstract class AssertionCleartkAn
 
 
         
-      /*
+      
       for (SimpleFeatureExtractor extractor : this.entityFeatureExtractors) {
-        instance.addAll(extractor.extract(identifiedAnnotationView, entityMention));
+        instance.addAll(extractor.extract(jCas, entityMention));
       }
-      */
+      
       
       List<Feature> zoneFeatures = extractZoneFeatures(coveringZoneMap, entityMention);
       if (zoneFeatures != null && !zoneFeatures.isEmpty())

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java?rev=1469380&r1=1469379&r2=1469380&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java Thu Apr 18 15:43:35 2013
@@ -21,12 +21,14 @@ package org.apache.ctakes.assertion.medf
 import java.util.ArrayList;
 
 import org.apache.ctakes.assertion.attributes.features.GenericFeaturesExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.ContextWordWindowExtractor;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.cleartk.classifier.Instance;
 import org.cleartk.classifier.feature.extractor.ContextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
 
 public class GenericCleartkAnalysisEngine extends
 		AssertionCleartkAnalysisEngine {
@@ -50,7 +52,7 @@ public class GenericCleartkAnalysisEngin
 
 	}
 
-	private void initialize_generic_extractor() {
+	private void initialize_generic_extractor() throws ResourceInitializationException {
 		
 		if (this.contextFeatureExtractors==null) {
 			this.contextFeatureExtractors = new ArrayList<ContextExtractor<IdentifiedAnnotation>>();
@@ -58,7 +60,10 @@ public class GenericCleartkAnalysisEngin
 		this.contextFeatureExtractors.add( 
 				new ContextExtractor<IdentifiedAnnotation>(
 						IdentifiedAnnotation.class, new GenericFeaturesExtractor()) );
-				
+		if(this.entityFeatureExtractors == null){
+			this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+		}
+		this.entityFeatureExtractors.add(new ContextWordWindowExtractor("org/apache/ctakes/assertion/models/generic.txt"));
 	}
 	
 	@Override

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java?rev=1469380&r1=1469379&r2=1469380&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java Thu Apr 18 15:43:35 2013
@@ -20,8 +20,8 @@ package org.apache.ctakes.assertion.medf
 
 import java.util.ArrayList;
 
-import org.apache.ctakes.assertion.attributes.features.GenericFeaturesExtractor;
 import org.apache.ctakes.assertion.attributes.features.HistoryFeaturesExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.ContextWordWindowExtractor;
 import org.apache.ctakes.typesystem.type.constants.CONST;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.uima.UimaContext;
@@ -29,6 +29,7 @@ import org.apache.uima.analysis_engine.A
 import org.apache.uima.resource.ResourceInitializationException;
 import org.cleartk.classifier.Instance;
 import org.cleartk.classifier.feature.extractor.ContextExtractor;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
 
 public class HistoryCleartkAnalysisEngine extends
 		AssertionCleartkAnalysisEngine {
@@ -52,7 +53,7 @@ public class HistoryCleartkAnalysisEngin
 
 	}
 
-	private void initialize_history_extractor() {
+	private void initialize_history_extractor() throws ResourceInitializationException {
 		
 		if (this.contextFeatureExtractors==null) {
 			this.contextFeatureExtractors = new ArrayList<ContextExtractor<IdentifiedAnnotation>>();
@@ -60,7 +61,11 @@ public class HistoryCleartkAnalysisEngin
 		this.contextFeatureExtractors.add( 
 				new ContextExtractor<IdentifiedAnnotation>(
 						IdentifiedAnnotation.class, new HistoryFeaturesExtractor()) );
-				
+		
+		if(this.entityFeatureExtractors == null){
+			this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+		}
+		this.entityFeatureExtractors.add(new ContextWordWindowExtractor("org/apache/ctakes/assertion/models/history.txt"));
 	}
 	
 	@Override

Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java?rev=1469380&r1=1469379&r2=1469380&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java (original)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java Thu Apr 18 15:43:35 2013
@@ -18,11 +18,16 @@
  */
 package org.apache.ctakes.assertion.medfacts.cleartk;
 
+import java.util.ArrayList;
+
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.ContextWordWindowExtractor;
+import org.apache.ctakes.assertion.medfacts.cleartk.extractors.NegationDependencyFeatureExtractor;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.cleartk.classifier.Instance;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.Instance;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
 
 
 public class PolarityCleartkAnalysisEngine extends AssertionCleartkAnalysisEngine {
@@ -31,6 +36,12 @@ public class PolarityCleartkAnalysisEngi
 	public void initialize(UimaContext context) throws ResourceInitializationException {
 		super.initialize(context);
 		probabilityOfKeepingADefaultExample = 0.1;
+		
+		if(this.entityFeatureExtractors == null){
+			this.entityFeatureExtractors = new ArrayList<SimpleFeatureExtractor>();
+		}
+		this.entityFeatureExtractors.add(new NegationDependencyFeatureExtractor());
+		this.entityFeatureExtractors.add(new ContextWordWindowExtractor("org/apache/ctakes/assertion/models/polarity.txt"));
 	}
 
 	public void setClassLabel(IdentifiedAnnotation entityMention, Instance<String> instance) throws AnalysisEngineProcessException {

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ContextWordWindowExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ContextWordWindowExtractor.java?rev=1469380&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ContextWordWindowExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/ContextWordWindowExtractor.java Thu Apr 18 15:43:35 2013
@@ -0,0 +1,88 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Scanner;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractor;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class ContextWordWindowExtractor implements SimpleFeatureExtractor {
+
+	private HashMap<String,Double> termVals = null;
+	private static final Pattern linePatt = Pattern.compile("^([^ ]+) : (.+)$");
+	private static final int WINDOW_SIZE = 5;
+	
+	public ContextWordWindowExtractor(String resourceFilename) throws ResourceInitializationException {
+		termVals = new HashMap<String,Double>();
+		try{
+			File termFile = FileLocator.locateFile(resourceFilename);
+			Scanner scanner = new Scanner(termFile);
+			Matcher m = null;
+			double max = 0.0;
+			double maxNeg = 0.0;
+			while(scanner.hasNextLine()){
+				String line = scanner.nextLine().trim();
+				m = linePatt.matcher(line);
+				if(m.matches()){
+					double val = Double.parseDouble(m.group(2));
+					termVals.put(m.group(1), val);
+					if(Math.abs(val) > max){
+						max = Math.abs(val);
+					}
+					if(val < maxNeg){
+						maxNeg = val;
+					}
+				}
+			}
+			max = max - maxNeg;
+			for(String key : termVals.keySet()){
+				termVals.put(key, (termVals.get(key)-maxNeg) / max);
+			}
+		}catch(IOException e){
+			throw new ResourceInitializationException();
+		}
+	}
+	
+	@Override
+	public List<Feature> extract(JCas view, Annotation focusAnnotation)
+			throws CleartkExtractorException {
+		ArrayList<Feature> feats = new ArrayList<Feature>();
+		List<BaseToken> precedingTokens = JCasUtil.selectPreceding(view, BaseToken.class, focusAnnotation, WINDOW_SIZE);
+		List<BaseToken> followingTokens = JCasUtil.selectFollowing(view, BaseToken.class, focusAnnotation, WINDOW_SIZE);
+		
+		double score = 0.0;
+		String key = null;
+		int ctxSize = 0;
+		for(int i = 0; i < precedingTokens.size(); i++){
+			key = precedingTokens.get(i).getCoveredText().toLowerCase();
+			if(termVals.containsKey(key)){
+				score += termVals.get(key);
+			}
+			ctxSize++;
+		}
+		for(int i = 0; i < followingTokens.size(); i++){
+			key = followingTokens.get(i).getCoveredText().toLowerCase();
+			if(termVals.containsKey(key)){
+				score += termVals.get(key);
+			}
+			ctxSize++;
+		}
+		score /= ctxSize;  // weight by actual amount of context so we don't penalize begin/end of sentence.
+		feats.add(new Feature("WORD_SCORE", score));
+		return feats;
+	}
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/NegationDependencyFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/NegationDependencyFeatureExtractor.java?rev=1469380&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/NegationDependencyFeatureExtractor.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/NegationDependencyFeatureExtractor.java Thu Apr 18 15:43:35 2013
@@ -0,0 +1,55 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.assertion.util.NegationManualDepContextAnalyzer;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+public class NegationDependencyFeatureExtractor implements
+		SimpleFeatureExtractor {
+
+	NegationManualDepContextAnalyzer conAnal = null;
+
+	public NegationDependencyFeatureExtractor(){
+		conAnal = new NegationManualDepContextAnalyzer();
+	}
+	
+	@Override
+	public List<Feature> extract(JCas jcas, Annotation focusAnnotation)
+			throws CleartkExtractorException {
+		List<Feature> feats = new ArrayList<Feature>();
+		Sentence sent = null;
+		
+		List<Sentence> sents = JCasUtil.selectCovering(jcas, Sentence.class, focusAnnotation.getBegin(), focusAnnotation.getEnd());
+		if(sents != null && sents.size() > 0){
+			sent = sents.get(0);
+		}else{
+			return feats;
+		}
+		
+		List<ConllDependencyNode> nodes = DependencyUtility.getDependencyNodes(jcas, sent);
+		ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jcas, focusAnnotation);
+		try {
+			boolean[] regexFeats = conAnal.findNegationContext(nodes, headNode);
+			for(int j = 0; j < regexFeats.length; j++){
+				if(regexFeats[j]){
+					feats.add(new Feature("NEG_DEP_REGEX_"+j));
+				}
+			}
+		} catch (Exception e) {
+			e.printStackTrace();
+			throw new CleartkExtractorException(e);
+		}
+		return feats;
+	}
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java?rev=1469380&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationDepRegex.java Thu Apr 18 15:43:35 2013
@@ -0,0 +1,526 @@
+/*
+ * Copyright: (c) 2011   Mayo Foundation for Medical Education and 
+ * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
+ * triple-shield Mayo logo are trademarks and service marks of MFMER.
+ *
+ * Except as contained in the copyright notice above, or as used to identify 
+ * MFMER as the author of this software, the trade names, trademarks, service
+ * marks, or product names of the copyright holder shall not be used in
+ * advertising, promotion or otherwise in connection with this software without
+ * prior written authorization of the copyright holder.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0 
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and 
+ * limitations under the License. 
+ */
+package org.apache.ctakes.assertion.util;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.ctakes.dependency.parser.util.DependencyRegex;
+import static org.apache.ctakes.dependency.parser.util.DependencyRegex.*;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import com.googlecode.clearnlp.dependency.DEPLib;
+import static com.googlecode.clearnlp.dependency.DEPLib.*;
+import com.googlecode.clearnlp.dependency.DEPLibEn;
+import static com.googlecode.clearnlp.dependency.DEPLibEn.*;
+import com.googlecode.clearnlp.dependency.DEPNode;
+
+
+/**
+ * Uses one or more regular expressions to detect patterns in dependency paths.
+ * 
+ * @author Mayo Clinic
+ */
+/**
+ * @author m081914
+ *
+ */
+public class NegationDepRegex {
+
+	// regular modal verb
+	public Set<String> iv_modalVerbsSet = new HashSet<String>();
+	// negative particle
+	public Set<String> iv_negParticlesSet = new HashSet<String>();
+	// regular verbs requiring negation particle
+	public Set<String> iv_regVerbsSet = new HashSet<String>();
+	// neagive verbs that contain negation in them
+	public Set<String> iv_negVerbsSet = new HashSet<String>();
+	// negation preposition
+	public Set<String> iv_negPrepositionsSet = new HashSet<String>();
+	// negatively charged determiners
+	public Set<String> iv_negDeterminersSet = new HashSet<String>();
+	// regular nouns - indicators
+	public Set<String> iv_regNounsSet = new HashSet<String>();
+	// regular prepositions
+	public Set<String> iv_regPrepositionsSet = new HashSet<String>();
+	// negative adjectives
+	public Set<String> iv_negAdjectivesSet = new HashSet<String>();
+	// negative collocations
+	public Set<String> iv_negCollocSet = new HashSet<String>();
+	// NEGATIVE COLLOCATION PARTICLE
+	public Set<String> iv_negColPartSet = new HashSet<String>();
+	// conjunctions as POS
+	public Set<String> rel_conjunctSet = new HashSet<String>();
+	// copula
+	public Set<String> iv_copulaSet = new HashSet<String>();
+	// Union of negative determiners and prepositions
+	public Set<String> iv_negDetPlusPrepSet = new HashSet<String>(); 
+	
+	public Set<String> _boundaryWordSet;
+
+	// The regexes to be used
+	public List<DependencyRegex> regexSet = new ArrayList<DependencyRegex>();
+
+	
+	/**
+	 * Constructor
+	 * @throws ResourceInitializationException 
+	 */
+	public NegationDepRegex(){
+		initialize();
+	}
+
+	public void initialize(){
+		// Define some boundary conditions (consistent with old negation annotator)
+		initBoundaryData();
+
+		// Add all the sets of special words
+		initWordSets();
+		
+		// Add all the regexes possible
+		initRegexes();
+	}
+
+	private void initBoundaryData() {
+		_boundaryWordSet = new HashSet<String>();
+		_boundaryWordSet.add("but");
+		_boundaryWordSet.add("however");
+		_boundaryWordSet.add("nevertheless");
+		_boundaryWordSet.add("notwithstanding");
+		_boundaryWordSet.add("though");
+		_boundaryWordSet.add("although");
+		_boundaryWordSet.add("if");
+		_boundaryWordSet.add("when");
+		_boundaryWordSet.add("how");
+		_boundaryWordSet.add("what");
+		_boundaryWordSet.add("which");
+		_boundaryWordSet.add("while");
+		_boundaryWordSet.add("since");
+		_boundaryWordSet.add("then");
+		_boundaryWordSet.add("i");
+		_boundaryWordSet.add("he");
+		_boundaryWordSet.add("she");
+		_boundaryWordSet.add("they");
+		_boundaryWordSet.add("we");
+	
+		_boundaryWordSet.add(";");
+		_boundaryWordSet.add(":");
+		_boundaryWordSet.add(".");
+		_boundaryWordSet.add(")");
+	}
+
+	public boolean isBoundary(Annotation contextAnnotation, int scopeOrientation) throws AnalysisEngineProcessException {
+		String lcText = contextAnnotation.getCoveredText().toLowerCase();
+		return _boundaryWordSet.contains(lcText);
+	}
+
+	private void initWordSets() {
+		iv_modalVerbsSet.add("can");
+		iv_modalVerbsSet.add("ca");
+		iv_modalVerbsSet.add("will");
+		iv_modalVerbsSet.add("must");
+		iv_modalVerbsSet.add("could");
+		iv_modalVerbsSet.add("would");
+		iv_modalVerbsSet.add("should");
+		iv_modalVerbsSet.add("shall");
+		iv_modalVerbsSet.add("did");
+//		iv_modalVerbsSet.add("is");     // added for dependencyNeg
+//		iv_modalVerbsSet.add("was");    // added for dependencyNeg
+//		iv_modalVerbsSet.add("has");    // added for dependencyNeg
+//		iv_modalVerbsSet.add("had");    // added for dependencyNeg
+
+		iv_copulaSet.add("is");     // added for dependencyNeg
+		iv_copulaSet.add("was");    // added for dependencyNeg
+		iv_copulaSet.add("be");    // added for dependencyNeg
+		iv_copulaSet.add("are");     // added for dependencyNeg
+		iv_copulaSet.add("were");    // added for dependencyNeg
+		iv_copulaSet.add("be");    // added for dependencyNeg
+		
+		iv_negParticlesSet.add("not");
+		iv_negColPartSet.add("out");
+		iv_negParticlesSet.add("n't");
+		iv_negParticlesSet.add("'t");
+	
+		iv_negCollocSet.add("rule");
+		iv_negCollocSet.add("rules");
+		iv_negCollocSet.add("ruled");
+		iv_negCollocSet.add("ruling");
+		iv_negCollocSet.add("rule-out");
+	
+		iv_regVerbsSet.add("reveal");
+		iv_regVerbsSet.add("reveals");
+		iv_regVerbsSet.add("revealed");
+		iv_regVerbsSet.add("revealing");
+		iv_regVerbsSet.add("have");
+		iv_regVerbsSet.add("had");
+		iv_regVerbsSet.add("has");
+		iv_regVerbsSet.add("feel");
+		iv_regVerbsSet.add("feels");
+		iv_regVerbsSet.add("felt");
+		iv_regVerbsSet.add("feeling");
+		iv_regVerbsSet.add("complain");
+		iv_regVerbsSet.add("complains");
+		iv_regVerbsSet.add("complained");
+		iv_regVerbsSet.add("complaining");
+		iv_regVerbsSet.add("demonstrate");
+		iv_regVerbsSet.add("demonstrates");
+		iv_regVerbsSet.add("demonstrated");
+		iv_regVerbsSet.add("demonstrating");
+		iv_regVerbsSet.add("appear");
+		iv_regVerbsSet.add("appears");
+		iv_regVerbsSet.add("appeared");
+		iv_regVerbsSet.add("appearing");
+		iv_regVerbsSet.add("caused");
+		iv_regVerbsSet.add("cause");
+		iv_regVerbsSet.add("causing");
+		iv_regVerbsSet.add("causes");
+		iv_regVerbsSet.add("find");
+		iv_regVerbsSet.add("finds");
+		iv_regVerbsSet.add("found");
+		iv_regVerbsSet.add("discover");
+		iv_regVerbsSet.add("discovered");
+		iv_regVerbsSet.add("discovers");
+//		iv_regVerbsSet.add("show");     // added for dependencyNeg
+//		iv_regVerbsSet.add("shows");	// added for dependencyNeg
+//		iv_regVerbsSet.add("showed");	// added for dependencyNeg
+//		iv_regVerbsSet.add("showwing");	// added for dependencyNeg
+//		iv_regVerbsSet.add("indicate");     // added for dependencyNeg
+//		iv_regVerbsSet.add("indicates");	// added for dependencyNeg
+//		iv_regVerbsSet.add("indicated");	// added for dependencyNeg
+//		iv_regVerbsSet.add("indicating");	// added for dependencyNeg
+//		iv_regVerbsSet.add("include");		// added for dependencyNeg
+//		iv_regVerbsSet.add("includes");		// added for dependencyNeg
+//		iv_regVerbsSet.add("included");		// added for dependencyNeg
+//		iv_regVerbsSet.add("including");	// added for dependencyNeg
+	
+		iv_negVerbsSet.add("deny");
+		iv_negVerbsSet.add("denies");
+		iv_negVerbsSet.add("denied");
+		iv_negVerbsSet.add("denying");
+		iv_negVerbsSet.add("fail");
+		iv_negVerbsSet.add("fails");
+		iv_negVerbsSet.add("failed");
+		iv_negVerbsSet.add("failing");
+		iv_negVerbsSet.add("decline");
+		iv_negVerbsSet.add("declines");
+		iv_negVerbsSet.add("declined");
+		iv_negVerbsSet.add("declining");
+		iv_negVerbsSet.add("exclude");
+		iv_negVerbsSet.add("excludes");
+		iv_negVerbsSet.add("excluding");
+		iv_negVerbsSet.add("excluded");
+//		iv_regVerbsSet.add("contraindicate");     // added for dependencyNeg
+//		iv_regVerbsSet.add("contraindicates");	// added for dependencyNeg
+//		iv_regVerbsSet.add("contraindicated");	// added for dependencyNeg
+//		iv_regVerbsSet.add("contraindicating");	// added for dependencyNeg
+//		iv_regVerbsSet.add("contra-indicate");     // added for dependencyNeg
+//		iv_regVerbsSet.add("contra-indicates");	// added for dependencyNeg
+//		iv_regVerbsSet.add("contra-indicated");	// added for dependencyNeg
+//		iv_regVerbsSet.add("contra-indicating");	// added for dependencyNeg
+	
+		iv_negPrepositionsSet.add("without");
+		iv_negPrepositionsSet.add("absent"); //removed for dependencyNeg
+		iv_negPrepositionsSet.add("none");   //removed for dependencyNeg
+	
+		iv_negDeterminersSet.add("no");
+		iv_negDeterminersSet.add("any");  //removed for dependencyNeg
+		iv_negDeterminersSet.add("neither");
+		iv_negDeterminersSet.add("nor");
+		iv_negDeterminersSet.add("never");
+	
+		iv_regNounsSet.add("evidence");
+		iv_regNounsSet.add("indication");
+		iv_regNounsSet.add("indications");
+		iv_regNounsSet.add("sign");
+		iv_regNounsSet.add("signs");
+		iv_regNounsSet.add("symptoms");
+		iv_regNounsSet.add("symptom");
+		iv_regNounsSet.add("sx");
+		iv_regNounsSet.add("dx");
+		iv_regNounsSet.add("diagnosis");
+		iv_regNounsSet.add("history");
+		iv_regNounsSet.add("hx");
+		iv_regNounsSet.add("findings");
+//		iv_regNounsSet.add("finding");		// added for dependencyNeg
+//		iv_regNounsSet.add("recurrence");	// added for dependencyNeg
+//		iv_regNounsSet.add("recurrences");	// added for dependencyNeg
+//		iv_regNounsSet.add("occurrence");	// added for dependencyNeg
+//		iv_regNounsSet.add("occurrences");	// added for dependencyNeg
+		     
+		iv_regPrepositionsSet.add("of");
+		iv_regPrepositionsSet.add("in");
+		iv_regPrepositionsSet.add("for");
+		iv_regPrepositionsSet.add("with");
+	
+		iv_negAdjectivesSet.add("unremarkable");
+		iv_negAdjectivesSet.add("unlikely");
+		iv_negAdjectivesSet.add("negative");
+//		iv_negAdjectivesSet.add("absent"); // added for dependencyNeg
+//		iv_negAdjectivesSet.add("none");   // added for dependencyNeg
+	
+//		rel_conjunctSet.add("CONJ");		// added for dependencyNeg
+		rel_conjunctSet.add(DEPLibEn.DEP_CONJ);
+//		rel_conjunctSet.add("COORD");		// added for dependencyNeg
+		rel_conjunctSet.add(DEPLibEn.DEP_CC);
+//		rel_conjunctSet.add("APPO");		// added for dependencyNeg
+		rel_conjunctSet.add(DEPLibEn.DEP_APPOS);
+//		rel_conjunctSet.add("P");			// added for dependencyNeg
+		rel_conjunctSet.add(DEPLibEn.DEP_PREP);
+		
+		iv_negDetPlusPrepSet = iv_negDeterminersSet;
+		iv_negDetPlusPrepSet.addAll(iv_negPrepositionsSet);
+	}
+
+	private void initRegexes() {
+		// Recognizes phrases like "<disease1>, <disease2>, or FOCUS".  For appending for any FOCUS.
+		DEPNode[] regnodes_NN_CONJ_NN = new DEPNode[1];
+		regnodes_NN_CONJ_NN[0]		= new DEPNode(DEPLib.NULL_ID, ANY_TOKEN);
+		regnodes_NN_CONJ_NN[0].setLabel(DependencyRegex.fromSet(rel_conjunctSet));
+		//regnodes_NN_CONJ_NN[1]        = new DEPNode();
+		//regnodes_NN_CONJ_NN[1].deprel = DependencyRegex.fromSet(rel_conjunctSet);
+
+		// Recognizes phrases like "denies <symptom1>"
+		DEPNode[] regnodes_VBNEG_OBJ = new DEPNode[2];
+		regnodes_VBNEG_OBJ[0]        = new DEPNode(DEPLib.NULL_ID, fromSet(iv_negVerbsSet));
+//		regnodes_VBNEG_OBJ[0].form   = DependencyRegex.fromSet(iv_negVerbsSet);
+		regnodes_VBNEG_OBJ[1]		 = new DEPNode(DEPLib.NULL_ID, ANY_TOKEN);
+		regnodes_VBNEG_OBJ[1].setLabel(DEPLibEn.DEP_DOBJ);  /* was "OBJ" in clearparser */
+		int cVBNEG_OBJ = 0;
+		
+		// Recognizes phrases like "was not FOCUS"
+		DEPNode[] regnodes_PRT_VB_PRD = new DEPNode[3];
+		regnodes_PRT_VB_PRD[0]          = new DEPNode(DEPLib.NULL_ID, fromSet(iv_negParticlesSet));
+//		regnodes_PRT_VB_PRD[0].form     = DependencyRegex.fromSet(iv_negParticlesSet);
+		regnodes_PRT_VB_PRD[0].setLabel(DEPLibEn.DEP_ADV); // was: ("ADV");
+		regnodes_PRT_VB_PRD[1]          = new DEPNode(DEPLib.NULL_ID, fromSet(iv_copulaSet));
+//		regnodes_PRT_VB_PRD[1].form     = DependencyRegex.fromSet(iv_copulaSet);
+		regnodes_PRT_VB_PRD[2]          = new DEPNode(DEPLib.NULL_ID, ANY_TOKEN);
+		regnodes_PRT_VB_PRD[2].setLabel(DEPLibEn.DEP_ACOMP); // was: ("PRD");
+		regnodes_PRT_VB_PRD[2].pos      = DependencyRegex.ANY_ADJECTIVE;
+		int cPRT_VB_PRD = 1;
+		
+		// Recognizes phrases like "did not find FOCUS" 
+		DEPNode[] regnodes_PRT_MOD_VB_OBJ = new DEPNode[4];
+		regnodes_PRT_MOD_VB_OBJ[0]          = new DEPNode(DEPLib.NULL_ID,fromSet(iv_negParticlesSet));
+//		regnodes_PRT_MOD_VB_OBJ[0].form     = DependencyRegex.fromSet(iv_negParticlesSet);
+		regnodes_PRT_MOD_VB_OBJ[0].setLabel(DEPLibEn.DEP_ADV); // was: ("ADV");
+		regnodes_PRT_MOD_VB_OBJ[1]          = new DEPNode(DEPLib.NULL_ID,fromSet(iv_modalVerbsSet));
+//		regnodes_PRT_MOD_VB_OBJ[1].form     = DependencyRegex.fromSet(iv_modalVerbsSet);
+		regnodes_PRT_MOD_VB_OBJ[2]          = new DEPNode(DEPLib.NULL_ID,fromSet(iv_regVerbsSet));
+//		regnodes_PRT_MOD_VB_OBJ[2].form     = DependencyRegex.fromSet(iv_regVerbsSet);
+//		regnodes_PRT_MOD_VB_OBJ[2].setLabel(DEPLibEn.DEP_CCOMP); // was: ("VC"); really a shot in the dark
+		regnodes_PRT_MOD_VB_OBJ[2].pos      = ANY_VERB;
+		regnodes_PRT_MOD_VB_OBJ[3]          = new DEPNode(DEPLib.NULL_ID,ANY_TOKEN);
+		regnodes_PRT_MOD_VB_OBJ[3].setLabel(DEPLibEn.DEP_DOBJ);  // was:("OBJ");
+		int cPRT_MOD_VB_OBJ = 1;
+		
+		// Recognizes phrases like "did not find evidence of FOCUS"
+		DEPNode[] regnodes_PRT_MOD_VB_OBJ_IN_PMOD = new DEPNode[6];
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[0]          = new DEPNode(DEPLib.NULL_ID,fromSet(iv_negParticlesSet));
+//		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[0].form     = DependencyRegex.fromSet(iv_negParticlesSet);
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[0].setLabel(DEPLibEn.DEP_NEG); // was:("ADV");
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[1]          = new DEPNode(DEPLib.NULL_ID,fromSet(iv_modalVerbsSet));
+//		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[1].form     = DependencyRegex.fromSet(iv_modalVerbsSet);
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[1].setLabel(DEPLibEn.DEP_CCOMP); // was: ("VC");
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[2]          = new DEPNode(DEPLib.NULL_ID,fromSet(iv_regVerbsSet));
+//		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[2].form     = DependencyRegex.fromSet(iv_regVerbsSet);
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[3]          = new DEPNode(DEPLib.NULL_ID,fromSet(iv_regNounsSet));
+//		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[3].form     = DependencyRegex.fromSet(iv_regNounsSet);
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[3].setLabel(DEPLibEn.DEP_DOBJ); //("OBJ");
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[4]        = new DEPNode(DEPLib.NULL_ID,fromSet(iv_regPrepositionsSet));
+//		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[4].form   = DependencyRegex.fromSet(iv_regPrepositionsSet);
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[4].pos    = "IN";
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[5]        = new DEPNode(DEPLib.NULL_ID,ANY_TOKEN);
+		regnodes_PRT_MOD_VB_OBJ_IN_PMOD[5].setLabel(DEPLibEn.DEP_POBJ); // was: ("PMOD");		
+		int cPRT_MOD_VB_OBJ_IN_PMOD = 1;
+		
+		// Recognizes phrases like "excluding FOCUS"
+		DEPNode[] regnodes_VBN_SBJ = new DEPNode[2];
+		regnodes_VBN_SBJ[0]        = new DEPNode(DEPLib.NULL_ID, fromSet(iv_negVerbsSet));
+//		regnodes_VBN_SBJ[0].form   = DependencyRegex.fromSet(iv_negVerbsSet);
+		regnodes_VBN_SBJ[0].pos    = "VBN";
+		regnodes_VBN_SBJ[1]		   = new DEPNode(DEPLib.NULL_ID, ANY_TOKEN);
+		regnodes_VBN_SBJ[1].setLabel(DEPLibEn.DEP_DOBJ); // was: ("SBJ");
+		int cVBN_SBJ = 0;
+		
+		// Recognizes phrases like "rules out FOCUS"
+		DEPNode[] regnodes_PRT_rule_OBJ = new DEPNode[3];
+		regnodes_PRT_rule_OBJ[0]          = new DEPNode(DEPLib.NULL_ID, fromSet(iv_negColPartSet));
+//		regnodes_PRT_rule_OBJ[0].form     = DependencyRegex.fromSet(iv_negColPartSet);
+		regnodes_PRT_rule_OBJ[0].setLabel(DEPLibEn.DEP_PRT); // was: ("PRT");
+		regnodes_PRT_rule_OBJ[1]          = new DEPNode(DEPLib.NULL_ID, fromSet(iv_negCollocSet));
+//		regnodes_PRT_rule_OBJ[1].form     = DependencyRegex.fromSet(iv_negCollocSet);
+		regnodes_PRT_rule_OBJ[2]          = new DEPNode(DEPLib.NULL_ID, ANY_TOKEN);
+		regnodes_PRT_rule_OBJ[2].setLabel(DEPLibEn.DEP_DOBJ); //("OBJ");
+		int cPRT_rule_OBJ = 1;
+		
+		//// Noun-ish rules
+		// Recognizes phrases like "no pain"
+		DEPNode[] regnodes_DT_NMOD   = new DEPNode[2];
+		regnodes_DT_NMOD[0]          = new DEPNode(NULL_ID, fromSet(iv_negDetPlusPrepSet));
+//		regnodes_DT_NMOD[0].form     = DependencyRegex.fromSet(iv_negDetPlusPrepSet);
+		regnodes_DT_NMOD[0].setLabel(DEP_DET); // was: ("NMOD");
+		regnodes_DT_NMOD[1]          = new DEPNode(NULL_ID, ANY_TOKEN);
+		regnodes_DT_NMOD[1].pos      = DependencyRegex.ANY_NOUN;
+		int cDT_NMOD = 1;
+
+		// Recognizes phrases like "without pain"
+		DEPNode[] regnodes_IN_PMOD   = new DEPNode[2];
+		regnodes_IN_PMOD[0]          = new DEPNode(NULL_ID, fromSet(iv_negDetPlusPrepSet));
+//		regnodes_IN_PMOD[0].form     = DependencyRegex.fromSet(iv_negDetPlusPrepSet);
+		regnodes_IN_PMOD[0].setLabel(DEP_PMOD); // was: ("PMOD");
+		regnodes_IN_PMOD[1]          = new DEPNode(NULL_ID, ANY_TOKEN);
+		regnodes_IN_PMOD[1].pos      = DependencyRegex.ANY_NOUN;
+		int cIN_PMOD = 0;
+		
+		// Recognizes phrases like "no history of FOCUS"
+		DEPNode[] regnodes_DT_NN_IN_PMOD = new DEPNode[4];
+		regnodes_DT_NN_IN_PMOD[0]        = new DEPNode(NULL_ID, fromSet(iv_negDetPlusPrepSet));
+//		regnodes_DT_NN_IN_PMOD[0].form   = DependencyRegex.fromSet(iv_negDetPlusPrepSet);
+//		regnodes_DT_NN_IN_PMOD[0].setLabel(DEP_PMOD + "|" + DEP_NMOD); // was: ("[NP]MOD");  // no convincing evidence that this can be restricted among the set of words listed above. (no would be det i think)
+		regnodes_DT_NN_IN_PMOD[1]        = new DEPNode(NULL_ID, fromSet(iv_regNounsSet));
+//		regnodes_DT_NN_IN_PMOD[1].form   = DependencyRegex.fromSet(iv_regNounsSet);
+		regnodes_DT_NN_IN_PMOD[2]        = new DEPNode(NULL_ID, fromSet(iv_regPrepositionsSet));
+//		regnodes_DT_NN_IN_PMOD[2].form   = DependencyRegex.fromSet(iv_regPrepositionsSet);
+		regnodes_DT_NN_IN_PMOD[2].pos    = "IN";
+		regnodes_DT_NN_IN_PMOD[3]        = new DEPNode(NULL_ID, ANY_TOKEN);
+		regnodes_DT_NN_IN_PMOD[3].setLabel(DEP_POBJ); // was: ("PMOD");
+		int cDT_NN_IN_PMOD = 1;
+		
+//		// Recognizes negative-adjective phrases like "is negative for carcinoma"
+//		DEPNode[] regnodes_JJNEG_COP_IN_PMOD = new DEPNode[4];
+//		regnodes_JJNEG_COP_IN_PMOD[0]        = new DEPNode();
+//		regnodes_JJNEG_COP_IN_PMOD[0].form   = DependencyRegex.fromSet(iv_negAdjectivesSet);
+//		regnodes_JJNEG_COP_IN_PMOD[1]        = new DEPNode();
+//		regnodes_JJNEG_COP_IN_PMOD[1].pos    = DependencyRegex.ANY_VERB; // banking that people don't use double negatives
+//		regnodes_JJNEG_COP_IN_PMOD[2]        = new DEPNode();
+//		regnodes_JJNEG_COP_IN_PMOD[2].form   = DependencyRegex.fromSet(iv_regPrepositionsSet);
+//		regnodes_JJNEG_COP_IN_PMOD[2].pos    = "IN";
+//		regnodes_JJNEG_COP_IN_PMOD[3]        = new DEPNode();
+//		regnodes_JJNEG_COP_IN_PMOD[3].deprel = "PMOD";
+//		int cJJNEG_COP_IN_PMOD = 1;
+
+		// Recognizes negative-adjective phrases like "negative for carcinoma"
+		DEPNode[] regnodes_JJNEG_AMOD_PMOD = new DEPNode[3];
+		regnodes_JJNEG_AMOD_PMOD[0]        = new DEPNode(NULL_ID, fromSet(iv_negAdjectivesSet));
+//		regnodes_JJNEG_AMOD_PMOD[0].form   = DependencyRegex.fromSet(iv_negAdjectivesSet);
+		regnodes_JJNEG_AMOD_PMOD[1]        = new DEPNode(NULL_ID, fromSet(iv_regPrepositionsSet));
+//		regnodes_JJNEG_AMOD_PMOD[1].form   = DependencyRegex.fromSet(iv_regPrepositionsSet);
+		regnodes_JJNEG_AMOD_PMOD[1].pos    = "IN";
+		regnodes_JJNEG_AMOD_PMOD[2]        = new DEPNode(NULL_ID, ANY_TOKEN);
+		regnodes_JJNEG_AMOD_PMOD[2].setLabel(DEP_POBJ); // was: ("PMOD");
+		int cJJNEG_AMOD_PMOD = 0;
+		
+		// Recognizes negative-adjective phrases like "unlikely to have carcinoma"
+		// FIXME - not sure this one works correctly -- this example parses weirdly in CVD.
+		DEPNode[] regnodes_JJNEG_AMOD_IM_OBJ = new DEPNode[4];
+		regnodes_JJNEG_AMOD_IM_OBJ[0]        = new DEPNode(NULL_ID, fromSet(iv_negAdjectivesSet));
+//		regnodes_JJNEG_AMOD_IM_OBJ[0].form   = DependencyRegex.fromSet(iv_negAdjectivesSet);
+		regnodes_JJNEG_AMOD_IM_OBJ[1]        = new DEPNode(NULL_ID, ANY_TOKEN);
+		regnodes_JJNEG_AMOD_IM_OBJ[1].setLabel(DEP_AMOD); // was: ("AMOD");
+		regnodes_JJNEG_AMOD_IM_OBJ[2]        = new DEPNode(NULL_ID, ANY_TOKEN);
+		regnodes_JJNEG_AMOD_IM_OBJ[2].setLabel(DEP_INFMOD); // was: ("IM");
+		regnodes_JJNEG_AMOD_IM_OBJ[3]        = new DEPNode(NULL_ID, ANY_TOKEN);
+		regnodes_JJNEG_AMOD_IM_OBJ[3].setLabel(DEP_DOBJ); // was: ("OBJ");
+		int cJJNEG_AMOD_IM_OBJ = 0;
+		
+		// Recognizes negative-adjective phrases like "unlikely carcinoma"
+		DEPNode[] regnodes_JJNEG_NN = new DEPNode[2];
+		regnodes_JJNEG_NN[0]        = new DEPNode(NULL_ID, fromSet(iv_negAdjectivesSet));
+//		regnodes_JJNEG_NN[0].form   = DependencyRegex.fromSet(iv_negAdjectivesSet);
+		regnodes_JJNEG_NN[1]        = new DEPNode(NULL_ID, ANY_TOKEN);
+		regnodes_JJNEG_NN[1].pos    = DependencyRegex.ANY_NOUN;
+		int cJJNEG_NN = 1;
+		
+		
+		// Add the verb-ish rules to the set of regexes to search
+		DependencyRegex regex_VBNEG_OBJ = 
+				(new DependencyRegex(regnodes_VBNEG_OBJ, cVBNEG_OBJ+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_VBNEG_OBJ);
+		DependencyRegex regex_PRT_VB_PRD = 
+				(new DependencyRegex(regnodes_PRT_VB_PRD, cPRT_VB_PRD+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_PRT_VB_PRD);
+		DependencyRegex regex_PRT_MOD_VB_OBJ = 
+				(new DependencyRegex(regnodes_PRT_MOD_VB_OBJ, cPRT_MOD_VB_OBJ+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_PRT_MOD_VB_OBJ);
+		DependencyRegex regex_PRT_MOD_VB_OBJ_IN_PMOD = 
+				(new DependencyRegex(regnodes_PRT_MOD_VB_OBJ_IN_PMOD, cPRT_MOD_VB_OBJ_IN_PMOD+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_PRT_MOD_VB_OBJ_IN_PMOD);
+		DependencyRegex regex_VBN_SBJ = 
+				(new DependencyRegex(regnodes_VBN_SBJ, cVBN_SBJ+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_VBN_SBJ);
+		
+		DependencyRegex regex_PRT_rule_OBJ = 
+				(new DependencyRegex(regnodes_PRT_rule_OBJ, cPRT_rule_OBJ+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_PRT_rule_OBJ);
+
+		// Add the noun-ish rules to the set of regexes to search
+		DependencyRegex regex_DT_NMOD = 
+				(new DependencyRegex(regnodes_DT_NMOD, cDT_NMOD+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_DT_NMOD);
+		DependencyRegex regex_IN_PMOD = 
+				(new DependencyRegex(regnodes_IN_PMOD, cIN_PMOD+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_IN_PMOD);
+		DependencyRegex regex_DT_NN_IN_PMOD = 
+				(new DependencyRegex(regnodes_DT_NN_IN_PMOD, cDT_NN_IN_PMOD+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_DT_NN_IN_PMOD);
+		DependencyRegex regex_JJNEG_AMOD_PMOD = 
+				(new DependencyRegex(regnodes_JJNEG_AMOD_PMOD, cJJNEG_AMOD_PMOD+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_JJNEG_AMOD_PMOD);
+		DependencyRegex regex_JJNEG_AMOD_IM_OBJ = 
+				(new DependencyRegex(regnodes_JJNEG_AMOD_IM_OBJ, cJJNEG_AMOD_IM_OBJ+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_JJNEG_AMOD_IM_OBJ);
+		DependencyRegex regex_JJNEG_NN = 
+				(new DependencyRegex(regnodes_JJNEG_NN, cJJNEG_NN+1))
+					.appendOptional( regnodes_NN_CONJ_NN );
+		regexSet.add(regex_JJNEG_NN);
+//		DependencyRegex regex_INNEG_NN = (
+//				new DependencyRegex(regnodes_INNEG_NN, cINNEG_NN+1)).appendOptional( regnodes_NN_CONJ_NN );
+//		regexSet.add(regex_INNEG_NN);
+	
+		// Print out the regexSet for the fun of it!
+		System.out.println("### here are the regexes");
+		for (DependencyRegex dreg : regexSet) {
+			System.out.println(dreg.toString());
+		}
+		
+	}
+
+
+}
\ No newline at end of file

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationManualDepContextAnalyzer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationManualDepContextAnalyzer.java?rev=1469380&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationManualDepContextAnalyzer.java (added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/util/NegationManualDepContextAnalyzer.java Thu Apr 18 15:43:35 2013
@@ -0,0 +1,158 @@
+/*
+ * Copyright: (c) 2011   Mayo Foundation for Medical Education and 
+ * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
+ * triple-shield Mayo logo are trademarks and service marks of MFMER.
+ *
+ * Except as contained in the copyright notice above, or as used to identify 
+ * MFMER as the author of this software, the trade names, trademarks, service
+ * marks, or product names of the copyright holder shall not be used in
+ * advertising, promotion or otherwise in connection with this software without
+ * prior written authorization of the copyright holder.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0 
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and 
+ * limitations under the License. 
+ */
+package org.apache.ctakes.assertion.util;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.ctakes.dependency.parser.util.DependencyPath;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.jcas.JCas;
+
+import clear.dep.DepNode;
+
+//import edu.mayo.bmi.fsm.output.NegationIndicator;
+//import edu.mayo.bmi.nlp.parser.util.ClearDependencyUtility;
+//import edu.mayo.bmi.nlp.parser.util.DependencyPath;
+//import edu.mayo.bmi.nlp.parser.util.DependencyRegex;
+//import edu.mayo.bmi.nlp.parser.util.DependencyUtility;
+//import edu.mayo.bmi.uima.context.ContextHit;
+//import edu.mayo.bmi.uima.core.type.syntax.ConllDependencyNode;
+
+
+/**
+ * Uses one or more finite state machines to detect dates in the given input of
+ * tokens.
+ * 
+ * @author Mayo Clinic
+ */
+public class NegationManualDepContextAnalyzer {
+
+	private NegationDepRegex regexes;
+	
+	public NegationManualDepContextAnalyzer(){
+		regexes = new NegationDepRegex();		
+	}
+	
+/*	public void initialize(UimaContext annotatorContext) throws ResourceInitializationException {
+		// Initialize all the regex that will be used
+
+	}
+*/
+	public boolean isBoundary(Annotation contextAnnotation, int scopeOrientation) throws AnalysisEngineProcessException {
+		String lcText = contextAnnotation.getCoveredText().toLowerCase();
+		return regexes._boundaryWordSet.contains(lcText);
+	}
+
+	/**
+	 * This method analyzes a sentence
+	 * for negation context based dependency paths.
+	 */
+/*	public ContextHit analyzeContext(List<ConllDependencyNode> nodes, ConllDependencyNode focus)
+			throws AnalysisEngineProcessException {
+		
+		try {
+			Set<NegationIndicator> s = findNegationContext(nodes,focus); 
+			
+			if (s.size() > 0) {
+				NegationIndicator neg = s.iterator().next();
+				return new ContextHit(neg.getStartOffset(), neg.getEndOffset());
+			} else {
+				return null;
+			}
+		} catch (Exception e) {
+			throw new AnalysisEngineProcessException(e);
+		}
+	}
+*/	
+	/**
+	 * Executes the regular expressions on paths.
+	 * 
+	 * @param nodes
+	 * @param focus
+	 * @return Set of DateToken objects.
+	 * @throws Exception
+	 */
+	public boolean[] findNegationContext(List<ConllDependencyNode> nodes, ConllDependencyNode focus) throws Exception {
+		List<ConllDependencyNode> hits = new ArrayList<ConllDependencyNode>();
+//		List<String> feats = new ArrayList<String>();
+		boolean[] feats = new boolean[regexes.regexSet.size()];
+		
+		// Print out the sentence for testing
+//		System.out.print("*** in findNegationContext; sentence is: ");
+//		for (ConllDependencyNode n : nodes) {
+//			System.out.print(n.getCoveredText()+" ");
+//		}
+//		System.out.println();
+		
+		// Iterate through all nodes in the sentence to find the focus node
+		for (int i=0; i<nodes.size(); i++) {
+			ConllDependencyNode hypNegNode = nodes.get(i);
+//			System.out.println("Node "+i+": {"+hypNegNode.getFORM()+"}"+hypNegNode.getPOSTAG()+">"+hypNegNode.getDEPREL());
+			DependencyPath path = DependencyUtility.getPath(nodes, hypNegNode, focus);
+			if(path == null) continue;
+			int featMatchInd = findPathMatches(path);
+			if ( featMatchInd != -1) {
+//				hits.add(hypNegNode);
+				feats[featMatchInd] = true;
+			}
+		}
+		return feats;
+//		System.out.println("=== in findNegationContext; found "+hits.size()+" negations in sentence");
+		
+		// Iterate through all the node hits and convert to NegationIndicators
+//		Set<NegationIndicator> negHits = new HashSet<NegationIndicator>();
+//		for (int i=0; i<hits.size(); i++) {
+//			NegationIndicator negInd = new NegationIndicator(hits.get(i).getBegin(), hits.get(i).getEnd());
+//			negHits.add(negInd);
+//		}
+//		return negHits;
+	}
+
+	private int findPathMatches(DependencyPath path) {
+		// Check this path against all regexes
+		// Test regexes on path
+//		System.out.println(path.toString());
+		for (int i=0; i<regexes.regexSet.size(); i++) {
+			if (regexes.regexSet.get(i).matches( path.toString() )) {
+//				System.out.println("  Regex: "+regexes.regexSet.get(i).toString()+"\n  "
+//						+regexes.regexSet.get(i).matches( path.toString() ));
+				return i;
+			}
+		}
+		return -1;
+
+	}
+
+	public int getNumFeatures(){
+		return regexes.regexSet.size();
+	}
+}
\ No newline at end of file



Mime
View raw message