ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1727990 - in /ctakes/trunk: ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/ ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/ ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/
Date Mon, 01 Feb 2016 19:37:16 GMT
Author: tmill
Date: Mon Feb  1 19:37:16 2016
New Revision: 1727990

URL: http://svn.apache.org/viewvc?rev=1727990&view=rev
Log:
Uima-fitized old coreference pipeline.

Added:
    ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/cogVeds.txt
    ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/modalAdjs.txt
    ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/otherVerbs.txt
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/CoreferencePipelineFactory.java
Modified:
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkableCreator.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkableExpander.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkablePairGenerator.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqSvmChainCreator.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AbstractClassifier.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnnotationSelector.java

Added: ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/cogVeds.txt
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/cogVeds.txt?rev=1727990&view=auto
==============================================================================
--- ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/cogVeds.txt
(added)
+++ ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/cogVeds.txt
Mon Feb  1 19:37:16 2016
@@ -0,0 +1,8 @@
+recommended
+thought
+believed
+known
+anticipated
+assumed
+expected
+noted
\ No newline at end of file

Added: ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/modalAdjs.txt
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/modalAdjs.txt?rev=1727990&view=auto
==============================================================================
--- ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/modalAdjs.txt
(added)
+++ ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/modalAdjs.txt
Mon Feb  1 19:37:16 2016
@@ -0,0 +1,16 @@
+necessary
+good
+economical
+possible
+useful
+easy
+certain
+advisable
+desirable
+likely
+convenient
+difficult
+important
+sufficient
+legal
+worthwhile
\ No newline at end of file

Added: ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/otherVerbs.txt
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/otherVerbs.txt?rev=1727990&view=auto
==============================================================================
--- ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/otherVerbs.txt
(added)
+++ ctakes/trunk/ctakes-coreference-res/src/main/resources/org/apache/ctakes/coreference/otherVerbs.txt
Mon Feb  1 19:37:16 2016
@@ -0,0 +1,4 @@
+seem
+appear
+mean
+follow
\ No newline at end of file

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkableCreator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkableCreator.java?rev=1727990&r1=1727989&r2=1727990&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkableCreator.java
(original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkableCreator.java
Mon Feb  1 19:37:16 2016
@@ -18,30 +18,45 @@
  */
 package org.apache.ctakes.coreference.ae;
 
+import java.io.File;
+import java.io.FileNotFoundException;
 import java.util.ArrayList;
 import java.util.HashSet;
+import java.util.Scanner;
+import java.util.Set;
 
+import org.apache.ctakes.coreference.type.DemMarkable;
+import org.apache.ctakes.coreference.type.NEMarkable;
+import org.apache.ctakes.coreference.type.PronounMarkable;
+import org.apache.ctakes.coreference.util.AnnotationSelector;
+import org.apache.ctakes.typesystem.type.syntax.Chunk;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
 
-import org.apache.ctakes.coreference.util.AnnotationSelector;
-import org.apache.ctakes.typesystem.type.syntax.Chunk;
-import org.apache.ctakes.typesystem.type.syntax.WordToken;
-import org.apache.ctakes.coreference.type.DemMarkable;
-import org.apache.ctakes.coreference.type.NEMarkable;
-import org.apache.ctakes.coreference.type.PronounMarkable;
-
 public class MipacqMarkableCreator extends JCasAnnotator_ImplBase {
 
 	public static int nextID = 0;
-	HashSet<String> modalAdj;
-	HashSet<String> cogved;
-	HashSet<String> otherVerb;
+	public static final String PARAM_MODAL_ADJ = "modalAdj";
+	@ConfigurationParameter(name = PARAM_MODAL_ADJ, mandatory=false, defaultValue="org/apache/ctakes/coreference/modalAdjs.txt")
+	File modalAdjFile = null;	
+	Set<String> modalAdj;
+	
+	public static final String PARAM_COGVED = "cogVeds";
+	@ConfigurationParameter(name = PARAM_COGVED, mandatory=false, defaultValue="org/apache/ctakes/coreference/cogVeds.txt")
+	File cogvedFile = null;
+	Set<String> cogved;
+	
+	public static final String PARAM_OTHER_VERB = "otherVerbs";
+	@ConfigurationParameter(name = PARAM_OTHER_VERB, mandatory=false, defaultValue="org/apache/ctakes/coreference/otherVerbs.txt")
+	File otherVerbFile=null;
+	Set<String> otherVerb;
 
 	// LOG4J logger based on class name
 	private Logger logger = Logger.getLogger(getClass().getName());
@@ -51,17 +66,26 @@ public class MipacqMarkableCreator exten
 		super.initialize(uc);
 
 		// Load modal adjectives and cognitive verbs for pleonastic patterns
-		String[] ma = (String[]) uc.getConfigParameterValue("modalAdj");
-		modalAdj = new HashSet<String>();
-		for (String s : ma) modalAdj.add(s);
-		String[] cv = (String[]) uc.getConfigParameterValue("cogved");
-		cogved = new HashSet<String>();
-		for (String s : cv) cogved.add(s);
-		String[] ov = (String[]) uc.getConfigParameterValue("otherVerb");
-		otherVerb = new HashSet<String>();
-		for (String s : ov) otherVerb.add(s);
+		try{
+		  modalAdj = readWordlistFile(modalAdjFile);
+		  cogved = readWordlistFile(cogvedFile);
+		  otherVerb = readWordlistFile(otherVerbFile);
+		}catch(FileNotFoundException e){
+		  throw new ResourceInitializationException(e);
+		}
 	}
 
+	private static final Set<String> readWordlistFile(File inputFile) throws FileNotFoundException{
+	  HashSet<String> words = new HashSet<>();
+	  try(Scanner scanner = new Scanner(inputFile)){
+	    while(scanner.hasNextLine()){
+	      String line = scanner.nextLine().trim();
+	      words.add(line);
+	    }
+	  }
+	  return words;
+	}
+	
 	@Override
 	public void process(JCas aJCas) throws AnalysisEngineProcessException {
 

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkableExpander.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkableExpander.java?rev=1727990&r1=1727989&r2=1727990&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkableExpander.java
(original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkableExpander.java
Mon Feb  1 19:37:16 2016
@@ -24,21 +24,18 @@ import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.Map;
 
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.ctakes.coreference.type.DemMarkable;
+import org.apache.ctakes.coreference.type.Markable;
+import org.apache.ctakes.coreference.type.NEMarkable;
+import org.apache.ctakes.coreference.util.FSIteratorToList;
+import org.apache.ctakes.coreference.util.MarkableTreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.FSIterator;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 
-import org.apache.ctakes.coreference.eval.helpers.Span;
-import org.apache.ctakes.coreference.util.FSIteratorToList;
-import org.apache.ctakes.coreference.util.MarkableTreeUtils;
-import org.apache.ctakes.typesystem.type.syntax.Chunk;
-import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
-import org.apache.ctakes.coreference.type.DemMarkable;
-import org.apache.ctakes.coreference.type.Markable;
-import org.apache.ctakes.coreference.type.NEMarkable;
-
 public class MipacqMarkableExpander extends JCasAnnotator_ImplBase {
 
 	@Override
@@ -50,12 +47,13 @@ public class MipacqMarkableExpander exte
 		mergeNP(aJCas);
 		elevateAdjectives(aJCas);
 		iter = aJCas.getJFSIndexRepository().getAnnotationIndex(Markable.type).iterator();
-		rmDup(aJCas, FSIteratorToList.convert(iter));
+		rmDup(FSIteratorToList.convert(iter));
 	}
 
-	private void removeDoctors(JCas jCas) {
+	/*
+	private static void removeDoctors(JCas jCas) {
 		FSIterator<Annotation> iter = jCas.getAnnotationIndex(NEMarkable.type).iterator();
-		ArrayList<Annotation> rm = new ArrayList<Annotation>();
+		ArrayList<Annotation> rm = new ArrayList<>();
 		while(iter.hasNext()){
 			NEMarkable m = (NEMarkable) iter.next();
 			if(m.getCoveredText().equalsIgnoreCase("dr")){
@@ -65,11 +63,11 @@ public class MipacqMarkableExpander exte
 		for(Annotation a: rm){
 			a.removeFromIndexes();
 		}
-	}
+	}*/
 
-	private void removeHistoryOf(JCas jCas) {
+	private static void removeHistoryOf(JCas jCas) {
 		FSIterator<Annotation> iter = jCas.getAnnotationIndex(NEMarkable.type).iterator();
-		ArrayList<Annotation> rm = new ArrayList<Annotation>();
+		ArrayList<Annotation> rm = new ArrayList<>();
 		while(iter.hasNext()){
 			NEMarkable m = (NEMarkable) iter.next();
 			if(m.getCoveredText().equalsIgnoreCase("history of")){
@@ -81,10 +79,10 @@ public class MipacqMarkableExpander exte
 		}
 	}
 
-	private void expandToNP (JCas aJCas, LinkedList<Annotation> markables) {
+	private static void expandToNP (JCas aJCas, LinkedList<Annotation> markables) {
 //		FSIterator<Annotation> iter = aJCas.getJFSIndexRepository().getAnnotationIndex(LookupWindowAnnotation.type).iterator();
-		FSIterator<Annotation> iter = aJCas.getAnnotationIndex(TreebankNode.type).iterator();
-		LinkedList<Annotation> l = FSIteratorToList.convert(iter);
+//		FSIterator<Annotation> iter = aJCas.getAnnotationIndex(TreebankNode.type).iterator();
+//		LinkedList<Annotation> l = FSIteratorToList.convert(iter);
 
 		for (Annotation m : markables){
 			TreebankNode node = MarkableTreeUtils.markableNode(aJCas, m.getBegin(), m.getEnd());
@@ -116,7 +114,8 @@ public class MipacqMarkableExpander exte
 
 	// are any of the named entities contained within this chunk?
 	// if so return the first that is.
-	private Annotation containsAny (Chunk c, LinkedList<Annotation> l) {
+	/*
+	private static Annotation containsAny (Chunk c, LinkedList<Annotation> l) {
 		int a = c.getBegin();
 		int b = c.getEnd();
 		for (Annotation ne : l)
@@ -126,19 +125,20 @@ public class MipacqMarkableExpander exte
 				return null;
 		return null;
 	}
+	*/
 
 	// merge NP# -> NP' PP, where NP' is marked as a Markable, by making NP# a markable 

-	private void mergeNP (JCas jcas) {
+	private static void mergeNP (JCas jcas) {
 		Map<Integer,TreebankNode> innerMap = null;
 		// mark the boundaries of every NP:
 		FSIterator<Annotation> nodeIter = jcas.getAnnotationIndex(TreebankNode.type).iterator();
-		HashMap<Integer,Map<Integer,TreebankNode>> npMap = new HashMap<Integer,Map<Integer,TreebankNode>>();
+		HashMap<Integer,Map<Integer,TreebankNode>> npMap = new HashMap<>();
 		while(nodeIter.hasNext()){
 			TreebankNode node = (TreebankNode) nodeIter.next();
 			if(node.getNodeType().equals("NP")){
 				innerMap = npMap.get(node.getBegin());
 				if(innerMap == null){
-					innerMap = new HashMap<Integer,TreebankNode>();
+					innerMap = new HashMap<>();
 				}
 				innerMap.put(node.getEnd(), node);
 				npMap.put(node.getBegin(), innerMap);
@@ -168,7 +168,7 @@ public class MipacqMarkableExpander exte
 	 * 	surgical procedures
 	 */
 
-	private void elevateAdjectives(JCas jcas){
+	private static void elevateAdjectives(JCas jcas){
 		FSIterator<Annotation> markables = jcas.getAnnotationIndex(NEMarkable.type).iterator();
 		while(markables.hasNext()){
 			NEMarkable mark = (NEMarkable) markables.next();
@@ -187,9 +187,9 @@ public class MipacqMarkableExpander exte
 			
 	}
 
-	private void rmDup(JCas aJCas, LinkedList<Annotation> markables) {
-		HashSet<Annotation> rm = new HashSet<Annotation>();
-		HashMap<String,Annotation> keep = new HashMap<String,Annotation>();
+	private static void rmDup(LinkedList<Annotation> markables) {
+		HashSet<Annotation> rm = new HashSet<>();
+		HashMap<String,Annotation> keep = new HashMap<>();
 		
 		for (int i = 0; i < markables.size(); i++) {
 			Annotation m1 = markables.get(i);

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkablePairGenerator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkablePairGenerator.java?rev=1727990&r1=1727989&r2=1727990&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkablePairGenerator.java
(original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqMarkablePairGenerator.java
Mon Feb  1 19:37:16 2016
@@ -22,62 +22,39 @@ import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
 import java.util.HashSet;
-import java.util.Hashtable;
 import java.util.LinkedList;
-import java.util.Vector;
 
-import org.apache.log4j.Logger;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.EmptyFSList;
-import org.apache.uima.jcas.cas.FSList;
-import org.apache.uima.jcas.cas.NonEmptyFSList;
-import org.apache.uima.jcas.cas.NonEmptyFloatList;
-import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.ctakes.coreference.type.BooleanLabeledFS;
-
-
-import org.apache.ctakes.core.resource.FileResource;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.coreference.eval.helpers.Span;
-import org.apache.ctakes.coreference.eval.helpers.SpanAlignment;
-import org.apache.ctakes.coreference.eval.helpers.SpanOffsetComparator;
+import org.apache.ctakes.coreference.type.DemMarkable;
+import org.apache.ctakes.coreference.type.Markable;
+import org.apache.ctakes.coreference.type.MarkablePairSet;
+import org.apache.ctakes.coreference.type.NEMarkable;
+import org.apache.ctakes.coreference.type.PronounMarkable;
 import org.apache.ctakes.coreference.util.CorefConsts;
 import org.apache.ctakes.coreference.util.FSIteratorToList;
-import org.apache.ctakes.coreference.util.MarkableTreeUtils;
 import org.apache.ctakes.coreference.util.PairAttributeCalculator;
-import org.apache.ctakes.coreference.util.ParentPtrTree;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.coreference.type.DemMarkable;
-import org.apache.ctakes.coreference.type.Markable;
-import org.apache.ctakes.coreference.type.MarkablePair;
-import org.apache.ctakes.coreference.type.MarkablePairSet;
-import org.apache.ctakes.coreference.type.NEMarkable;
-import org.apache.ctakes.coreference.type.PronounMarkable;
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.EmptyFSList;
+import org.apache.uima.jcas.cas.NonEmptyFSList;
+import org.apache.uima.jcas.tcas.Annotation;
 
 public class MipacqMarkablePairGenerator extends JCasAnnotator_ImplBase {
 
+  public static final String PARAM_STOPWORDS_FILE = "StopFile";
+  @ConfigurationParameter(name=PARAM_STOPWORDS_FILE, mandatory=false, defaultValue="org/apache/ctakes/coreference/models/stop.txt")
+  File stopwordFile = null;
+  HashSet<String> stopwords;
+  
 	// LOG4J logger based on class name
 	private Logger logger = Logger.getLogger(getClass().getName());
-	private int maxSpanID = 0;
-	HashSet<String> stopwords;
-//	ParentPtrTree ppt;
-//	
-//	Vector<Span> goldSpans = null;
-//	Hashtable<String,Integer> goldSpan2id = null;
-//	Vector<int[]> goldPairs = null;
-//	
-//	Vector<Span> sysSpans = null;
-//	Hashtable<String,Integer> sysSpan2id = null;
-//	Vector<int[]> sysPairs = null;
-//	Hashtable<Integer, Integer> sysId2AlignId = null;
-//	Hashtable<Integer, Integer> goldId2AlignId = null;
-//	Hashtable<Integer, Integer> alignId2GoldId = null;
-//	int[] goldEqvCls;
 	int numVecs = 0;
 	
 	@Override
@@ -86,20 +63,20 @@ public class MipacqMarkablePairGenerator
 		
 		// Load stop words list
 		try {
-			stopwords = new HashSet<String>();
-			FileResource r = (FileResource) uc.getResourceObject("stopWords");
-			BufferedReader br = new BufferedReader(new FileReader(r.getFile()));
-			String l;
-			while ((l = br.readLine())!=null) {
-				l = l.trim();
-				if (l.length()==0) continue;
-				int i = l.indexOf('|');
-				if (i > 0)
-					stopwords.add(l.substring(0,i).trim());
-				else if (i < 0)
-					stopwords.add(l.trim());
+			stopwords = new HashSet<>();
+			try(BufferedReader br = new BufferedReader(new FileReader(stopwordFile))){
+			  String l;
+			  while ((l = br.readLine())!=null) {
+			    l = l.trim();
+			    if (l.length()==0) continue;
+			    int i = l.indexOf('|');
+			    if (i > 0)
+			      stopwords.add(l.substring(0,i).trim());
+			    else if (i < 0)
+			      stopwords.add(l.trim());
+			  }
 			}
-			logger.info("Stop words list loaded: " + r.getFile().getAbsolutePath());
+			logger.info("Stop words list loaded: " + stopwordFile.getAbsolutePath());
 		} catch (Exception e) {
 			e.printStackTrace();
 			logger.error("Error loading stop words list");
@@ -111,33 +88,12 @@ public class MipacqMarkablePairGenerator
 	public void process(JCas jcas) throws AnalysisEngineProcessException {
 		// read the gold standard
 		numVecs = 0;
-//		sysId2AlignId = new Hashtable<Integer, Integer>();
-//		goldId2AlignId = new Hashtable<Integer, Integer>();
-//		alignId2GoldId = new Hashtable<Integer, Integer>();
-		String docName = DocumentIDAnnotationUtil.getDocumentID(jcas);
-//		if (docName==null) docName = "141471681_1";
-//		System.out.print("creating vectors for "+docName);
-//		loadGoldStandard(docName);
-//		else loadGoldStandard();
+//		String docName = DocumentIDAnnotationUtil.getDocumentID(jcas);
 
 		// Convert the orderless FSIterator to List, sort by char offsets
 		LinkedList<Annotation> lm = FSIteratorToList.convert(
 				jcas.getJFSIndexRepository().getAnnotationIndex(Markable.type).iterator());
 		
-//		loadSystemPairs(lm);
-//		// align the spans
-//		SpanAlignment sa = new SpanAlignment(goldSpans.toArray(new Span[goldSpans.size()]),
-//				sysSpans.toArray(new Span[sysSpans.size()]));
-//
-//		int[] id = sa.get1();
-//		for (int i = 0; i < id.length; i++){
-//			alignId2GoldId.put(id[i]+maxSpanID, goldSpan2id.get(goldSpans.get(i).toString()));
-//			goldId2AlignId.put(goldSpan2id.get(goldSpans.get(i).toString()), id[i] + maxSpanID);
-//		}
-//		id = sa.get2();
-//		for (int i = 0; i < id.length; i++){
-//			sysId2AlignId.put(sysSpan2id.get(sysSpans.get(i).toString()), id[i]+maxSpanID);
-//		}
 		// now iterate over system markables and add the ones that match gold standard as
 		// true, otherwise false
 		for (int p = 1; p < lm.size(); ++p) {
@@ -218,13 +174,6 @@ public class MipacqMarkablePairGenerator
 				tail = (NonEmptyFSList) tail.getTail();
 			}
 			tail.setHead(labeledAntecedent);
-//			if (isGoldPair(a, m)){
-//				labeledAntecedent.setLabel(true);
-//				// FIXME this cannot be done, it's implicitly looking at the label and changing the
possible outcomes...
-//				break; // stop if a gold pair is found
-//			}else{
-//				labeledAntecedent.setLabel(false);
-//			}
 		}
 		if(tail == null) pairList.setAntecedentList(new EmptyFSList(jcas));
 		else tail.setTail(new EmptyFSList(jcas));
@@ -254,13 +203,6 @@ public class MipacqMarkablePairGenerator
 				tail = (NonEmptyFSList) tail.getTail();
 			}
 			tail.setHead(labeledAntecedent);
-//			if (isGoldPair(a, m)){
-//				// FIXME
-//				labeledAntecedent.setLabel(true);
-//				break; // stop if a gold pair is found
-//			}else{
-//				labeledAntecedent.setLabel(false);
-//			}
 		}
 		if(tail == null) pairList.setAntecedentList(new EmptyFSList(jcas));
 		else tail.setTail(new EmptyFSList(jcas));
@@ -294,13 +236,6 @@ public class MipacqMarkablePairGenerator
 				tail = (NonEmptyFSList) tail.getTail();
 			}
 			tail.setHead(labeledAntecedent);
-//			if (isGoldPair(a, m)){
-//				// FIXME
-//				labeledAntecedent.setLabel(true);
-//				break; // stop if a gold pair is found
-//			}else{
-//				labeledAntecedent.setLabel(false);
-//			}
 		}
 		if(tail == null) pairList.setAntecedentList(new EmptyFSList(jcas));
 		else tail.setTail(new EmptyFSList(jcas));

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqSvmChainCreator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqSvmChainCreator.java?rev=1727990&r1=1727989&r2=1727990&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqSvmChainCreator.java
(original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MipacqSvmChainCreator.java
Mon Feb  1 19:37:16 2016
@@ -18,6 +18,7 @@
  */
 package org.apache.ctakes.coreference.ae;
 
+import java.io.File;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -26,11 +27,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Scanner;
 
-import libsvm.svm;
-import libsvm.svm_model;
-import libsvm.svm_node;
-
-import org.apache.ctakes.core.resource.FileResource;
 import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.apache.ctakes.coreference.type.BooleanLabeledFS;
 import org.apache.ctakes.coreference.type.DemMarkable;
@@ -49,12 +45,12 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
 import org.apache.ctakes.typesystem.type.relation.RelationArgument;
 import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.FSIterator;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.EmptyFSList;
 import org.apache.uima.jcas.cas.FSList;
@@ -62,14 +58,27 @@ import org.apache.uima.jcas.cas.NonEmpty
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
 
+import libsvm.svm_node;
+
 public class MipacqSvmChainCreator extends JCasAnnotator_ImplBase {
 
+  public static final String PARAM_STOPWORDS_FILENAME = "StopWords";
+  @ConfigurationParameter(name = PARAM_STOPWORDS_FILENAME, mandatory=false, defaultValue="org/apache/ctakes/coreference/models/stop.txt")
+  File stopwordFile = null;
+  HashSet<String> stopwords;
+
+  public static final String PARAM_FRAGS_FILENAME = "FragsFile";
+  @ConfigurationParameter(name = PARAM_FRAGS_FILENAME, mandatory=false, defaultValue="org/apache/ctakes/coreference/models/frags.txt")
+  File treefragFile = null;
+  private ArrayList<String> treeFrags;
+  
+  public static final String PARAM_COREF_MODEL = "ModelFile";
+  @ConfigurationParameter(name = PARAM_COREF_MODEL, mandatory=false, defaultValue="org/apache/ctakes/coreference/models/ne.mayo.rbf.model")
+  File modelFile = null;
+  
 	// LOG4J logger based on class name
 	private Logger logger = Logger.getLogger(getClass().getName());
 
-	// debug
-	private boolean debug = false;
-
 	// svm models
 //	private AbstractClassifier mod_pron, mod_dem, mod_coref;
 	private AbstractClassifier mod_coref;
@@ -80,24 +89,6 @@ public class MipacqSvmChainCreator exten
 //	private int coref_idx;
 	private SvmVectorCreator vecCreator = null;
 	
-//	ParentPtrTree ppt;
-
-	HashSet<String> stopwords;
-	private ArrayList<String> treeFrags;
-
-/*	private svm_model loadModel (UimaContext uc, String m) {
-		svm_model ret = null;
-		try {
-			String r = ((FileResource) uc.getResourceObject(m)).getFile().getAbsolutePath();
-			ret = svm.svm_load_model(r);
-			logger.info(m+" loaded: "+r);
-		} catch (Exception e) {
-			e.printStackTrace();
-			logger.error("Error loading "+m);
-		}
-		return ret;
-	}
-*/
 	@Override
 	public void initialize(UimaContext uc) throws ResourceInitializationException {
 		super.initialize(uc);
@@ -107,43 +98,35 @@ public class MipacqSvmChainCreator exten
 		// FIXME why is there a minus one here?
 //		mod_pron = new AbstractClassifier(uc, "svmPronModel", FeatureVector.getPronCorefFeatures().length
+ SyntaxAttributeCalculator.getNumPronFeats() - 1);
 //		mod_dem = new AbstractClassifier(uc, "svmDemModel", FeatureVector.getDemCorefFeatures().length
+ SyntaxAttributeCalculator.getNumDemFeats() - 1);
-		mod_coref = new AbstractClassifier(uc, "svmCorefModel", FeatureVector.getNECorefFeatures().length
+ SyntaxAttributeCalculator.getNumNEFeats() - 1);
-
-//		int[] labels = new int[2];
-//		svm.svm_get_labels(mod_anaphoricity, labels);
-//		anaphoricity_idx = labels[0]==1 ? 0 : 1;
-		//		svm.svm_get_labels(mod_coref, labels);
-		//		coref_idx = labels[0]==1 ? 0 : 1;
+		mod_coref = new AbstractClassifier(modelFile, FeatureVector.getNECorefFeatures().length
+ SyntaxAttributeCalculator.getNumNEFeats() - 1);
 
 		// Load stop words list
 		try {
-			stopwords = new HashSet<String>();
-			FileResource r = (FileResource) uc.getResourceObject("stopWords");
-			Scanner scanner = new Scanner(r.getFile());
-			String l;
-			while (scanner.hasNextLine()) {
-				l = scanner.nextLine().trim();
-				if (l.length()==0) continue;
-				int i = l.indexOf('|');
-				if (i > 0)
-					stopwords.add(l.substring(0,i).trim());
-				else if (i < 0)
-					stopwords.add(l.trim());
+			stopwords = new HashSet<>();
+			try(Scanner scanner = new Scanner(stopwordFile)){
+			  String l;
+			  while (scanner.hasNextLine()) {
+			    l = scanner.nextLine().trim();
+			    if (l.length()==0) continue;
+			    int i = l.indexOf('|');
+			    if (i > 0)
+			      stopwords.add(l.substring(0,i).trim());
+			    else if (i < 0)
+			      stopwords.add(l.trim());
+			  }
+			  logger.info("Stop words list loaded: " + stopwordFile.getAbsolutePath());
+			  vecCreator = new SvmVectorCreator(stopwords);
 			}
-			logger.info("Stop words list loaded: " + r.getFile().getAbsolutePath());
-			vecCreator = new SvmVectorCreator(stopwords);
+      treeFrags = new ArrayList<>();
+			try(Scanner scanner = new Scanner(treefragFile)){
 
-			treeFrags = new ArrayList<String>();
-			r = (FileResource) uc.getResourceObject("frags");
-			if(r != null){
-				scanner = new Scanner(r.getFile());
-				while(scanner.hasNextLine()){
-					String line = scanner.nextLine();
-					treeFrags.add(line.split(" ")[1]);
-				}
-				vecCreator.setFrags(treeFrags);
+			  while(scanner.hasNextLine()){
+			    String line = scanner.nextLine();
+			    treeFrags.add(line.split(" ")[1]);
+			  }
+			  vecCreator.setFrags(treeFrags);
+			  logger.info("Tree fragment features loaded: " + treefragFile.getAbsolutePath());
 			}
-			logger.info("Tree fragment features loaded: " + r.getFile().getAbsolutePath());
 		} catch (Exception e) {
 			e.printStackTrace();
 			logger.error("Error loading stop words list");
@@ -155,7 +138,7 @@ public class MipacqSvmChainCreator exten
 		// Convert the orderless FSIterator to List, sort by char offsets
 		LinkedList<Annotation> lm = FSIteratorToList.convert(
 				jcas.getJFSIndexRepository().getAnnotationIndex(Markable.type).iterator());
-		Map<Markable, NonEmptyFSList> collectionRas = new HashMap<Markable, NonEmptyFSList>();
+		Map<Markable, NonEmptyFSList> collectionRas = new HashMap<>();
 		String docName = DocumentIDAnnotationUtil.getDocumentID(jcas);
 		logger.info("Classifying coreference in document: " + docName);
 //		ArrayList<CollectionTextRelation> chains = new ArrayList<CollectionTextRelation>();
@@ -166,7 +149,7 @@ public class MipacqSvmChainCreator exten
 //		ppt = new ParentPtrTree(lm.size());
 
 		// Make a data structure mapping markables to indexes so we don't lose the order if we
re-arrange
-		Map<Markable, Integer> m2q = new HashMap<Markable,Integer>();
+		Map<Markable, Integer> m2q = new HashMap<>();
 		
 		for(int p = 0; p < lm.size(); p++){
 			m2q.put((Markable)lm.get(p), p);
@@ -176,7 +159,7 @@ public class MipacqSvmChainCreator exten
 		while(iter.hasNext()){
 			MarkablePairSet set = (MarkablePairSet) iter.next();
 			Markable anaphor = set.getAnaphor();
-			FSList fs = (FSList) set.getAntecedentList();
+			FSList fs = set.getAntecedentList();
 			MarkableProb bestAnte = null;
 			LinkedList<Markable> ll = fs2ll(fs);
 			if(anaphor instanceof PronounMarkable){
@@ -243,17 +226,7 @@ public class MipacqSvmChainCreator exten
 					chain.setMembers(anteNode);
 					chain.addToIndexes();
 				}
-				anteNode.setTail(node);
-				
-				
-//				ppt.union(m2q.get(anaphor), m2q.get(bestAnte.m));
-				if(anaphor instanceof PronounMarkable){
-					// if the anaphor is a pronoun then it won't be in the cas as an identifiedannotation
so we need to add it.
-					IdentifiedAnnotation ia = new IdentifiedAnnotation(jcas);
-					// TODO
-				}
-			}else{
-//				indexNegativeExample(jcas, bestAnte.m, anaphor, bestAnte.prob);
+				anteNode.setTail(node);				
 			}
 		}
 		logger.info("Done classifying document: " + docName);
@@ -300,8 +273,8 @@ public class MipacqSvmChainCreator exten
 	}
 
 
-	private LinkedList<Markable> fs2ll(FSList fs) {
-		LinkedList<Markable> ll = new LinkedList<Markable>();
+	private static LinkedList<Markable> fs2ll(FSList fs) {
+		LinkedList<Markable> ll = new LinkedList<>();
 		while(fs instanceof NonEmptyFSList){
 			NonEmptyFSList node = (NonEmptyFSList) fs;
 			BooleanLabeledFS feat = (BooleanLabeledFS) node.getHead();
@@ -311,7 +284,8 @@ public class MipacqSvmChainCreator exten
 		}
 		return ll;
 	}
-	
+
+	/*
 	private MarkableProb processPronoun(Markable anaphor, LinkedList<Markable> anteList,
JCas jcas){
 		Markable ante = null;
 		double bestProb = 0.0;
@@ -334,7 +308,7 @@ public class MipacqSvmChainCreator exten
 		}
 		return new MarkableProb(ante, bestProb);
 	}
-
+*/
 	private MarkableProb processNE(Markable anaphor, List<Markable> anteList, JCas jcas){
 		Markable ante = null;
 		double bestProb = 0.0;
@@ -353,12 +327,15 @@ public class MipacqSvmChainCreator exten
 		return new MarkableProb(ante, bestProb);
 	}
 	
-	private MarkableProb processNELazily(Markable anaphor, List<Markable> anteList, JCas
jcas){
+	/*
+	private static MarkableProb processNELazily(Markable anaphor, List<Markable> anteList,
JCas jcas){
 		if(anteList.size() > 0) return new MarkableProb(anteList.get(0), 1.0);
-		else return new MarkableProb(null,0.0);
+		
+		return new MarkableProb(null,0.0);
 	}
+	*/
 	
-	private MarkableProb processDem(Markable anaphor, List<Markable> anteList, JCas jcas){
+	private static MarkableProb processDem(Markable anaphor, List<Markable> anteList,
JCas jcas){
 		double bestProb = 0.0;
 		TreebankNode n = MarkableTreeUtils.markableNode(jcas, anaphor.getBegin(), anaphor.getEnd());
 		TreebankNode parent = (n != null ? n.getParent() : null);

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AbstractClassifier.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AbstractClassifier.java?rev=1727990&r1=1727989&r2=1727990&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AbstractClassifier.java
(original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AbstractClassifier.java
Mon Feb  1 19:37:16 2016
@@ -36,19 +36,14 @@ public class AbstractClassifier {
 	private svm_model svmCls = null;
 	private int clsIndex = -1;
 
-	public AbstractClassifier(UimaContext uc, String key, int len) {
+	public AbstractClassifier(File fn, int len) {
 		try{
-			File file = ((FileResource)uc.getResourceObject(key)).getFile();
-			String fn = file.getAbsolutePath();
-			svmCls = svm.svm_load_model(fn);
+			svmCls = svm.svm_load_model(fn.getAbsolutePath());
 			int[] labels = new int[2];
 			svm.svm_get_labels(svmCls, labels);
 			clsIndex = labels[0]==1 ? 0 : 1;
 		}catch(IOException e){
 			e.printStackTrace();
-		} catch (ResourceAccessException e) {
-			// TODO Auto-generated catch block
-			e.printStackTrace();
 		}
 	}
 

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnnotationSelector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnnotationSelector.java?rev=1727990&r1=1727989&r2=1727990&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnnotationSelector.java
(original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/AnnotationSelector.java
Mon Feb  1 19:37:16 2016
@@ -22,12 +22,7 @@ import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Hashtable;
 //import java.util.Iterator;
-
-import org.apache.log4j.Logger;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.FSArray;
-import org.apache.uima.jcas.tcas.Annotation;
+import java.util.Set;
 
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.Chunk;
@@ -38,6 +33,11 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.EventMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.log4j.Logger;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.tcas.Annotation;
 
 // TODO: This class hardcoded all the criteria,
 // which should be replaced by a parser of
@@ -83,7 +83,7 @@ public class AnnotationSelector {
 	}
 
 	public static ArrayList<WordToken> selectPronoun (JCas jcas,
-			HashSet<String> modalAdj, HashSet<String> cogved, HashSet<String> othervb,
+			Set<String> modalAdj, Set<String> cogved, Set<String> othervb,
 			Logger logger) {
 		Hashtable<String, WordToken> offset2token = new Hashtable<String, WordToken>();
 		ArrayList<WordToken> ret = new ArrayList<WordToken>();
@@ -118,7 +118,7 @@ public class AnnotationSelector {
 	}
 
 	private static boolean isPleonastic (TerminalTreebankNode ttn,
-			HashSet<String> modalAdj, HashSet<String> cogved, HashSet<String> othervb)
{
+			Set<String> modalAdj, Set<String> cogved, Set<String> othervb) {
 		if (!ttn.getCoveredText().equalsIgnoreCase("it")) return false;
 
 		if (ttn.getNodeType().equals("PRP")) {

Added: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/CoreferencePipelineFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/CoreferencePipelineFactory.java?rev=1727990&view=auto
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/CoreferencePipelineFactory.java
(added)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/CoreferencePipelineFactory.java
Mon Feb  1 19:37:16 2016
@@ -0,0 +1,24 @@
+package org.apache.ctakes.coreference.util;
+
+import org.apache.ctakes.coreference.ae.MipacqMarkableCreator;
+import org.apache.ctakes.coreference.ae.MipacqMarkableExpander;
+import org.apache.ctakes.coreference.ae.MipacqMarkablePairGenerator;
+import org.apache.ctakes.coreference.ae.MipacqSvmChainCreator;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.fit.factory.AggregateBuilder;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.resource.ResourceInitializationException;
+
+public class CoreferencePipelineFactory {
+  
+  public static AnalysisEngineDescription getCoreferencePipeline() throws ResourceInitializationException{
+    AggregateBuilder builder = new AggregateBuilder();
+    
+    builder.add(AnalysisEngineFactory.createEngineDescription(MipacqMarkableCreator.class));
+    builder.add(AnalysisEngineFactory.createEngineDescription(MipacqMarkableExpander.class));
+    builder.add(AnalysisEngineFactory.createEngineDescription(MipacqMarkablePairGenerator.class));
+    builder.add(AnalysisEngineFactory.createEngineDescription(MipacqSvmChainCreator.class));
+
+    return builder.createAggregateDescription();
+  }
+}




Mime
View raw message