incubator-ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject svn commit: r1438147 - in /incubator/ctakes/trunk: ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/ ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ ctakes-assertion/src/main/java/org/apache/ctakes/assertion/...
Date Thu, 24 Jan 2013 19:52:59 GMT
Author: swu
Date: Thu Jan 24 19:52:58 2013
New Revision: 1438147

URL: http://svn.apache.org/viewvc?rev=1438147&view=rev
Log:
the stuff it takes to get a subject attribute test running

Modified:
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
    incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
    incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java?rev=1438147&r1=1438146&r2=1438147&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvalBasedOnModifier.java
Thu Jan 24 19:52:58 2013
@@ -65,6 +65,7 @@ import org.apache.ctakes.assertion.medfa
 import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.SubjectCleartkAnalysisEngine;
 import org.apache.ctakes.assertion.medfacts.cleartk.UncertaintyCleartkAnalysisEngine;
+import org.apache.ctakes.core.ae.DocumentIdPrinterAnalysisEngine;
 import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
 import org.uimafit.component.JCasAnnotator_ImplBase;
 import org.uimafit.component.xwriter.XWriter;
@@ -155,6 +156,11 @@ public class AssertionEvalBasedOnModifie
             required = false)
     public boolean runGeneric = true;
         
+    @Option(
+            name = "--cross-validation",
+            usage = "ignore the test set and run n-fold cross-validation. default: n=2",
+            required = false)
+    public Integer crossValidationFolds;
     
   }
   
@@ -175,14 +181,14 @@ public class AssertionEvalBasedOnModifie
     
 //    System.err.println("forcing skipping of subject processing!!!");
 //    options.runSubject = false;
-    System.err.println("forcing skipping of generic processing!!!");
-    options.runGeneric = false;
-    System.err.println("forcing skipping of polarity processing!!!");
-    options.runPolarity = false;
-    System.err.println("forcing skipping of uncertainty processing!!!");
-    options.runUncertainty = false;
-    System.err.println("forcing skipping of conditional processing!!!");
-    options.runConditional = false;
+//    System.err.println("forcing skipping of generic processing!!!");
+//    options.runGeneric = false;
+//    System.err.println("forcing skipping of polarity processing!!!");
+//    options.runPolarity = false;
+//    System.err.println("forcing skipping of uncertainty processing!!!");
+//    options.runUncertainty = false;
+//    System.err.println("forcing skipping of conditional processing!!!");
+//    options.runConditional = false;
     printOptionsForDebugging(options);
     List<File> trainFiles = Arrays.asList(options.trainDirectory.listFiles());
     //File modelsDir = new File("models/modifier");
@@ -239,9 +245,9 @@ public class AssertionEvalBasedOnModifie
     
     
     
-    if(options.testDirectory == null) {
+    if(options.testDirectory == null || options.crossValidationFolds != null) {
       // run n-fold cross-validation
-      List<Map<String, AnnotationStatistics>> foldStats = evaluation.crossValidation(trainFiles,
2);
+      List<Map<String, AnnotationStatistics>> foldStats = evaluation.crossValidation(trainFiles,
options.crossValidationFolds);
       //AnnotationStatistics overallStats = AnnotationStatistics.addAll(foldStats);
       Map<String, AnnotationStatistics> overallStats = new TreeMap<String, AnnotationStatistics>();
       
@@ -284,6 +290,7 @@ public class AssertionEvalBasedOnModifie
 		"training dir: %s%n" +
 	    "test dir: %s%n" + 
 	    "model dir: %s%n" +
+	    "cross-validation: %d%n" +
 	    "run polarity: %b%n" +
 	    "run conditional: %b%n" +
 	    "run uncertainty: %b%n" +
@@ -293,6 +300,7 @@ public class AssertionEvalBasedOnModifie
 	    options.trainDirectory.getAbsolutePath(),
 	    options.testDirectory.getAbsolutePath(),
 	    options.modelsDirectory.getAbsolutePath(),
+	    options.crossValidationFolds,
 	    options.runPolarity,
 	    options.runConditional,
 	    options.runUncertainty,
@@ -365,6 +373,9 @@ public static void printScore(Map<String
 //        directory.getPath());
 //    builder.add(assertionDescription);
     
+    AnalysisEngineDescription documentIdPrinterAnnotator = AnalysisEngineFactory.createPrimitiveDescription(DocumentIdPrinterAnalysisEngine.class);
+    builder.add(documentIdPrinterAnnotator);
+    
     AnalysisEngineDescription goldCopierIdentifiedAnnotsAnnotator = AnalysisEngineFactory.createPrimitiveDescription(ReferenceIdentifiedAnnotationsSystemToGoldCopier.class);
     builder.add(goldCopierIdentifiedAnnotsAnnotator);
     

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java?rev=1438147&r1=1438146&r2=1438147&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java
Thu Jan 24 19:52:58 2013
@@ -71,7 +71,7 @@ public class GenericCleartkAnalysisEngin
 	      } else
 	      {
 	        String label = this.classifier.classify(instance.getFeatures());
-	        entityMention.setSubject(label);
+	        entityMention.setGeneric("1".equals(label));
 	      }
 	}
 

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java?rev=1438147&r1=1438146&r2=1438147&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java
Thu Jan 24 19:52:58 2013
@@ -82,16 +82,17 @@ public class SubjectCleartkAnalysisEngin
 	      {
 	        String subj = entityMention.getSubject();
 	        instance.setOutcome(subj);
-	        this.dataWriter.write(instance);
 	        logger.log(Level.INFO,  String.format("[%s] expected: ''; actual: ''; features:
%s",
-	      		  this.getClass().getSimpleName(),
-	      		  instance.toString()
-	      		  //StringUtils.join(instance.getFeatures(), ", ")
-	      		  ));
+		      		  this.getClass().getSimpleName(),
+		      		  instance.toString()
+		      		  //StringUtils.join(instance.getFeatures(), ", ")
+		      		  ));
+	        this.dataWriter.write(instance);
 	      } else
 	      {
 	        String label = this.classifier.classify(instance.getFeatures());
 	        entityMention.setSubject(label);
+	        logger.log(Level.INFO, "SUBJECT is being set on an IdentifiedAnnotation: "+label+"
"+entityMention.getSubject());
 	      }
 	}
 

Modified: incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java?rev=1438147&r1=1438146&r2=1438147&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
(original)
+++ incubator/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java
Thu Jan 24 19:52:58 2013
@@ -92,8 +92,14 @@ public class GoldEntityAndAttributeReade
 			
 			File knowtatorDirectory = new File(currentBatchDirectory, "Knowtator");
 			File textDirectory = new File(knowtatorDirectory, "text");
+			// train set uses this naming convention
 			File xmlDirectory = new File(currentBatchDirectory, "Knowtator_XML");
 			File xmiDirectory = new File(currentBatchDirectory, "Knowtator_XMI");
+			// dev and test sets use this naming convention
+			if (!xmlDirectory.exists()) {
+				xmlDirectory = new File(currentBatchDirectory, "Knowtator XML");
+				xmiDirectory = new File(currentBatchDirectory, "Knowtator XMI");
+			}
 			
 			if (!knowtatorDirectory.isDirectory() ||
 					!textDirectory.isDirectory() ||
@@ -139,6 +145,11 @@ public class GoldEntityAndAttributeReade
 					textDirectory.toString() + "/"
 			);
 			
+//			AnalysisEngineDescription sysAnnotator = (AnalysisEngineDescription) AnalysisEngineFactory.createAnalysisEngineFromPath(
+//					"/Users/m081914/work/sharpattr/ctakes/ctakes-clinical-pipeline" +
+//					"/desc/analysis_engine/AttributeClassifierPreprocessor.xml"
+//					);
+			
 	    AnalysisEngineDescription xWriter = AnalysisEngineFactory.createPrimitiveDescription(
 	        XWriter.class,
 	        typeSystemDescription,

Modified: incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java?rev=1438147&r1=1438146&r2=1438147&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
(original)
+++ incubator/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SHARPKnowtatorXMLReader.java
Thu Jan 24 19:52:58 2013
@@ -171,7 +171,11 @@ public class SHARPKnowtatorXMLReader ext
     	fileSeparator = File.separator;
     }
     String xmlURI = textURI.replaceAll("Knowtator"+fileSeparator+"text", "Knowtator_XML")
+ ".knowtator.xml";
+    // check if directory structure doesn't have underscores
     try {
+    	if (!new File(new URI(xmlURI)).exists()) {
+    		xmlURI = textURI.replaceAll("Knowtator"+fileSeparator+"text", "Knowtator%20XML") +
".knowtator.xml";
+    	}
       return new URI(xmlURI);
     } catch (URISyntaxException e) {
       throw new AnalysisEngineProcessException(e);



Mime
View raw message