Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 6B3A6C0D6 for ; Tue, 18 Jun 2013 19:30:40 +0000 (UTC) Received: (qmail 67665 invoked by uid 500); 18 Jun 2013 19:30:35 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 67275 invoked by uid 500); 18 Jun 2013 19:30:35 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 67267 invoked by uid 99); 18 Jun 2013 19:30:35 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 18 Jun 2013 19:30:35 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 18 Jun 2013 19:30:31 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id B7B172388847; Tue, 18 Jun 2013 19:30:10 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1494265 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion: eval/ pipelines/ train/ Date: Tue, 18 Jun 2013 19:30:10 -0000 To: commits@ctakes.apache.org From: swu@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130618193010.B7B172388847@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: swu Date: Tue Jun 18 19:30:10 2013 New Revision: 1494265 URL: http://svn.apache.org/r1494265 Log: possible to train assertion models with different data sources Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateAttributeModels.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/pipelines/GoldEntityAndAttributeReaderPipelineForSeedCorpus.java Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1494265&r1=1494264&r2=1494265&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Tue Jun 18 19:30:10 2013 @@ -237,7 +237,8 @@ protected static Options options = new O public static void main(String[] args) throws Exception { //Options options = new Options(); - options.parseOptions(args); + resetOptions(); + options.parseOptions(args); // System.err.println("forcing skipping of subject processing!!!"); // options.runSubject = false; @@ -342,21 +343,48 @@ protected static Options options = new O CollectionReader trainCollectionReader = evaluation.getCollectionReader(trainFiles); evaluation.train(trainCollectionReader, modelsDir); } - if (testFiles==null || testFiles.size()==0) { - throw new RuntimeException("testFiles = " + testFiles + " testFiles.size() = " + (testFiles==null ? "null": testFiles.size())) ; - } - logger.debug("testFiles.size() = " + testFiles.size()); - CollectionReader testCollectionReader = evaluation.getCollectionReader(testFiles); - Map stats = evaluation.test(testCollectionReader, modelsDir); - AssertionEvaluation.printScore(stats, modelsDir.getAbsolutePath()); + if (!options.trainOnly) { + if (testFiles==null || testFiles.size()==0) { + throw new RuntimeException("testFiles = " + testFiles + " testFiles.size() = " + (testFiles==null ? "null": testFiles.size())) ; + } + logger.debug("testFiles.size() = " + testFiles.size()); + CollectionReader testCollectionReader = evaluation.getCollectionReader(testFiles); + Map stats = evaluation.test(testCollectionReader, modelsDir); + + AssertionEvaluation.printScore(stats, modelsDir.getAbsolutePath()); + } } System.out.println("Finished assertion module."); } - private static void printOptionsForDebugging(Options options) + private static void resetOptions() { + options.ignoreConditional = false; + options.ignoreGeneric = false; + options.ignoreHistory = false; + options.ignorePolarity = false; + options.ignoreSubject = false; + options.ignoreUncertainty = false; + + options.trainOnly = false; + options.testOnly = false; + options.noCleartk = false; + options.printErrors = false; + options.evalOnly = false; + + options.evaluationOutputDirectory = null; + options.trainDirectory = null; + options.testDirectory = null; + options.devDirectory = null; + options.modelsDirectory = null; + options.preprocessDir = null; + + options.crossValidationFolds = null; + } + +private static void printOptionsForDebugging(Options options) { System.out.format( "training dir: %s%n" + @@ -442,6 +470,7 @@ public static void printScore(Map annotationTypes = new ArrayList(); + static { + annotationTypes.add("polarity"); + annotationTypes.add("conditional"); + annotationTypes.add("uncertainty"); + annotationTypes.add("subject"); + annotationTypes.add("generic"); + annotationTypes.add("historyOf"); + } + + // Specify training directories for each attribute in a (semi)colon-separated list, e.g., "sharp_data/dev:sharp_data/train" + private static HashMap trainingDirectories = new HashMap(); + static { + trainingDirectories.put("polarity","sharp_data/dev"); + trainingDirectories.put("conditional","sharp_data/dev"); + trainingDirectories.put("uncertainty","sharp_data/dev"); + trainingDirectories.put("subject","sharp_data/dev"); + trainingDirectories.put("generic","sharp_data/dev"); + trainingDirectories.put("historyOf","sharp_data/dev"); + } + public static void main(String[] args) throws Exception { + + for (String attribute : annotationTypes) { + + ArrayList params = new ArrayList(); + + params.add("--train-dir"); params.add(trainingDirectories.get(attribute)); + params.add("--models-dir"); params.add("sharp_data/model/eval.model"); + params.add("--cross-validation"); params.add("5"); + + // Build up an "ignore" string + for (String ignoreAttribute : annotationTypes) { + if (!ignoreAttribute.equals(attribute)) { + + if (ignoreAttribute.equals("historyOf")) { + ignoreAttribute = ignoreAttribute.substring(0, ignoreAttribute.length()-2); + } + + params.add("--ignore-" + ignoreAttribute); + } + } + String[] paramList = params.toArray(new String[]{}); + + System.out.println(Arrays.asList(paramList).toString()); + + // Run the actual assertion training on just one attribute + AssertionEvaluation.main( paramList ); + } + + + + } +} Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java?rev=1494265&view=auto ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java (added) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/TrainAttributeModels.java Tue Jun 18 19:30:10 2013 @@ -0,0 +1,66 @@ +package org.apache.ctakes.assertion.train; + +import java.util.ArrayList; +import java.util.HashMap; + +import org.apache.ctakes.assertion.eval.AssertionEvaluation; + +import scala.actors.threadpool.Arrays; + +public class TrainAttributeModels { + + private static ArrayList annotationTypes = new ArrayList(); + static { + annotationTypes.add("polarity"); + annotationTypes.add("conditional"); + annotationTypes.add("uncertainty"); + annotationTypes.add("subject"); + annotationTypes.add("generic"); + annotationTypes.add("historyOf"); + } + + // Specify training directories for each attribute in a (semi)colon-separated list, e.g., "sharp_data/dev:sharp_data/train" + private static HashMap trainingDirectories = new HashMap(); + static { + trainingDirectories.put("polarity","sharp_data/train:i2b2_data/train"); + trainingDirectories.put("conditional","sharp_data/train"); + trainingDirectories.put("uncertainty","sharp_data/train"); + trainingDirectories.put("subject","sharp_data/train"); + trainingDirectories.put("generic","sharp_data/train"); + trainingDirectories.put("historyOf","sharp_data/train"); + } + public static void main(String[] args) throws Exception { + + for (String attribute : annotationTypes) { + + ArrayList params = new ArrayList(); + + params.add("--train-dir"); params.add(trainingDirectories.get(attribute)); +// params.add("--test-dir"); params.add("sharp_data/dev"); + params.add("--models-dir"); params.add("sharp_data/model/eval.model"); +// params.add("--evaluation-output-dir"); params.add("sharp_data/output"); + params.add("--train-only"); + + // Build up an "ignore" string + for (String ignoreAttribute : annotationTypes) { + if (!ignoreAttribute.equals(attribute)) { + + if (ignoreAttribute.equals("historyOf")) { + ignoreAttribute = ignoreAttribute.substring(0, ignoreAttribute.length()-2); + } + + params.add("--ignore-" + ignoreAttribute); + } + } + String[] paramList = params.toArray(new String[]{}); + + System.out.println(Arrays.asList(paramList).toString()); + + // Run the actual assertion training on just one attribute + AssertionEvaluation.main( paramList ); + } + + + + } +}