Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DA99D1797C for ; Wed, 29 Oct 2014 14:09:46 +0000 (UTC) Received: (qmail 69653 invoked by uid 500); 29 Oct 2014 14:09:46 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 69613 invoked by uid 500); 29 Oct 2014 14:09:46 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 69604 invoked by uid 99); 29 Oct 2014 14:09:46 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 29 Oct 2014 14:09:46 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 29 Oct 2014 14:09:45 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 6764F23888D2; Wed, 29 Oct 2014 14:08:54 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1635138 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion: attributes/features/SubjectFeaturesExtractor.java medfacts/cleartk/SubjectCleartkAnalysisEngine.java Date: Wed, 29 Oct 2014 14:08:54 -0000 To: commits@ctakes.apache.org From: tmill@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20141029140854.6764F23888D2@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: tmill Date: Wed Oct 29 14:08:53 2014 New Revision: 1635138 URL: http://svn.apache.org/r1635138 Log: CTAKES-94: Changed tabs to spaces in subject extractor code. Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java?rev=1635138&r1=1635137&r2=1635138&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/SubjectFeaturesExtractor.java Wed Oct 29 14:08:53 2014 @@ -34,47 +34,47 @@ import org.cleartk.ml.feature.extractor. /** SubjectFeaturesExtractor - * Ports the features and classification decisions of the first version (logic) of the subject tool + * Ports the features and classification decisions of the first version (logic) of the subject tool * * @author m081914 * */ public class SubjectFeaturesExtractor implements FeatureExtractor1 { - - - @Override - public List extract(JCas jCas, IdentifiedAnnotation arg) { - - List features = new ArrayList<>(); - - // Pull in general dependency-based features -- externalize to another extractor? - ConllDependencyNode node = DependencyUtility.getNominalHeadNode(jCas, arg); - if (node!= null) { - features.add(new Feature("DEPENDENCY_HEAD", node.getCoveredText())); - features.add(new Feature("DEPENDENCY_HEAD_deprel", node.getDeprel())); - } - - HashMap featsMap = SubjectAttributeClassifier.extract(jCas, arg); + + + @Override + public List extract(JCas jCas, IdentifiedAnnotation arg) { + + List features = new ArrayList<>(); + + // Pull in general dependency-based features -- externalize to another extractor? + ConllDependencyNode node = DependencyUtility.getNominalHeadNode(jCas, arg); + if (node!= null) { + features.add(new Feature("DEPENDENCY_HEAD", node.getCoveredText())); + features.add(new Feature("DEPENDENCY_HEAD_deprel", node.getDeprel())); + } + + HashMap featsMap = SubjectAttributeClassifier.extract(jCas, arg); - if (!featsMap.isEmpty()) { - // Pull in all the features that were used for the rule-based module - features.addAll( hashToFeatureList(featsMap) ); - // Pull in the result of the rule-based module as well - features.add(new Feature("SUBJECT_CLASSIFIER_LOGIC", SubjectAttributeClassifier.classifyWithLogic(featsMap))); - } - - return features; - } + if (!featsMap.isEmpty()) { + // Pull in all the features that were used for the rule-based module + features.addAll( hashToFeatureList(featsMap) ); + // Pull in the result of the rule-based module as well + features.add(new Feature("SUBJECT_CLASSIFIER_LOGIC", SubjectAttributeClassifier.classifyWithLogic(featsMap))); + } + + return features; + } - private static Collection hashToFeatureList( - HashMap featsIn) { - - Collection featsOut = new HashSet<>(); - for (String featName : featsIn.keySet()) { - featsOut.add(new Feature(featName,featsIn.get(featName))); - } - - return featsOut; - } + private static Collection hashToFeatureList( + HashMap featsIn) { + + Collection featsOut = new HashSet<>(); + for (String featName : featsIn.keySet()) { + featsOut.add(new Feature(featName,featsIn.get(featName))); + } + + return featsOut; + } } Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java?rev=1635138&r1=1635137&r2=1635138&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java Wed Oct 29 14:08:53 2014 @@ -32,68 +32,68 @@ import org.apache.uima.resource.Resource import org.cleartk.ml.Instance; public class SubjectCleartkAnalysisEngine extends - AssertionCleartkAnalysisEngine { + AssertionCleartkAnalysisEngine { - boolean USE_DEFAULT_EXTRACTORS = false; - - @Override - public void initialize(UimaContext context) throws ResourceInitializationException { - super.initialize(context); - probabilityOfKeepingADefaultExample = 0.1; - - if (this.isTraining() && this.goldViewName == null) { - throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training"); - } - - initialize_subject_extractor(); - initializeFeatureSelection(); - - } - - - private void initialize_subject_extractor() { - this.entityFeatureExtractors.add( new SubjectFeaturesExtractor()); - } - - @Override - public void setClassLabel(IdentifiedAnnotation entityOrEventMention, - Instance instance) throws AnalysisEngineProcessException { - if (this.isTraining()) - { - String subj = entityOrEventMention.getSubject(); - - // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling - if ("patient".equals(subj) - && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) { - return; - } - instance.setOutcome(subj); - logger.log(Level.DEBUG, String.format("[%s] expected: ''; actual: ''; features: %s", - this.getClass().getSimpleName(), - instance.toString() - )); - } else - { - String label = this.classifier.classify(instance.getFeatures()); - entityOrEventMention.setSubject(label); - logger.log(Level.DEBUG, "SUBJECT is being set on an IdentifiedAnnotation: "+label+" "+entityOrEventMention.getSubject()); - } - } - public static FeatureSelection createFeatureSelection(double threshold) { - return new Chi2FeatureSelection<>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false); - } - - public static URI createFeatureSelectionURI(File outputDirectoryName) { - return new File(outputDirectoryName, FEATURE_SELECTION_NAME + "_Chi2_extractor.dat").toURI(); - } - - @Override - protected void initializeFeatureSelection() throws ResourceInitializationException { - if (featureSelectionThreshold == 0) { - this.featureSelection = null; - } else { - this.featureSelection = createFeatureSelection(this.featureSelectionThreshold); - } - } - + boolean USE_DEFAULT_EXTRACTORS = false; + + @Override + public void initialize(UimaContext context) throws ResourceInitializationException { + super.initialize(context); + probabilityOfKeepingADefaultExample = 0.1; + + if (this.isTraining() && this.goldViewName == null) { + throw new IllegalArgumentException(PARAM_GOLD_VIEW_NAME + " must be defined during training"); + } + + initialize_subject_extractor(); + initializeFeatureSelection(); + + } + + + private void initialize_subject_extractor() { + this.entityFeatureExtractors.add( new SubjectFeaturesExtractor()); + } + + @Override + public void setClassLabel(IdentifiedAnnotation entityOrEventMention, + Instance instance) throws AnalysisEngineProcessException { + if (this.isTraining()) + { + String subj = entityOrEventMention.getSubject(); + + // downsampling. initialize probabilityOfKeepingADefaultExample to 1.0 for no downsampling + if ("patient".equals(subj) + && coin.nextDouble() >= this.probabilityOfKeepingADefaultExample) { + return; + } + instance.setOutcome(subj); + logger.log(Level.DEBUG, String.format("[%s] expected: ''; actual: ''; features: %s", + this.getClass().getSimpleName(), + instance.toString() + )); + } else + { + String label = this.classifier.classify(instance.getFeatures()); + entityOrEventMention.setSubject(label); + logger.log(Level.DEBUG, "SUBJECT is being set on an IdentifiedAnnotation: "+label+" "+entityOrEventMention.getSubject()); + } + } + public static FeatureSelection createFeatureSelection(double threshold) { + return new Chi2FeatureSelection<>(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false); + } + + public static URI createFeatureSelectionURI(File outputDirectoryName) { + return new File(outputDirectoryName, FEATURE_SELECTION_NAME + "_Chi2_extractor.dat").toURI(); + } + + @Override + protected void initializeFeatureSelection() throws ResourceInitializationException { + if (featureSelectionThreshold == 0) { + this.featureSelection = null; + } else { + this.featureSelection = createFeatureSelection(this.featureSelectionThreshold); + } + } + }