Return-Path: X-Original-To: apmail-incubator-ctakes-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-ctakes-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 1BF71EFA5 for ; Mon, 14 Jan 2013 14:13:07 +0000 (UTC) Received: (qmail 69126 invoked by uid 500); 14 Jan 2013 14:13:07 -0000 Delivered-To: apmail-incubator-ctakes-commits-archive@incubator.apache.org Received: (qmail 69063 invoked by uid 500); 14 Jan 2013 14:13:05 -0000 Mailing-List: contact ctakes-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: ctakes-dev@incubator.apache.org Delivered-To: mailing list ctakes-commits@incubator.apache.org Received: (qmail 69039 invoked by uid 99); 14 Jan 2013 14:13:04 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 14 Jan 2013 14:13:04 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 14 Jan 2013 14:13:03 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id CFA8A2388980; Mon, 14 Jan 2013 14:12:43 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1432925 - in /incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor: ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java eval/RelationExtractorEvaluation.java Date: Mon, 14 Jan 2013 14:12:43 -0000 To: ctakes-commits@incubator.apache.org From: dligach@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130114141243.CFA8A2388980@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: dligach Date: Mon Jan 14 14:12:43 2013 New Revision: 1432925 URL: http://svn.apache.org/viewvc?rev=1432925&view=rev Log: Initial version of baseline 3 Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java?rev=1432925&view=auto ============================================================================== --- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java (added) +++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/Baseline3EntityMentionPairRelationExtractorAnnotator.java Mon Jan 14 14:12:43 2013 @@ -0,0 +1,196 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.relationextractor.ae; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Map; + +import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation; +import org.apache.ctakes.typesystem.type.syntax.TreebankNode; +import org.apache.ctakes.typesystem.type.textsem.EntityMention; +import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; +import org.apache.ctakes.typesystem.type.textspan.Sentence; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.CASException; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.FSArray; +import org.apache.uima.jcas.tcas.Annotation; +import org.cleartk.classifier.Feature; +import org.uimafit.descriptor.ConfigurationParameter; +import org.uimafit.util.JCasUtil; + +/** + * Annotate location_of relation between two entities in sentences containing + * exactly two entities (where the entities are of the correct types). + * This implementation assumes classifyBothDirections is set to true (i.e. + * each pair of entities is considered twice). + */ +public class Baseline3EntityMentionPairRelationExtractorAnnotator extends RelationExtractorAnnotator { + + public static final String PARAM_CLASSIFY_BOTH_DIRECTIONS = "ClassifyBothDirections"; + + @ConfigurationParameter( + name = PARAM_CLASSIFY_BOTH_DIRECTIONS, + mandatory = false, + description = "run the classifier in both directions, that is, classify each pair of events " + + "{X,Y} once in the order X-to-Y and once in the order Y-to-X (default: classify each " + + "pair of events {X, Y} once, giving the label 'R' if a relation exists with the order " + + "X-to-Y, and 'R-1' if a relation exists with the order Y-to-X)") + protected boolean classifyBothDirections = false; + + @Override + public List getCandidateRelationArgumentPairs( + JCas identifiedAnnotationView, Sentence sentence) { + + // collect all possible relation arguments from the sentence + List args = JCasUtil.selectCovered( + identifiedAnnotationView, + EntityMention.class, + sentence); + + // Create pairings (this will change depending on the classification direction) + List pairs = new ArrayList(); + + for (int i = 0; i < args.size(); ++i) { + EntityMention arg1 = args.get(i); + int jStart = this.classifyBothDirections ? 0 : i + 1; + for (int j = jStart; j < args.size(); ++j) { + EntityMention arg2 = args.get(j); + // skip identical entity mentions and mentions with identical spans + if (i == j || (arg1.getBegin() == arg2.getBegin() && arg1.getEnd() == arg2.getEnd())) { + continue; + } + pairs.add(new IdentifiedAnnotationPair(arg1, arg2)); + } + } + + List anatomicalSites = new ArrayList(); + for(EntityMention entityMention : args) { + if(entityMention.getTypeID() == 6) { + anatomicalSites.add(entityMention); + } + } + + if(anatomicalSites.size() == 0) { + return new ArrayList(); + } + + for(EntityMention anatomicalSite : anatomicalSites) { + try { + List treebankNodes = JCasUtil.selectCovering( + identifiedAnnotationView.getView(CAS.NAME_DEFAULT_SOFA), + TreebankNode.class, + anatomicalSite.getBegin(), + anatomicalSite.getEnd()); + for(TreebankNode treebankNode : treebankNodes) { + if(treebankNode.getNodeType().equals("NP")) { + FSArray fsArray = treebankNode.getChildren(); + if(fsArray == null) { + System.out.println("NULL"); + } else { + for(FeatureStructure featureStructure : fsArray.toArray()) { + TreebankNode childNode = (TreebankNode) featureStructure; + System.out.println(sentence.getCoveredText()); + System.out.println("anatomical site: " + anatomicalSite.getCoveredText()); + System.out.println("child node: " + childNode.getCoveredText()); + System.out.println(); + } + } + } + } + } catch (CASException e) { + e.printStackTrace(); + } + } + + // for all other cases, return no entity pairs + return new ArrayList(); + } + + /* + * Are entity types of the arguments valid for location_of? + * The following combinations are allowed: + * + * location-of(anatomical site/6, disorder/2) + * location-of(anatomical site/6, sign/symptom/3) + * location-of(anatomical site/6, procedure/5) + */ + private static boolean validateArgumentTypes(IdentifiedAnnotationPair pair) { + + // allowable arg2 types for location_of + HashSet okArg2Types = new HashSet(Arrays.asList(2, 3, 5)); + + IdentifiedAnnotation arg1 = pair.getArg1(); // Argument (should be anatomical site) + IdentifiedAnnotation arg2 = pair.getArg2(); // Related_to (should be either disorder, sign/symptom, or procedure) + int type1 = arg1.getTypeID(); + int type2 = arg2.getTypeID(); + + if(type1 == 6 && okArg2Types.contains(type2)) { + return true; + } + + return false; + } + + @Override + protected String getRelationCategory(Map, BinaryTextRelation> relationLookup, + IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) { + + BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2)); + String category; + if (this.classifyBothDirections) { + // if classifying both directions, we'll see {X, Y} once when X is first and + // once when Y is first, so just do the single direction lookup here + if (relation != null) { + category = relation.getCategory(); + } else if (coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) { + category = NO_RELATION_CATEGORY; + } else { + category = null; + } + } else { + // if classifying in a single direction, we'll see {X, Y} only once, + // so do lookups in both directions, and change the category name for + // the relations in the reverse order + if (relation != null) { + category = relation.getCategory(); + } else { + relation = relationLookup.get(Arrays.asList(arg2, arg1)); + if (relation != null) { + // Change category name to show reverse order + category = relation.getCategory() + "-1"; + } else if (coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) { + category = NO_RELATION_CATEGORY; + } else { + category = null; + } + } + } + return category; + } + + @Override + public String classify(List features) { + return "location_of"; + } +} Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1432925&r1=1432924&r2=1432925&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original) +++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Mon Jan 14 14:12:43 2013 @@ -28,6 +28,15 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.ctakes.relationextractor.ae.Baseline3EntityMentionPairRelationExtractorAnnotator; +import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator; +import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator; +import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator; +import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation; +import org.apache.ctakes.typesystem.type.relation.RelationArgument; +import org.apache.ctakes.typesystem.type.textsem.EntityMention; +import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; +import org.apache.ctakes.typesystem.type.textsem.Modifier; import org.apache.uima.UIMAFramework; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineDescription; @@ -72,15 +81,6 @@ import com.google.common.collect.Maps; import com.google.common.collect.Ordering; import com.google.common.collect.Sets; -import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator; -import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator; -import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator; -import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation; -import org.apache.ctakes.typesystem.type.relation.RelationArgument; -import org.apache.ctakes.typesystem.type.textsem.EntityMention; -import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; -import org.apache.ctakes.typesystem.type.textsem.Modifier; - public class RelationExtractorEvaluation extends Evaluation_ImplBase> { public static class Options extends Options_ImplBase { @@ -138,7 +138,7 @@ public class RelationExtractorEvaluation // parameter settings currently optimized for SHARP data private static final ParameterSettings BEST_DEGREE_OF_PARAMETERS = new ParameterSettings(false, 0.5f, "radial basis function", 10.0, 0.0010); - private static final ParameterSettings BEST_NON_DEGREE_OF_PARAMETERS = new ParameterSettings(false, 1.0f, "radial basis function", 10.0, 0.01); + private static final ParameterSettings BEST_NON_DEGREE_OF_PARAMETERS = new ParameterSettings(true, 1.0f, "radial basis function", 10.0, 0.01); public static void main(String[] args) throws Exception { Options options = new Options(); @@ -163,7 +163,7 @@ public class RelationExtractorEvaluation boolean isDegreeOf = relationCategory.equals("degree_of"); Class annotatorClass = isDegreeOf ? DegreeOfRelationExtractorAnnotator.class - : EntityMentionPairRelationExtractorAnnotator.class; + : Baseline3EntityMentionPairRelationExtractorAnnotator.class; // determine the type of classifier to be trained Class> dataWriterClass = LIBSVMStringOutcomeDataWriter.class;