Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 96AA210655 for ; Tue, 29 Oct 2013 21:59:08 +0000 (UTC) Received: (qmail 35919 invoked by uid 500); 29 Oct 2013 21:59:08 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 35882 invoked by uid 500); 29 Oct 2013 21:59:08 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 35875 invoked by uid 99); 29 Oct 2013 21:59:08 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 29 Oct 2013 21:59:08 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 29 Oct 2013 21:59:04 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 067C32388994; Tue, 29 Oct 2013 21:58:43 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1536904 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java Date: Tue, 29 Oct 2013 21:58:42 -0000 To: commits@ctakes.apache.org From: clin@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20131029215843.067C32388994@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: clin Date: Tue Oct 29 21:58:42 2013 New Revision: 1536904 URL: http://svn.apache.org/r1536904 Log: add nearby DateAnnotation and TimeAnnotation of an event for DocTimeRel classification Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java (with props) Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java?rev=1536904&view=auto ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java (added) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java Tue Oct 29 21:58:42 2013 @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.temporal.ae.feature; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +//import java.util.logging.Logger; + +//import org.apache.ctakes.temporal.ae.feature.treekernel.TemporalPETExtractor; +//import org.apache.ctakes.temporal.ae.feature.treekernel.TemporalSingleTreeExtractor; +import org.apache.ctakes.typesystem.type.syntax.TreebankNode; +import org.apache.ctakes.typesystem.type.textsem.DateAnnotation; +import org.apache.ctakes.typesystem.type.textsem.EventMention; +import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; +import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation; +import org.apache.ctakes.typesystem.type.textsem.TimeMention; +import org.apache.ctakes.typesystem.type.textspan.Sentence; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; +import org.apache.uima.resource.ResourceInitializationException; +import org.cleartk.classifier.Feature; +import org.cleartk.classifier.feature.extractor.CleartkExtractorException; +import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor; +import org.uimafit.util.JCasUtil; + +public class TimeXExtractor implements SimpleFeatureExtractor { + + private String name; +// private TemporalPETExtractor path; + private TemporalAttributeFeatureExtractor attr; + private TimeWordTypeExtractor timewd; +// private TemporalSingleTreeExtractor treeExt; + +// private Logger logger = Logger.getLogger(this.getClass().getName()); + + public TimeXExtractor() throws ResourceInitializationException { + super(); + this.name = "TimeXFeature"; +// this.path = new TemporalPETExtractor(); + this.attr = new TemporalAttributeFeatureExtractor(); + this.timewd = new TimeWordTypeExtractor(); +// this.treeExt = new TemporalSingleTreeExtractor(); + } + + @Override + public List extract(JCas view, Annotation annotation) throws CleartkExtractorException { + List features = new ArrayList(); + + //1 get covering sentence: + Map> coveringMap = + JCasUtil.indexCovering(view, EventMention.class, Sentence.class); + EventMention targetTokenAnnotation = (EventMention)annotation; + Collection sentList = coveringMap.get(targetTokenAnnotation); + + //2 get TimeX + Map timeDistMap = null; + +// List treePath = new ArrayList(); + + //3 get Document Creation Time +// String sofastr = view.getSofaDataString(); +// int start = sofastr.indexOf("meta rev_date="); +// int end = sofastr.indexOf(" start_date="); +// System.out.println(sofastr.substring(start, end)); +// Collection sources = JCasUtil.select(view, SourceData.class); +// for(SourceData source : sources){ +// System.out.println("original date: "+source.getSourceOriginalDate()); +// System.out.println("revision date: "+source.getSourceRevisionDate()); +// } + + if (sentList != null && !sentList.isEmpty()){ + timeDistMap = new TreeMap(); + + for(Sentence sent : sentList) { + for (TimeMention time : JCasUtil.selectCovered(view, TimeMention.class, sent)) { + timeDistMap.put(Math.abs(time.getBegin() - annotation.getBegin()), time); + } + for (TimeAnnotation time : JCasUtil.selectCovered(view, TimeAnnotation.class, sent)) { + timeDistMap.put(Math.abs(time.getBegin() - annotation.getBegin()), time); + } + for (DateAnnotation time : JCasUtil.selectCovered(view, DateAnnotation.class, sent)) { + timeDistMap.put(Math.abs(time.getBegin() - annotation.getBegin()), time); + } + } + + //get the closest Time Expression feature + for (Map.Entry entry : timeDistMap.entrySet()) { + Feature feature = new Feature(this.name, entry.getValue().getCoveredText()); + features.add(feature); + // logger.info("add time feature: "+ entry.getValue().getCoveredText() + entry.getValue().getTimeClass()); + Feature indicator = new Feature("TimeXNearby", this.name); + features.add(indicator); + Feature type = new Feature("TimeXType", entry.getValue().getClass()); + features.add(type); + + //add PP get Heading preposition + for(TreebankNode treebankNode : JCasUtil.selectCovering( + view, + TreebankNode.class, + entry.getValue().getBegin(), + entry.getValue().getEnd())) { + + if(treebankNode.getNodeType().equals("PP")) { + Feature PPNodeType = new Feature("Timex_PPNodeType", treebankNode.getNodeType()); + features.add(PPNodeType); + break; + } + } + + //add path tree, timex attributes + try { +// treePath=this.path.extract(view, targetTokenAnnotation, entry.getValue());//add path between timex and event + features.addAll(this.attr.extract(view, targetTokenAnnotation, entry.getValue()));//add temporal attribute features + features.addAll(this.timewd.extract(view, entry.getValue())); + } catch (AnalysisEngineProcessException e) { + throw new IllegalArgumentException(String.format("error in gererating path feature:", features)); + } + break; + } + } + +// if (treePath.isEmpty()){ +// try { +// features.addAll(this.treeExt.extract(view, targetTokenAnnotation)); +// } catch (AnalysisEngineProcessException e) { +// throw new IllegalArgumentException(String.format("error in gererating path feature:", features)); +// } +// }else{ +// features.addAll(treePath); +// } + + + return features; + } + +} Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/TimeXExtractor.java ------------------------------------------------------------------------------ svn:mime-type = text/plain