Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B31E8102C0 for ; Mon, 4 Nov 2013 16:50:35 +0000 (UTC) Received: (qmail 47403 invoked by uid 500); 4 Nov 2013 16:48:11 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 47318 invoked by uid 500); 4 Nov 2013 16:48:01 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 47283 invoked by uid 99); 4 Nov 2013 16:48:00 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 04 Nov 2013 16:48:00 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 04 Nov 2013 16:47:59 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 2ED2B23888FE; Mon, 4 Nov 2013 16:47:39 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1538670 - in /ctakes/trunk: ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/ ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/ Date: Mon, 04 Nov 2013 16:47:39 -0000 To: commits@ctakes.apache.org From: chenpei@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20131104164739.2ED2B23888FE@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: chenpei Date: Mon Nov 4 16:47:38 2013 New Revision: 1538670 URL: http://svn.apache.org/r1538670 Log: CTAKES-253 - YTEX ctakes patches port. Thanks Vijay Garla. * ctakes-context-tokenizer\src\main\java\org\apache\ctakes\contexttokenizer\ae\ContextDependentTokenizerAnnotator.java add null check: changed to avoid NPE in case BaseToken is null. Also ignore newline tokens (they should be treated as whitespace). * ctakes-core\src\main\java\org\apache\ctakes\core\fsm\adapters\NumberTokenAdapter.java add null check: ignore empty numbertokens * ctakes-core\src\main\java\org\apache\ctakes\core\fsm\machine\DateFSM.java Modified to include years in dates Modified: ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java Modified: ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java?rev=1538670&r1=1538669&r2=1538670&view=diff ============================================================================== --- ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java (original) +++ ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java Mon Nov 4 16:47:38 2013 @@ -18,21 +18,21 @@ */ package org.apache.ctakes.contexttokenizer.ae; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - -import org.apache.log4j.Logger; -import org.apache.uima.UimaContext; -import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.cas.FSIterator; -import org.apache.uima.cas.text.AnnotationIndex; -import org.apache.uima.jcas.JCas; -import org.apache.uima.jcas.JFSIndexRepository; -import org.apache.uima.resource.ResourceInitializationException; - +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.log4j.Logger; +import org.apache.uima.UimaContext; +import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.FSIterator; +import org.apache.uima.cas.text.AnnotationIndex; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JFSIndexRepository; +import org.apache.uima.resource.ResourceInitializationException; + import org.apache.ctakes.core.ae.TokenizerAnnotator; import org.apache.ctakes.core.fsm.adapters.ContractionTokenAdapter; @@ -57,20 +57,21 @@ import org.apache.ctakes.core.fsm.output import org.apache.ctakes.core.fsm.output.RomanNumeralToken; import org.apache.ctakes.core.fsm.output.TimeToken; import org.apache.ctakes.core.fsm.token.BaseToken; -import org.apache.ctakes.typesystem.type.syntax.ContractionToken; -import org.apache.ctakes.typesystem.type.syntax.NewlineToken; -import org.apache.ctakes.typesystem.type.syntax.NumToken; -import org.apache.ctakes.typesystem.type.syntax.PunctuationToken; -import org.apache.ctakes.typesystem.type.syntax.SymbolToken; -import org.apache.ctakes.typesystem.type.syntax.WordToken; -import org.apache.ctakes.typesystem.type.textsem.DateAnnotation; -import org.apache.ctakes.typesystem.type.textsem.FractionAnnotation; -import org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation; -import org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation; -import org.apache.ctakes.typesystem.type.textsem.RangeAnnotation; -import org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation; -import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation; -import org.apache.ctakes.typesystem.type.textspan.Sentence; +import org.apache.ctakes.core.fsm.token.EolToken; +import org.apache.ctakes.typesystem.type.syntax.ContractionToken; +import org.apache.ctakes.typesystem.type.syntax.NewlineToken; +import org.apache.ctakes.typesystem.type.syntax.NumToken; +import org.apache.ctakes.typesystem.type.syntax.PunctuationToken; +import org.apache.ctakes.typesystem.type.syntax.SymbolToken; +import org.apache.ctakes.typesystem.type.syntax.WordToken; +import org.apache.ctakes.typesystem.type.textsem.DateAnnotation; +import org.apache.ctakes.typesystem.type.textsem.FractionAnnotation; +import org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation; +import org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation; +import org.apache.ctakes.typesystem.type.textsem.RangeAnnotation; +import org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation; +import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation; +import org.apache.ctakes.typesystem.type.textspan.Sentence; /** * Finds tokens based on context. @@ -123,7 +124,10 @@ public class ContextDependentTokenizerAn while (btaItr.hasNext()) { org.apache.ctakes.typesystem.type.syntax.BaseToken bta = (org.apache.ctakes.typesystem.type.syntax.BaseToken) btaItr .next(); - baseTokenList.add(adaptToBaseToken(bta)); + // ignore newlines, avoid null tokens + BaseToken bt = adaptToBaseToken(bta); + if(bt != null && !(bt instanceof EolToken)) + baseTokenList.add(bt); } // execute FSM logic Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java?rev=1538670&r1=1538669&r2=1538670&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java Mon Nov 4 16:47:38 2013 @@ -21,6 +21,8 @@ package org.apache.ctakes.core.fsm.adapt import org.apache.ctakes.core.fsm.token.NumberToken; import org.apache.ctakes.typesystem.type.syntax.NumToken; +import com.google.common.base.Strings; + /** * Adapts JCas token annotation to interface expected by the Context Dependent * Tokenizer. @@ -36,7 +38,7 @@ public class NumberTokenAdapter extends { super(nta); - if (nta.getCoveredText().length() > 0 && nta.getCoveredText().charAt(0) == '-') + if (!Strings.isNullOrEmpty(nta.getCoveredText()) && nta.getCoveredText().length() > 0 && nta.getCoveredText().charAt(0) == '-') { iv_isPositive = false; } Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java?rev=1538670&r1=1538669&r2=1538670&view=diff ============================================================================== --- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java (original) +++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java Mon Nov 4 16:47:38 2013 @@ -220,16 +220,16 @@ public class DateFSM { startState.addTransition(new AnyCondition(), startState); monthFullTextState.addTransition(dayNumCondition, dayNumState); - monthFullTextState.addTransition(yearNotDayNumCondition, ntEndState); + monthFullTextState.addTransition(yearNotDayNumCondition, endState); monthFullTextState.addTransition(new AnyCondition(), startState); monthShortTextState.addTransition(dayNumCondition, dayNumState); monthShortTextState.addTransition(periodCondition, periodState); - monthShortTextState.addTransition(yearNotDayNumCondition, ntEndState); + monthShortTextState.addTransition(yearNotDayNumCondition, endState); monthShortTextState.addTransition(new AnyCondition(), startState); periodState.addTransition(dayNumCondition, dayNumState); - periodState.addTransition(yearNotDayNumCondition, ntEndState); + periodState.addTransition(yearNotDayNumCondition, endState); periodState.addTransition(new AnyCondition(), startState); dayNumState.addTransition(yearNumCondition, endState);