ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vjapa...@apache.org
Subject svn commit: r1555218 - in /ctakes/branches/ytex: ctakes-ytex-uima/desc/analysis_engine/ ctakes-ytex-uima/desc/cpe/ ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/tools/ ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/ ctakes-ytex-uima...
Date Fri, 03 Jan 2014 19:31:44 GMT
Author: vjapache
Date: Fri Jan  3 19:31:43 2014
New Revision: 1555218

URL: http://svn.apache.org/r1555218
Log:
add unit tests
fix annotation viewer
add negexannotator
cleanup unused under desc 

Added:
    ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/AggregatePlaintextUMLSProcessorMinimal.xml
    ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/fracture_demo.xml
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/ApplicationContextHolder.java
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/DBCollectionReader.java
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/org/
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/org/apache/
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/org/apache/ctakes/
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/org/apache/ctakes/ytex/
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/org/apache/ctakes/ytex/types/
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/org/apache/ctakes/ytex/types/TypeSystem.xml
    ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/TestUtils.java
    ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/annotators/DBCollectionReaderTest.java
    ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/annotators/SparseDataExporterTest.java
    ctakes/branches/ytex/ctakes-ytex-uima/src/test/resources/log4j.properties
Removed:
    ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/Coref-resolver_CPE.xml
    ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/DrugNER_PlainText_CPE.xml
    ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/FileSystemCollectionReader.xml
    ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/test_plaintext_ctakes.xml
    ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/test_plaintext_metamap.xml
Modified:
    ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
    ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/NegexAnnotator.xml
    ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/test_plaintext.xml
    ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/tools/DBAnnotationViewerMain.java
    ctakes/branches/ytex/ctakes-ytex/README
    ctakes/branches/ytex/ctakes-ytex/scripts/build-setup.xml
    ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDaoImpl.java
    ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java

Modified: ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml?rev=1555218&r1=1555217&r2=1555218&view=diff
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml (original)
+++ ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml Fri Jan  3 19:31:43 2014
@@ -16,9 +16,12 @@
 			Identical to
 			ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
 			with the following changes:
-			substitute ytex SegmentRegexAnnotator for SimpleSegmentAnnotator;
-			substitute ytex SentenceDetectorAnnotator for SentenceDetectorAnnotator;
-			substitute ytex DictionaryLookupAnnotator for
+			substitute ytex SegmentRegexAnnotator for
+			SimpleSegmentAnnotator;
+			(TODO substitute ytex SentenceDetectorAnnotator for
+			SentenceDetectorAnnotator);
+			substitute ytex DictionaryLookupAnnotator
+			for
 			DictionaryLookupAnnotatorUMLS
 			add SenseDisambiguatorAnnotator
 		</description>
@@ -46,7 +49,13 @@
 			<import location="../../../ctakes-ne-contexts/desc/NegationAnnotator.xml" />
 		</delegateAnalysisEngine>
 		<delegateAnalysisEngine key="SentenceDetectorAnnotator">
-			<import location="./SentenceDetectorAnnotator.xml" />
+		<!-- 
+		With newlines in sentences CoNLL dependency goes into infinite loop for some reason.
+		Needs to be investigated
+		<import location="./SentenceDetectorAnnotator.xml" />
+		 -->
+		 <import
+				location="../../../ctakes-core/desc/analysis_engine/SentenceDetectorAnnotator.xml" />
 		</delegateAnalysisEngine>
 		<delegateAnalysisEngine key="SenseDisambiguatorAnnotator">
 			<import location="./SenseDisambiguatorAnnotator.xml" />
@@ -86,6 +95,10 @@
 			<import
 				location="../../../ctakes-dependency-parser/desc/analysis_engine/ClearNLPSemanticRoleLabelerAE.xml" />
 		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="ExtractionPrepAnnotator">
+			<import
+				location="../../../ctakes-clinical-pipeline/desc/analysis_engine/ExtractionPrepAnnotator.xml" />
+		</delegateAnalysisEngine>
 	</delegateAnalysisEngineSpecifiers>
 	<analysisEngineMetaData>
 		<name>AggregatePlaintextUMLSProcessor</name>
@@ -93,8 +106,10 @@
 			documents in plain text format using the built in UMLS (SNOMEDCT and
 			RxNORM) dictionaries. This uses the dictionary
 			lookup/desc/DictionaryLookupAnnotatorUMLS.xml
-			and requires an UMLS license. Please update
-			DictionaryLookupAnnotatorUMLS.xml file with your UMLS username and
+			and requires an UMLS
+			license. Please update
+			DictionaryLookupAnnotatorUMLS.xml file with
+			your UMLS username and
 			password.
 		</description>
 		<version />
@@ -134,8 +149,11 @@
 				<node>LookupWindowAnnotator</node>
 				<node>DictionaryLookupAnnotatorDB</node>
 				<node>SenseDisambiguatorAnnotator</node>
-				<!-- <node>DependencyParser</node> <node>SemanticRoleLabeler</node> <node>AssertionAnnotator</node> 
-					<node>StatusAnnotator</node> <node>NegationAnnotator</node> <node>ExtractionPrepAnnotator</node> -->
+				<node>DependencyParser</node>
+				<node>SemanticRoleLabeler</node>
+				<node>AssertionAnnotator</node>
+				<!-- <node>StatusAnnotator</node> <node>NegationAnnotator</node> -->
+				<node>ExtractionPrepAnnotator</node>
 			</fixedFlow>
 		</flowConstraints>
 		<typePriorities>

Added: ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/AggregatePlaintextUMLSProcessorMinimal.xml
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/AggregatePlaintextUMLSProcessorMinimal.xml?rev=1555218&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/AggregatePlaintextUMLSProcessorMinimal.xml (added)
+++ ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/AggregatePlaintextUMLSProcessorMinimal.xml Fri Jan  3 19:31:43 2014
@@ -0,0 +1,240 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor 
+	license agreements. See the NOTICE file distributed with this work for additional 
+	information regarding copyright ownership. The ASF licenses this file to 
+	you under the Apache License, Version 2.0 (the "License"); you may not use 
+	this file except in compliance with the License. You may obtain a copy of 
+	the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required 
+	by applicable law or agreed to in writing, software distributed under the 
+	License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS 
+	OF ANY KIND, either express or implied. See the License for the specific 
+	language governing permissions and limitations under the License. -->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+	<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+	<analysisEngineMetaData>
+		<description>
+			Identical to
+			ctakes-clinical-pipeline/desc/analysis_engine/AggregatePlaintextUMLSProcessor.xml
+			with the following changes:
+			substitute ytex SegmentRegexAnnotator for
+			SimpleSegmentAnnotator;
+			substitute ytex SentenceDetectorAnnotator for
+			SentenceDetectorAnnotator;
+			substitute ytex DictionaryLookupAnnotator
+			for
+			DictionaryLookupAnnotatorUMLS;
+			add SenseDisambiguatorAnnotator;
+			Remove LvgAnnotator, DependencyParser, SemanticRoleLabeler,
+			AssertionAnnotator,
+			StatusAnnotator, NegationAnnotator,
+			ExtractionPrepAnnotator;
+			Add NegexAnnotator
+		</description>
+	</analysisEngineMetaData>
+	<primitive>false</primitive>
+	<delegateAnalysisEngineSpecifiers>
+		<delegateAnalysisEngine key="Chunker">
+			<import location="../../../ctakes-chunker/desc/Chunker.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="TokenizerAnnotator">
+			<import
+				location="../../../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="ContextDependentTokenizerAnnotator">
+			<import
+				location="../../../ctakes-context-tokenizer/desc/analysis_engine/ContextDependentTokenizerAnnotator.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="DictionaryLookupAnnotatorDB">
+			<import location="./DictionaryLookupAnnotator.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="SentenceDetectorAnnotator">
+			<import location="./SentenceDetectorAnnotator.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="SenseDisambiguatorAnnotator">
+			<import location="./SenseDisambiguatorAnnotator.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="LookupWindowAnnotator">
+			<import
+				location="../../../ctakes-clinical-pipeline/desc/analysis_engine/LookupWindowAnnotator.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="AdjustNounPhraseToIncludeFollowingNP">
+			<import
+				location="../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingNP.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="AdjustNounPhraseToIncludeFollowingPPNP">
+			<import
+				location="../../../ctakes-chunker/desc/AdjustNounPhraseToIncludeFollowingPPNP.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="SegmentRegexAnnotator">
+			<import location="SegmentRegexAnnotator.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="POSTagger">
+			<import location="../../../ctakes-pos-tagger/desc/POSTagger.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="NegexAnnotator">
+			<import location="./NegexAnnotator.xml" />
+		</delegateAnalysisEngine>
+	</delegateAnalysisEngineSpecifiers>
+	<analysisEngineMetaData>
+		<name>AggregatePlaintextUMLSProcessor</name>
+		<description>Runs the complete pipeline for annotating clinical
+			documents in plain text format using the built in UMLS (SNOMEDCT and
+			RxNORM) dictionaries. This uses the dictionary
+			lookup/desc/DictionaryLookupAnnotatorUMLS.xml
+			and requires an UMLS
+			license. Please update
+			DictionaryLookupAnnotatorUMLS.xml file with
+			your UMLS username and
+			password.
+		</description>
+		<version />
+		<vendor />
+		<configurationParameters searchStrategy="language_fallback">
+			<configurationParameter>
+				<name>ChunkCreatorClass</name>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>true</mandatory>
+				<overrides>
+					<parameter>Chunker/ChunkCreatorClass</parameter>
+				</overrides>
+			</configurationParameter>
+		</configurationParameters>
+		<configurationParameterSettings>
+			<nameValuePair>
+				<name>ChunkCreatorClass</name>
+				<value>
+					<string>org.apache.ctakes.chunker.ae.PhraseTypeChunkCreator
+					</string>
+				</value>
+			</nameValuePair>
+		</configurationParameterSettings>
+		<flowConstraints>
+			<fixedFlow>
+				<node>SegmentRegexAnnotator</node>
+				<node>SentenceDetectorAnnotator</node>
+				<node>TokenizerAnnotator</node>
+				<node>ContextDependentTokenizerAnnotator</node>
+				<node>POSTagger</node>
+				<node>Chunker</node>
+				<node>AdjustNounPhraseToIncludeFollowingNP</node>
+				<node>AdjustNounPhraseToIncludeFollowingPPNP</node>
+				<node>LookupWindowAnnotator</node>
+				<node>DictionaryLookupAnnotatorDB</node>
+				<node>SenseDisambiguatorAnnotator</node>
+				<node>NegexAnnotator</node>
+			</fixedFlow>
+		</flowConstraints>
+		<typePriorities>
+			<name>Ordering</name>
+			<description>For subiterator</description>
+			<version>1.0</version>
+			<priorityList>
+				<type>org.apache.ctakes.typesystem.type.textspan.Segment</type>
+				<type>org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+				<type>org.apache.ctakes.typesystem.type.syntax.BaseToken</type>
+			</priorityList>
+			<priorityList>
+				<type>org.apache.ctakes.typesystem.type.textspan.Sentence</type>
+				<type>org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation
+				</type>
+			</priorityList>
+		</typePriorities>
+		<fsIndexCollection />
+		<capabilities>
+			<capability>
+				<inputs />
+				<outputs>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.NewlineToken
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.WordToken
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.VP
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.refsem.UmlsConcept
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.UCP
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.TimeAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.SymbolToken
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.Sentence
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspanSegment
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.SBAR
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.RangeAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PunctuationToken
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.Property
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.Properties
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PRT
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.PP
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.OntologyConcept
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.NumToken
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textspan.LookupWindowAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.Lemma
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.LST
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.INTJ
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.FractionAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.structured.DocumentID
+					</type>
+					<type allAnnotatorFeatures="true">uima.tcas.DocumentAnnotation</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.DateAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.CopySrcAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.CopyDestAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.ContractionToken
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.textsem.ContextAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.Chunk
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.CONJP
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.BaseToken
+					</type>
+					<type allAnnotatorFeatures="true">uima.cas.AnnotationBase</type>
+					<type allAnnotatorFeatures="true">uima.tcas.Annotation</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.ADVP
+					</type>
+					<type allAnnotatorFeatures="true">org.apache.ctakes.typesystem.type.syntax.ADJP
+					</type>
+				</outputs>
+				<languagesSupported />
+			</capability>
+		</capabilities>
+		<operationalProperties>
+			<modifiesCas>true</modifiesCas>
+			<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+			<outputsNewCASes>false</outputsNewCASes>
+		</operationalProperties>
+	</analysisEngineMetaData>
+	<resourceManagerConfiguration />
+</analysisEngineDescription>

Modified: ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/NegexAnnotator.xml
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/NegexAnnotator.xml?rev=1555218&r1=1555217&r2=1555218&view=diff
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/NegexAnnotator.xml (original)
+++ ctakes/branches/ytex/ctakes-ytex-uima/desc/analysis_engine/NegexAnnotator.xml Fri Jan  3 19:31:43 2014
@@ -1,60 +1,56 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
-  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-  <primitive>true</primitive>
-  <annotatorImplementationName>ytex.uima.annotators.NegexAnnotator</annotatorImplementationName>
-  <analysisEngineMetaData>
-    <name>NegexAnnotator</name>
-    <description>NegexAnnotator based on the original negex java code.</description>
-    <version>1.0</version>
-    <vendor/>
-    <configurationParameters>
-      <configurationParameter>
-        <name>negatePossibilities</name>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>targetTypeName</name>
-        <type>String</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-    </configurationParameters>
-    <configurationParameterSettings>
-      <nameValuePair>
-        <name>negatePossibilities</name>
-        <value>
-          <boolean>true</boolean>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>targetTypeName</name>
-        <value>
-          <string>ytex.uima.types.WordToken</string>
-        </value>
-      </nameValuePair>
-    </configurationParameterSettings>
-    <typeSystemDescription>
-      <imports>
-        <import name="ytex.uima.YTEXTypes"/>
-      </imports>
-    </typeSystemDescription>
-    <typePriorities/>
-    <fsIndexCollection/>
-    <capabilities>
-      <capability>
-        <inputs/>
-        <outputs/>
-        <languagesSupported/>
-      </capability>
-    </capabilities>
-    <operationalProperties>
-      <modifiesCas>true</modifiesCas>
-      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-      <outputsNewCASes>false</outputsNewCASes>
-    </operationalProperties>
-  </analysisEngineMetaData>
-  <resourceManagerConfiguration/>
+	<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+	<primitive>true</primitive>
+	<annotatorImplementationName>org.apache.ctakes.ytex.uima.annotators.NegexAnnotator
+	</annotatorImplementationName>
+	<analysisEngineMetaData>
+		<name>NegexAnnotator</name>
+		<description>NegexAnnotator based on the original negex java code.
+		</description>
+		<version>1.0</version>
+		<vendor />
+		<configurationParameters>
+			<configurationParameter>
+				<name>negatePossibilities</name>
+				<type>Boolean</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>targetTypeName</name>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+		</configurationParameters>
+		<configurationParameterSettings>
+			<nameValuePair>
+				<name>negatePossibilities</name>
+				<value>
+					<boolean>true</boolean>
+				</value>
+			</nameValuePair>
+		</configurationParameterSettings>
+		<typeSystemDescription>
+			<imports>
+				<import name="org.apache.ctakes.ytex.types.TypeSystem" />
+			</imports>
+		</typeSystemDescription>
+		<typePriorities />
+		<fsIndexCollection />
+		<capabilities>
+			<capability>
+				<inputs />
+				<outputs />
+				<languagesSupported />
+			</capability>
+		</capabilities>
+		<operationalProperties>
+			<modifiesCas>true</modifiesCas>
+			<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+			<outputsNewCASes>false</outputsNewCASes>
+		</operationalProperties>
+	</analysisEngineMetaData>
+	<resourceManagerConfiguration />
 </analysisEngineDescription>

Added: ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/fracture_demo.xml
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/fracture_demo.xml?rev=1555218&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/fracture_demo.xml (added)
+++ ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/fracture_demo.xml Fri Jan  3 19:31:43 2014
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<cpeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <collectionReader>
+        <collectionIterator>
+            <descriptor>
+                <import location="../collection_reader/DBCollectionReader.xml"/>
+            </descriptor>
+            <configurationParameterSettings>
+                <nameValuePair>
+                    <name>queryGetDocumentKeys</name>
+                    <value>
+                        <string>select note_id instance_id from fracture_demo</string>
+                    </value>
+                </nameValuePair>
+                <nameValuePair>
+                    <name>queryGetDocument</name>
+                    <value>
+                        <string>select note_text from fracture_demo where note_id = :instance_id</string>
+                    </value>
+                </nameValuePair>
+            </configurationParameterSettings>
+        </collectionIterator>
+    </collectionReader>
+	<casProcessors casPoolSize="3" processingUnitThreadCount="1">
+        <casProcessor deployment="integrated" name="YTEX AggregatePlaintextUMLSProcessor">
+            <descriptor>
+                <import location="../analysis_engine/AggregatePlaintextUMLSProcessor.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+		<casProcessor deployment="integrated" name="YTEX DBConsumer">
+			<descriptor>
+                <import location="../analysis_engine/DBConsumer.xml"/>
+			</descriptor>
+			<checkpoint batch="10000" time="1000ms" />
+		</casProcessor>
+	</casProcessors>
+	<cpeConfig>
+		<numToProcess>-1</numToProcess>
+		<deployAs>immediate</deployAs>
+		<checkpoint batch="0" time="300000" />
+		<timerImpl />
+	</cpeConfig>
+</cpeDescription>

Modified: ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/test_plaintext.xml
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/test_plaintext.xml?rev=1555218&r1=1555217&r2=1555218&view=diff
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/test_plaintext.xml (original)
+++ ctakes/branches/ytex/ctakes-ytex-uima/desc/cpe/test_plaintext.xml Fri Jan  3 19:31:43 2014
@@ -1,38 +1,36 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <cpeDescription xmlns="http://uima.apache.org/resourceSpecifier">
-	<collectionReader>
-		<collectionIterator>
-			<descriptor>
-				<import
-					name="coredesc.collection_reader.FilesInDirectoryCollectionReader" />
-			</descriptor>
-			<configurationParameterSettings>
-				<nameValuePair>
-					<name>InputDirectory</name>
-					<value>
-						<string>examples/pubmed/abstracts</string>
-					</value>
-				</nameValuePair>
-			</configurationParameterSettings>
-		</collectionIterator>
-	</collectionReader>
+    <collectionReader>
+        <collectionIterator>
+            <descriptor>
+                <import location="../../../ctakes-core/desc/collection_reader/FilesInDirectoryCollectionReader.xml"/>
+            </descriptor>
+            <configurationParameterSettings>
+                <nameValuePair>
+                    <name>InputDirectory</name>
+                    <value>
+                        <string>resources/data/text-files</string>
+                    </value>
+                </nameValuePair>
+            </configurationParameterSettings>
+        </collectionIterator>
+    </collectionReader>
 	<casProcessors casPoolSize="3" processingUnitThreadCount="1">
-		<casProcessor deployment="integrated" name="AggregatePlaintextUMLSProcessor">
-			<descriptor>
-				<import name="ytex.cdpdesc.analysis_engine.AggregatePlaintextUMLSProcessor" />
-			</descriptor>
-			<deploymentParameters />
-			<errorHandling>
-				<errorRateThreshold action="terminate" value="0/1000" />
-				<maxConsecutiveRestarts action="terminate"
-					value="30" />
-				<timeout max="100000" default="-1" />
-			</errorHandling>
-			<checkpoint batch="10000" time="1000ms" />
-		</casProcessor>
+        <casProcessor deployment="integrated" name="YTEX AggregatePlaintextUMLSProcessor">
+            <descriptor>
+                <import location="../analysis_engine/AggregatePlaintextUMLSProcessor.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
 		<casProcessor deployment="integrated" name="YTEX DBConsumer">
 			<descriptor>
-				<import name="ytex.uima.annotators.DBConsumer" />
+                <import location="../analysis_engine/DBConsumer.xml"/>
 			</descriptor>
 			<checkpoint batch="10000" time="1000ms" />
 		</casProcessor>

Modified: ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/tools/DBAnnotationViewerMain.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/tools/DBAnnotationViewerMain.java?rev=1555218&r1=1555217&r2=1555218&view=diff
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/tools/DBAnnotationViewerMain.java (original)
+++ ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/tools/DBAnnotationViewerMain.java Fri Jan  3 19:31:43 2014
@@ -67,6 +67,7 @@ import org.apache.uima.resource.metadata
 import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.apache.uima.tools.docanalyzer.AnnotationViewerDialog;
 import org.apache.uima.tools.docanalyzer.PrefsMediator;
+import org.apache.uima.tools.images.Images;
 import org.apache.uima.tools.stylemap.ColorParser;
 import org.apache.uima.tools.stylemap.StyleMapEntry;
 import org.apache.uima.tools.util.gui.AboutDialog;
@@ -90,8 +91,7 @@ import org.xml.sax.SAXException;
  * Main Annotation Viewer GUI. Allows user to choose directory of XCAS or XMI
  * files, then launches the AnnotationViewerDialog.
  * 
- * copied from AnnotationViewerMain.
- * Modified to load CAS from database.
+ * copied from AnnotationViewerMain. Modified to load CAS from database.
  */
 public class DBAnnotationViewerMain extends JFrame {
 	private static final long serialVersionUID = -3201723535833938833L;
@@ -176,11 +176,7 @@ public class DBAnnotationViewerMain exte
 
 		// Set frame icon image
 		try {
-			// this.setIconImage(Images.getImage(Images.MICROSCOPE));
-			// new
-			// ImageIcon(getClass().getResource(FRAME_ICON_IMAGE)).getImage());
-			this.setIconImage(ImageIO.read(this.getClass().getResource(
-					"/ytex/tools/docanalyzer/icon.gif")));
+			 this.setIconImage(Images.getImage(Images.MICROSCOPE));
 		} catch (IOException e) {
 			System.err.println("Image could not be loaded: " + e.getMessage());
 		}
@@ -302,7 +298,7 @@ public class DBAnnotationViewerMain exte
 
 		// add banner
 		JLabel banner = new JLabel(new ImageIcon(this.getClass().getResource(
-				"/ytex/tools/docanalyzer/logo.gif")));
+				"/org/apache/ctakes/ctakes_logo.jpg")));
 		contentPanel.add(banner, BorderLayout.NORTH);
 
 		// Add the view Button to run TAE
@@ -363,16 +359,16 @@ public class DBAnnotationViewerMain exte
 				casDescriptor = CasCreationUtils
 						.createCas((AnalysisEngineDescription) descriptor);
 				styleMapFile = getStyleMapFile(
-						(AnalysisEngineDescription) descriptor, descriptorFile
-								.getPath());
+						(AnalysisEngineDescription) descriptor,
+						descriptorFile.getPath());
 			} else if (descriptor instanceof TypeSystemDescription) {
 				TypeSystemDescription tsDesc = (TypeSystemDescription) descriptor;
 				tsDesc.resolveImports();
 				casDescriptor = CasCreationUtils.createCas(tsDesc, null,
 						new FsIndexDescription[0]);
 				styleMapFile = getStyleMapFile(
-						(TypeSystemDescription) descriptor, descriptorFile
-								.getPath());
+						(TypeSystemDescription) descriptor,
+						descriptorFile.getPath());
 			} else {
 				displayError("Invalid Descriptor File \""
 						+ descriptorFile.getPath()
@@ -399,8 +395,8 @@ public class DBAnnotationViewerMain exte
 		 * viewerDialog.pack(); viewerDialog.setModal(true);
 		 * viewerDialog.setVisible(true);
 		 */
-		this.launchThatViewer(this.documentIDField.getText(), casDescriptor
-				.getTypeSystem(), null, javaViewerRB.isSelected(),
+		this.launchThatViewer(this.documentIDField.getText(),
+				casDescriptor.getTypeSystem(), null, javaViewerRB.isSelected(),
 				javaViewerUCRB.isSelected(), xmlRB.isSelected(), styleMapFile,
 				createTempDir());
 	}
@@ -485,8 +481,8 @@ public class DBAnnotationViewerMain exte
 	 */
 	public void savePreferences() {
 		// prefs.put("inDir", inputFileSelector.getSelected());
-		prefs.put("taeDescriptorFile", this.taeDescriptorFileSelector
-				.getSelected());
+		prefs.put("taeDescriptorFile",
+				this.taeDescriptorFileSelector.getSelected());
 	}
 
 	/**
@@ -532,8 +528,8 @@ public class DBAnnotationViewerMain exte
 			}
 		}
 
-		JOptionPane.showMessageDialog(DBAnnotationViewerMain.this, buf
-				.toString(), "Error", JOptionPane.ERROR_MESSAGE);
+		JOptionPane.showMessageDialog(DBAnnotationViewerMain.this,
+				buf.toString(), "Error", JOptionPane.ERROR_MESSAGE);
 	}
 
 	/**
@@ -603,8 +599,7 @@ public class DBAnnotationViewerMain exte
 				if (javaViewerUCRBisSelected)
 					getColorsForTypesFromFile(viewer, styleMapFile);
 				else
-					viewer
-							.setHiddenTypes(new String[] { "uima.cpm.FileLocation" });
+					viewer.setHiddenTypes(new String[] { "uima.cpm.FileLocation" });
 				// launch viewer in a new dialog
 				viewer.setCAS(cas);
 				JDialog dialog = new JDialog(this,
@@ -685,13 +680,12 @@ public class DBAnnotationViewerMain exte
 		} else {
 			String taeDir = prefsMed.getTAEfile();
 			JFileChooser chooser = new JFileChooser(taeDir);
-			chooser
-					.setDialogTitle("Select the Analysis Engine that Generated this Output");
+			chooser.setDialogTitle("Select the Analysis Engine that Generated this Output");
 			chooser.setFileSelectionMode(JFileChooser.FILES_ONLY);
 			int returnVal = chooser.showOpenDialog(this);
 			if (returnVal == JFileChooser.APPROVE_OPTION) {
-				XMLInputSource in = new XMLInputSource(chooser
-						.getSelectedFile());
+				XMLInputSource in = new XMLInputSource(
+						chooser.getSelectedFile());
 				return UIMAFramework.getXMLParser()
 						.parseAnalysisEngineDescription(in);
 			} else {
@@ -819,7 +813,8 @@ public class DBAnnotationViewerMain exte
 	private Properties loadJDBCProperties() throws IOException {
 		InputStream is = null;
 		try {
-			is = this.getClass().getResourceAsStream("org/apache/ctakes/ytex/ytex.properties");
+			is = this.getClass().getResourceAsStream(
+					"/org/apache/ctakes/ytex/ytex.properties");
 			this.jdbcProperties = new Properties();
 			this.jdbcProperties.load(is);
 			// make sure required properties are specified
@@ -856,16 +851,16 @@ public class DBAnnotationViewerMain exte
 		Properties jdbcProperties = loadJDBCProperties();
 
 		CAS cas = CasCreationUtils.createCas(Collections.EMPTY_LIST,
-				typeSystem, UIMAFramework
-						.getDefaultPerformanceTuningProperties());
+				typeSystem,
+				UIMAFramework.getDefaultPerformanceTuningProperties());
 		try {
 			Class.forName(jdbcProperties.getProperty("db.driver"));
-			conn = DriverManager.getConnection(jdbcProperties
-					.getProperty("db.url"), jdbcProperties
-					.containsKey("db.username") ? jdbcProperties
-					.getProperty("db.username") : null, jdbcProperties
-					.containsKey("db.password") ? jdbcProperties
-					.getProperty("db.password") : null);
+			conn = DriverManager.getConnection(
+					jdbcProperties.getProperty("db.url"),
+					jdbcProperties.containsKey("db.username") ? jdbcProperties
+							.getProperty("db.username") : null,
+					jdbcProperties.containsKey("db.password") ? jdbcProperties
+							.getProperty("db.password") : null);
 			String strSQL = jdbcProperties.containsKey("db.schema") ? "select cas from "
 					+ jdbcProperties.getProperty("db.schema")
 					+ ".document where document_id = ?"
@@ -874,8 +869,8 @@ public class DBAnnotationViewerMain exte
 			ps.setInt(1, Integer.parseInt(documentID));
 			rs = ps.executeQuery();
 			if (rs.next()) {
-				gzIS = new GZIPInputStream(new BufferedInputStream(rs
-						.getBinaryStream(1)));
+				gzIS = new GZIPInputStream(new BufferedInputStream(
+						rs.getBinaryStream(1)));
 				XmlCasDeserializer.deserialize(gzIS, cas, true);
 			} else {
 				throw new RuntimeException("No document with id = "
@@ -910,8 +905,8 @@ public class DBAnnotationViewerMain exte
 	 * copied from AnnotationViewerDialog.
 	 */
 	private File createTempDir() {
-		File temp = new File(System.getProperty("java.io.tmpdir"), System
-				.getProperty("user.name"));
+		File temp = new File(System.getProperty("java.io.tmpdir"),
+				System.getProperty("user.name"));
 		temp.mkdir();
 		return temp;
 	}

Added: ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/ApplicationContextHolder.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/ApplicationContextHolder.java?rev=1555218&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/ApplicationContextHolder.java (added)
+++ ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/ApplicationContextHolder.java Fri Jan  3 19:31:43 2014
@@ -0,0 +1,69 @@
+package org.apache.ctakes.ytex.uima;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.springframework.beans.factory.BeanFactory;
+import org.springframework.beans.factory.access.BeanFactoryLocator;
+import org.springframework.context.ApplicationContext;
+import org.springframework.context.access.ContextSingletonBeanFactoryLocator;
+import org.springframework.context.support.ClassPathXmlApplicationContext;
+
+/**
+ * get the spring bean application context. default to the context defined in
+ * org/apache/ctkaes/ytex/uima/beanRefContext.xml. this can be overriden in
+ * ytex.properties, key ytex.beanRefContext
+ * 
+ * @author vijay
+ */
+public class ApplicationContextHolder {
+	private static final Log log = LogFactory
+			.getLog(ApplicationContextHolder.class);
+	private static Properties ytexProperties;
+	private static BeanFactoryLocator beanFactory;
+	private static ApplicationContext ytexApplicationContext;
+
+	static {
+		InputStream ytexPropsIn = null;
+		String beanRefContext = "classpath*:org/apache/ctakes/ytex/uima/beanRefContext.xml";
+		try {
+			log.info("loading ytex.properties from: "
+					+ ApplicationContextHolder.class
+							.getResource("/org/apache/ctakes/ytex/ytex.properties"));
+			ytexPropsIn = ApplicationContextHolder.class
+					.getResourceAsStream("/org/apache/ctakes/ytex/ytex.properties");
+
+			ytexProperties = new Properties();
+			ytexProperties.load(ytexPropsIn);
+			ytexProperties.putAll(System.getProperties());
+			beanRefContext = ytexProperties.getProperty("ytex.beanRefContext",
+					beanRefContext);
+			if (log.isInfoEnabled())
+				log.info("beanRefContext=" + beanRefContext);
+			beanFactory = ContextSingletonBeanFactoryLocator
+					.getInstance(beanRefContext);
+		} catch (Exception e) {
+			log.error("initalizer", e);
+		} finally {
+			if (ytexPropsIn != null) {
+				try {
+					ytexPropsIn.close();
+				} catch (IOException e) {
+				}
+			}
+		}
+	}
+
+	public static ApplicationContext getApplicationContext() {
+		return (ApplicationContext)beanFactory.useBeanFactory(
+				"ytexApplicationContext").getFactory();
+	}
+
+	public static Properties getYtexProperties() {
+		return ytexProperties;
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/DBCollectionReader.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/DBCollectionReader.java?rev=1555218&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/DBCollectionReader.java (added)
+++ ctakes/branches/ytex/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/DBCollectionReader.java Fri Jan  3 19:31:43 2014
@@ -0,0 +1,270 @@
+package org.apache.ctakes.ytex.uima;
+
+import java.io.IOException;
+import java.sql.Driver;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import javax.sql.DataSource;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.ctakes.ytex.uima.types.DocKey;
+import org.apache.ctakes.ytex.uima.types.KeyValuePair;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.collection.CollectionReader_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
+import org.apache.uima.resource.metadata.ProcessingResourceMetaData;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+import org.springframework.jdbc.core.RowCallbackHandler;
+import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
+import org.springframework.jdbc.core.simple.SimpleJdbcTemplate;
+import org.springframework.jdbc.datasource.DataSourceTransactionManager;
+import org.springframework.jdbc.datasource.SimpleDriverDataSource;
+import org.springframework.jdbc.support.lob.DefaultLobHandler;
+import org.springframework.jdbc.support.lob.LobHandler;
+import org.springframework.transaction.TransactionStatus;
+import org.springframework.transaction.support.TransactionCallback;
+import org.springframework.transaction.support.TransactionTemplate;
+
+/**
+ * 
+ * Read documents from db. Config parameters:
+ * <ul>
+ * <li>queryGetDocumentKeys the query to get the document keys</li>
+ * <li>queryGetDocument the query to get a document given a key. should have
+ * named parameters that match the columns of the result set returned by
+ * queryGetDocumentKeys</li>
+ * <li>keyTypeName the uima type of the document key to be added to the cas.
+ * defaults to org.apache.ctakes.ytex.uima.types.DocKey.
+ * <li>keyNameToLowerCase convert the column names returned by
+ * queryGetDocumentKeys to lower case, default true</li>
+ * </ul>
+ * 
+ * @TODO more doc
+ * @author vijay
+ * 
+ */
+public class DBCollectionReader extends CollectionReader_ImplBase {
+	private static final Log log = LogFactory.getLog(DBCollectionReader.class);
+
+	/**
+	 * the query to get the document keys set in config file
+	 */
+	protected String queryGetDocumentKeys;
+	/**
+	 * the queyr to get a document given a key. set in config file
+	 */
+	protected String queryGetDocument;
+	/**
+	 * the key type. if not set, will default to
+	 * org.apache.ctakes.ytex.uima.types.DocKey
+	 */
+	protected String keyTypeName = "org.apache.ctakes.ytex.uima.types.DocKey";
+
+	protected DataSource dataSource;
+	protected SimpleJdbcTemplate simpleJdbcTemplate;
+	protected NamedParameterJdbcTemplate namedJdbcTemplate;
+	protected TransactionTemplate txTemplate;
+	protected boolean keyNameToLowerCase = true;
+
+	public boolean isKeyNameToLowerCase() {
+		return keyNameToLowerCase;
+	}
+
+	public void setKeyNameToLowerCase(boolean keyNameToLowerCase) {
+		this.keyNameToLowerCase = keyNameToLowerCase;
+	}
+
+	List<Map<String, Object>> listDocumentIds;
+	int i = 0;
+
+	@Override
+	public void initialize() throws ResourceInitializationException {
+		initializePreLoad();
+		loadDocumentIds();
+	}
+
+	protected void initializePreLoad() throws ResourceInitializationException {
+		super.initialize();
+		ProcessingResourceMetaData metaData = getProcessingResourceMetaData();
+		ConfigurationParameterSettings paramSettings = metaData
+				.getConfigurationParameterSettings();
+		this.queryGetDocumentKeys = (String) paramSettings
+				.getParameterValue("queryGetDocumentKeys");
+		this.queryGetDocument = (String) paramSettings
+				.getParameterValue("queryGetDocument");
+		this.keyTypeName = (String) paramSettings
+				.getParameterValue("keyTypeName");
+		Boolean keyNameToLowerCase = (Boolean) paramSettings
+				.getParameterValue("keyNameToLowerCase");
+		if (keyNameToLowerCase != null)
+			this.keyNameToLowerCase = keyNameToLowerCase.booleanValue();
+		String dbURL = (String) paramSettings.getParameterValue("dbURL");
+		String dbDriver = (String) paramSettings.getParameterValue("dbDriver");
+		initDB(dbDriver, dbURL);
+	}
+
+	protected void initDB(String dbDriver, String dbURL)
+			throws ResourceInitializationException {
+		if (dbURL != null && dbURL.length() > 0) {
+			try {
+
+				if (dbDriver == null || dbDriver.length() == 0) {
+					dbDriver = ApplicationContextHolder.getYtexProperties()
+							.getProperty("db.driver");
+				}
+				dataSource = new SimpleDriverDataSource((Driver) Class.forName(
+						dbDriver).newInstance(), dbURL);
+				txTemplate = new TransactionTemplate(
+						new DataSourceTransactionManager(dataSource));
+			} catch (InstantiationException e) {
+				throw new ResourceInitializationException(e);
+			} catch (IllegalAccessException e) {
+				throw new ResourceInitializationException(e);
+			} catch (ClassNotFoundException e) {
+				throw new ResourceInitializationException(e);
+			}
+		} else {
+			txTemplate = (TransactionTemplate) ApplicationContextHolder
+					.getApplicationContext().getBean("txTemplate");
+			dataSource = (DataSource) ApplicationContextHolder
+					.getApplicationContext().getBean(
+							"collectionReaderDataSource");
+		}
+		simpleJdbcTemplate = new SimpleJdbcTemplate(dataSource);
+		namedJdbcTemplate = new NamedParameterJdbcTemplate(dataSource);
+	}
+
+	protected void loadDocumentIds() {
+		if (listDocumentIds == null) {
+			listDocumentIds = txTemplate
+					.execute(new TransactionCallback<List<Map<String, Object>>>() {
+
+						@Override
+						public List<Map<String, Object>> doInTransaction(
+								TransactionStatus arg0) {
+							return simpleJdbcTemplate
+									.queryForList(queryGetDocumentKeys);
+						}
+					});
+			i = 0;
+		}
+	}
+
+	@Override
+	public void getNext(final CAS aCAS) throws IOException, CollectionException {
+		try {
+			getNext(aCAS.getJCas());
+		} catch (CASException e) {
+			throw new CollectionException(e);
+		}
+	}
+
+	public void getNext(final JCas aCAS) throws IOException,
+			CollectionException {
+		if (i < listDocumentIds.size()) {
+			final Map<String, Object> id = listDocumentIds.get(i++);
+			if (log.isInfoEnabled()) {
+				log.info("loading document with id = " + id);
+			}
+			getDocumentById(aCAS, id);
+			addDocKey(aCAS, id);
+		} else {
+			// shouldn't get here?
+			throw new CollectionException("no documents to process",
+					new Object[] {});
+		}
+	}
+
+	private void addDocKey(JCas aCAS, Map<String, Object> id)
+			throws CollectionException {
+		DocKey docKey = new DocKey(aCAS);
+		FSArray keyValuePairs = new FSArray(aCAS, id.size());
+		int i = 0;
+		for (Map.Entry<String, Object> idVal : id.entrySet()) {
+			String key = idVal.getKey();
+			Object val = idVal.getValue();
+			KeyValuePair p = new KeyValuePair(aCAS);
+			p.setKey(key);
+			if (val instanceof Number) {
+				p.setValueLong(((Number) val).longValue());
+			} else if (val instanceof String) {
+				p.setValueString((String) val);
+			} else {
+				log.warn("Don't know how to handle key attribute, converting to string, key="
+						+ key + ", value=" + val);
+				p.setValueString(val.toString());
+			}
+			keyValuePairs.set(i, p);
+			i++;
+		}
+		docKey.setKeyValuePairs(keyValuePairs);
+		docKey.addToIndexes();
+
+	}
+
+	protected void getDocumentById(final JCas aCAS, final Map<String, Object> id) {
+		Map<String, Object> idMapTmp = id;
+		if (this.isKeyNameToLowerCase()) {
+			idMapTmp = new HashMap<String, Object>();
+			for (Map.Entry<String, Object> e : id.entrySet()) {
+				idMapTmp.put(e.getKey().toLowerCase(), e.getValue());
+			}
+		}
+		final Map<String, Object> idQuery = idMapTmp;
+		this.txTemplate.execute(new TransactionCallback<Object>() {
+
+			@Override
+			public Object doInTransaction(TransactionStatus arg0) {
+				namedJdbcTemplate.query(queryGetDocument, idQuery,
+						new RowCallbackHandler() {
+							boolean bFirstRowRead = false;
+
+							@Override
+							public void processRow(ResultSet rs)
+									throws SQLException {
+								if (!bFirstRowRead) {
+									LobHandler lobHandler = new DefaultLobHandler();
+									String clobText = lobHandler
+											.getClobAsString(rs, 1);
+									aCAS.setDocumentText(clobText);
+									bFirstRowRead = true;
+								} else {
+									log.error("Multiple documents for document key: "
+											+ idQuery);
+								}
+							}
+						});
+				return null;
+			}
+		});
+	}
+
+	@Override
+	public Progress[] getProgress() {
+		return new Progress[] { new ProgressImpl(i, listDocumentIds.size(),
+				Progress.ENTITIES) };
+	}
+
+	@Override
+	public boolean hasNext() throws IOException, CollectionException {
+		return i < listDocumentIds.size();
+	}
+
+	@Override
+	public void close() throws IOException {
+		this.listDocumentIds = null;
+		this.i = 0;
+	}
+
+}

Added: ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/org/apache/ctakes/ytex/types/TypeSystem.xml
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/org/apache/ctakes/ytex/types/TypeSystem.xml?rev=1555218&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/org/apache/ctakes/ytex/types/TypeSystem.xml (added)
+++ ctakes/branches/ytex/ctakes-ytex-uima/src/main/resources/org/apache/ctakes/ytex/types/TypeSystem.xml Fri Jan  3 19:31:43 2014
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <name>YTEXTypeSystem</name>
+  <description>YTEX Types</description>
+  <version>1.0</version>
+  <vendor/>
+  <imports>
+    <import name="org.apache.ctakes.typesystem.types.TypeSystem"/>
+  </imports>
+  <types>
+    <typeDescription>
+      <name>org.apache.ctakes.ytex.uima.types.KeyValuePair</name>
+      <description/>
+      <supertypeName>uima.cas.TOP</supertypeName>
+      <features>
+        <featureDescription>
+          <name>key</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>valueLong</name>
+          <description/>
+          <rangeTypeName>uima.cas.Long</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>valueString</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+      </features>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.ctakes.ytex.uima.types.DocKey</name>
+      <description/>
+      <supertypeName>uima.tcas.Annotation</supertypeName>
+      <features>
+        <featureDescription>
+          <name>keyValuePairs</name>
+          <description/>
+          <rangeTypeName>uima.cas.FSArray</rangeTypeName>
+          <elementType>org.apache.ctakes.ytex.uima.types.KeyValuePair</elementType>
+        </featureDescription>
+      </features>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.ctakes.ytex.uima.types.Date</name>
+      <description/>
+      <supertypeName>uima.tcas.Annotation</supertypeName>
+      <features>
+        <featureDescription>
+          <name>date</name>
+          <description>ISO 8601 Formatted Timestamp: yyyy-MM-dd'T'HH:mm:ssZ</description>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+      </features>
+    </typeDescription>
+  </types>
+</typeSystemDescription>

Added: ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/TestUtils.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/TestUtils.java?rev=1555218&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/TestUtils.java (added)
+++ ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/TestUtils.java Fri Jan  3 19:31:43 2014
@@ -0,0 +1,82 @@
+package org.apache.ctakes.ytex.uima;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Date;
+
+import org.apache.ctakes.ytex.dao.DBUtil;
+import org.apache.ctakes.ytex.uima.annotators.DBConsumer;
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.XMLInputSource;
+import org.apache.uima.util.XMLParser;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.CollectionReaderFactory;
+
+import com.google.common.base.Strings;
+
+public class TestUtils {
+	static final String queryGetDocumentKeys = "select note_id instance_id from %s%s";
+	static final String queryGetDocument = "select note_text from %s%s where note_id = :instance_id";
+
+	public static void addDescriptor(AggregateBuilder builder, String path)
+			throws IOException, InvalidXMLException {
+		File fileCtakes = new File(path);
+		XMLParser parser = UIMAFramework.getXMLParser();
+		XMLInputSource source = new XMLInputSource(fileCtakes);
+		builder.add(parser.parseAnalysisEngineDescription(source));
+	}
+
+	/**
+	 * Create a simple aggregate ae that does sentence splitting, tokenization,
+	 * and stores results in database. runs the following AEs: <li>
+	 * SegmentRegexAnnotator <li>SentenceDetectorAnnotator <li>
+	 * TokenizerAnnotator <li>DBConsumer
+	 * 
+	 * @param analysisBatch
+	 *            name of analysis batch for dbconsumer. If null will be set to
+	 *            test-[current time millis].
+	 * @return
+	 * @throws IOException
+	 * @throws InvalidXMLException
+	 * @throws ResourceInitializationException
+	 */
+	public static AnalysisEngine createTokenizerAE(String analysisBatch)
+			throws IOException, InvalidXMLException,
+			ResourceInitializationException {
+		String dbAnalysisBatch = analysisBatch;
+		if (Strings.isNullOrEmpty(dbAnalysisBatch))
+			dbAnalysisBatch = "test-" + System.currentTimeMillis();
+		AggregateBuilder builder = new AggregateBuilder();
+		addDescriptor(builder, "desc/analysis_engine/SegmentRegexAnnotator.xml");
+		addDescriptor(builder,
+				"desc/analysis_engine/SentenceDetectorAnnotator.xml");
+		addDescriptor(builder,
+				"../ctakes-core/desc/analysis_engine/TokenizerAnnotator.xml");
+		builder.add(AnalysisEngineFactory.createPrimitiveDescription(
+				DBConsumer.class, "analysisBatch", dbAnalysisBatch,
+				"storeDocText", false, "storeCAS", true));
+		AnalysisEngine engine = builder.createAggregate();
+		return engine;
+	}
+
+	public static CollectionReader getFractureDemoCollectionReader()
+			throws ResourceInitializationException {
+		CollectionReader colReader = CollectionReaderFactory
+				.createCollectionReader(
+						DBCollectionReader.class,
+						"queryGetDocumentKeys",
+						String.format(queryGetDocumentKeys,
+								DBUtil.getYTEXTablePrefix(),
+								DBUtil.formatTableName("fracture_demo")),
+						"queryGetDocument",
+						String.format(queryGetDocument,
+								DBUtil.getYTEXTablePrefix(),
+								DBUtil.formatTableName("fracture_demo")));
+		return colReader;
+	}
+}

Added: ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/annotators/DBCollectionReaderTest.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/annotators/DBCollectionReaderTest.java?rev=1555218&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/annotators/DBCollectionReaderTest.java (added)
+++ ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/annotators/DBCollectionReaderTest.java Fri Jan  3 19:31:43 2014
@@ -0,0 +1,37 @@
+package org.apache.ctakes.ytex.uima.annotators;
+
+import java.io.IOException;
+
+import org.apache.ctakes.ytex.uima.TestUtils;
+import org.apache.ctakes.ytex.uima.types.DocKey;
+import org.apache.uima.UIMAException;
+import org.apache.uima.cas.CASRuntimeException;
+import org.apache.uima.cas.admin.CASAdminException;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.collection.metadata.CpeDescriptorException;
+import org.apache.uima.jcas.JCas;
+import org.junit.Assert;
+import org.junit.Test;
+import org.uimafit.factory.JCasFactory;
+import org.xml.sax.SAXException;
+
+public class DBCollectionReaderTest {
+
+	@Test
+	public void test() throws IOException,
+			SAXException, CpeDescriptorException, UIMAException, CASRuntimeException, CASAdminException {
+		CollectionReader colReader = TestUtils.getFractureDemoCollectionReader();
+		int count = 0;
+		JCas jcas = JCasFactory.createJCasFromPath("src/main/resources/org/apache/ctakes/ytex/types/TypeSystem.xml");
+		while(colReader.hasNext()) {
+			count++;
+			colReader.getNext(jcas.getCas());
+			Assert.assertTrue("document should have a dockey", jcas.getAnnotationIndex(DocKey.type).iterator().hasNext());
+			jcas.reset();
+		}
+		Assert.assertTrue("should have read some documents", count > 0);
+	}
+
+
+
+}

Added: ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/annotators/SparseDataExporterTest.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/annotators/SparseDataExporterTest.java?rev=1555218&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/annotators/SparseDataExporterTest.java (added)
+++ ctakes/branches/ytex/ctakes-ytex-uima/src/test/java/org/apache/ctakes/ytex/uima/annotators/SparseDataExporterTest.java Fri Jan  3 19:31:43 2014
@@ -0,0 +1,125 @@
+package org.apache.ctakes.ytex.uima.annotators;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Properties;
+
+import org.apache.ctakes.ytex.dao.DBUtil;
+import org.apache.ctakes.ytex.kernel.SparseDataExporter;
+import org.apache.ctakes.ytex.uima.ApplicationContextHolder;
+import org.apache.ctakes.ytex.uima.TestUtils;
+import org.apache.uima.UIMAException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.uimafit.pipeline.SimplePipeline;
+
+public class SparseDataExporterTest {
+	static final String instanceClassQuery = "select note_id,  fracture, case when note_set = 'train' then 1 else 0 end train from %sfracture_demo";
+	static final String numericWordQuery = "select f.note_id, coveredText, COUNT(*) "
+			+ "from %1$sanno_token w "
+			+ "inner join %1$sanno_base da on w.anno_base_id = da.anno_base_id "
+			+ "inner join %1$sdocument d on d.document_id = da.document_id "
+			+ "inner join %1$sfracture_demo f on f.note_id = d.instance_id "
+			+ "where coveredText is not null "
+			+ "and d.analysis_batch = '%2$s' "
+			+ "group by f.note_id, coveredText";
+	static String analysisBatch;
+	static SparseDataExporter exporter;
+
+	/**
+	 * set the analysis batch, run the pipeline on the fracture demo
+	 * 
+	 * @throws ResourceInitializationException
+	 * @throws InvalidXMLException
+	 * @throws UIMAException
+	 * @throws IOException
+	 */
+	@BeforeClass
+	public static void setup() throws ResourceInitializationException,
+			InvalidXMLException, UIMAException, IOException {
+		analysisBatch = "SparseDataExporterTest-" + System.currentTimeMillis();
+		SimplePipeline.runPipeline(TestUtils.getFractureDemoCollectionReader(),
+				TestUtils.createTokenizerAE(analysisBatch));
+		exporter = ApplicationContextHolder.getApplicationContext().getBean(
+				SparseDataExporter.class);
+	}
+
+	@Test
+	public void testWeka() throws ResourceInitializationException,
+			InvalidXMLException, UIMAException, IOException {
+		File propFile = setupExportProps("weka");
+		exporter.exportData(propFile.getAbsolutePath(), "weka");
+		File trainArff = new File(propFile.getParent() + "/train.arff");
+		Assert.assertTrue("train.arff should exist", trainArff.exists());
+		Assert.assertTrue("train.arff should have a non-trivial size",
+				trainArff.length() > 2000);
+	}
+
+	@Test
+	public void testLibsvm() throws ResourceInitializationException,
+			InvalidXMLException, UIMAException, IOException {
+		File propFile = setupExportProps("libsvm");
+		exporter.exportData(propFile.getAbsolutePath(), "libsvm");
+		File train_data = new File(propFile.getParent() + "/train_data.txt");
+		assertOutputFileGood(train_data);
+	}
+
+	private void assertOutputFileGood(File train_data) {
+		Assert.assertTrue(train_data.getName() + " should exist",
+				train_data.exists());
+		Assert.assertTrue(train_data.getName()
+				+ " should have a non-trivial size", train_data.length() > 2000);
+	}
+
+	@Test
+	public void testSparsematrix() throws ResourceInitializationException,
+			InvalidXMLException, UIMAException, IOException {
+		File propFile = setupExportProps("sparsematrix");
+		exporter.exportData(propFile.getAbsolutePath(), "sparsematrix");
+		assertOutputFileGood(new File(propFile.getParent() + "/data.txt"));
+	}
+
+	/**
+	 * set up export.xml in the specified subdir relative to tempdir. if the
+	 * ./target directory exists, create the output there, else use
+	 * java.io.tmpdir as output dir.
+	 * 
+	 * @param subdir
+	 * @return file for export.xml
+	 * @throws FileNotFoundException
+	 * @throws IOException
+	 */
+	private File setupExportProps(String subdir) throws FileNotFoundException,
+			IOException {
+		File baseOutputDir = new File("./target");
+		if (!baseOutputDir.exists() || !baseOutputDir.isDirectory()) {
+			baseOutputDir = new File(System.getProperty("java.io.tmpdir"));
+		}
+		File tempDir = new File(baseOutputDir.getAbsolutePath() + "/"
+				+ analysisBatch + "/" + subdir);
+		tempDir.mkdirs();
+		System.out.println("temp dir: " + tempDir);
+		Properties props = new Properties();
+		props.setProperty("arffRelation", "fracture-word");
+		props.setProperty("instanceClassQuery",
+				String.format(instanceClassQuery, DBUtil.getYTEXTablePrefix()));
+		props.setProperty("numericWordQuery", String.format(numericWordQuery,
+				DBUtil.getYTEXTablePrefix(), analysisBatch));
+		props.setProperty("outdir", tempDir.getAbsolutePath());
+		File propFile = new File(tempDir, "export.xml");
+		OutputStream fos = null;
+		try {
+			fos = new FileOutputStream(propFile);
+			props.storeToXML(fos, null);
+		} finally {
+			fos.close();
+		}
+		return propFile;
+	}
+}

Added: ctakes/branches/ytex/ctakes-ytex-uima/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex-uima/src/test/resources/log4j.properties?rev=1555218&view=auto
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex-uima/src/test/resources/log4j.properties (added)
+++ ctakes/branches/ytex/ctakes-ytex-uima/src/test/resources/log4j.properties Fri Jan  3 19:31:43 2014
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+###############################################################################
+
+log4j.rootLogger=INFO, Console
+
+###############################################################################
+
+# Rolling File Appender definition
+log4j.appender.RFile=org.apache.log4j.RollingFileAppender
+log4j.appender.RFile.File=textAnalysis.log
+log4j.appender.RFile.MaxFileSize=1024KB
+log4j.appender.RFile.MaxBackupIndex=1
+log4j.appender.RFile.layout=org.apache.log4j.PatternLayout
+log4j.appender.RFile.layout.ConversionPattern=%d %-5p %c %x - %m%n
+
+###############################################################################
+
+# Console Appender definition
+log4j.appender.Console=org.apache.log4j.ConsoleAppender
+log4j.appender.Console.layout=org.apache.log4j.PatternLayout
+log4j.appender.Console.layout.ConversionPattern=%d %-5p %c %x - %m%n
+
+###############################################################################
+log4j.category.org.hibernate=INFO
+log4j.category.org.springframework=INFO
+log4j.category.org.apache.ctakes.ytex=TRACE
\ No newline at end of file

Modified: ctakes/branches/ytex/ctakes-ytex/README
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/README?rev=1555218&r1=1555217&r2=1555218&view=diff
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/README (original)
+++ ctakes/branches/ytex/ctakes-ytex/README Fri Jan  3 19:31:43 2014
@@ -53,3 +53,15 @@ Override this by:
  
 
 
+############
+Creating the ctakes-ytex-resources-3.1.2-SNAPSHOT.zip 
+############
+* Create the v_snomed_fword_lookup.txt file
+Install umls in mysql, run ytex db setup - will create v_snomed_fword_lookup table.
+Export the file:
+  - set permissions: grant FILE on *.* to 'ytex'@'localhost'
+  - export: mysqldump --user=ytex --password=ytex --tab=c:\temp ytex v_snomed_fword_lookup
+  - replace \N with empty string in c:\temp\v_snomed_fword_lookup.txt
+* Create the concept graph
+
+

Modified: ctakes/branches/ytex/ctakes-ytex/scripts/build-setup.xml
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/scripts/build-setup.xml?rev=1555218&r1=1555217&r2=1555218&view=diff
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/scripts/build-setup.xml (original)
+++ ctakes/branches/ytex/ctakes-ytex/scripts/build-setup.xml Fri Jan  3 19:31:43 2014
@@ -7,14 +7,8 @@ Main target is 'all', which does the fol
   * templateToConfig generates configuration files from templates. 
   * setupDatabase call data/build.xml to setup the database.
 
-You *must* define the ytex.home variable for this script using the -D option:
+You *must* define the CTAKES_HOME environment variable, or set ctakes.home via the -D option:
 ant -Dctakes.home=c:\java\ctakes -file build-setup.xml
-
-To use this in a 'real' environment, you will need an installation of UMLS,
-or you can download a UMLS database dump we've provided.
-
-To set up a development environment using a preexisting database, 
-execute setup.lvg and templateToConfig
 	]]>
 	</description>
 	<!-- override these variables using eclipse settings/command line -->
@@ -113,7 +107,7 @@ Call with option -projecthelp for more i
 			</else>
 		</if>
 	</target>
-	<target name="deleteTestDb">
+	<target name="deleteTestDb" description="delete the test hsql databse">
 		<delete dir="${test.hsqldb.dir}" quiet="true" />
 	</target>
 	<target name="templateToConfig" depends="templateToConfig.init,templateToConfig.updateConfig" description="generate config files from templates using values specified in ytex.properties">

Modified: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDaoImpl.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDaoImpl.java?rev=1555218&r1=1555217&r2=1555218&view=diff
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDaoImpl.java (original)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/dao/ConceptDaoImpl.java Fri Jan  3 19:31:43 2014
@@ -116,30 +116,40 @@ public class ConceptDaoImpl implements C
 			CommandLineParser parser = new GnuParser();
 			CommandLine line = parser.parse(options, args);
 			String name = line.getOptionValue("name");
-			URL url = ConceptDaoImpl.class.getClassLoader().getResource(
-					CONCEPT_GRAPH_PATH + "/" + name + ".xml");
+			String propRes = CONCEPT_GRAPH_PATH + name + ".xml";
+			URL url = ConceptDaoImpl.class.getClassLoader()
+					.getResource(propRes);
+			if (url == null) {
+				System.out.println("properties file could not be located: "
+						+ propRes);
+				return;
+			}
+			// load properties
+			Properties props = new Properties();
+			InputStream is = ConceptDaoImpl.class.getClassLoader()
+					.getResourceAsStream(propRes);
+			try {
+				props.loadFromXML(is);
+			} finally {
+				is.close();
+			}
+			// determine directory for concept graph - attempt to put in same
+			// dir as props
 			File fDir = null;
-			if (url != null) {
-				if ("file".equals(url.getProtocol())) {
-					File f;
-					try {
-						f = new File(url.toURI());
-					} catch (URISyntaxException e) {
-						f = new File(url.getPath());
-					}
-					fDir = f.getParentFile();
+			if ("file".equals(url.getProtocol())) {
+				File f;
+				try {
+					f = new File(url.toURI());
+				} catch (URISyntaxException e) {
+					f = new File(url.getPath());
 				}
+				fDir = f.getParentFile();
 			}
-			Properties props = FileUtil.loadProperties(
-					line.getOptionValue("prop"), true);
-			String conceptGraphName = props
-					.getProperty("org.apache.ctakes.ytex.conceptGraphName");
 			String conceptGraphQuery = props
-					.getProperty("org.apache.ctakes.ytex.conceptGraphQuery");
-			String strCheckCycle = props.getProperty(
-					"org.apache.ctakes.ytex.checkCycle", "true");
+					.getProperty("ytex.conceptGraphQuery");
+			String strCheckCycle = props.getProperty("ytex.checkCycle", "true");
 			String forbiddenConceptList = props
-					.getProperty("org.apache.ctakes.ytex.forbiddenConcepts");
+					.getProperty("ytex.forbiddenConcepts");
 			Set<String> forbiddenConcepts;
 			if (forbiddenConceptList != null) {
 				forbiddenConcepts = new HashSet<String>();
@@ -152,13 +162,15 @@ public class ConceptDaoImpl implements C
 			if ("false".equalsIgnoreCase(strCheckCycle)
 					|| "no".equalsIgnoreCase(strCheckCycle))
 				checkCycle = false;
-			if (conceptGraphName != null && conceptGraphQuery != null) {
+			if (!Strings.isNullOrEmpty(name)
+					&& !Strings.isNullOrEmpty(conceptGraphQuery)) {
 				KernelContextHolder
 						.getApplicationContext()
 						.getBean(ConceptDao.class)
-						.createConceptGraph(fDir.getAbsolutePath(),
-								conceptGraphName, conceptGraphQuery,
-								checkCycle, forbiddenConcepts);
+						.createConceptGraph(
+								fDir != null ? fDir.getAbsolutePath() : null,
+								name, conceptGraphQuery, checkCycle,
+								forbiddenConcepts);
 			} else {
 				printHelp(options);
 			}
@@ -259,6 +271,14 @@ public class ConceptDaoImpl implements C
 			if (log.isWarnEnabled())
 				log.warn("createConceptGraph(): concept graph already exists, will not create a new one.  Delete existing concept graph if you want to recreate it.");
 		} else {
+			String outputDir = dir;
+			if (Strings.isNullOrEmpty(outputDir)) {
+				outputDir = getDefaultConceptGraphDir();
+			}
+			if (Strings.isNullOrEmpty(outputDir)) {
+				throw new IllegalArgumentException(
+						"could not determine default concept graph directory; please set property org.apache.ctakes.ytex.conceptGraphDir");
+			}
 			if (log.isInfoEnabled())
 				log.info("createConceptGraph(): file not found, creating concept graph from database.");
 			final ConceptGraph cg = new ConceptGraph();
@@ -300,14 +320,12 @@ public class ConceptDaoImpl implements C
 			cg.setRoot(rootId);
 			// can't get the maximum depth unless we're sure there are no
 			// cycles
-			// if (checkCycle)
-			// cg.setDepthMax(calculateDepthMax(rootId, cg.getConceptMap()));
 			if (checkCycle) {
 				log.info("computing intrinsic info for concept graph: " + name);
 				this.intrinsicInfoContentEvaluator
-						.evaluateIntrinsicInfoContent(name, dir, cg);
+						.evaluateIntrinsicInfoContent(name, outputDir, cg);
 			}
-			writeConceptGraph(dir, name, cg);
+			writeConceptGraph(outputDir, name, cg);
 		}
 	}
 
@@ -322,8 +340,9 @@ public class ConceptDaoImpl implements C
 		ConceptGraph cg = this.readConceptGraph(name);
 		if (cg != null) {
 			this.initializeConceptGraph(cg);
-			if(log.isInfoEnabled()) {
-				log.info(String.format("concept graph %s, vertices: %s", name, cg.getConceptList().size()));
+			if (log.isInfoEnabled()) {
+				log.info(String.format("concept graph %s, vertices: %s", name,
+						cg.getConceptList().size()));
 			}
 		}
 		return cg;
@@ -554,13 +573,6 @@ public class ConceptDaoImpl implements C
 	private void writeConceptGraph(String dir, String name, ConceptGraph cg) {
 		ObjectOutputStream os = null;
 		String outputDir = dir;
-		if (Strings.isNullOrEmpty(outputDir)) {
-			outputDir = getDefaultConceptGraphDir();
-		}
-		if (Strings.isNullOrEmpty(outputDir)) {
-			throw new IllegalArgumentException(
-					"could not determine default concept graph directory; please set property org.apache.ctakes.ytex.conceptGraphDir");
-		}
 		File cgFile = new File(outputDir + "/" + name + ".gz");
 		log.info("writing concept graph: " + cgFile.getAbsolutePath());
 		if (!cgFile.getParentFile().exists())

Modified: ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java
URL: http://svn.apache.org/viewvc/ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java?rev=1555218&r1=1555217&r2=1555218&view=diff
==============================================================================
--- ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java (original)
+++ ctakes/branches/ytex/ctakes-ytex/src/main/java/org/apache/ctakes/ytex/kernel/metric/ConceptSimilarityServiceImpl.java Fri Jan  3 19:31:43 2014
@@ -52,7 +52,6 @@ import org.springframework.transaction.s
 
 import com.google.common.collect.ImmutableMap;
 
-
 /**
  * compute concept similarity
  * 
@@ -685,24 +684,31 @@ public class ConceptSimilarityServiceImp
 
 	public void init() {
 		log.info("begin initialization for concept graph: " + conceptGraphName);
-		TransactionTemplate t = new TransactionTemplate(this.transactionManager);
-		t.setPropagationBehavior(TransactionTemplate.PROPAGATION_REQUIRES_NEW);
-		t.execute(new TransactionCallback<Object>() {
-			@Override
-			public Object doInTransaction(TransactionStatus arg0) {
-				cg = conceptDao.getConceptGraph(conceptGraphName);
-				if (cg == null) {
-					log.warn("concept graph null, name: " + conceptGraphName);
-					return null;
-				}
-				if (isPreload()) {
-					initInfoContent();
-					initCuiTuiMapFromCorpus();
+		cg = conceptDao.getConceptGraph(conceptGraphName);
+		if (cg == null) {
+			log.warn("concept graph null, name: " + conceptGraphName);
+		} else {
+			initSimilarityMetricMap();
+			if (isPreload()) {
+				try {
+					TransactionTemplate t = new TransactionTemplate(
+							this.transactionManager);
+					t.setPropagationBehavior(TransactionTemplate.PROPAGATION_REQUIRES_NEW);
+					t.execute(new TransactionCallback<Object>() {
+						@Override
+						public Object doInTransaction(TransactionStatus arg0) {
+							initInfoContent();
+							initCuiTuiMapFromCorpus();
+							return null;
+						}
+					});
+				} catch (Exception e) {
+					log.info("could not initialize cui-tui map: "
+							+ e.getMessage()
+							+ ".  This is expected if you do not have umls installed in your db.");
 				}
-				initSimilarityMetricMap();
-				return null;
 			}
-		});
+		}
 		log.info("end initialization for concept graph: " + conceptGraphName);
 	}
 
@@ -784,8 +790,8 @@ public class ConceptSimilarityServiceImp
 		// }
 		// fill corpusIC
 		log.info("loading corpus infocontent for corpusName=" + corpusName
-					+ ", conceptGraphName=" + conceptGraphName
-					+ ", conceptSetName=" + conceptSetName);
+				+ ", conceptGraphName=" + conceptGraphName
+				+ ", conceptSetName=" + conceptSetName);
 		Map<String, Double> corpusICMap = classifierEvaluationDao
 				.getInfoContent(corpusName, conceptGraphName,
 						this.conceptSetName);
@@ -877,7 +883,8 @@ public class ConceptSimilarityServiceImp
 	// /*
 	// * (non-Javadoc)
 	// *
-	// * @see org.apache.ctakes.ytex.kernel.ConceptSimilarity#lch(java.lang.String,
+	// * @see
+	// org.apache.ctakes.ytex.kernel.ConceptSimilarity#lch(java.lang.String,
 	// * java.lang.String)
 	// */
 	// public double lch(String concept1, String concept2) {



Mime
View raw message