ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From brittfi...@apache.org
Subject svn commit: r1500502 - in /ctakes/sandbox/ctakes-scrubber-deid/desc: ./ ae/ consumer/ cpe/ reader/ type/
Date Sun, 07 Jul 2013 18:57:07 GMT
Author: brittfitch
Date: Sun Jul  7 18:57:06 2013
New Revision: 1500502

URL: http://svn.apache.org/r1500502
Log:
CTAKES-64
initial check in of scrubber de-identification module. 
contributed by Britt Fitch & Andy McMurry on behalf of Harvard Medical School.

checking in files in chunks. 
adding UIMA config files.

Added:
    ctakes/sandbox/ctakes-scrubber-deid/desc/
    ctakes/sandbox/ctakes-scrubber-deid/desc/ae/
    ctakes/sandbox/ctakes-scrubber-deid/desc/ae/DictionaryAnnotator.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/ae/RegexAnnotator.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/ae/TFAnnotator.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/
    ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_pubs.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_test.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_train.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_printer_debug.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/
    ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_PUBS.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_test.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_train.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/reader/
    ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_pubs.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_test.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_train.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/type/
    ctakes/sandbox/ctakes-scrubber-deid/desc/type/CalculationTypeSystem.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/type/KnownPHITypeSystem.xml   (with props)
    ctakes/sandbox/ctakes-scrubber-deid/desc/type/OntologyMatchTypeSystem.xml   (with props)

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/ae/DictionaryAnnotator.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/ae/DictionaryAnnotator.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/ae/DictionaryAnnotator.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/ae/DictionaryAnnotator.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.spin.scrubber.uima.annotator.DictionaryAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>DictionaryAnnotator</name>
+    <description>matches dictionary</description>
+    <configurationParameters>
+      <configurationParameter>
+        <name>lookupQuery</name>
+        <description>currently query table matches structure of umls.mrconso. only requirement is that query returns 'code' and 'ontology' from a string match.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>lookupQuery</name>
+        <value>
+          <array>
+	          <string>select cui as code, sab as ontology from lookup_umls where str = ?;</string>
+	          <string>select code, ontology from lookup_dictionary where str = ?;</string>
+          </array>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import location="../type/OntologyMatchTypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection>
+      <fsIndexes>
+        <fsIndexDescription>
+          <label>OntologyMatchIndex</label>
+          <typeName>org.spin.scrubber.uima.type.OntologyMatch</typeName>
+          <kind>sorted</kind>
+          <keys>
+            <fsIndexKey>
+              <featureName>begin</featureName>
+              <comparator>standard</comparator>
+            </fsIndexKey>
+          </keys>
+        </fsIndexDescription>
+      </fsIndexes>
+    </fsIndexCollection>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.OntologyMatch</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/ae/DictionaryAnnotator.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/ae/RegexAnnotator.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/ae/RegexAnnotator.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/ae/RegexAnnotator.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/ae/RegexAnnotator.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,159 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.spin.scrubber.uima.annotator.RegexAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>Regex Annotator</name>
+    <description>Matches regular expressions in document text.</description>
+    <version/>
+    <vendor/>
+    <configurationParameters>
+      <configurationParameter>
+        <name>Filenames</name>
+        <description>list of external resource dependency keys that need to be initialized for this annotator</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>CaseSensitiveFile</name>
+        <description>boolean flag to determine if files should be interpreted as case sensitive or not.</description>
+        <type>Boolean</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>Filenames</name>
+        <value>
+          <array>
+            <string>PatternFile</string>
+            <string>HospitalNameFile</string>
+            <string>PrivateFile</string>
+          </array>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>CaseSensitiveFile</name>
+        <value>
+          <array>
+            <boolean>true</boolean>
+            <boolean>false</boolean>
+            <boolean>false</boolean>
+          </array>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import location="../type/OntologyMatchTypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type>uima.tcas.Annotation</type>
+          <type>org.spin.scrubber.uima.type.OntologyMatch</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <externalResourceDependencies>
+    <externalResourceDependency>
+      <key>PatternFile</key>
+      <description>An required external file containing regular expressions to match. 
+	      File format is as follows: 
+			  - Lines starting with // or whitepsace are ignored
+			  - Lines starting with # are the regex name 
+			  - Lines starting with % indicate an annotation type. 
+			  - All other lines are regular expressions.</description>
+      <optional>false</optional>
+    </externalResourceDependency>
+    <externalResourceDependency>
+      <key>HospitalNameFile</key>
+      <description>An optional external file containing names to match. 
+	      File format is as follows: 
+			  - Lines starting with // or whitepsace are ignored
+			  - Lines starting with # are the  name 
+			  - Lines starting with % indicate an annotation type. 
+			  - All other lines consist of strings to match.</description>
+      <optional>true</optional>
+    </externalResourceDependency>
+    <externalResourceDependency>
+      <key>PrivateFile</key>
+      <description>An optional external file containing names to match. 
+	      File format is as follows: 
+			  - Lines starting with // or whitepsace are ignored
+			  - Lines starting with # are the  name 
+			  - Lines starting with % indicate an annotation type. 
+			  - All other lines consist of strings to match.</description>
+      <optional>true</optional>
+    </externalResourceDependency>
+  </externalResourceDependencies>
+  <resourceManagerConfiguration>
+    <externalResources>
+      <externalResource>
+        <name>regex</name>
+        <description/>
+        <fileResourceSpecifier>
+          <fileUrl>file:conf/regex_patterns.txt</fileUrl>
+        </fileResourceSpecifier>
+      </externalResource>
+      <externalResource>
+        <name>hospital</name>
+        <description/>
+        <fileResourceSpecifier>
+          <fileUrl>file:conf/hospital_names.txt</fileUrl>
+        </fileResourceSpecifier>
+      </externalResource>
+      <externalResource>
+        <name>private</name>
+        <description/>
+        <fileResourceSpecifier>
+          <fileUrl>file:conf/private_dict.txt</fileUrl>
+        </fileResourceSpecifier>
+      </externalResource>
+    </externalResources>
+    <externalResourceBindings>
+      <externalResourceBinding>
+        <key>PatternFile</key>
+        <resourceName>regex</resourceName>
+      </externalResourceBinding>
+      <externalResourceBinding>
+        <key>HospitalNameFile</key>
+        <resourceName>hospital</resourceName>
+      </externalResourceBinding>
+      <externalResourceBinding>
+        <key>PrivateFile</key>
+        <resourceName>private</resourceName>
+      </externalResourceBinding>
+    </externalResourceBindings>
+  </resourceManagerConfiguration>
+</analysisEngineDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/ae/RegexAnnotator.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/ae/TFAnnotator.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/ae/TFAnnotator.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/ae/TFAnnotator.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/ae/TFAnnotator.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.spin.scrubber.uima.annotator.TFAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>TFAnnotator</name>
+    <description>adds 2 annotations of the term frequency (TF) of a given word. first with the same part of speech and second regardless of part of speech.</description>
+    <configurationParameters>
+      <configurationParameter>
+        <name>lookupQuery</name>
+        <description>query returns token, count, and pos for all of the filter (publication) set. used to build map for calculating TF</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>lookupQuery</name>
+        <value>
+          <string>select cnt, token, pos from lookup_term_frequency;</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import location="../type/CalculationTypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection>
+      <fsIndexes>
+        <fsIndexDescription>
+          <label>CalculationIndex</label>
+          <typeName>org.spin.scrubber.uima.type.Calculation</typeName>
+          <kind>sorted</kind>
+          <keys>
+            <fsIndexKey>
+              <featureName>begin</featureName>
+              <comparator>standard</comparator>
+            </fsIndexKey>
+          </keys>
+        </fsIndexDescription>
+      </fsIndexes>
+    </fsIndexCollection>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.Calculation</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/ae/TFAnnotator.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_pubs.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_pubs.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_pubs.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_pubs.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>org.spin.scrubber.uima.consumer.JDBCCasConsumer</implementationName>
+  <processingResourceMetaData>
+    <name>JDBC Cas Consumer</name>
+    <description>writes annotations to sql db</description>
+    <version>1.0</version>
+    <vendor>CBMI</vendor>
+    <configurationParameters>
+   	  <configurationParameter>
+        <name>tableName</name>
+        <description>name of table that will store the output annotations</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+			<name>pos</name>
+			<description>
+				specify feature string for part-of-speech field. 
+				will differ depending on pos tagger impl. 
+			</description>
+			<type>String</type>
+			<multiValued>false</multiValued>
+			<mandatory>false</mandatory>
+		</configurationParameter>
+		<configurationParameter>
+			<name>capitalization</name>
+			<description>
+				specify feature string for capitalization field. 
+				values appear to range from 0-3 based on frequency of capital letters in the token 
+			</description>
+			<type>String</type>
+			<multiValued>false</multiValued>
+			<mandatory>false</mandatory>
+		</configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+    	<nameValuePair>
+			<name>pos</name>
+			<value>
+				<string>edu.mayo.bmi.uima.core.type.BaseToken:partOfSpeech</string> <!-- cTakes POS tagger field -->
+				<!-- <string>org.apache.uima.TokenAnnotation:posTag</string> --> <!-- UIMA HMM POS tagger field -->
+			</value>
+		</nameValuePair>
+		<nameValuePair>
+			<name>capitalization</name>
+			<value>
+				<string>edu.mayo.bmi.uima.core.type.WordToken:capitalization</string> <!-- cTakes capitalization field -->
+			</value>
+		</nameValuePair>
+	  <nameValuePair>
+        <name>tableName</name>
+        <value>
+          <string>machine_annotations_pubs</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+        <import location="../type/KnownPHITypeSystem.xml"/>
+        <import location="../type/CalculationTypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>          
+        </inputs>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>false</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+  <resourceManagerConfiguration/>
+</casConsumerDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_pubs.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_test.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_test.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_test.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_test.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,104 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>org.spin.scrubber.uima.consumer.JDBCCasConsumer</implementationName>
+  <processingResourceMetaData>
+    <name>JDBC Cas Consumer</name>
+    <description>writes annotations to sql db</description>
+    <version>1.0</version>
+    <vendor>CBMI</vendor>
+    <configurationParameters>
+   	  <configurationParameter>
+        <name>tableName</name>
+        <description>name of table that will store the output annotations</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+        </configurationParameter>
+        <configurationParameter>
+			<name>pos</name>
+			<description>
+				specify feature string for part-of-speech field. 
+				will differ depending on pos tagger impl. 
+			</description>
+			<type>String</type>
+			<multiValued>false</multiValued>
+			<mandatory>false</mandatory>
+		</configurationParameter>
+		<configurationParameter>
+			<name>capitalization</name>
+			<description>
+				specify feature string for capitalization field. 
+				values appear to range from 0-3 based on frequency of capital letters in the token 
+			</description>
+			<type>String</type>
+			<multiValued>false</multiValued>
+			<mandatory>false</mandatory>
+		</configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+    	<nameValuePair>
+			<name>pos</name>
+			<value>
+				<string>edu.mayo.bmi.uima.core.type.BaseToken:partOfSpeech</string> <!-- cTakes POS tagger field -->
+				<!-- <string>org.apache.uima.TokenAnnotation:posTag</string> --> <!-- UIMA HMM POS tagger field -->
+			</value>
+		</nameValuePair>
+		<nameValuePair>
+			<name>capitalization</name>
+			<value>
+				<string>edu.mayo.bmi.uima.core.type.WordToken:capitalization</string> <!-- cTakes capitalization field -->
+			</value>
+		</nameValuePair>
+	  <nameValuePair>
+        <name>tableName</name>
+        <value>
+          <string>machine_annotations_test</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+        <import location="../type/KnownPHITypeSystem.xml"/>
+        <import location="../type/CalculationTypeSystem.xml"/>
+        <import location="../type/OntologyMatchTypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+        </inputs>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>false</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+  <resourceManagerConfiguration/>
+</casConsumerDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_test.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_train.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_train.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_train.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_train.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,104 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>org.spin.scrubber.uima.consumer.JDBCCasConsumer</implementationName>
+  <processingResourceMetaData>
+    <name>JDBC Cas Consumer</name>
+    <description>writes annotations to sql db</description>
+    <version>1.0</version>
+    <vendor>CBMI</vendor>
+    <configurationParameters>
+   	  <configurationParameter>
+        <name>tableName</name>
+        <description>name of table that will store the output annotations</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+        </configurationParameter>
+        <configurationParameter>
+			<name>pos</name>
+			<description>
+				specify feature string for part-of-speech field. 
+				will differ depending on pos tagger impl. 
+			</description>
+			<type>String</type>
+			<multiValued>false</multiValued>
+			<mandatory>false</mandatory>
+		</configurationParameter>
+		<configurationParameter>
+			<name>capitalization</name>
+			<description>
+				specify feature string for capitalization field. 
+				values appear to range from 0-3 based on frequency of capital letters in the token 
+			</description>
+			<type>String</type>
+			<multiValued>false</multiValued>
+			<mandatory>false</mandatory>
+		</configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+    	<nameValuePair>
+			<name>pos</name>
+			<value>
+				<string>edu.mayo.bmi.uima.core.type.BaseToken:partOfSpeech</string> <!-- cTakes POS tagger field -->
+				<!-- <string>org.apache.uima.TokenAnnotation:posTag</string> --> <!-- UIMA HMM POS tagger field -->
+			</value>
+		</nameValuePair>
+		<nameValuePair>
+			<name>capitalization</name>
+			<value>
+				<string>edu.mayo.bmi.uima.core.type.WordToken:capitalization</string> <!-- cTakes capitalization field -->
+			</value>
+		</nameValuePair>
+	  <nameValuePair>
+        <name>tableName</name>
+        <value>
+          <string>machine_annotations_train</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+        <import location="../type/KnownPHITypeSystem.xml"/>
+        <import location="../type/CalculationTypeSystem.xml"/>
+        <import location="../type/OntologyMatchTypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+        </inputs>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>false</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+  <resourceManagerConfiguration/>
+</casConsumerDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_jdbc_train.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_printer_debug.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_printer_debug.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_printer_debug.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_printer_debug.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>org.spin.scrubber.uima.consumer.AnnotationPrinter</implementationName>
+  <processingResourceMetaData>
+    <name>Annotation Printer</name>
+    <description>Prints annotations.</description>
+    <version>1.0</version>
+    <vendor>The Apache Software Foundation</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>outputFile</name>
+        <description>File to which annotations will be printed</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>pos</name>
+        <description>specify feature string for part-of-speech field. 
+					will differ depending on pos tagger impl.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>ontologyConceptArr</name>
+        <description>this will be an array containing concept IDs that have been associated to this token</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>outputFile</name>
+        <value>
+          <string>out/annotations.txt</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>pos</name>
+        <value>
+          <string>edu.mayo.bmi.uima.core.type.BaseToken:partOfSpeech</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>ontologyConceptArr</name>
+        <value>
+          <string>edu.mayo.bmi.uima.core.type.NamedEntity:ontologyConceptArr</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+          <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+          <import location="../type/KnownPHITypeSystem.xml"/>
+          <import location="../type/CalculationTypeSystem.xml"/>
+          <import location="../type/OntologyMatchTypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <capabilities>
+      <capability>
+        <inputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+        </inputs>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>false</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+</casConsumerDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/consumer/consumer_printer_debug.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_PUBS.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_PUBS.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_PUBS.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_PUBS.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<cpeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+
+<!-- READERS -->
+    <collectionReader>
+        <collectionIterator>
+            <descriptor>
+                <import location="../reader/FileSystemCollectionReaderPUBS.xml"/>
+            </descriptor>
+        </collectionIterator>
+    </collectionReader>
+    
+ <!-- ANNOTATORS -->
+    <casProcessors casPoolSize="12" processingUnitThreadCount="9">
+        <!-- ctakes chunker -->
+        <casProcessor deployment="integrated" name="ChunkerAggregate">
+            <descriptor>
+                <import location="../../../ctakes/ctakes-chunker/desc/analysis_engine/ChunkerAggregate.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="10/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+        <!-- my dictionary annotator -->
+        <casProcessor deployment="integrated" name="DictionaryAnnotator">
+            <descriptor>
+                <import location="../ae/DictionaryAnnotator.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="10/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+
+        <!-- TODO: note: Regex not needed for publications, and they take a long time to process -->
+        <!-- <casProcessor deployment="integrated" name="RegexAnnotator">
+            <descriptor>
+                <import location="../ae/RegexAnnotator.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="10/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor> -->
+        
+	<!-- CONSUMERS -->        
+        <casProcessor deployment="integrated" name="JDBC Cas Consumer">
+            <descriptor>
+                <import location="../consumer/JDBCCasConsumerPUBS.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="10/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+        
+        <!-- <casProcessor deployment="integrated" name="Annotation Printer">
+            <descriptor>
+                <import location="../consumer/consumer_printer_debug.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor> -->
+    </casProcessors>
+    
+    <cpeConfig>
+        <numToProcess>-1</numToProcess>
+        <deployAs>immediate</deployAs>
+        <checkpoint batch="0" time="300000ms"/>
+        <timerImpl/>
+    </cpeConfig>
+</cpeDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_PUBS.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_test.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_test.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_test.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_test.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<cpeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+
+<!-- READERS -->
+    <collectionReader>
+        <collectionIterator>
+            <descriptor>
+                <import location="../reader/reader_files_test.xml"/>
+            </descriptor>
+        </collectionIterator>
+    </collectionReader>
+    
+     <casProcessors casPoolSize="6" processingUnitThreadCount="3">
+    <!-- ANNOTATORS -->
+        <casProcessor deployment="integrated" name="ChunkerAggregate">
+            <descriptor>
+                <import location="../../../ctakes/ctakes-chunker/desc/analysis_engine/ChunkerAggregate.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+        <casProcessor deployment="integrated" name="DictionaryAnnotator">
+            <descriptor>
+                <import location="../ae/DictionaryAnnotator.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+        <casProcessor deployment="integrated" name="RegexAnnotator">
+            <descriptor>
+                <import location="../ae/RegexAnnotator.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+        <casProcessor deployment="integrated" name="TFAnnotator">
+            <descriptor>
+                <import location="../ae/TFAnnotator.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+ 
+	<!-- CONSUMERS -->        
+        <casProcessor deployment="integrated" name="JDBC Cas Consumer">
+            <descriptor>
+                <import location="../consumer/consumer_jdbc_test.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+        
+        <!-- <casProcessor deployment="integrated" name="Annotation Printer">
+            <descriptor>
+                <import location="../consumer/consumer_printer_debug.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor> -->
+    </casProcessors>
+    
+    <cpeConfig>
+        <numToProcess>-1</numToProcess>
+        <deployAs>immediate</deployAs>
+        <checkpoint batch="0" time="300000ms"/>
+        <timerImpl/>
+    </cpeConfig>
+</cpeDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_test.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_train.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_train.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_train.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_train.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,139 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<cpeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+
+<!-- READERS -->
+    <collectionReader>
+        <collectionIterator>
+            <descriptor>
+                <import location="../reader/reader_files_train.xml"/>
+            </descriptor>
+        </collectionIterator>
+    </collectionReader>
+    
+     <casProcessors casPoolSize="6" processingUnitThreadCount="3">
+
+        <!-- CHUNKER -->
+        <casProcessor deployment="integrated" name="ChunkerAggregate">
+            <descriptor>
+                <import location="../../../ctakes/ctakes-chunker/desc/analysis_engine/ChunkerAggregate.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+
+         <casProcessor deployment="integrated" name="ContextDependentTokenizerAnnotator">
+             <descriptor>
+                 <import location="../../../ctakes/ctakes-context-tokenizer/desc/analysis_engine/ContextDependentTokenizerAnnotator.xml"/>
+             </descriptor>
+             <deploymentParameters/>
+             <errorHandling>
+                 <errorRateThreshold action="terminate" value="0/1000"/>
+                 <maxConsecutiveRestarts action="terminate" value="30"/>
+                 <timeout max="100000" default="-1"/>
+             </errorHandling>
+             <checkpoint batch="10000" time="1000ms"/>
+         </casProcessor>
+
+         <!-- REGEX -->
+         <casProcessor deployment="integrated" name="RegexAnnotator">
+             <descriptor>
+                 <import location="../ae/RegexAnnotator.xml"/>
+             </descriptor>
+             <deploymentParameters/>
+             <errorHandling>
+                 <errorRateThreshold action="terminate" value="0/1000"/>
+                 <maxConsecutiveRestarts action="terminate" value="30"/>
+                 <timeout max="100000" default="-1"/>
+             </errorHandling>
+             <checkpoint batch="10000" time="1000ms"/>
+         </casProcessor>
+
+
+         <!-- DICTIONARY -->
+        <casProcessor deployment="integrated" name="DictionaryAnnotator">
+            <descriptor>
+                <import location="../ae/DictionaryAnnotator.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+
+         <!-- TF calculation -->
+         <casProcessor deployment="integrated" name="TFAnnotator">
+             <descriptor>
+                 <import location="../ae/TFAnnotator.xml"/>
+             </descriptor>
+             <deploymentParameters/>
+             <errorHandling>
+                 <errorRateThreshold action="terminate" value="0/1000"/>
+                 <maxConsecutiveRestarts action="terminate" value="30"/>
+                 <timeout max="100000" default="-1"/>
+             </errorHandling>
+             <checkpoint batch="10000" time="1000ms"/>
+         </casProcessor>
+
+        <casProcessor deployment="integrated" name="Annotation Printer">
+            <descriptor>
+                <import location="../consumer/consumer_printer_debug.xml"/>
+            </descriptor>
+            <deploymentParameters/>
+            <errorHandling>
+                <errorRateThreshold action="terminate" value="0/1000"/>
+                <maxConsecutiveRestarts action="terminate" value="30"/>
+                <timeout max="100000" default="-1"/>
+            </errorHandling>
+            <checkpoint batch="10000" time="1000ms"/>
+        </casProcessor>
+
+         <!-- JDBC
+         <casProcessor deployment="integrated" name="JDBC Cas Consumer">
+             <descriptor>
+                 <import location="../consumer/consumer_jdbc_train.xml"/>
+             </descriptor>
+             <deploymentParameters/>
+             <errorHandling>
+                 <errorRateThreshold action="terminate" value="0/1000"/>
+                 <maxConsecutiveRestarts action="terminate" value="30"/>
+                 <timeout max="100000" default="-1"/>
+             </errorHandling>
+             <checkpoint batch="10000" time="1000ms"/>
+         </casProcessor>
+         -->
+
+     </casProcessors>
+    
+    <cpeConfig>
+        <numToProcess>-1</numToProcess>
+        <deployAs>immediate</deployAs>
+        <checkpoint batch="0" time="300000ms"/>
+        <timerImpl/>
+    </cpeConfig>
+</cpeDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/cpe/cpe_cases_train.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_pubs.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_pubs.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_pubs.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_pubs.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,125 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+
+    <!-- TODO: use scrubber.properties -->
+  <implementationName>org.spin.scrubber.uima.reader.FileSystemCollectionReader</implementationName>
+
+  <processingResourceMetaData>
+    <name>File System Collection Reader</name>
+    <description>Reads files from the filesystem.  This CollectionReader may be used
+          with or without a CAS Initializer.  If a CAS Initializer is supplied, it will
+          be passed an InputStream to the file and must populate the CAS from that
+          InputStream.  If no CAS Initializer is supplied, this CollectionReader will
+          read the file itself and set treat the entire contents of the file as the
+          document to be inserted into the CAS.</description>
+    <version>1.0</version>
+    <vendor>The Apache Software Foundation</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>InputDirectory</name>
+        <description>Directory containing input files</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Encoding</name>
+        <description>Character encoding for the documents.  If not specified,
+                   the default system encoding will be used.  Note that this parameter
+                   only applies if there is no CAS Initializer provided; otherwise,
+                   it is the CAS Initializer's responsibility to deal with character
+                   encoding issues.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Language</name>
+        <description>ISO language code for the documents</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>BrowseSubdirectories</name>
+        <description>True means include files of subdirectories, recursively, of the input directory.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>InputDirectory</name>
+        <value>
+          <string>data/input/pubs/processed</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>BrowseSubdirectories</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+          <!-- TODO: rename KnownPHI to Human Annotation -->
+        <import location="../type/KnownPHITypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection>
+      <fsIndexes>
+        <fsIndexDescription>
+          <label>KnownPHIIndex</label>
+          <typeName>org.spin.scrubber.uima.type.KnownPHI</typeName>
+          <kind>sorted</kind>
+          <keys>
+            <fsIndexKey>
+              <featureName>begin</featureName>
+              <comparator>standard</comparator>
+            </fsIndexKey>
+          </keys>
+        </fsIndexDescription>
+      </fsIndexes>
+    </fsIndexCollection>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+            <!-- TODO: rename KnownPHI to Human Annotation -->
+          <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.KnownPHI</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+  <resourceManagerConfiguration/>
+</collectionReaderDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_pubs.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_test.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_test.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_test.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_test.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>org.spin.scrubber.uima.reader.FileSystemCollectionReaderXML</implementationName>
+  <processingResourceMetaData>
+    <name>File System Collection Reader</name>
+    <description>Reads files from the filesystem.  This CollectionReader may be used
+          with or without a CAS Initializer.  If a CAS Initializer is supplied, it will
+          be passed an InputStream to the file and must populate the CAS from that
+          InputStream.  If no CAS Initializer is supplied, this CollectionReader will
+          read the file itself and set treat the entire contents of the file as the
+          document to be inserted into the CAS.</description>
+    <version>1.0</version>
+    <vendor>The Apache Software Foundation</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>KnownPHINodeList</name>
+        <description>List of XPaths to specific fields known to contain ONLY PHI.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>ScrubNodeList</name>
+        <description>List of XPaths to scrub</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>InputDirectory</name>
+        <description>Directory containing input files</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Encoding</name>
+        <description>Character encoding for the documents.  If not specified,
+                   the default system encoding will be used.  Note that this parameter
+                   only applies if there is no CAS Initializer provided; otherwise,
+                   it is the CAS Initializer's responsibility to deal with character
+                   encoding issues.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Language</name>
+        <description>ISO language code for the documents</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>BrowseSubdirectories</name>
+        <description>True means include files of subdirectories, recursively, of the input directory.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>ScrubNodeList</name>
+        <value>
+	        <array>
+	          <string>/Envelope/Body/PathologyCase/FullReportData</string>
+	          <string>/Envelope/Body/PathologyCase/FullReportText</string>
+	          <string>/Envelope/Body/PathologyCase/GrossDescriptionText</string>
+	          <string>/Envelope/Body/PathologyCase/DiagnosisText</string>
+	        </array>
+	   	</value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>KnownPHINodeList</name>
+        <value>
+	        <array>
+	          <string>/Envelope/Header/Identifiers/FirstName</string>
+	          <string>/Envelope/Header/Identifiers/LastName</string>
+	          <string>/Envelope/Header/Identifiers/DateOfBirth</string>
+	          <string>/Envelope/Header/Identifiers/SSN</string>
+	          <string>/Envelope/Header/Identifiers/AccessionNumber</string>
+	          <string>/Envelope/Header/Identifiers/LocalMRN</string>
+	        </array>
+	   	</value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>InputDirectory</name>
+        <value>
+          <string>data/input/cases/test</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>BrowseSubdirectories</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+        <import location="../type/KnownPHITypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection>
+      <fsIndexes>
+        <fsIndexDescription>
+          <label>KnownPHIIndex</label>
+          <typeName>org.spin.scrubber.uima.type.KnownPHI</typeName>
+          <kind>sorted</kind>
+          <keys>
+            <fsIndexKey>
+              <featureName>begin</featureName>
+              <comparator>standard</comparator>
+            </fsIndexKey>
+          </keys>
+        </fsIndexDescription>
+      </fsIndexes>
+    </fsIndexCollection>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+          <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.KnownPHI</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+  <resourceManagerConfiguration/>
+</collectionReaderDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_test.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_train.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_train.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_train.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_train.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>org.spin.scrubber.uima.reader.FileSystemCollectionReaderXML</implementationName>
+  <processingResourceMetaData>
+    <name>File System Collection Reader</name>
+    <description>Reads files from the filesystem.  This CollectionReader may be used
+          with or without a CAS Initializer.  If a CAS Initializer is supplied, it will
+          be passed an InputStream to the file and must populate the CAS from that
+          InputStream.  If no CAS Initializer is supplied, this CollectionReader will
+          read the file itself and set treat the entire contents of the file as the
+          document to be inserted into the CAS.</description>
+    <version>1.0</version>
+    <vendor>The Apache Software Foundation</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>KnownPHINodeList</name>
+        <description>List of XPaths to specific fields known to contain ONLY PHI.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>ScrubNodeList</name>
+        <description>List of XPaths to scrub</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>InputDirectory</name>
+        <description>Directory containing input files</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Encoding</name>
+        <description>Character encoding for the documents.  If not specified,
+                   the default system encoding will be used.  Note that this parameter
+                   only applies if there is no CAS Initializer provided; otherwise,
+                   it is the CAS Initializer's responsibility to deal with character
+                   encoding issues.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Language</name>
+        <description>ISO language code for the documents</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>BrowseSubdirectories</name>
+        <description>True means include files of subdirectories, recursively, of the input directory.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>ScrubNodeList</name>
+        <value>
+	        <array>
+	          <string>/Envelope/Body/PathologyCase/FullReportData</string>
+	          <string>/Envelope/Body/PathologyCase/FullReportText</string>
+	          <string>/Envelope/Body/PathologyCase/GrossDescriptionText</string>
+	          <string>/Envelope/Body/PathologyCase/DiagnosisText</string>
+	        </array>
+	   	</value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>KnownPHINodeList</name>
+        <value>
+	        <array>
+	          <string>/Envelope/Header/Identifiers/FirstName</string>
+	          <string>/Envelope/Header/Identifiers/LastName</string>
+	          <string>/Envelope/Header/Identifiers/DateOfBirth</string>
+	          <string>/Envelope/Header/Identifiers/SSN</string>
+	          <string>/Envelope/Header/Identifiers/AccessionNumber</string>
+	          <string>/Envelope/Header/Identifiers/LocalMRN</string>
+	        </array>
+	   	</value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>InputDirectory</name>
+        <value>
+          <string>data/input/cases/train</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>BrowseSubdirectories</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.examples.SourceDocumentInformation"/>
+        <import location="../type/KnownPHITypeSystem.xml"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection>
+      <fsIndexes>
+        <fsIndexDescription>
+          <label>KnownPHIIndex</label>
+          <typeName>org.spin.scrubber.uima.type.KnownPHI</typeName>
+          <kind>sorted</kind>
+          <keys>
+            <fsIndexKey>
+              <featureName>begin</featureName>
+              <comparator>standard</comparator>
+            </fsIndexKey>
+          </keys>
+        </fsIndexDescription>
+      </fsIndexes>
+    </fsIndexCollection>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type allAnnotatorFeatures="true">org.apache.uima.examples.SourceDocumentInformation</type>
+          <type allAnnotatorFeatures="true">org.spin.scrubber.uima.type.KnownPHI</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+  <resourceManagerConfiguration/>
+</collectionReaderDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/reader/reader_files_train.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/type/CalculationTypeSystem.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/type/CalculationTypeSystem.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/type/CalculationTypeSystem.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/type/CalculationTypeSystem.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <name>CalculationTypeSystem</name>
+  <description>Type System Definition for Calculation annotation</description>
+  <version>1.0</version>
+  <vendor>The Apache Software Foundation</vendor>
+  <types>
+    <typeDescription>
+      <name>org.spin.scrubber.uima.type.Calculation</name>
+      <description/>
+      <supertypeName>uima.tcas.Annotation</supertypeName>
+      <features>
+        <featureDescription>
+          <name>calculationName</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>calculationValue</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+      </features>
+    </typeDescription>
+  </types>
+</typeSystemDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/type/CalculationTypeSystem.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/type/KnownPHITypeSystem.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/type/KnownPHITypeSystem.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/type/KnownPHITypeSystem.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/type/KnownPHITypeSystem.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <name>KnownPHITypeSystem</name>
+  <description>Type System Definition for KnownPHI annotation</description>
+  <version>1.0</version>
+  <vendor>Harvard Medical School</vendor>
+  <types>
+    <typeDescription>
+      <name>org.spin.scrubber.uima.type.KnownPHI</name>
+      <description/>
+      <supertypeName>uima.tcas.Annotation</supertypeName>
+      <features>
+        <featureDescription>
+          <name>code</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>ontology</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>content</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+      </features>
+    </typeDescription>
+  </types>
+</typeSystemDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/type/KnownPHITypeSystem.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: ctakes/sandbox/ctakes-scrubber-deid/desc/type/OntologyMatchTypeSystem.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-scrubber-deid/desc/type/OntologyMatchTypeSystem.xml?rev=1500502&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-scrubber-deid/desc/type/OntologyMatchTypeSystem.xml (added)
+++ ctakes/sandbox/ctakes-scrubber-deid/desc/type/OntologyMatchTypeSystem.xml Sun Jul  7 18:57:06 2013
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+  
+    http://www.apache.org/licenses/LICENSE-2.0
+  
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <name>OntologyMatchTypeSystem</name>
+  <description>Type System Definition for OntologyMatch annotation</description>
+  <version>1.0</version>
+  <vendor>The Apache Software Foundation</vendor>
+  <types>
+    <typeDescription>
+      <name>org.spin.scrubber.uima.type.OntologyMatch</name>
+      <description/>
+      <supertypeName>uima.tcas.Annotation</supertypeName>
+      <features>
+        <featureDescription>
+          <name>code</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>ontology</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+      </features>
+    </typeDescription>
+  </types>
+</typeSystemDescription>

Propchange: ctakes/sandbox/ctakes-scrubber-deid/desc/type/OntologyMatchTypeSystem.xml
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message