ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chen...@apache.org
Subject svn commit: r1734445 - in /ctakes/sandbox/ctakes-clinical-deid: ./ src/main/resources/META-INF/org.apache.uima.fit/ src/main/ruta/org/apache/ctakes/deid/ src/test/java/org/apache/ctakes/deid/ src/test/resources/org/apache/ctakes/deid/
Date Thu, 10 Mar 2016 18:52:49 GMT
Author: chenpei
Date: Thu Mar 10 18:52:48 2016
New Revision: 1734445

URL: http://svn.apache.org/viewvc?rev=1734445&view=rev
Log:
CTAKES-384 Applying patch.Thanks Peter Klugl.

Modified:
    ctakes/sandbox/ctakes-clinical-deid/pom.xml
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/UserName.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java
    ctakes/sandbox/ctakes-clinical-deid/src/test/resources/org/apache/ctakes/deid/examples.csv

Modified: ctakes/sandbox/ctakes-clinical-deid/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/pom.xml?rev=1734445&r1=1734444&r2=1734445&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/pom.xml (original)
+++ ctakes/sandbox/ctakes-clinical-deid/pom.xml Thu Mar 10 18:52:48 2016
@@ -11,24 +11,10 @@
   </parent>
 
   <properties>
+    <uima-version>2.8.1</uima-version>
     <ruta-version>2.4.0</ruta-version>
-<!--     <ruta-version>2.3.1</ruta-version> -->
   </properties>
 
-  <!--  use ruta-2.4.0-rc2 for now until its released -->
-  <repositories>
-    <repository>
-      <id>staged-release</id>
-      <url>https://repository.apache.org/content/repositories/orgapacheuima-1081/</url>
-    </repository>
-  </repositories>
-  <pluginRepositories>
-    <pluginRepository>
-      <id>staged-release</id>
-      <url>https://repository.apache.org/content/repositories/orgapacheuima-1081/</url>
-    </pluginRepository>
-  </pluginRepositories>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.ctakes</groupId>
@@ -45,9 +31,9 @@
       <version>${ruta-version}</version>
     </dependency>
   </dependencies>
-  
-  
-  
+
+
+
   <build>
     <resources>
       <resource>
@@ -60,7 +46,7 @@
         <directory>target/generated-sources/ruta/descriptor</directory>
       </resource>
     </resources>
-  
+
     <plugins>
       <plugin>
         <groupId>org.apache.uima</groupId>
@@ -69,44 +55,40 @@
         <executions>
           <execution>
             <id>descriptors</id>
-            <!-- use this phase so that the ruta script files are already 
-              copied to target/classes -->
-            <!-- no need to specify the included script directories with 
-              <scriptFiles> -->
+            <!-- use this phase so that the ruta script files are already copied to target/classes
-->
+            <!-- no need to specify the included script directories with <scriptFiles>
-->
             <phase>generate-resources</phase>
             <goals>
               <goal>generate</goal>
             </goals>
             <configuration>
 
-              <!-- This is a exemplary configuration, which explicitly specifies 
-                the default configuration values if not mentioned otherwise. -->
-               <scriptFiles>
+              <!-- This is a exemplary configuration, which explicitly specifies the default
configuration 
+                values if not mentioned otherwise. -->
+              <scriptFiles>
                 <directory>${basedir}/src/main/ruta</directory>
                 <includes>
                   <include>**/*.ruta</include>
                 </includes>
-               </scriptFiles>
+              </scriptFiles>
 
-              <!-- The directory where the generated type system descriptors 
-                will be written stored. -->
+              <!-- The directory where the generated type system descriptors will be written
stored. -->
               <!-- default value: ${project.build.directory}/generated-sources/ruta/descriptor
-->
               <typeSystemOutputDirectory>${project.build.directory}/generated-sources/ruta/descriptor</typeSystemOutputDirectory>
 
-              <!-- The directory where the generated analysis engine descriptors 
-                will be stored. -->
+              <!-- The directory where the generated analysis engine descriptors will
be stored. -->
               <!-- default value: ${project.build.directory}/generated-sources/ruta/descriptor
-->
               <analysisEngineOutputDirectory>${project.build.directory}/generated-sources/ruta/descriptor</analysisEngineOutputDirectory>
 
-              <!-- The template descriptor for the generated type system. 
-                By default the descriptor of the maven dependency is loaded. -->
+              <!-- The template descriptor for the generated type system. By default the
descriptor of 
+                the maven dependency is loaded. -->
               <!-- default value: none -->
               <!-- not used in this example <typeSystemTemplate>...</typeSystemTemplate>
-->
 
-              <!-- The template descriptor for the generated analysis engine. 
-                By default the descriptor of the maven dependency is loaded. -->
+              <!-- The template descriptor for the generated analysis engine. By default
the descriptor 
+                of the maven dependency is loaded. -->
               <!-- default value: none -->
-              <!-- not used in this example <analysisEngineTemplate>...</analysisEngineTemplate>
-->
+              <analysisEngineTemplate>src/main/resources/template/BasicEngine.xml</analysisEngineTemplate>
 
               <!-- Script paths of the generated analysis engine descriptor. -->
               <!-- default value: none -->
@@ -147,8 +129,8 @@
               <!-- default value: false -->
               <resolveImports>false</resolveImports>
 
-              <!-- Amount of retries for building dependent descriptors. 
-                Default value -1 leads to three retires for each script. -->
+              <!-- Amount of retries for building dependent descriptors. Default value
-1 leads to three 
+                retires for each script. -->
               <!-- default value: -1 -->
               <maxBuildRetries>-1</maxBuildRetries>
 
@@ -166,8 +148,8 @@
               <!-- default value: none -->
               <buildPaths>
                 <buildPath>script:src/main/ruta/</buildPath>
-<!--                 <buildPath>descriptor:target/generated-sources/ruta/descriptor/</buildPath>
-->
-<!--                 <buildPath>resources:src/main/resources/wordlists</buildPath>
-->
+                <!-- <buildPath>descriptor:target/generated-sources/ruta/descriptor/</buildPath>
-->
+                <!-- <buildPath>resources:src/main/resources/wordlists</buildPath>
-->
               </buildPaths>
 
             </configuration>
@@ -192,6 +174,40 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>jaxb2-maven-plugin</artifactId>
+            <version>2.2</version>
+            <executions>
+                <execution>
+                    <id>xjc</id>
+                    <goals>
+                        <goal>xjc</goal>
+                    </goals>
+                </execution>
+            </executions>
+            <configuration>
+                <packageName>org.apache.ctakes.deid.i2b2</packageName>
+            </configuration>
+        </plugin>
+        <plugin>
+        <groupId>org.apache.uima</groupId>
+        <artifactId>jcasgen-maven-plugin</artifactId>
+        <version>${uima-version}</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>generate</goal>
+            </goals>
+            <configuration>
+              <typeSystemIncludes>
+                <typeSystemInclude>src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml</typeSystemInclude>
+              </typeSystemIncludes>
+              <limitToProject>true</limitToProject>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
   <dependencyManagement>
@@ -202,8 +218,8 @@
         <version>2.8.1</version>
       </dependency>
       <dependency>
-         <groupId>xml-apis</groupId>
-         <artifactId>xml-apis</artifactId>
+        <groupId>xml-apis</groupId>
+        <artifactId>xml-apis</artifactId>
         <version>1.4.01</version>
       </dependency>
     </dependencies>

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt?rev=1734445&r1=1734444&r2=1734445&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt
(original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/META-INF/org.apache.uima.fit/types.txt
Thu Mar 10 18:52:48 2016
@@ -7,4 +7,5 @@ classpath*:org/apache/ctakes/drugner/typ
 classpath*:org/apache/ctakes/padtermspotter/types/TypeSystem.xml
 classpath*:org/apache/ctakes/smokingstatus/types/TypeSystem.xml
 classpath*:org/apache/ctakes/sideeffect/types/TypeSystem.xml
+classpath*:org/apache/ctakes/deid/types/TypeSystem.xml
 classpath*:org/apache/ctakes/deid/DeidRutaTypeSystem.xml
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta?rev=1734445&r1=1734444&r2=1734445&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta (original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Deid.ruta Thu
Mar 10 18:52:48 2016
@@ -1,22 +1,46 @@
 PACKAGE org.apache.ctakes.deid;
 
-TYPESYSTEM org.apache.ctakes.typesystem.types.TypeSystem;
+//TYPESYSTEM org.apache.ctakes.typesystem.types.TypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.types.TypeSystem;
+
+// UIMA-4833
+TYPESYSTEM org.apache.ctakes.deid.ZipStateRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.StreetRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.AgeRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.DoctorRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.UserNameRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.PhoneRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.DateRutaTypeSystem;
+
 SCRIPT org.apache.ctakes.deid.Dictionaries;
+SCRIPT org.apache.ctakes.deid.Age;
+SCRIPT org.apache.ctakes.deid.Doctor;
 SCRIPT org.apache.ctakes.deid.ZipState;
 SCRIPT org.apache.ctakes.deid.Street;
 SCRIPT org.apache.ctakes.deid.UserName;
+SCRIPT org.apache.ctakes.deid.Phone;
+SCRIPT org.apache.ctakes.deid.Date;
 
 CALL(Dictionaries);
 CALL(ZipState);
 CALL(Street);
 CALL(UserName);
+CALL(Date);
+CALL(Age);
+CALL(Doctor);
+CALL(Phone);
+
+Zip{-> Location, Location.entityType = "ZIP"};
+State{-> Location, Location.entityType= "STATE"};
+Email{-> Contact, Contact.entityType = "EMAIL"};
+ProfessionInd{-> Profession, Profession.entityType = "PROFESSION"};
+Url{-> Contact, Contact.entityType = "URL"};
+Street{-> Location, Location.entityType= "STREET"};
+UserName{-> Name, Name.entityType = "USERNAME"};
+Age{-> Age.entityType = "AGE"};
+Doctor{-> Name, Name.entityType = "DOCTOR"};
+Phone{-> Contact, Contact.entityType = "PHONE"};
+Date{-> Date.entityType = "DATE"};
+
 
-// map types of ruta scripts to cTAKES types
-// TODO select the correct types and fill the features
-Zip{-> IdentifiedAnnotation};
-State{-> IdentifiedAnnotation};
-Email{-> IdentifiedAnnotation};
-Url{-> IdentifiedAnnotation};
-Street{-> IdentifiedAnnotation};
-UserName{-> IdentifiedAnnotation};
 

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta?rev=1734445&r1=1734444&r2=1734445&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta
(original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Dictionaries.ruta
Thu Mar 10 18:52:48 2016
@@ -2,19 +2,49 @@ PACKAGE org.apache.ctakes.deid;
 
 WORDLIST trie = 'generated.mtwl';
 DECLARE KeywordInd; 
-DECLARE KeywordInd Profession, StateContext;
-DECLARE KeywordInd StreetInd, StreetFullInd;
+DECLARE KeywordInd ProfessionInd, StateContext, DeceasedInd, FamilyInd, MonthInd;
+DECLARE KeywordInd StreetInd, StreetFullInd, AgePostInd, AgePreInd, PhonePreInd;
 
 TRIE(
-    "profession.txt" = Profession,
+    "profession.txt" = ProfessionInd,
     "us_state.txt" = StateContext,
     "us_state_acronym_abbreviation.txt" = StateContext,
     "street_ind.txt" = StreetInd,
     "street_full_ind.txt" = StreetFullInd,
+    "age_post_ind.txt" = AgePostInd,
+    "age_pre_ind.txt" = AgePreInd,
+    "deceased_ind.txt" = DeceasedInd,
+    "family_ind" = FamilyInd,
+    "phone_pre_ind" = PhonePreInd,
+    "month_ind" = MonthInd,
     trie, true, 4, false, 0, "-");
-    
 
 DECLARE Url, Email;
 "[_A-Za-z0-9-\\+]+(\\.[_A-Za-z0-9- ]+)*@[A-Za-z0-9-]+(\\.[A-Za-z0-9]+)*(\\.(com|org|edu|gov|mil|co\\.uk))"
-> Email;
 "(https?://)?(www.)([a-zA-Z0-9]+).[a-zA-Z0-9]*.[a-z]{3}.?([a-z]+)?(/[a-zA-Z0-9]+)?|(https?|ftp)://[^\\s/$.?#].[^\\s]*|www.[^\\s/$.?#].[^\\s]*"
-> Url;
 
+DECLARE MDInd;
+"M\\.D\\."-> MDInd;
+
+DECLARE Num1, Num12, Num2, Num3, Num34, Num4, Num5;
+
+NUM->{
+    Document{REGEXP(".")-> Num1};
+	Document{REGEXP("..?")-> Num12};
+	Document{REGEXP("..")-> Num2};
+	Document{REGEXP("...")-> Num3};
+	Document{REGEXP("....?")-> Num34};
+	Document{REGEXP("....")-> Num4};
+	Document{REGEXP(".....")-> Num5};
+};
+
+DECLARE LParen, RParen, Dash, Slash;
+SPECIAL-> {
+    Document.ct=="("{-> LParen};
+    Document.ct==")"{-> RParen};
+    Document.ct=="-"{-> Dash};
+    Document.ct=="/"{-> Slash};
+};
+
+DECLARE ApoInd;
+(SPECIAL.ct=="'" SW.ct=="s"){-> ApoInd};
\ No newline at end of file

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/UserName.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/UserName.ruta?rev=1734445&r1=1734444&r2=1734445&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/UserName.ruta
(original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/UserName.ruta
Thu Mar 10 18:52:48 2016
@@ -1,4 +1,5 @@
 PACKAGE org.apache.ctakes.deid;
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
 
 DECLARE UserName;
 //getUSERNAME 1
@@ -6,11 +7,7 @@ RETAINTYPE(WS);
 SPECIAL.ct=="[" 
 	(W{REGEXP(".{2,3}")} @NUM{REGEXP(".{1,3}")}){-> UserName} 
 	SPECIAL.ct=="]" ;
+MDInd WS+ W{REGEXP(".{2}"), -REGEXP("[Oo]n")} NUM{REGEXP(".{1,3}")->MARK(UserName,3,4)};
 RETAINTYPE;
 //getUSERNAME2
 
-DECLARE MDInd;
-"M\\.D\\."-> MDInd;
-MDInd W{REGEXP(".{2,3}")} NUM{REGEXP(".{1,3}")->MARK(UserName,2,3)};
-MDInd W{REGEXP("[Oo]n")} NUM{REGEXP(".{1,3}")->UNMARK(UserName,2,3)};
-W{REGEXP("[Oo]n")} @NUM{REGEXP(".{1,3}")->UNMARK(UserName,1,2)};

Modified: ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java?rev=1734445&r1=1734444&r2=1734445&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java
(original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/DeidPipelineTest.java
Thu Mar 10 18:52:48 2016
@@ -23,9 +23,7 @@ import java.io.InputStreamReader;
 import java.net.URL;
 import java.util.Collection;
 
-import junit.framework.Assert;
-
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.deid.type.DeidEntity;
 import org.apache.uima.fit.factory.AggregateBuilder;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
@@ -34,6 +32,8 @@ import org.apache.uima.fit.util.JCasUtil
 import org.apache.uima.jcas.JCas;
 import org.junit.Test;
 
+import junit.framework.Assert;
+
 public class DeidPipelineTest {
 
   private String descriptorPath = "target/generated-sources/ruta/descriptor/org/apache/ctakes/deid/DeidRutaAnnotator.xml";
@@ -58,11 +58,11 @@ public class DeidPipelineTest {
       jcas.setDocumentText(documentText);
 
       SimplePipeline.runPipeline(jcas, builder.createAggregateDescription());
-      Collection<IdentifiedAnnotation> select = JCasUtil.select(jcas, IdentifiedAnnotation.class);
+      Collection<DeidEntity> select = JCasUtil.select(jcas, DeidEntity.class);
       Assert.assertEquals(documentText, split.length - 1, select.size());
       int counter = 1;
-      for (IdentifiedAnnotation identifiedAnnotation : select) {
-        String actual = identifiedAnnotation.getCoveredText();
+      for (DeidEntity each : select) {
+        String actual = each.getCoveredText();
         String expected = split[counter];
         Assert.assertEquals(expected, actual);
         counter++;

Modified: ctakes/sandbox/ctakes-clinical-deid/src/test/resources/org/apache/ctakes/deid/examples.csv
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/test/resources/org/apache/ctakes/deid/examples.csv?rev=1734445&r1=1734444&r2=1734445&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/test/resources/org/apache/ctakes/deid/examples.csv
(original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/test/resources/org/apache/ctakes/deid/examples.csv
Thu Mar 10 18:52:48 2016
@@ -7,6 +7,6 @@ some text Mass 12345-1234 more text;Mass
 some text 742 Evergreen Terrace some text;742 Evergreen Terrace
 some text 742 Lower Evergreen Terrace some text;742 Lower Evergreen Terrace
 some text Evergreen street some text;Evergreen street
-some text M.D. abc 123 some text;abc 123
+some text M.D. ab123 some text;ab123
 some text [ab123] some text;ab123
 some text on 123 some text;
\ No newline at end of file



Mime
View raw message