ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chen...@apache.org
Subject svn commit: r1734582 - in /ctakes/sandbox/ctakes-clinical-deid/src: main/java/ main/java/org/ main/java/org/apache/ main/java/org/apache/ctakes/ main/java/org/apache/ctakes/deid/ main/resources/org/ main/resources/org/apache/ main/resources/org/apache/...
Date Fri, 11 Mar 2016 15:50:47 GMT
Author: chenpei
Date: Fri Mar 11 15:50:47 2016
New Revision: 1734582

URL: http://svn.apache.org/viewvc?rev=1734582&view=rev
Log:
CTAKES-384 Applying patch 20160311.patch.Thanks Peter Klugl.

Added:
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/template/
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/template/BasicEngine.xml
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/age_post_ind.txt
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/age_pre_ind.txt
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/deceased_ind.txt
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/family_ind.txt
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/month_ind.txt
    ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/phone_pre_ind.txt
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Age.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Phone.ruta
    ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/
    ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd
    ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/I2B2Evaluation.java

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java Fri Mar 11 15:50:47 2016
@@ -0,0 +1,183 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.deid;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.Unmarshaller;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.AGE;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.CONTACT;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.DATE;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.LOCATION;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.NAME;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.PROFESSION;
+import org.apache.ctakes.deid.type.Age;
+import org.apache.ctakes.deid.type.Date;
+import org.apache.ctakes.deid.type.DeidEntity;
+import org.apache.ctakes.deid.type.Location;
+import org.apache.ctakes.deid.type.Name;
+import org.apache.ctakes.deid.type.Profession;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+
+public class I2B2DeidCollectionReader extends JCasCollectionReader_ImplBase {
+
+  public static final String PARAM_INPUT_DIRECTORY = "inputDirectory";
+
+  @ConfigurationParameter(name = PARAM_INPUT_DIRECTORY, mandatory = true)
+  private File inputDirectory;
+
+  public static final String PARAM_FILE_EXTENSIONS = "fileExtensions";
+
+  @ConfigurationParameter(name = PARAM_FILE_EXTENSIONS, mandatory = true, defaultValue = "xml")
+  private String[] fileExtensions;
+
+  public static final String PARAM_GOLD_VIEW = "goldView";
+
+  @ConfigurationParameter(name = PARAM_GOLD_VIEW, mandatory = true, defaultValue = "_InitialView")
+  private String goldView;
+
+  private Iterator<File> iterator;
+
+  private int current = 0;
+
+  private int overall = 0;
+
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    super.initialize(context);
+    Collection<File> files = FileUtils.listFiles(inputDirectory, fileExtensions, true);
+    overall = files.size();
+    iterator = files.iterator();
+  }
+
+  @Override
+  public void getNext(JCas jcas) throws IOException, CollectionException {
+    File currentFile = iterator.next();
+    current++;
+
+    DeIdi2B2 doc = null;
+
+    try {
+      JAXBContext jaxbContext = JAXBContext.newInstance(DeIdi2B2.class);
+      Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
+      doc = (DeIdi2B2) jaxbUnmarshaller.unmarshal(currentFile);
+    } catch (Exception e) {
+      throw new CollectionException(e);
+    }
+
+    jcas.setDocumentText(doc.getTEXT());
+    addDocumentId(jcas, currentFile);
+
+    if (!StringUtils.equals(goldView, jcas.getViewName())) {
+      // create additional gold view
+      try {
+        jcas = jcas.createView(goldView);
+        jcas.setDocumentText(doc.getTEXT());
+        addDocumentId(jcas, currentFile);
+      } catch (CASException e) {
+        throw new CollectionException(e);
+      }
+    }
+
+    TAGS tags = doc.getTAGS();
+    List<Object> tagElements = tags.getDATEOrAGEOrNAME();
+    for (Object object : tagElements) {
+      createDeidEntity(jcas, object);
+    }
+  }
+
+  private void addDocumentId(JCas jcas, File currentFile) {
+    DocumentID docId = new DocumentID(jcas);
+    docId.setDocumentID(currentFile.getName());
+    docId.addToIndexes();
+  }
+
+  @Override
+  public boolean hasNext() throws IOException, CollectionException {
+    return iterator.hasNext();
+  }
+
+  @Override
+  public Progress[] getProgress() {
+    return new Progress[] { new ProgressImpl(current, overall, Progress.ENTITIES) };
+  }
+
+  private void createDeidEntity(JCas jcas, Object object) {
+    DeidEntity entity = null;
+    if (object instanceof AGE) {
+      AGE element = (AGE) object;
+      entity = new Age(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof DATE) {
+      DATE element = (DATE) object;
+      entity = new Date(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof LOCATION) {
+      LOCATION element = (LOCATION) object;
+      entity = new Location(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof NAME) {
+      NAME element = (NAME) object;
+      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof CONTACT) {
+      CONTACT element = (CONTACT) object;
+      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof PROFESSION) {
+      PROFESSION element = (PROFESSION) object;
+      entity = new Profession(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    }
+    entity.addToIndexes();
+  }
+
+}

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java Fri Mar 11 15:50:47 2016
@@ -0,0 +1,197 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.deid;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.deid.type.DeidEntity;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.ruta.type.FalseNegative;
+import org.apache.uima.ruta.type.FalsePositive;
+import org.apache.uima.ruta.type.TruePositive;
+import org.apache.uima.util.CasCopier;
+
+public class SimpleDeidEntityComparator extends JCasAnnotator_ImplBase {
+
+  public static final String PARAM_GOLD_VIEW = "goldView";
+
+  @ConfigurationParameter(name = PARAM_GOLD_VIEW, mandatory = true, defaultValue = "gold")
+  private String goldView;
+
+  public static final String PARAM_CREATE_RUTA_EVAL_ANNOTATIONS = "createRutaEvalAnnotations";
+
+  @ConfigurationParameter(name = PARAM_CREATE_RUTA_EVAL_ANNOTATIONS, mandatory = true, defaultValue = "false")
+  private Boolean createRutaEvalAnnotations;
+
+  
+  private Map<String, Integer> type2tp = new TreeMap<>();
+  private Map<String, Integer> type2fp = new TreeMap<>();
+  private Map<String, Integer> type2fn = new TreeMap<>();
+  
+  @Override
+  public void process(JCas jcas) throws AnalysisEngineProcessException {
+
+    String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+
+    JCas processJCas = jcas;
+    JCas goldJCas;
+    try {
+      goldJCas = jcas.getView(goldView);
+    } catch (CASException e) {
+      throw new AnalysisEngineProcessException(e);
+    }
+
+    Collection<DeidEntity> allGold = JCasUtil.select(goldJCas, DeidEntity.class);
+    Collection<DeidEntity> allProcess = JCasUtil.select(processJCas, DeidEntity.class);
+
+    Collection<DeidEntity> tp = new ArrayList<>();
+    Collection<DeidEntity> fp = new ArrayList<>();
+    Collection<DeidEntity> fn = new ArrayList<>();
+
+    CasCopier cc = new CasCopier(goldJCas.getCas(), processJCas.getCas());
+    
+    for (DeidEntity goldAnnotation : allGold) {
+      boolean found = false;
+      for (DeidEntity processAnnotation : allProcess) {
+        if (equals(goldAnnotation, processAnnotation)) {
+          tp.add(processAnnotation);
+          inc(type2tp, processAnnotation);
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        DeidEntity copyFs = (DeidEntity) cc.copyFs(goldAnnotation);
+        fn.add(copyFs);
+        inc(type2fn, copyFs);
+      }
+    }
+
+    for (DeidEntity processAnnotation : allProcess) {
+      boolean found = false;
+      for (DeidEntity goldAnnotation : allGold) {
+        if (equals(goldAnnotation, processAnnotation)) {
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        fp.add(processAnnotation);
+        inc(type2fp, processAnnotation);
+      }
+    }
+
+    printResult(documentID, tp.size(), fp.size(), fn.size());
+
+    if (createRutaEvalAnnotations) {
+      for (DeidEntity each : tp) {
+        TruePositive a = new TruePositive(processJCas, each.getBegin(), each.getEnd());
+        a.setOriginal(each);
+        a.addToIndexes();
+      }
+      for (DeidEntity each : fp) {
+        FalsePositive a = new FalsePositive(processJCas, each.getBegin(), each.getEnd());
+        a.setOriginal(each);
+        a.addToIndexes();
+      }
+      for (DeidEntity each : fn) {
+        FalseNegative a = new FalseNegative(processJCas, each.getBegin(), each.getEnd());
+        a.setOriginal(each);
+        a.addToIndexes();
+      }
+    }
+  }
+
+ 
+
+  @Override
+  public void collectionProcessComplete() throws AnalysisEngineProcessException {
+    super.collectionProcessComplete();
+    int tps = 0;
+    int fps = 0;
+    int fns = 0;
+    Collection<String> types = new TreeSet<>();
+    types.addAll(type2tp.keySet());
+    types.addAll(type2fp.keySet());
+    types.addAll(type2fn.keySet());
+    
+    for (String string : types) {
+      int tp = type2tp.get(string) == null ? 0 : type2tp.get(string);
+      int fp = type2fp.get(string) == null ? 0 : type2fp.get(string);
+      int fn = type2fn.get(string) == null ? 0 : type2fn.get(string);
+      tps += tp;
+      fps += fp;
+      fns += fn;
+      printResult(string, tp, fp, fn);
+    }
+    
+    printResult("OVERALL", tps, fps, fns);
+  }
+
+  private static boolean equals(DeidEntity goldAnnotation, DeidEntity processAnnotation) {
+    boolean sameType = goldAnnotation.getType().getName()
+            .equals(processAnnotation.getType().getName());
+    boolean sameBegin = goldAnnotation.getBegin() == processAnnotation.getBegin();
+    boolean sameEnd = goldAnnotation.getEnd() == processAnnotation.getEnd();
+    boolean sameEntityType = goldAnnotation.getEntityType()
+            .equals(processAnnotation.getEntityType());
+    return sameType && sameBegin && sameEnd && sameEntityType;
+  }
+
+  private static void printResult(String doc, double tpCount, double fpCount, double fnCount) {
+    double precision = 1;
+    if (tpCount + fpCount != 0) {
+      precision = tpCount / (tpCount + fpCount);
+    }
+    double recall = 1;
+    if (tpCount + fnCount != 0) {
+      recall = tpCount / (tpCount + fnCount);
+    }
+    double f1 = 0;
+    if(precision + recall != 0) {
+      f1 = 2 * (precision * recall) / (precision + recall);
+    }
+
+    System.out.printf(Locale.ENGLISH,"%-10s\tp: %.2f\tr: %.2f\tf1: %.2f", doc, precision, recall, f1);
+    System.out.println();
+  }
+
+  private void inc(Map<String, Integer> map, DeidEntity annotation) {
+    String name = annotation.getType().getShortName();
+    Integer count = map.get(name);
+    if(count == null) {
+      map.put(name, 1);
+    } else {
+      map.put(name, count + 1);
+    }
+  }
+  
+}

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/org/apache/ctakes/deid/types/TypeSystem.xml Fri Mar 11 15:50:47 2016
@@ -0,0 +1,66 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <name>org.apache.ctakes.deid.types.TypeSystem</name>
+	  <description>This is a Apache cTAKES Type System for clinical deidentificastion.</description>
+	  <version>1.0</version>
+	  <vendor>Apache cTAKES</vendor>
+  <types>
+    <typeDescription>
+      <name>org.apache.ctakes.deid.type.Date</name>
+      <description/>
+      <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.ctakes.deid.type.Age</name>
+      <description/>
+      <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.ctakes.deid.type.Location</name>
+      <description/>
+      <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.ctakes.deid.type.Name</name>
+      <description/>
+      <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.ctakes.deid.type.Contact</name>
+      <description/>
+      <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.ctakes.deid.type.Profession</name>
+      <description/>
+      <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.ctakes.deid.type.Age</name>
+      <description/>
+      <supertypeName>org.apache.ctakes.deid.type.DeidEntity</supertypeName>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.ctakes.deid.type.DeidEntity</name>
+      <description/>
+      <supertypeName>uima.tcas.Annotation</supertypeName>
+    <features>
+        <featureDescription>
+          <name>id</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>entityType</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>comment</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+      </features>
+    </typeDescription>
+  </types>
+</typeSystemDescription>

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/template/BasicEngine.xml
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/template/BasicEngine.xml?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/template/BasicEngine.xml (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/template/BasicEngine.xml Fri Mar 11 15:50:47 2016
@@ -0,0 +1,297 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.uima.ruta.engine.RutaEngine</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>org.apache.uima.ruta.engine.BasicEngine</name>
+    <description/>
+    <version>1.0</version>
+    <vendor/>
+    <configurationParameters searchStrategy="language_fallback">
+      <configurationParameter>
+        <name>seeders</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>debug</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>additionalScripts</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>profile</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>debugWithMatches</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>statistics</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>additionalEngines</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>additionalExtensions</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>debugOnlyFor</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>scriptEncoding</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>additionalEngineLoaders</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>resourcePaths</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>defaultFilteredTypes</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>mainScript</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>scriptPaths</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>descriptorPaths</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>removeBasics</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>dynamicAnchoring</name>
+        <description>Activates dynamic anchoring (possible speed up).</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>greedyRuleElement</name>
+        <description>Activates greedy anchoring for rule elements.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>greedyRule</name>
+        <description>Activates greedy anchoring for complete rules.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>lowMemoryProfile</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>createdBy</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>simpleGreedyForComposed</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>additionalUimafitEngines</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>strictImports</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>varNames</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>varValues</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>rules</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    <configurationParameter>
+        <name>dictRemoveWS</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    <configurationParameter>
+        <name>reindexOnly</name>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>false</mandatory>
+    </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>debug</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>profile</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>debugWithMatches</name>
+        <value>
+          <boolean>true</boolean>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>defaultFilteredTypes</name>
+        <value>
+          <array>
+            <string>org.apache.uima.ruta.type.SPACE</string>
+            <string>org.apache.uima.ruta.type.BREAK</string>
+            <string>org.apache.uima.ruta.type.MARKUP</string>
+          </array>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>removeBasics</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>seeders</name>
+        <value>
+          <array>
+            <string>org.apache.uima.ruta.seed.DefaultSeeder</string>
+          </array>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>createdBy</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+    <nameValuePair>
+        <name>strictImports</name>
+        <value>
+          <boolean>true</boolean>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.ruta.engine.BasicTypeSystem"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities>
+      <priorityList>
+        <type>org.apache.uima.ruta.type.RutaFrame</type>
+        <type>uima.tcas.Annotation</type>
+        <type>org.apache.uima.ruta.type.RutaBasic</type>
+      </priorityList>
+    </typePriorities>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/age_post_ind.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/age_post_ind.txt?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/age_post_ind.txt (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/age_post_ind.txt Fri Mar 11 15:50:47 2016
@@ -0,0 +1,19 @@
+years old
+y.o
+/
+m
+f
+yo
+yoRHM
+yr
+yrs
+yF
+yM
+yoF
+yoM
+year-old
+yr-old
+yrs-old
+yearold
+yrold
+yrsold
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/age_pre_ind.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/age_pre_ind.txt?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/age_pre_ind.txt (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/age_pre_ind.txt Fri Mar 11 15:50:47 2016
@@ -0,0 +1,3 @@
+age
+ages
+aged
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/deceased_ind.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/deceased_ind.txt?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/deceased_ind.txt (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/deceased_ind.txt Fri Mar 11 15:50:47 2016
@@ -0,0 +1,3 @@
+passed away
+dies
+deceased
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/family_ind.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/family_ind.txt?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/family_ind.txt (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/family_ind.txt Fri Mar 11 15:50:47 2016
@@ -0,0 +1,6 @@
+brother
+sister
+grandmother
+grandfather
+father
+mother
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/month_ind.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/month_ind.txt?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/month_ind.txt (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/month_ind.txt Fri Mar 11 15:50:47 2016
@@ -0,0 +1,12 @@
+january
+february
+march
+april
+may
+june
+july
+august
+september
+october
+november
+december
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/phone_pre_ind.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/phone_pre_ind.txt?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/phone_pre_ind.txt (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/resources/wordlists/phone_pre_ind.txt Fri Mar 11 15:50:47 2016
@@ -0,0 +1,5 @@
+tele
+telephone
+tel
+phone
+contact
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Age.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Age.ruta?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Age.ruta (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Age.ruta Fri Mar 11 15:50:47 2016
@@ -0,0 +1,24 @@
+PACKAGE org.apache.ctakes.deid;
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.types.TypeSystem;
+
+
+(Num12 ApoInd?){-> Age} SPECIAL.ct=="-"? AgePostInd;
+AgePreInd (Num12 ApoInd?){-PARTOF(Age)-> Age}; 
+DeceasedInd W{REGEXP("from|with|of", true)} W{REGEXP("at", true)} 
+    (Num12 ApoInd?){-PARTOF(Age)-> Age};
+Num2{-PARTOF(Age)-> Age} W{REGEXP("M|F|male|female", true)} W{OR(REGEXP("with|/|h|hx|s|w|p|who|comes|admitted", true), IS(CAP))};
+((W{REGEXP("in|by", true)} W{REGEXP("his|her", true)} W??)
+   | (W{REGEXP("lived", true)} W{REGEXP("into|to", true)} W?{REGEXP("his|her|their", true)})
+   | (W{REGEXP("who|shoe|he", true)} W{REGEXP("is", true)} W{REGEXP("now", true)}))
+    (@Num2 ApoInd){-PARTOF(Age)-> Age};
+FamilyInd W.ct=="at"? Num2{-PARTOF(Age)-> Age};
+
+((W{OR(REGEXP("MI|cancer"), AND(IS(CW), REGEXP(".{1,3}")))})
+    | (W{REGEXP("myocardial", true)} W{REGEXP("infarction", true)}))
+    W.ct=="at"? 
+    @Num2{-PARTOF(Age) -> Age};
+
+Num2{-PARTOF(Age)-> Age} W{REGEXP("st|th")} W{REGEXP("[Bb]irthday")};
+AgePreInd Num12{-PARTOF(Age)-> Age} (ANY{REGEXP(",|/|and")}  Num12{-> Age})*;
+Num12{-PARTOF(Age)-> Age} ANY{REGEXP(",|/|and")} Num12{-PARTOF(Age)-> Age} "years" "of" "age";

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Date.ruta Fri Mar 11 15:50:47 2016
@@ -0,0 +1,11 @@
+PACKAGE org.apache.ctakes.deid;
+
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.types.TypeSystem;
+
+RETAINTYPE(WS);
+(Num4{-PARTOF(Date),REGEXP("19..|20..")} Dash Num2 Dash Num2){-> Date};
+(Num12{-PARTOF(Date)} Slash (Num12 Slash)? Num2{REGEXP("[123456789].")}){-> Date};
+Num4{-PARTOF(Date),REGEXP("19..|20..")-> Date};
+MonthInd{-PARTOF(Date)-> Date};
+RETAINTYPE;

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Doctor.ruta Fri Mar 11 15:50:47 2016
@@ -0,0 +1,9 @@
+PACKAGE org.apache.ctakes.deid;
+
+TYPESYSTEM org.apache.ctakes.deid.types.TypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.UserNameRutaTypeSystem;
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+
+DECLARE Doctor;
+
+MDInd W{-PARTOF(UserName)-> Doctor} (SPECIAL.ct=="/" W{-> Doctor})*;
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Phone.ruta
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Phone.ruta?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Phone.ruta (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/ruta/org/apache/ctakes/deid/Phone.ruta Fri Mar 11 15:50:47 2016
@@ -0,0 +1,15 @@
+PACKAGE org.apache.ctakes.deid;
+
+TYPESYSTEM org.apache.ctakes.deid.DictionariesRutaTypeSystem;
+
+DECLARE Phone;
+
+DECLARE DashOrPeriod;
+PERIOD{-> DashOrPeriod};
+Dash{-> DashOrPeriod};
+
+PhonePreInd W?{OR(PARTOF({PERIOD, COLON}), W.ct=="#")} 
+    (
+    (LParen Num3 RParen)? Dash?
+    Num34 (DashOrPeriod Num34)+
+    ){-> Phone};
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/xsd/i2b2.xsd Fri Mar 11 15:50:47 2016
@@ -0,0 +1,99 @@
+<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
+  <xs:element name="deIdi2b2">
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element type="xs:string" name="TEXT"/>
+        <xs:element name="TAGS">
+          <xs:complexType>
+            <xs:choice maxOccurs="unbounded" minOccurs="0">
+              <xs:element name="DATE">
+                <xs:complexType>
+                  <xs:simpleContent>
+                    <xs:extension base="xs:string">
+                      <xs:attribute type="xs:string" name="id"/>
+                      <xs:attribute type="xs:integer" name="start"/>
+                      <xs:attribute type="xs:integer" name="end"/>
+                      <xs:attribute type="xs:string" name="text"/>
+                      <xs:attribute type="xs:string" name="TYPE"/>
+                      <xs:attribute type="xs:string" name="comment"/>
+                    </xs:extension>
+                  </xs:simpleContent>
+                </xs:complexType>
+              </xs:element>
+              <xs:element name="AGE">
+                <xs:complexType>
+                  <xs:simpleContent>
+                    <xs:extension base="xs:string">
+                      <xs:attribute type="xs:string" name="id"/>
+                      <xs:attribute type="xs:integer" name="start"/>
+                      <xs:attribute type="xs:integer" name="end"/>
+                      <xs:attribute type="xs:string" name="text"/>
+                      <xs:attribute type="xs:string" name="TYPE"/>
+                      <xs:attribute type="xs:string" name="comment"/>
+                    </xs:extension>
+                  </xs:simpleContent>
+                </xs:complexType>
+              </xs:element>
+              <xs:element name="NAME">
+                <xs:complexType>
+                  <xs:simpleContent>
+                    <xs:extension base="xs:string">
+                      <xs:attribute type="xs:string" name="id" use="optional"/>
+                      <xs:attribute type="xs:integer" name="start" use="optional"/>
+                      <xs:attribute type="xs:integer" name="end" use="optional"/>
+                      <xs:attribute type="xs:string" name="text" use="optional"/>
+                      <xs:attribute type="xs:string" name="TYPE" use="optional"/>
+                      <xs:attribute type="xs:string" name="comment" use="optional"/>
+                    </xs:extension>
+                  </xs:simpleContent>
+                </xs:complexType>
+              </xs:element>
+              <xs:element name="LOCATION">
+                <xs:complexType>
+                  <xs:simpleContent>
+                    <xs:extension base="xs:string">
+                      <xs:attribute type="xs:string" name="id" use="optional"/>
+                      <xs:attribute type="xs:integer" name="start" use="optional"/>
+                      <xs:attribute type="xs:integer" name="end" use="optional"/>
+                      <xs:attribute type="xs:string" name="text" use="optional"/>
+                      <xs:attribute type="xs:string" name="TYPE" use="optional"/>
+                      <xs:attribute type="xs:string" name="comment" use="optional"/>
+                    </xs:extension>
+                  </xs:simpleContent>
+                </xs:complexType>
+              </xs:element>
+              <xs:element name="CONTACT">
+                <xs:complexType>
+                  <xs:simpleContent>
+                    <xs:extension base="xs:string">
+                      <xs:attribute type="xs:string" name="id" use="optional"/>
+                      <xs:attribute type="xs:integer" name="start" use="optional"/>
+                      <xs:attribute type="xs:integer" name="end" use="optional"/>
+                      <xs:attribute type="xs:string" name="text" use="optional"/>
+                      <xs:attribute type="xs:string" name="TYPE" use="optional"/>
+                      <xs:attribute type="xs:string" name="comment" use="optional"/>
+                    </xs:extension>
+                  </xs:simpleContent>
+                </xs:complexType>
+              </xs:element>
+              <xs:element name="PROFESSION">
+                <xs:complexType>
+                  <xs:simpleContent>
+                    <xs:extension base="xs:string">
+                      <xs:attribute type="xs:string" name="id" use="optional"/>
+                      <xs:attribute type="xs:integer" name="start" use="optional"/>
+                      <xs:attribute type="xs:integer" name="end" use="optional"/>
+                      <xs:attribute type="xs:string" name="text" use="optional"/>
+                      <xs:attribute type="xs:string" name="TYPE" use="optional"/>
+                      <xs:attribute type="xs:string" name="comment" use="optional"/>
+                    </xs:extension>
+                  </xs:simpleContent>
+                </xs:complexType>
+              </xs:element>
+            </xs:choice>
+          </xs:complexType>
+        </xs:element>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+</xs:schema>
\ No newline at end of file

Added: ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/I2B2Evaluation.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/I2B2Evaluation.java?rev=1734582&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/I2B2Evaluation.java (added)
+++ ctakes/sandbox/ctakes-clinical-deid/src/test/java/org/apache/ctakes/deid/I2B2Evaluation.java Fri Mar 11 15:50:47 2016
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.deid;
+
+import java.io.IOException;
+
+import org.apache.ctakes.core.cc.XmiWriterCasConsumerCtakes;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.pipeline.SimplePipeline;
+import org.apache.uima.resource.ResourceInitializationException;
+
+public class I2B2Evaluation {
+
+  public static void main(String[] args)
+          throws ResourceInitializationException, UIMAException, IOException {
+
+    SimplePipeline.runPipeline(
+            CollectionReaderFactory.createReader(I2B2DeidCollectionReader.class,
+                    I2B2DeidCollectionReader.PARAM_INPUT_DIRECTORY,
+//                    "C:/data/i2b2/2014/training-PHI-Gold-Set1/",
+                    "C:/data/i2b2/2014/PHI-test/",
+                    I2B2DeidCollectionReader.PARAM_GOLD_VIEW, "gold"),
+            AnalysisEngineFactory.createEngine("org.apache.ctakes.deid.DeidRutaAnnotator"),
+            AnalysisEngineFactory.createEngine(SimpleDeidEntityComparator.class,
+                    SimpleDeidEntityComparator.PARAM_CREATE_RUTA_EVAL_ANNOTATIONS, true),
+            AnalysisEngineFactory.createEngine(XmiWriterCasConsumerCtakes.class,
+                    XmiWriterCasConsumerCtakes.PARAM_OUTPUTDIR, "target/xmis"));
+
+  }
+
+}




Mime
View raw message