ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From chen...@apache.org
Subject svn commit: r1734596 - in /ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid: I2B2DeidCollectionReader.java SimpleDeidEntityComparator.java
Date Fri, 11 Mar 2016 17:26:57 GMT
Author: chenpei
Date: Fri Mar 11 17:26:57 2016
New Revision: 1734596

URL: http://svn.apache.org/viewvc?rev=1734596&view=rev
Log:
CTAKES-384 Applying patch 20160311-2.patch.Thanks Peter Klugl.

Modified:
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java?rev=1734596&r1=1734595&r2=1734596&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
(original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
Fri Mar 11 17:26:57 2016
@@ -181,3 +181,186 @@ public class I2B2DeidCollectionReader ex
   }
 
 }
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.deid;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.Unmarshaller;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.AGE;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.CONTACT;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.DATE;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.LOCATION;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.NAME;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.PROFESSION;
+import org.apache.ctakes.deid.type.Age;
+import org.apache.ctakes.deid.type.Date;
+import org.apache.ctakes.deid.type.DeidEntity;
+import org.apache.ctakes.deid.type.Location;
+import org.apache.ctakes.deid.type.Name;
+import org.apache.ctakes.deid.type.Profession;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+
+public class I2B2DeidCollectionReader extends JCasCollectionReader_ImplBase {
+
+  public static final String PARAM_INPUT_DIRECTORY = "inputDirectory";
+
+  @ConfigurationParameter(name = PARAM_INPUT_DIRECTORY, mandatory = true)
+  private File inputDirectory;
+
+  public static final String PARAM_FILE_EXTENSIONS = "fileExtensions";
+
+  @ConfigurationParameter(name = PARAM_FILE_EXTENSIONS, mandatory = true, defaultValue =
"xml")
+  private String[] fileExtensions;
+
+  public static final String PARAM_GOLD_VIEW = "goldView";
+
+  @ConfigurationParameter(name = PARAM_GOLD_VIEW, mandatory = true, defaultValue = "_InitialView")
+  private String goldView;
+
+  private Iterator<File> iterator;
+
+  private int current = 0;
+
+  private int overall = 0;
+
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    super.initialize(context);
+    Collection<File> files = FileUtils.listFiles(inputDirectory, fileExtensions, true);
+    overall = files.size();
+    iterator = files.iterator();
+  }
+
+  @Override
+  public void getNext(JCas jcas) throws IOException, CollectionException {
+    File currentFile = iterator.next();
+    current++;
+
+    DeIdi2B2 doc = null;
+
+    try {
+      JAXBContext jaxbContext = JAXBContext.newInstance(DeIdi2B2.class);
+      Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
+      doc = (DeIdi2B2) jaxbUnmarshaller.unmarshal(currentFile);
+    } catch (Exception e) {
+      throw new CollectionException(e);
+    }
+
+    jcas.setDocumentText(doc.getTEXT());
+    addDocumentId(jcas, currentFile);
+
+    if (!StringUtils.equals(goldView, jcas.getViewName())) {
+      // create additional gold view
+      try {
+        jcas = jcas.createView(goldView);
+        jcas.setDocumentText(doc.getTEXT());
+        addDocumentId(jcas, currentFile);
+      } catch (CASException e) {
+        throw new CollectionException(e);
+      }
+    }
+
+    TAGS tags = doc.getTAGS();
+    List<Object> tagElements = tags.getDATEOrAGEOrNAME();
+    for (Object object : tagElements) {
+      createDeidEntity(jcas, object);
+    }
+  }
+
+  private void addDocumentId(JCas jcas, File currentFile) {
+    DocumentID docId = new DocumentID(jcas);
+    docId.setDocumentID(currentFile.getName());
+    docId.addToIndexes();
+  }
+
+  @Override
+  public boolean hasNext() throws IOException, CollectionException {
+    return iterator.hasNext();
+  }
+
+  @Override
+  public Progress[] getProgress() {
+    return new Progress[] { new ProgressImpl(current, overall, Progress.ENTITIES) };
+  }
+
+  private void createDeidEntity(JCas jcas, Object object) {
+    DeidEntity entity = null;
+    if (object instanceof AGE) {
+      AGE element = (AGE) object;
+      entity = new Age(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof DATE) {
+      DATE element = (DATE) object;
+      entity = new Date(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof LOCATION) {
+      LOCATION element = (LOCATION) object;
+      entity = new Location(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof NAME) {
+      NAME element = (NAME) object;
+      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof CONTACT) {
+      CONTACT element = (CONTACT) object;
+      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof PROFESSION) {
+      PROFESSION element = (PROFESSION) object;
+      entity = new Profession(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    }
+    entity.addToIndexes();
+  }
+
+}

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java?rev=1734596&r1=1734595&r2=1734596&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
(original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
Fri Mar 11 17:26:57 2016
@@ -195,3 +195,200 @@ public class SimpleDeidEntityComparator
   }
   
 }
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.deid;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Locale;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.deid.type.DeidEntity;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.ruta.type.FalseNegative;
+import org.apache.uima.ruta.type.FalsePositive;
+import org.apache.uima.ruta.type.TruePositive;
+import org.apache.uima.util.CasCopier;
+
+public class SimpleDeidEntityComparator extends JCasAnnotator_ImplBase {
+
+  public static final String PARAM_GOLD_VIEW = "goldView";
+
+  @ConfigurationParameter(name = PARAM_GOLD_VIEW, mandatory = true, defaultValue = "gold")
+  private String goldView;
+
+  public static final String PARAM_CREATE_RUTA_EVAL_ANNOTATIONS = "createRutaEvalAnnotations";
+
+  @ConfigurationParameter(name = PARAM_CREATE_RUTA_EVAL_ANNOTATIONS, mandatory = true, defaultValue
= "false")
+  private Boolean createRutaEvalAnnotations;
+
+  
+  private Map<String, Integer> type2tp = new TreeMap<>();
+  private Map<String, Integer> type2fp = new TreeMap<>();
+  private Map<String, Integer> type2fn = new TreeMap<>();
+  
+  @Override
+  public void process(JCas jcas) throws AnalysisEngineProcessException {
+
+    String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
+
+    JCas processJCas = jcas;
+    JCas goldJCas;
+    try {
+      goldJCas = jcas.getView(goldView);
+    } catch (CASException e) {
+      throw new AnalysisEngineProcessException(e);
+    }
+
+    Collection<DeidEntity> allGold = JCasUtil.select(goldJCas, DeidEntity.class);
+    Collection<DeidEntity> allProcess = JCasUtil.select(processJCas, DeidEntity.class);
+
+    Collection<DeidEntity> tp = new ArrayList<>();
+    Collection<DeidEntity> fp = new ArrayList<>();
+    Collection<DeidEntity> fn = new ArrayList<>();
+
+    CasCopier cc = new CasCopier(goldJCas.getCas(), processJCas.getCas());
+    
+    for (DeidEntity goldAnnotation : allGold) {
+      boolean found = false;
+      for (DeidEntity processAnnotation : allProcess) {
+        if (equals(goldAnnotation, processAnnotation)) {
+          tp.add(processAnnotation);
+          inc(type2tp, processAnnotation);
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        DeidEntity copyFs = (DeidEntity) cc.copyFs(goldAnnotation);
+        fn.add(copyFs);
+        inc(type2fn, copyFs);
+      }
+    }
+
+    for (DeidEntity processAnnotation : allProcess) {
+      boolean found = false;
+      for (DeidEntity goldAnnotation : allGold) {
+        if (equals(goldAnnotation, processAnnotation)) {
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        fp.add(processAnnotation);
+        inc(type2fp, processAnnotation);
+      }
+    }
+
+    printResult(documentID, tp.size(), fp.size(), fn.size());
+
+    if (createRutaEvalAnnotations) {
+      for (DeidEntity each : tp) {
+        TruePositive a = new TruePositive(processJCas, each.getBegin(), each.getEnd());
+        a.setOriginal(each);
+        a.addToIndexes();
+      }
+      for (DeidEntity each : fp) {
+        FalsePositive a = new FalsePositive(processJCas, each.getBegin(), each.getEnd());
+        a.setOriginal(each);
+        a.addToIndexes();
+      }
+      for (DeidEntity each : fn) {
+        FalseNegative a = new FalseNegative(processJCas, each.getBegin(), each.getEnd());
+        a.setOriginal(each);
+        a.addToIndexes();
+      }
+    }
+  }
+
+ 
+
+  @Override
+  public void collectionProcessComplete() throws AnalysisEngineProcessException {
+    super.collectionProcessComplete();
+    int tps = 0;
+    int fps = 0;
+    int fns = 0;
+    Collection<String> types = new TreeSet<>();
+    types.addAll(type2tp.keySet());
+    types.addAll(type2fp.keySet());
+    types.addAll(type2fn.keySet());
+    
+    for (String string : types) {
+      int tp = type2tp.get(string) == null ? 0 : type2tp.get(string);
+      int fp = type2fp.get(string) == null ? 0 : type2fp.get(string);
+      int fn = type2fn.get(string) == null ? 0 : type2fn.get(string);
+      tps += tp;
+      fps += fp;
+      fns += fn;
+      printResult(string, tp, fp, fn);
+    }
+    
+    printResult("OVERALL", tps, fps, fns);
+  }
+
+  private static boolean equals(DeidEntity goldAnnotation, DeidEntity processAnnotation)
{
+    boolean sameType = goldAnnotation.getType().getName()
+            .equals(processAnnotation.getType().getName());
+    boolean sameBegin = goldAnnotation.getBegin() == processAnnotation.getBegin();
+    boolean sameEnd = goldAnnotation.getEnd() == processAnnotation.getEnd();
+    boolean sameEntityType = goldAnnotation.getEntityType()
+            .equals(processAnnotation.getEntityType());
+    return sameType && sameBegin && sameEnd && sameEntityType;
+  }
+
+  private static void printResult(String doc, double tpCount, double fpCount, double fnCount)
{
+    double precision = 1;
+    if (tpCount + fpCount != 0) {
+      precision = tpCount / (tpCount + fpCount);
+    }
+    double recall = 1;
+    if (tpCount + fnCount != 0) {
+      recall = tpCount / (tpCount + fnCount);
+    }
+    double f1 = 0;
+    if(precision + recall != 0) {
+      f1 = 2 * (precision * recall) / (precision + recall);
+    }
+
+    System.out.printf(Locale.ENGLISH,"%-10s\tp: %.2f\tr: %.2f\tf1: %.2f", doc, precision,
recall, f1);
+    System.out.println();
+  }
+
+  private void inc(Map<String, Integer> map, DeidEntity annotation) {
+    String name = annotation.getType().getShortName();
+    Integer count = map.get(name);
+    if(count == null) {
+      map.put(name, 1);
+    } else {
+      map.put(name, count + 1);
+    }
+  }
+  
+}



Mime
View raw message