ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pklu...@apache.org
Subject svn commit: r1746271 - in /ctakes/sandbox/ctakes-clinical-deid: pom.xml.orig pom.xml.rej src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
Date Tue, 31 May 2016 13:03:47 GMT
Author: pkluegl
Date: Tue May 31 13:03:46 2016
New Revision: 1746271

URL: http://svn.apache.org/viewvc?rev=1746271&view=rev
Log:
CTAKES-384 fixed last patch

Removed:
    ctakes/sandbox/ctakes-clinical-deid/pom.xml.orig
    ctakes/sandbox/ctakes-clinical-deid/pom.xml.rej
Modified:
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
    ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java?rev=1746271&r1=1746270&r2=1746271&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
(original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/I2B2DeidCollectionReader.java
Tue May 31 13:03:46 2016
@@ -1,366 +1,183 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.ctakes.deid;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.Unmarshaller;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.AGE;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.CONTACT;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.DATE;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.LOCATION;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.NAME;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.PROFESSION;
-import org.apache.ctakes.deid.type.Age;
-import org.apache.ctakes.deid.type.Date;
-import org.apache.ctakes.deid.type.DeidEntity;
-import org.apache.ctakes.deid.type.Location;
-import org.apache.ctakes.deid.type.Name;
-import org.apache.ctakes.deid.type.Profession;
-import org.apache.ctakes.typesystem.type.structured.DocumentID;
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CASException;
-import org.apache.uima.collection.CollectionException;
-import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.util.Progress;
-import org.apache.uima.util.ProgressImpl;
-
-public class I2B2DeidCollectionReader extends JCasCollectionReader_ImplBase {
-
-  public static final String PARAM_INPUT_DIRECTORY = "inputDirectory";
-
-  @ConfigurationParameter(name = PARAM_INPUT_DIRECTORY, mandatory = true)
-  private File inputDirectory;
-
-  public static final String PARAM_FILE_EXTENSIONS = "fileExtensions";
-
-  @ConfigurationParameter(name = PARAM_FILE_EXTENSIONS, mandatory = true, defaultValue =
"xml")
-  private String[] fileExtensions;
-
-  public static final String PARAM_GOLD_VIEW = "goldView";
-
-  @ConfigurationParameter(name = PARAM_GOLD_VIEW, mandatory = true, defaultValue = "_InitialView")
-  private String goldView;
-
-  private Iterator<File> iterator;
-
-  private int current = 0;
-
-  private int overall = 0;
-
-  @Override
-  public void initialize(UimaContext context) throws ResourceInitializationException {
-    super.initialize(context);
-    Collection<File> files = FileUtils.listFiles(inputDirectory, fileExtensions, true);
-    overall = files.size();
-    iterator = files.iterator();
-  }
-
-  @Override
-  public void getNext(JCas jcas) throws IOException, CollectionException {
-    File currentFile = iterator.next();
-    current++;
-
-    DeIdi2B2 doc = null;
-
-    try {
-      JAXBContext jaxbContext = JAXBContext.newInstance(DeIdi2B2.class);
-      Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
-      doc = (DeIdi2B2) jaxbUnmarshaller.unmarshal(currentFile);
-    } catch (Exception e) {
-      throw new CollectionException(e);
-    }
-
-    jcas.setDocumentText(doc.getTEXT());
-    addDocumentId(jcas, currentFile);
-
-    if (!StringUtils.equals(goldView, jcas.getViewName())) {
-      // create additional gold view
-      try {
-        jcas = jcas.createView(goldView);
-        jcas.setDocumentText(doc.getTEXT());
-        addDocumentId(jcas, currentFile);
-      } catch (CASException e) {
-        throw new CollectionException(e);
-      }
-    }
-
-    TAGS tags = doc.getTAGS();
-    List<Object> tagElements = tags.getDATEOrAGEOrNAME();
-    for (Object object : tagElements) {
-      createDeidEntity(jcas, object);
-    }
-  }
-
-  private void addDocumentId(JCas jcas, File currentFile) {
-    DocumentID docId = new DocumentID(jcas);
-    docId.setDocumentID(currentFile.getName());
-    docId.addToIndexes();
-  }
-
-  @Override
-  public boolean hasNext() throws IOException, CollectionException {
-    return iterator.hasNext();
-  }
-
-  @Override
-  public Progress[] getProgress() {
-    return new Progress[] { new ProgressImpl(current, overall, Progress.ENTITIES) };
-  }
-
-  private void createDeidEntity(JCas jcas, Object object) {
-    DeidEntity entity = null;
-    if (object instanceof AGE) {
-      AGE element = (AGE) object;
-      entity = new Age(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    } else if (object instanceof DATE) {
-      DATE element = (DATE) object;
-      entity = new Date(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    } else if (object instanceof LOCATION) {
-      LOCATION element = (LOCATION) object;
-      entity = new Location(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    } else if (object instanceof NAME) {
-      NAME element = (NAME) object;
-      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    } else if (object instanceof CONTACT) {
-      CONTACT element = (CONTACT) object;
-      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    } else if (object instanceof PROFESSION) {
-      PROFESSION element = (PROFESSION) object;
-      entity = new Profession(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    }
-    entity.addToIndexes();
-  }
-
-}
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.ctakes.deid;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.List;
-
-import javax.xml.bind.JAXBContext;
-import javax.xml.bind.Unmarshaller;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.AGE;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.CONTACT;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.DATE;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.LOCATION;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.NAME;
-import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.PROFESSION;
-import org.apache.ctakes.deid.type.Age;
-import org.apache.ctakes.deid.type.Date;
-import org.apache.ctakes.deid.type.DeidEntity;
-import org.apache.ctakes.deid.type.Location;
-import org.apache.ctakes.deid.type.Name;
-import org.apache.ctakes.deid.type.Profession;
-import org.apache.ctakes.typesystem.type.structured.DocumentID;
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CASException;
-import org.apache.uima.collection.CollectionException;
-import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.util.Progress;
-import org.apache.uima.util.ProgressImpl;
-
-public class I2B2DeidCollectionReader extends JCasCollectionReader_ImplBase {
-
-  public static final String PARAM_INPUT_DIRECTORY = "inputDirectory";
-
-  @ConfigurationParameter(name = PARAM_INPUT_DIRECTORY, mandatory = true)
-  private File inputDirectory;
-
-  public static final String PARAM_FILE_EXTENSIONS = "fileExtensions";
-
-  @ConfigurationParameter(name = PARAM_FILE_EXTENSIONS, mandatory = true, defaultValue =
"xml")
-  private String[] fileExtensions;
-
-  public static final String PARAM_GOLD_VIEW = "goldView";
-
-  @ConfigurationParameter(name = PARAM_GOLD_VIEW, mandatory = true, defaultValue = "_InitialView")
-  private String goldView;
-
-  private Iterator<File> iterator;
-
-  private int current = 0;
-
-  private int overall = 0;
-
-  @Override
-  public void initialize(UimaContext context) throws ResourceInitializationException {
-    super.initialize(context);
-    Collection<File> files = FileUtils.listFiles(inputDirectory, fileExtensions, true);
-    overall = files.size();
-    iterator = files.iterator();
-  }
-
-  @Override
-  public void getNext(JCas jcas) throws IOException, CollectionException {
-    File currentFile = iterator.next();
-    current++;
-
-    DeIdi2B2 doc = null;
-
-    try {
-      JAXBContext jaxbContext = JAXBContext.newInstance(DeIdi2B2.class);
-      Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
-      doc = (DeIdi2B2) jaxbUnmarshaller.unmarshal(currentFile);
-    } catch (Exception e) {
-      throw new CollectionException(e);
-    }
-
-    jcas.setDocumentText(doc.getTEXT());
-    addDocumentId(jcas, currentFile);
-
-    if (!StringUtils.equals(goldView, jcas.getViewName())) {
-      // create additional gold view
-      try {
-        jcas = jcas.createView(goldView);
-        jcas.setDocumentText(doc.getTEXT());
-        addDocumentId(jcas, currentFile);
-      } catch (CASException e) {
-        throw new CollectionException(e);
-      }
-    }
-
-    TAGS tags = doc.getTAGS();
-    List<Object> tagElements = tags.getDATEOrAGEOrNAME();
-    for (Object object : tagElements) {
-      createDeidEntity(jcas, object);
-    }
-  }
-
-  private void addDocumentId(JCas jcas, File currentFile) {
-    DocumentID docId = new DocumentID(jcas);
-    docId.setDocumentID(currentFile.getName());
-    docId.addToIndexes();
-  }
-
-  @Override
-  public boolean hasNext() throws IOException, CollectionException {
-    return iterator.hasNext();
-  }
-
-  @Override
-  public Progress[] getProgress() {
-    return new Progress[] { new ProgressImpl(current, overall, Progress.ENTITIES) };
-  }
-
-  private void createDeidEntity(JCas jcas, Object object) {
-    DeidEntity entity = null;
-    if (object instanceof AGE) {
-      AGE element = (AGE) object;
-      entity = new Age(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    } else if (object instanceof DATE) {
-      DATE element = (DATE) object;
-      entity = new Date(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    } else if (object instanceof LOCATION) {
-      LOCATION element = (LOCATION) object;
-      entity = new Location(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    } else if (object instanceof NAME) {
-      NAME element = (NAME) object;
-      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    } else if (object instanceof CONTACT) {
-      CONTACT element = (CONTACT) object;
-      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    } else if (object instanceof PROFESSION) {
-      PROFESSION element = (PROFESSION) object;
-      entity = new Profession(jcas, element.getStart().intValue(), element.getEnd().intValue());
-      entity.setId(element.getId());
-      entity.setEntityType(element.getTYPE());
-      entity.setComment(element.getComment());
-    }
-    entity.addToIndexes();
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.ctakes.deid;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.Unmarshaller;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.AGE;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.CONTACT;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.DATE;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.LOCATION;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.NAME;
+import org.apache.ctakes.deid.i2b2.DeIdi2B2.TAGS.PROFESSION;
+import org.apache.ctakes.deid.type.Age;
+import org.apache.ctakes.deid.type.Date;
+import org.apache.ctakes.deid.type.DeidEntity;
+import org.apache.ctakes.deid.type.Location;
+import org.apache.ctakes.deid.type.Name;
+import org.apache.ctakes.deid.type.Profession;
+import org.apache.ctakes.typesystem.type.structured.DocumentID;
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+
+public class I2B2DeidCollectionReader extends JCasCollectionReader_ImplBase {
+
+  public static final String PARAM_INPUT_DIRECTORY = "inputDirectory";
+
+  @ConfigurationParameter(name = PARAM_INPUT_DIRECTORY, mandatory = true)
+  private File inputDirectory;
+
+  public static final String PARAM_FILE_EXTENSIONS = "fileExtensions";
+
+  @ConfigurationParameter(name = PARAM_FILE_EXTENSIONS, mandatory = true, defaultValue =
"xml")
+  private String[] fileExtensions;
+
+  public static final String PARAM_GOLD_VIEW = "goldView";
+
+  @ConfigurationParameter(name = PARAM_GOLD_VIEW, mandatory = true, defaultValue = "_InitialView")
+  private String goldView;
+
+  private Iterator<File> iterator;
+
+  private int current = 0;
+
+  private int overall = 0;
+
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    super.initialize(context);
+    Collection<File> files = FileUtils.listFiles(inputDirectory, fileExtensions, true);
+    overall = files.size();
+    iterator = files.iterator();
+  }
+
+  @Override
+  public void getNext(JCas jcas) throws IOException, CollectionException {
+    File currentFile = iterator.next();
+    current++;
+
+    DeIdi2B2 doc = null;
+
+    try {
+      JAXBContext jaxbContext = JAXBContext.newInstance(DeIdi2B2.class);
+      Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
+      doc = (DeIdi2B2) jaxbUnmarshaller.unmarshal(currentFile);
+    } catch (Exception e) {
+      throw new CollectionException(e);
+    }
+
+    jcas.setDocumentText(doc.getTEXT());
+    addDocumentId(jcas, currentFile);
+
+    if (!StringUtils.equals(goldView, jcas.getViewName())) {
+      // create additional gold view
+      try {
+        jcas = jcas.createView(goldView);
+        jcas.setDocumentText(doc.getTEXT());
+        addDocumentId(jcas, currentFile);
+      } catch (CASException e) {
+        throw new CollectionException(e);
+      }
+    }
+
+    TAGS tags = doc.getTAGS();
+    List<Object> tagElements = tags.getDATEOrAGEOrNAME();
+    for (Object object : tagElements) {
+      createDeidEntity(jcas, object);
+    }
+  }
+
+  private void addDocumentId(JCas jcas, File currentFile) {
+    DocumentID docId = new DocumentID(jcas);
+    docId.setDocumentID(currentFile.getName());
+    docId.addToIndexes();
+  }
+
+  @Override
+  public boolean hasNext() throws IOException, CollectionException {
+    return iterator.hasNext();
+  }
+
+  @Override
+  public Progress[] getProgress() {
+    return new Progress[] { new ProgressImpl(current, overall, Progress.ENTITIES) };
+  }
+
+  private void createDeidEntity(JCas jcas, Object object) {
+    DeidEntity entity = null;
+    if (object instanceof AGE) {
+      AGE element = (AGE) object;
+      entity = new Age(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof DATE) {
+      DATE element = (DATE) object;
+      entity = new Date(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof LOCATION) {
+      LOCATION element = (LOCATION) object;
+      entity = new Location(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof NAME) {
+      NAME element = (NAME) object;
+      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof CONTACT) {
+      CONTACT element = (CONTACT) object;
+      entity = new Name(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    } else if (object instanceof PROFESSION) {
+      PROFESSION element = (PROFESSION) object;
+      entity = new Profession(jcas, element.getStart().intValue(), element.getEnd().intValue());
+      entity.setId(element.getId());
+      entity.setEntityType(element.getTYPE());
+      entity.setComment(element.getComment());
+    }
+    entity.addToIndexes();
+  }
+
+}

Modified: ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java?rev=1746271&r1=1746270&r2=1746271&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
(original)
+++ ctakes/sandbox/ctakes-clinical-deid/src/main/java/org/apache/ctakes/deid/SimpleDeidEntityComparator.java
Tue May 31 13:03:46 2016
@@ -195,200 +195,3 @@ public class SimpleDeidEntityComparator
   }
   
 }
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.ctakes.deid;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Locale;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.TreeSet;
-
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
-import org.apache.ctakes.deid.type.DeidEntity;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CASException;
-import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.util.JCasUtil;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.ruta.type.FalseNegative;
-import org.apache.uima.ruta.type.FalsePositive;
-import org.apache.uima.ruta.type.TruePositive;
-import org.apache.uima.util.CasCopier;
-
-public class SimpleDeidEntityComparator extends JCasAnnotator_ImplBase {
-
-  public static final String PARAM_GOLD_VIEW = "goldView";
-
-  @ConfigurationParameter(name = PARAM_GOLD_VIEW, mandatory = true, defaultValue = "gold")
-  private String goldView;
-
-  public static final String PARAM_CREATE_RUTA_EVAL_ANNOTATIONS = "createRutaEvalAnnotations";
-
-  @ConfigurationParameter(name = PARAM_CREATE_RUTA_EVAL_ANNOTATIONS, mandatory = true, defaultValue
= "false")
-  private Boolean createRutaEvalAnnotations;
-
-  
-  private Map<String, Integer> type2tp = new TreeMap<>();
-  private Map<String, Integer> type2fp = new TreeMap<>();
-  private Map<String, Integer> type2fn = new TreeMap<>();
-  
-  @Override
-  public void process(JCas jcas) throws AnalysisEngineProcessException {
-
-    String documentID = DocumentIDAnnotationUtil.getDocumentID(jcas);
-
-    JCas processJCas = jcas;
-    JCas goldJCas;
-    try {
-      goldJCas = jcas.getView(goldView);
-    } catch (CASException e) {
-      throw new AnalysisEngineProcessException(e);
-    }
-
-    Collection<DeidEntity> allGold = JCasUtil.select(goldJCas, DeidEntity.class);
-    Collection<DeidEntity> allProcess = JCasUtil.select(processJCas, DeidEntity.class);
-
-    Collection<DeidEntity> tp = new ArrayList<>();
-    Collection<DeidEntity> fp = new ArrayList<>();
-    Collection<DeidEntity> fn = new ArrayList<>();
-
-    CasCopier cc = new CasCopier(goldJCas.getCas(), processJCas.getCas());
-    
-    for (DeidEntity goldAnnotation : allGold) {
-      boolean found = false;
-      for (DeidEntity processAnnotation : allProcess) {
-        if (equals(goldAnnotation, processAnnotation)) {
-          tp.add(processAnnotation);
-          inc(type2tp, processAnnotation);
-          found = true;
-          break;
-        }
-      }
-      if (!found) {
-        DeidEntity copyFs = (DeidEntity) cc.copyFs(goldAnnotation);
-        fn.add(copyFs);
-        inc(type2fn, copyFs);
-      }
-    }
-
-    for (DeidEntity processAnnotation : allProcess) {
-      boolean found = false;
-      for (DeidEntity goldAnnotation : allGold) {
-        if (equals(goldAnnotation, processAnnotation)) {
-          found = true;
-          break;
-        }
-      }
-      if (!found) {
-        fp.add(processAnnotation);
-        inc(type2fp, processAnnotation);
-      }
-    }
-
-    printResult(documentID, tp.size(), fp.size(), fn.size());
-
-    if (createRutaEvalAnnotations) {
-      for (DeidEntity each : tp) {
-        TruePositive a = new TruePositive(processJCas, each.getBegin(), each.getEnd());
-        a.setOriginal(each);
-        a.addToIndexes();
-      }
-      for (DeidEntity each : fp) {
-        FalsePositive a = new FalsePositive(processJCas, each.getBegin(), each.getEnd());
-        a.setOriginal(each);
-        a.addToIndexes();
-      }
-      for (DeidEntity each : fn) {
-        FalseNegative a = new FalseNegative(processJCas, each.getBegin(), each.getEnd());
-        a.setOriginal(each);
-        a.addToIndexes();
-      }
-    }
-  }
-
- 
-
-  @Override
-  public void collectionProcessComplete() throws AnalysisEngineProcessException {
-    super.collectionProcessComplete();
-    int tps = 0;
-    int fps = 0;
-    int fns = 0;
-    Collection<String> types = new TreeSet<>();
-    types.addAll(type2tp.keySet());
-    types.addAll(type2fp.keySet());
-    types.addAll(type2fn.keySet());
-    
-    for (String string : types) {
-      int tp = type2tp.get(string) == null ? 0 : type2tp.get(string);
-      int fp = type2fp.get(string) == null ? 0 : type2fp.get(string);
-      int fn = type2fn.get(string) == null ? 0 : type2fn.get(string);
-      tps += tp;
-      fps += fp;
-      fns += fn;
-      printResult(string, tp, fp, fn);
-    }
-    
-    printResult("OVERALL", tps, fps, fns);
-  }
-
-  private static boolean equals(DeidEntity goldAnnotation, DeidEntity processAnnotation)
{
-    boolean sameType = goldAnnotation.getType().getName()
-            .equals(processAnnotation.getType().getName());
-    boolean sameBegin = goldAnnotation.getBegin() == processAnnotation.getBegin();
-    boolean sameEnd = goldAnnotation.getEnd() == processAnnotation.getEnd();
-    boolean sameEntityType = goldAnnotation.getEntityType()
-            .equals(processAnnotation.getEntityType());
-    return sameType && sameBegin && sameEnd && sameEntityType;
-  }
-
-  private static void printResult(String doc, double tpCount, double fpCount, double fnCount)
{
-    double precision = 1;
-    if (tpCount + fpCount != 0) {
-      precision = tpCount / (tpCount + fpCount);
-    }
-    double recall = 1;
-    if (tpCount + fnCount != 0) {
-      recall = tpCount / (tpCount + fnCount);
-    }
-    double f1 = 0;
-    if(precision + recall != 0) {
-      f1 = 2 * (precision * recall) / (precision + recall);
-    }
-
-    System.out.printf(Locale.ENGLISH,"%-10s\tp: %.2f\tr: %.2f\tf1: %.2f", doc, precision,
recall, f1);
-    System.out.println();
-  }
-
-  private void inc(Map<String, Integer> map, DeidEntity annotation) {
-    String name = annotation.getType().getShortName();
-    Integer count = map.get(name);
-    if(count == null) {
-      map.put(name, 1);
-    } else {
-      map.put(name, count + 1);
-    }
-  }
-  
-}



Mime
View raw message