ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From c...@apache.org
Subject svn commit: r1451606 - in /incubator/ctakes/trunk/ctakes-temporal/src/main: java/org/apache/ctakes/temporal/ae/feature/IdentifiedAnnotationFeatureExtractor.java resources/ resources/eventDict.txt
Date Fri, 01 Mar 2013 14:50:35 GMT
Author: clin
Date: Fri Mar  1 14:50:35 2013
New Revision: 1451606

URL: http://svn.apache.org/r1451606
Log:
Add a feature extractor to tell if a token is inside an identified annotation and if the identified
annotation is one of the known events among gold standards.

Added:
    incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/IdentifiedAnnotationFeatureExtractor.java
  (with props)
    incubator/ctakes/trunk/ctakes-temporal/src/main/resources/
    incubator/ctakes/trunk/ctakes-temporal/src/main/resources/eventDict.txt   (with props)

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/IdentifiedAnnotationFeatureExtractor.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/IdentifiedAnnotationFeatureExtractor.java?rev=1451606&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/IdentifiedAnnotationFeatureExtractor.java
(added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/IdentifiedAnnotationFeatureExtractor.java
Fri Mar  1 14:50:35 2013
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae.feature;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.io.Files;
+import com.google.common.io.LineProcessor;
+
+/**
+ * Given a token, check if this token is inside any identified annotation
+ * if so check if the identifiled annotion is a known event. If yes, get the weight.
+ * @author Chen Lin
+ * 2/28/2013
+ *
+ */
+public class IdentifiedAnnotationFeatureExtractor implements SimpleFeatureExtractor {
+
+  private String name;
+
+  private Map<String, double[]> textDoublesMap;
+
+  private double[] meanValues;
+  
+  public static Map<String, double[]> parseTextDoublesMap(File file, Charset charset)
throws IOException {
+    return Files.readLines(file, charset, new StringToDoublesProcessor());
+  }
+
+  static class StringToDoublesProcessor implements LineProcessor<Map<String, double[]>>
{
+    private Logger logger = Logger.getLogger(this.getClass().getName());
+
+    private Map<String, double[]> result = new HashMap<String, double[]>();
+
+    private int length = -1;
+
+    @Override
+    public Map<String, double[]> getResult() {
+      return this.result;
+    }
+
+    @Override
+    public boolean processLine(String line) throws IOException {
+      String[] parts = line.trim().split(",");
+      String key = parts[0];
+      int partsOffset = 0;
+      if (this.length == -1) {
+        this.length = parts.length;
+      } else if (parts.length != this.length) {
+        String message = "expected %d parts, found %d, skipping line '%s'";
+        this.logger.warning(String.format(message, this.length, parts.length, line));
+        return true;
+      }
+      double[] values = new double[parts.length - 1];
+      for (int i = 0; i < values.length; ++i) {
+        values[i] = Double.parseDouble(parts[i + 1 + partsOffset]);
+      }
+      this.result.put(key, values);
+      return true;
+    }
+  }
+
+  public IdentifiedAnnotationFeatureExtractor(String name, Map<String, double[]> textDoublesMap)
{
+    super();
+    this.name = name;
+    this.textDoublesMap = textDoublesMap;
+    int nMapEntries = this.textDoublesMap.size();
+    if (nMapEntries == 0) {
+      throw new IllegalArgumentException("textDoublesMap cannot be empty");
+    }
+    int nValues = textDoublesMap.entrySet().iterator().next().getValue().length;
+    this.meanValues = new double[nValues];
+    for (double[] values : textDoublesMap.values()) {
+      for (int i = 0; i < values.length; ++i) {
+        this.meanValues[i] += values[i];
+      }
+    }
+    for (int i = 0; i < this.meanValues.length; ++i) {
+    	this.meanValues[i] /= nMapEntries;
+    }
+  }
+
+  @Override
+  public List<Feature> extract(JCas view, Annotation annotation) throws CleartkExtractorException
{
+
+	  Map<BaseToken, Collection<IdentifiedAnnotation>> coveringMap =
+			  JCasUtil.indexCovering(view, BaseToken.class, IdentifiedAnnotation.class);
+
+	  BaseToken targetTokenAnnotation = (BaseToken)annotation;
+
+	  Collection<IdentifiedAnnotation> idannoList = coveringMap.get(targetTokenAnnotation);
+
+	  ArrayList<Feature> features = new ArrayList<Feature>();
+
+	  if (idannoList != null && !idannoList.isEmpty())
+	  {
+		  features.add(new Feature("TokenInside","A_IdentifiedAnnotation"));
+		  for (IdentifiedAnnotation ida : idannoList){
+			  //check if the ida is a know event:
+			  double[] values = this.textDoublesMap.get(ida.getCoveredText().toLowerCase());
+			  if(values != null){
+				  features.add(new Feature(this.name, "is_A_KnownEvent"));
+				  break;
+			  }
+		  }
+	  }
+
+
+	  return features;
+  }
+
+}

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/IdentifiedAnnotationFeatureExtractor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/ctakes/trunk/ctakes-temporal/src/main/resources/eventDict.txt
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-temporal/src/main/resources/eventDict.txt?rev=1451606&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-temporal/src/main/resources/eventDict.txt (added)
+++ incubator/ctakes/trunk/ctakes-temporal/src/main/resources/eventDict.txt Fri Mar  1 14:50:35
2013
@@ -0,0 +1,91 @@
+ibuprofen,1
+hypaque,1
+tenormin,1
+celexa,1
+tylenol,1
+betamethasone dipropionate,1
+amoxicillin,1
+finasteride,1
+aspirin,1
+zetia,1
+acetaminophen,1
+maxzide,1
+furosemide,1
+neurontin,1
+cardizem la,1
+cephalexin,1
+prednisone,1
+methylprednisolone,1
+cardizem,1
+fosamax,1
+coumadin,1
+lipitor,1
+folic acid,1
+ezetimibe,1
+xeloda,1
+simvastatin,1
+imodium,1
+lisinopril,1
+penicillin,1
+lantus,1
+vibramycin,1
+keflex,1
+norvasc,1
+xanax,1
+keppra,1
+zestril,1
+proscar,1
+cardura,1
+insulin,1
+hydrochlorothiazide,1
+nitroglycerin,1
+spiriva,1
+synthroid,1
+prilosec,1
+penicillin v potassium,1
+oxaliplatin,1
+zocor,1
+medrol,1
+coreg,1
+cozaar,1
+metoprolol tartrate,1
+progesterone,1
+levofloxacin,1
+lopressor,1
+amoxil,1
+trimethoprim,1
+alprazolam,1
+lasix,1
+albuterol,1
+vicodin,1
+omeprazole,1
+levaquin,1
+fluorouracil,1
+doxycycline,1
+carvedilol,1
+miralax,1
+levoxyl,1
+remeron,1
+penicillin v,1
+roxicodone,1
+nitrofurantoin,1
+triamterene,1
+digoxin,1
+vitamin a,1
+atenolol,1
+motrin,1
+gabapentin,1
+flagyl,1
+cipro,1
+patanol,1
+capecitabine,1
+today,1
+vitamin d,1
+lovenox,1
+fragmin,1
+mirtazapine,1
+sulfamethoxazole,1
+lansoprazole,1
+prevacid,1
+betamethasone,1
+piperacillin,1
\ No newline at end of file

Propchange: incubator/ctakes/trunk/ctakes-temporal/src/main/resources/eventDict.txt
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message