ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1562598 - /ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Analyze.java
Date Wed, 29 Jan 2014 21:21:11 GMT
Author: dligach
Date: Wed Jan 29 21:21:11 2014
New Revision: 1562598

URL: http://svn.apache.org/r1562598
Log:
added cui printer for cui-polysemy analysis

Modified:
    ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Analyze.java

Modified: ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Analyze.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Analyze.java?rev=1562598&r1=1562597&r2=1562598&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Analyze.java (original)
+++ ctakes/sandbox/ctakes-wsd/src/main/java/org/apache/ctakes/wsd/pipelines/Analyze.java Wed
Jan 29 21:21:11 2014
@@ -30,13 +30,14 @@ import org.uimafit.util.JCasUtil;
 import com.google.common.collect.Lists;
 
 /**
+ * Generate various data sets for analyzing polysemy.
  * 
  * @author dmitriy dligach
  */
 public class Analyze {
 
   public static final String GOLD_VIEW_NAME = "GoldView";
-  
+
   public static class Options extends Options_ImplBase {
 
     @Option(
@@ -44,16 +45,10 @@ public class Analyze {
         usage = "specify the path to the directory containing the XMI files",
         required = true)
     public File inputDirectory;
-
-    @Option(
-        name = "--output-dir",
-        usage = "specify the path to the output directory",
-        required = false)
-    public File outputDirectory;
   }
-  
+
   public static void main(String[] args) throws Exception {
-    
+
     Options options = new Options();
     options.parseOptions(args);
 
@@ -67,73 +62,103 @@ public class Analyze {
         XMIReader.class,
         XMIReader.PARAM_FILES,
         paths);
-    
-    AnalysisEngine consumer = AnalysisEngineFactory.createPrimitive(Consumer.class);
-        
+
+    AnalysisEngine consumer = AnalysisEngineFactory.createPrimitive(PrintCuis.class);
+
     SimplePipeline.runPipeline(xmiCollectionReader, consumer);
   }
-  
-  public static class Consumer extends JCasAnnotator_ImplBase {
+
+  public static class PrintConceptSemanticTypes extends JCasAnnotator_ImplBase {
 
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
-      
+
       JCas goldView;                                                                    
                                      
       try {                                                                             
                                      
         goldView = jCas.getView(GOLD_VIEW_NAME);                                        
                                      
       } catch (CASException e) {                                                        
                                      
         throw new AnalysisEngineProcessException(e);                                    
                                      
       }   
-      
+
       for (EventMention mention : Lists.newArrayList(JCasUtil.select(jCas, EventMention.class)))
{
         // for some reason in gold begin offset for some mentions is a huge number
         if(mention.getBegin() > jCas.getDocumentText().length()) {
           continue;
         }
-        
+
         String text = mention.getCoveredText().toLowerCase();
         String semanticType = mention.getClass().getSimpleName();
         System.out.format("%s|%s\n", text, semanticType);
       }
-      
+
       for (EntityMention mention : Lists.newArrayList(JCasUtil.select(jCas, EntityMention.class)))
{
         // avoid weird crashes
         if(mention.getBegin() > jCas.getDocumentText().length()) {
           continue;
         }
-        
+
         String text = mention.getCoveredText().toLowerCase();
         String semanticType = mention.getClass().getSimpleName();
         System.out.format("%s|%s\n", text, semanticType);
       }
     }
-    
-    /**
-     * Get the CUIs, RxNorm codes, etc.
-     */
-    public static Set<String> getOntologyConceptCodes(IdentifiedAnnotation identifiedAnnotation)
{
-      
-      Set<String> codes = new HashSet<String>();
-      
-      FSArray fsArray = identifiedAnnotation.getOntologyConceptArr();
-      if(fsArray == null) {
-        return codes;
+  }
+
+  public static class PrintCuis extends JCasAnnotator_ImplBase {
+
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+      for (EventMention mention : Lists.newArrayList(JCasUtil.select(jCas, EventMention.class)))
{
+        if(mention.getBegin() > jCas.getDocumentText().length()) {
+          continue;
+        }
+
+        String text = mention.getCoveredText().toLowerCase();
+        for(String code : getOntologyConceptCodes(mention)) {
+          System.out.format("%s|%s\n", text, code);
+        }
       }
-      
-      for(FeatureStructure featureStructure : fsArray.toArray()) {
-        OntologyConcept ontologyConcept = (OntologyConcept) featureStructure;
-        
-        if(ontologyConcept instanceof UmlsConcept) {
-          UmlsConcept umlsConcept = (UmlsConcept) ontologyConcept;
-          String code = umlsConcept.getCui();
-          codes.add(code);
-        } else { // SNOMED or RxNorm
-          String code = ontologyConcept.getCodingScheme() + ontologyConcept.getCode();
-          codes.add(code);
+
+      for (EntityMention mention : Lists.newArrayList(JCasUtil.select(jCas, EntityMention.class)))
{
+        if(mention.getBegin() > jCas.getDocumentText().length()) {
+          continue;
+        }
+
+        String text = mention.getCoveredText().toLowerCase();
+        for(String code : getOntologyConceptCodes(mention)) {
+          System.out.format("%s|%s\n", text, code);
         }
       }
-      
+    }
+  }
+
+  /**
+   * Get the CUIs, RxNorm codes, etc.
+   */
+  public static Set<String> getOntologyConceptCodes(IdentifiedAnnotation identifiedAnnotation)
{
+
+    Set<String> codes = new HashSet<String>();
+
+    FSArray fsArray = identifiedAnnotation.getOntologyConceptArr();
+    if(fsArray == null) {
       return codes;
     }
+
+    for(FeatureStructure featureStructure : fsArray.toArray()) {
+      OntologyConcept ontologyConcept = (OntologyConcept) featureStructure;
+
+      if(ontologyConcept instanceof UmlsConcept) {
+        UmlsConcept umlsConcept = (UmlsConcept) ontologyConcept;
+        String code = umlsConcept.getCui();
+        codes.add(code);
+      } else { // SNOMED or RxNorm
+        String code = ontologyConcept.getCodingScheme() + ontologyConcept.getCode();
+        codes.add(code);
+      }
+    }
+
+    return codes;
   }
 }
+



Mime
View raw message