Mailing-List: contact notifications-help@ctakes.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@ctakes.apache.org
Date: Wed, 22 Oct 2014 22:19:34 +0000 (UTC)
From: "Pei Chen (JIRA)" <jira@apache.org>
To: notifications@ctakes.apache.org
Message-ID: <JIRA.12743210.1411397808000.318515.1414016374429@Atlassian.JIRA>
In-Reply-To: <JIRA.12743210.1411397808000@Atlassian.JIRA>
References: <JIRA.12743210.1411397808000@Atlassian.JIRA>
 <JIRA.12743210.1411397808446@arcas>
Subject: [jira] [Commented] (CTAKES-314) BigTop/Hadoop cTAKES integration
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 7bit


    [ https://issues.apache.org/jira/browse/CTAKES-314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14180655#comment-14180655 ] 

Pei Chen commented on CTAKES-314:
---------------------------------

package sparkapps

import java.text.BreakIterator

import opennlp.tools.postag.POSTagger
import opennlp.tools.sentdetect.{SentenceDetectorME, SentenceModel, SentenceDetector}
import org.apache.ctakes.assertion.medfacts.cleartk.PolarityCleartkAnalysisEngine
import org.apache.ctakes.clinicalpipeline.ClinicalPipelineFactory.{RemoveEnclosedLookupWindows, CopyNPChunksToLookupWindowAnnotations}
import org.apache.ctakes.constituency.parser.ae.ConstituencyParser
import org.apache.ctakes.contexttokenizer.ae.ContextDependentTokenizerAnnotator
import org.apache.ctakes.core.ae.{TokenizerAnnotatorPTB, SimpleSegmentAnnotator}
import org.apache.ctakes.dependency.parser.ae.{ClearNLPSemanticRoleLabelerAE, ClearNLPDependencyParserAE}
import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE._
import org.apache.ctakes.dictionary.lookup.ae.UmlsDictionaryLookupAnnotator
import org.apache.ctakes.typesystem.`type`.syntax.BaseToken
import org.apache.ctakes.typesystem.`type`.textsem.IdentifiedAnnotation
import org.apache.uima.analysis_engine.AnalysisEngineDescription
import org.apache.uima.jcas.JCas
import org.cleartk.chunker.Chunker
import org.uimafit.factory.{AnalysisEngineFactory, AggregateBuilder, JCasFactory}
import org.uimafit.pipeline.SimplePipeline
import org.uimafit.util.JCasUtil

import scala.collection.JavaConverters._

/**
 * Created by jay on 10/10/14.
 */
object CTakesExample {

  def getDefaultPipeline():AnalysisEngineDescription = {
    /**
     * not sure if useful.
    val modelIn = getClass().getResourceAsStream("en-sent.bin");
    val sentenceModel = new SentenceModel(modelIn);
    modelIn.close();
    val sentenceDetector = new SentenceDetectorME(sentenceModel);
     */

    var builder =
      new AggregateBuilder
    builder.add(SimpleSegmentAnnotator.createAnnotatorDescription());
    builder.add(org.apache.ctakes.core.ae.SentenceDetector.createAnnotatorDescription());

    builder.add(TokenizerAnnotatorPTB.createAnnotatorDescription());
    builder.add(ContextDependentTokenizerAnnotator.createAnnotatorDescription());

    builder.add(org.apache.ctakes.postagger.POSTagger.createAnnotatorDescription());
    builder.add(org.apache.ctakes.chunker.ae.Chunker.createAnnotatorDescription());

    builder.add(AnalysisEngineFactory.createPrimitiveDescription(classOf[CopyNPChunksToLookupWindowAnnotations]));
    builder.add(AnalysisEngineFactory.createPrimitiveDescription(classOf[RemoveEnclosedLookupWindows]));
    //builder.add(UmlsDictionaryLookupAnnotator.createAnnotatorDescription());
    builder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
    return builder.createAggregateDescription();

  }
  def main(args: Array[String]) {

    val aed:AnalysisEngineDescription = getDefaultPipeline();
      val jcas:JCas = JCasFactory.createJCas();
      jcas.setDocumentText("The patient is suffering from extreme pain due to shark bite. Recommend continuing use of aspirin, oxycodone, and coumadin. atient denies smoking and chest pain. Patient has no cancer. There is no sign of multiple sclerosis. Continue exercise for obesity and hypertension. ");

      SimplePipeline.runPipeline(jcas, aed);

      //Print out the tokens and Parts of Speech

      val iter = JCasUtil.select(jcas,classOf[BaseToken]).iterator()
      //val iter = JCasUtil.select(jcas, classOf[BaseToken]).iterator()
      //val iter = JCasUtil.selectAll(jcas).iterator();


      while(iter.hasNext){
        val entity = iter.next();
        //System.out.println(entity.toString())
        //System.out.println(entity.getCAS)	
        
        System.out.println(entity.getCoveredText() + " - " + entity.getPartOfSpeech());
      }

  }

}
//============ OUTPUT:
The - DT
patient - NN
is - VBZ
suffering - VBG
from - IN
extreme - JJ
pain - NN
due - IN
to - IN
shark - NN
bite - NN
. - .
Recommend - VBP
continuing - VBG
use - NN
of - IN
aspirin - NN
, - ,
oxycodone - NN
, - ,
and - CC
coumadin - NN
. - .
atient - NN
denies - VBZ
smoking - VBG
and - CC
chest - NN
pain - NN
. - .
Patient - NN
has - VBZ
no - DT
cancer - NN
. - .
There - EX
is - VBZ
no - DT
sign - NN
of - IN
multiple - JJ
sclerosis - NN
. - .
Continue - VB
exercise - NN
for - IN
obesity - NN
and - CC
hypertension - NN
. - .


> BigTop/Hadoop cTAKES integration
> --------------------------------
>
>                 Key: CTAKES-314
>                 URL: https://issues.apache.org/jira/browse/CTAKES-314
>             Project: cTAKES
>          Issue Type: New Feature
>    Affects Versions: 3.2.0
>            Reporter: Pei Chen
>             Fix For: 3.2.3
>
>         Attachments: Napkin_cTAKES_Hadoop.JPG, fix.diff
>
>
> Placeholder to-
> Create a simple application that can take in different datasources (public forums, twitter, etc.), scale up cTAKES using BigTop/Hadoop ecosystem.


--
This message was sent by Atlassian JIRA
(v6.3.4#6332)