ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From james-mas...@apache.org
Subject svn commit: r1550769 - /ctakes/sandbox/groovy/cTAKES-with-resources.groovy
Date Fri, 13 Dec 2013 16:55:39 GMT
Author: james-masanz
Date: Fri Dec 13 16:55:38 2013
New Revision: 1550769

URL: http://svn.apache.org/r1550769
Log:
work in progress, checking in so others can help debug. Need desc and org subtrees from groovy-temp-resources

Added:
    ctakes/sandbox/groovy/cTAKES-with-resources.groovy

Added: ctakes/sandbox/groovy/cTAKES-with-resources.groovy
URL: http://svn.apache.org/viewvc/ctakes/sandbox/groovy/cTAKES-with-resources.groovy?rev=1550769&view=auto
==============================================================================
--- ctakes/sandbox/groovy/cTAKES-with-resources.groovy (added)
+++ ctakes/sandbox/groovy/cTAKES-with-resources.groovy Fri Dec 13 16:55:38 2013
@@ -0,0 +1,386 @@
+#!/usr/bin/env groovy
+
+/**
+** 	This assumes that you have installed Groovy and 
+** 	that you have the command groovy available in your path. 
+** 	On Debian/Ubuntu systems, installing Groovy should be as easy as apt-get install groovy.
+** 	You can download groovy from http://groovy.codehaus.org/
+** 	The first run may be slow since it needs to download all of the dependencies.
+**  Usage: $groovy cTAKES-without-resources.groovy [inputDir]
+** 	or enable more verbose status $groovy -Dgroovy.grape.report.downloads=true cTAKES-without-resources.groovy
[inputDir]
+**/
+
+// @GrabResolver from Richard Eckart de Castilho, needed while using OpenNLP pre-1.5.3 version.
+// To fix issue with jwnl unresolved dependency
+@GrabResolver(name='opennlp.sf.net', 
+      root='http://opennlp.sourceforge.net/maven2')
+
+// Tried getting core-res first in hopes it will get added to classpath and that can avoid:
+//Caused by: java.io.FileNotFoundException: org\apache\ctakes\core\sentdetect\sd-med-model.zip
(The system cannot find the path specified)
+// but it didn't make a difference....
+@Grapes([	  
+
+ @Grab(group='org.scala-lang', module='scala-library', version='2.9.0'),
+ @Grab(group='org.scala-tools.sbinary', module='sbinary_2.9.0', version='0.4.0'),
+                        
+			
+// @Grab(group='org.apache.ctakes',
+//      module='ctakes-core-res',
+//            version='3.1.1'),
+			
+ @Grab(group='org.apache.ctakes',
+      module='ctakes-clinical-pipeline',
+            version='3.1.1'),
+			
+ //@Grab(group='net.sf.mastif', module='mastif-i2b2', version='1.4'),
+ //@Grab(group='net.sf.mastif', module='mastif-zoner', version='1.4'),
+ //@Grab(group='net.sf.carafe.jcarafe', module='jcarafe-ext_2.9.1', version='0.9.8.3.RC4'),
+ //@Grab(group='net.sf.carafe.jcarafe', module='jcarafe-core_2.9.1', version='0.9.8.3.RC4'),
+            
+                                               			
+								
+// @Grab(group='org.apache.ctakes',
+//      module='ctakes-dependency-parser-res',
+//            version='3.1.1'),
+			
+			
+ //<groupId>net.sourceforge.ctakesresources</groupId>
+ //<artifactId>ctakes-resources-umls2011ab</artifactId>
+ //<version>3.1.1</version>
+ @Grab(group='net.sourceforge.ctakesresources',
+      module='ctakes-resources-umls2011ab',
+            version='3.1.1')
+])
+
+import java.io.File;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.cleartk.util.cr.FilesCollectionReader;
+import org.uimafit.factory.AnalysisEngineFactory;
+import org.uimafit.factory.AggregateBuilder;
+import org.uimafit.pipeline.SimplePipeline;	
+import org.uimafit.component.JCasAnnotator_ImplBase;
+import org.uimafit.component.xwriter.XWriter;
+import org.uimafit.factory.TypeSystemDescriptionFactory;
+import org.uimafit.factory.TypePrioritiesFactory;
+import static org.uimafit.util.JCasUtil.*;
+
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textspan.Segment;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.core.ae.SentenceDetector;
+import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
+import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
+import org.apache.ctakes.core.util.CtakesFileNamer;
+
+String path = null;
+File cwd = new File(".");
+path = cwd.getCanonicalPath();
+System.out.println(path);
+
+
+//this.class.classLoader.rootLoader.addURL( new URL("file:///C:/lib/my.jar") )
+String libLocation = "file:///" + path + "/lib/";
+String modelName;
+modelName = "med-facts-i2b2-1.2-SNAPSHOT.jar";
+this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) );
+modelName = "med-facts-zoner-1.1.jar";
+this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) );
+modelName = "jcarafe-ext_2.9.1-0.9.8.3.RC4.jar";
+this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) );
+modelName = "jcarafe-core_2.9.1-0.9.8.3.RC4.jar";
+this.class.classLoader.rootLoader.addURL( new URL(libLocation + modelName) );
+
+
+if(args.length < 1) {
+	System.out.println("Please specify input directory");
+	System.exit(1);
+}
+		
+//scala.ScalaObject o = new scala.ScalaObject();
+//System.out.println("HERE! I was able to get an error about abstract interface 'scala.ScalaObject'
so it is findable here");
+		
+System.out.println("Reading from directory: " + args[0]);
+
+CollectionReader collectionReader = FilesCollectionReader.getCollectionReader(args[0]);
+
+		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+		// Start of section to be replaced/deleted once get resolving to models (jars and zips)
to work properly from groovy//
+		//Download Models
+		//TODO: Separate downloads from URL here is a hack.  
+		//Models should really be automatically downloaded from 
+		//maven central as part of ctakes-*-res projects/artifacts via @grab.
+		File downloadDir;
+		String ctakesRepoUrl = "http://svn.apache.org/repos/asf/ctakes/trunk/";
+		String modelUrl;
+		String modelRelative;
+		String modelRelativeName;
+		
+		downloadDir = new File("org/apache/ctakes/core/sentdetect");
+		downloadDir.mkdirs();
+		modelUrl = ctakesRepoUrl + "ctakes-core-res/src/main/resources/org/apache/ctakes/core/sentdetect/sd-med-model.zip";
+		downloadFile(modelUrl, "org/apache/ctakes/core/sentdetect/sd-med-model.zip");
+		
+		downloadDir = new File("org/apache/ctakes/constituency/parser/models");
+		downloadDir.mkdirs();
+		modelUrl = ctakesRepoUrl + "ctakes-constituency-parser-res/src/main/resources/org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin"
+		downloadFile(modelUrl, "org/apache/ctakes/constituency/parser/models/sharpacq-3.1.bin");
+		
+		downloadDir = new File("org/apache/ctakes/postagger/models/");
+		downloadDir.mkdirs();
+		modelUrl = ctakesRepoUrl + "ctakes-pos-tagger-res/src/main/resources/org/apache/ctakes/postagger/models/mayo-pos.zip";
+		downloadFile(modelUrl, "org/apache/ctakes/postagger/models/mayo-pos.zip");
+		
+		modelRelative = "org/apache/ctakes/dependency/parser/models/pred/";
+		modelName = "mayo-en-pred-1.3.0.jar";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-dependency-parser-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/dependency/parser/models/role/";
+		modelName = "mayo-en-role-1.3.0.jar";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-dependency-parser-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/dependency/parser/models/srl/";
+		modelName = "mayo-en-srl-1.3.0.jar";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-dependency-parser-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/chunker/models/";
+		modelName = "chunker-model.zip";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-chunker-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/assertion/models/";
+		modelName = "i2b2.model";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/assertion/models/";
+		modelName = "cue.model";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/assertion/models/";
+		modelName = "scope.model";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/assertion/models/";
+		modelName = "pos.model";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/assertion/models/";
+		modelName = "featureFile11b";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/assertion/models/";
+		modelName = "generic.txt";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/assertion/models/";
+		modelName = "history.txt";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/assertion/models/";
+		modelName = "polarity.txt";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "org/apache/ctakes/assertion/models/";
+		modelName = "uncertainty.txt";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		
+		// sharpPolarityFrags.txt
+		// sharpUncertaintyFrags.txt
+		
+		modelRelative = "org/apache/ctakes/dictionary/lookup/";
+		modelName = "LookupDesc_Db.xml";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-dictionary-lookup-res/src/main/resources/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+
+		// get some jars that can't get using grapes/@grab
+		modelRelative = "lib/";
+		modelName = "jcarafe-core_2.9.1-0.9.8.3.RC4.jar";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "lib/";
+		modelName = "jcarafe-ext_2.9.1-0.9.8.3.RC4.jar";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "lib/";
+		modelName = "med-facts-zoner-1.1.jar";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		modelRelative = "lib/";
+		modelName = "med-facts-i2b2-1.2-SNAPSHOT.jar";
+		modelRelativeName = modelRelative + modelName;
+		modelUrl = ctakesRepoUrl + "ctakes-assertion/" + modelRelativeName;
+		downloadDir = new File(modelRelative);
+		downloadDir.mkdirs();
+		downloadFile(modelUrl, modelRelativeName);
+		
+		// tried using net.sf.mastif grapes but dep errors....
+		
+		System.err.println("TODO YET - DEAL WITH GETTING rxnorm_index");
+		System.err.println("TODO YET - DEAL WITH GETTING OrangeBook");
+		
+		File curDir = new File(".");
+		String depDepDir = "org/apache/ctakes/dependency/parser/models/dependency";
+		String depLemDir = "org/apache/ctakes/dependency/parser/models/lemmatizer";
+		//File depParserDir = new File(curDir, "org/apache/ctakes/dependency/parser/models/dependency";)
+		//File depParserLemDir = new File(curDir, "org/apache/ctakes/dependency/parser/models/lemmatizer";)
+		
+		String dictionaryJar = "dictionary-1.3.1.jar";
+		String dictionaryJarWithRelPath = depLemDir + "/" + dictionaryJar;
+
+		String mayoEnDepJar = "mayo-en-dep-1.3.0.jar";
+		String mayoEnDepJarWithRelPath = depDepDir + "/" + mayoEnDepJar;
+		
+		File f1 = new File(depDepDir);
+		File f2 = new File(depLemDir);
+		String absPath;
+
+		absPath = f1.getAbsolutePath();
+		System.out.println("Creating dirs for " + absPath);
+		f1.mkdirs();
+
+		absPath = f2.getAbsolutePath();
+		System.out.println("Creating dirs for " + absPath);
+		f2.mkdirs();
+
+		
+		String modelsURL = "https://svn.apache.org/repos/asf/ctakes/tags/ctakes-3.1.1/ctakes-dependency-parser-res/src/main/resources/org/apache/ctakes/dependency/parser/models/"
+		String dictionaryJarURL = modelsURL + "lemmatizer/" + dictionaryJar;
+		String mayoEnDepJarURL = modelsURL + "dependency/" + mayoEnDepJar;
+		System.out.println("Downloading resources not available separately from maven central:
" + dictionaryJar);
+		downloadFile(dictionaryJarURL, dictionaryJarWithRelPath);
+		downloadFile(mayoEnDepJarURL, mayoEnDepJarWithRelPath);
+		//downloadFile("https://svn.apache.org/repos/asf/ctakes/tags/ctakes-3.1.1/ctakes-dependency-parser-res/src/main/resources/org/apache/ctakes/dependency/parser/models/lemmatizer/dictionary-1.3.1.jar",
"dictionary-1.3.1.jar");
+		
+		//   End of section to be replaced/deleted once get resolving to models (jars and zips)
to work properly from groovy//
+		//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+		
+		
+		
+
+		//Build the pipeline to run
+		AggregateBuilder aggregateBuilder = new AggregateBuilder();
+		
+		AnalysisEngineDescription clinicalPipelineWithUmls = AnalysisEngineFactory.createAnalysisEngineDescription("desc/analysis_engine/AggregatePlaintextUMLSProcessor"
// Note, do not include .xml in the name here
+		);
+		aggregateBuilder.add(clinicalPipelineWithUmls);
+
+		
+		//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(SimpleSegmentAnnotator.class));
+		//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+            //SentenceDetector.class,
+            //SentenceDetector.SD_MODEL_FILE_PARAM,
+            //"sd-med-model.zip"));
+		//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(TokenizerAnnotatorPTB.class));
		
+		//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(
+			//ConstituencyParser.class,
+			//ConstituencyParser.PARAM_MODELFILE,
+            //"sharpacq-3.1.bin"));
+		//aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(Writer.class));
+		TypeSystemDescription typeSystemDescription = TypeSystemDescriptionFactory.createTypeSystemDescription("org.apache.ctakes.typesystem.types.TypeSystem");
+		AnalysisEngineDescription xWriter = AnalysisEngineFactory.createPrimitiveDescription(
+			  XWriter.class,
+			  typeSystemDescription,
+			  XWriter.PARAM_OUTPUT_DIRECTORY_NAME,
+			  "output-dir",
+			  XWriter.PARAM_FILE_NAMER_CLASS_NAME,
+			  CtakesFileNamer.class.getName()
+			  );
+
+		aggregateBuilder.add(xWriter);
+
+		
+		SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate());
+
+// Custom writer class used at the end of the pipeline to write results to screen
+class Writer extends org.uimafit.component.JCasAnnotator_ImplBase {
+  void process(JCas jcas) {
+	System.out.println("Commented out most of process() for Writer");
+  }
+}
+
+def downloadFile(String url, String filename) {
+	System.out.println("Downloading: " + url);
+	def file = new File(filename);
+	String savingAs = file.getAbsolutePath();
+	System.out.println("Saving as " + savingAs);
+	if(file.exists()) {
+	  System.out.println("File already exists:" + filename);
+	  return;
+	}
+    def f = new FileOutputStream(file)
+    def out = new BufferedOutputStream(f)
+    out << new URL(url).openStream()
+    out.close()
+}
+



Mime
View raw message